Skip to content

Commit

Permalink
fix bug in html parsing
Browse files Browse the repository at this point in the history
add html comments
  • Loading branch information
pedroth committed Nov 14, 2023
1 parent 7bdc00a commit 1288864
Show file tree
Hide file tree
Showing 22 changed files with 575 additions and 120 deletions.
47 changes: 40 additions & 7 deletions dist/node/CodeRender/CodeRender.js
Original file line number Diff line number Diff line change
Expand Up @@ -47335,7 +47335,7 @@ function buildDom(nodeType) {
dom.innerHTML = innerHtml;
if (children.length > 0) {
children.forEach((child) => {
if (!child.build)
if (!child.build || child.isEmpty())
return;
dom.appendChild(child.build());
});
Expand Down Expand Up @@ -47366,6 +47366,7 @@ function buildDom(nodeType) {
domNode.getLazyActions = () => lazyActions;
domNode.getType = () => nodeType;
domNode.getRef = () => (f) => f(maybe(ref));
domNode.isEmpty = () => !nodeType;
return domNode;
}
var childrenToString = function({
Expand All @@ -47389,6 +47390,8 @@ var childrenToString = function({
};
var startTagToString = function({ nodeType, attrs, isFormatted }) {
const result = [];
if (!nodeType)
return "";
result.push(`<${nodeType}`);
result.push(...Object.entries(attrs).map(([attr, value]) => ` ${attr}="${value}" `));
result.push(`>`);
Expand All @@ -47397,6 +47400,8 @@ var startTagToString = function({ nodeType, attrs, isFormatted }) {
return result;
};
var endTagToString = function({ nodeType, isFormatted, n }) {
if (!nodeType)
return "";
const indentation = Array(n).fill(" ").join("");
const result = [];
if (isFormatted)
Expand Down Expand Up @@ -47453,6 +47458,9 @@ function eatNSymbol(n, symbolPredicate) {
function eatSpaces(tokenStream) {
return eatSymbolsWhile(tokenStream, (s) => s.type === " ");
}
function eatSpacesTabsAndNewLines(tokenStream) {
return eatSymbolsWhile(tokenStream, (s) => s.type === " " || s.type === "\t" || s.type === "\n");
}
function eatSymbolsWhile(tokenStream, predicate) {
let s = tokenStream;
while (!tokenStream.isEmpty()) {
Expand Down Expand Up @@ -47725,6 +47733,8 @@ var tokenBuilder = () => {
var TOKENS_PARSERS = [
tokenRepeat("#", 6),
tokenRepeat("$", 2),
tokenSymbol("<!--"),
tokenSymbol("-->"),
tokenSymbol("*"),
tokenSymbol("_"),
tokenSymbol(CUSTOM_SYMBOL),
Expand Down Expand Up @@ -48296,16 +48306,19 @@ var parseHtml = function(stream2) {
}, () => {
const { left: EmptyTag, right: nextStream } = parseEmptyTag(stream2);
return pair({ type: TYPES.html, EmptyTag }, nextStream);
}, () => {
const { left: CommentTag, right: nextStream } = parseCommentTag(stream2);
return pair({ type: TYPES.html, CommentTag }, nextStream);
});
};
var parseStartTag = function(stream2) {
const token = stream2.head();
if (token.type === "<") {
const nextStream1 = eatSpaces(stream2.tail());
const { left: tagName, right: nextStream2 } = parseAlphaNumName(nextStream1);
const nextStream3 = eatSpaces(nextStream2);
const nextStream3 = eatSpacesTabsAndNewLines(nextStream2);
const { left: Attrs, right: nextStream4 } = parseAttrs(nextStream3);
const nextStream5 = eatSpaces(nextStream4);
const nextStream5 = eatSpacesTabsAndNewLines(nextStream4);
if (nextStream5.head().type === ">") {
return pair({ type: TYPES.startTag, tag: tagName.text, Attrs }, nextStream5.tail());
}
Expand All @@ -48317,15 +48330,27 @@ var parseEmptyTag = function(stream2) {
if (token.type === "<") {
const nextStream1 = eatSpaces(stream2.tail());
const { left: tagName, right: nextStream2 } = parseAlphaNumName(nextStream1);
const nextStream3 = eatSpaces(nextStream2);
const nextStream3 = eatSpacesTabsAndNewLines(nextStream2);
const { left: Attrs, right: nextStream4 } = parseAttrs(nextStream3);
const nextStream5 = eatSpaces(nextStream4);
const nextStream5 = eatSpacesTabsAndNewLines(nextStream4);
if (nextStream5.head().type === "/>") {
return pair({ type: TYPES.emptyTag, tag: tagName.text, Attrs }, nextStream5.tail());
}
}
throw new Error(`Error occurred while parsing EmptyTag,` + stream2.toString());
};
var parseCommentTag = function(stream2) {
return success(stream2).filter((nextStream) => {
return nextStream.head().type === "<!--";
}).map((nextStream) => {
const { left: AnyBut, right: nextStream1 } = parseAnyBut((token) => token.type === "-->")(nextStream.tail());
if (AnyBut.textArray.length > 0)
return pair({ type: TYPES.commentTag }, nextStream1.tail());
throw new Error(`Dummy error. Real error to be thrown in _orCatch_ function`);
}).orCatch(() => {
throw new Error(`Error occurred while parsing Attr, ${stream2.toString()}`);
});
};
function parseAlphaNumName(tokenStream) {
const strBuffer = [];
let s = tokenStream;
Expand Down Expand Up @@ -48353,7 +48378,7 @@ var parseCharAlphaNumName = function(charStream) {
var parseAttrs = function(stream2) {
return or(() => {
const { left: Attr, right: nextStream } = parseAttr(stream2);
const nextStreamNoSpaces = eatSpaces(nextStream);
const nextStreamNoSpaces = eatSpacesTabsAndNewLines(nextStream);
const { left: Attrs, right: nextStream1 } = parseAttrs(nextStreamNoSpaces);
return pair({
type: TYPES.attrs,
Expand Down Expand Up @@ -48462,7 +48487,7 @@ var parseInnerHtmlTypes = function(stream2) {
});
};
var parseEndTag = function(stream2) {
const filteredStream = eatSymbolsWhile(stream2, (token2) => token2.type === " " || token2.type === "\t" || token2.type === "\n");
const filteredStream = eatSpacesTabsAndNewLines(stream2);
const token = filteredStream.head();
if (token.type === "</") {
const nextStream1 = eatSpaces(filteredStream.tail());
Expand Down Expand Up @@ -48516,6 +48541,7 @@ var TYPES = {
html: "html",
startTag: "startTag",
emptyTag: "emptyTag",
commentTag: "commentTag",
innerHtml: "innerHtml",
innerHtmlTypes: "innerHtmlTypes",
endTag: "endTag",
Expand Down Expand Up @@ -62717,6 +62743,10 @@ class Render {
{
predicate: (h) => !!h.EmptyTag,
value: (h) => this.renderEmptyTag(h.EmptyTag)
},
{
predicate: (h) => !!h.CommentTag,
value: (h) => this.renderCommentTag(h.CommentTag)
}
])(html);
}
Expand Down Expand Up @@ -62758,6 +62788,9 @@ class Render {
attributes.forEach(({ attributeName, attributeValue }) => container.attr(attributeName, attributeValue));
return container;
}
renderCommentTag(commentTag) {
return buildDom();
}
renderNablaText(text2) {
const { left: Expression } = parseExpression(tokenizer(stream(text2)));
if (Expression.expressions.length > 0) {
Expand Down
12 changes: 11 additions & 1 deletion dist/node/Lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ function buildDom(nodeType) {
dom.innerHTML = innerHtml;
if (children.length > 0) {
children.forEach((child) => {
if (!child.build)
if (!child.build || child.isEmpty())
return;
dom.appendChild(child.build());
});
Expand Down Expand Up @@ -153,6 +153,7 @@ function buildDom(nodeType) {
domNode.getLazyActions = () => lazyActions;
domNode.getType = () => nodeType;
domNode.getRef = () => (f) => f(maybe(ref));
domNode.isEmpty = () => !nodeType;
return domNode;
}
var childrenToString = function({
Expand All @@ -176,6 +177,8 @@ var childrenToString = function({
};
var startTagToString = function({ nodeType, attrs, isFormatted }) {
const result = [];
if (!nodeType)
return "";
result.push(`<${nodeType}`);
result.push(...Object.entries(attrs).map(([attr, value]) => ` ${attr}="${value}" `));
result.push(`>`);
Expand All @@ -184,6 +187,8 @@ var startTagToString = function({ nodeType, attrs, isFormatted }) {
return result;
};
var endTagToString = function({ nodeType, isFormatted, n }) {
if (!nodeType)
return "";
const indentation = Array(n).fill(" ").join("");
const result = [];
if (isFormatted)
Expand Down Expand Up @@ -240,6 +245,9 @@ function eatNSymbol(n, symbolPredicate) {
function eatSpaces(tokenStream) {
return eatSymbolsWhile(tokenStream, (s) => s.type === " ");
}
function eatSpacesTabsAndNewLines(tokenStream) {
return eatSymbolsWhile(tokenStream, (s) => s.type === " " || s.type === "\t" || s.type === "\n");
}
function eatSymbolsWhile(tokenStream, predicate) {
let s = tokenStream;
while (!tokenStream.isEmpty()) {
Expand Down Expand Up @@ -512,6 +520,8 @@ var tokenBuilder = () => {
var TOKENS_PARSERS = [
tokenRepeat("#", 6),
tokenRepeat("$", 2),
tokenSymbol("<!--"),
tokenSymbol("-->"),
tokenSymbol("*"),
tokenSymbol("_"),
tokenSymbol(CUSTOM_SYMBOL),
Expand Down
49 changes: 41 additions & 8 deletions dist/node/MathRender.js
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ function buildDom(nodeType) {
dom.innerHTML = innerHtml;
if (children.length > 0) {
children.forEach((child) => {
if (!child.build)
if (!child.build || child.isEmpty())
return;
dom.appendChild(child.build());
});
Expand Down Expand Up @@ -153,6 +153,7 @@ function buildDom(nodeType) {
domNode.getLazyActions = () => lazyActions;
domNode.getType = () => nodeType;
domNode.getRef = () => (f) => f(maybe(ref));
domNode.isEmpty = () => !nodeType;
return domNode;
}
var childrenToString = function({
Expand All @@ -176,6 +177,8 @@ var childrenToString = function({
};
var startTagToString = function({ nodeType, attrs, isFormatted }) {
const result = [];
if (!nodeType)
return "";
result.push(`<${nodeType}`);
result.push(...Object.entries(attrs).map(([attr, value]) => ` ${attr}="${value}" `));
result.push(`>`);
Expand All @@ -184,6 +187,8 @@ var startTagToString = function({ nodeType, attrs, isFormatted }) {
return result;
};
var endTagToString = function({ nodeType, isFormatted, n }) {
if (!nodeType)
return "";
const indentation = Array(n).fill(" ").join("");
const result = [];
if (isFormatted)
Expand Down Expand Up @@ -240,6 +245,9 @@ function eatNSymbol(n, symbolPredicate) {
function eatSpaces(tokenStream) {
return eatSymbolsWhile(tokenStream, (s) => s.type === " ");
}
function eatSpacesTabsAndNewLines(tokenStream) {
return eatSymbolsWhile(tokenStream, (s) => s.type === " " || s.type === "\t" || s.type === "\n");
}
function eatSymbolsWhile(tokenStream, predicate) {
let s = tokenStream;
while (!tokenStream.isEmpty()) {
Expand Down Expand Up @@ -512,6 +520,8 @@ var tokenBuilder = () => {
var TOKENS_PARSERS = [
tokenRepeat("#", 6),
tokenRepeat("$", 2),
tokenSymbol("<!--"),
tokenSymbol("-->"),
tokenSymbol("*"),
tokenSymbol("_"),
tokenSymbol(CUSTOM_SYMBOL),
Expand Down Expand Up @@ -1083,16 +1093,19 @@ var parseHtml = function(stream2) {
}, () => {
const { left: EmptyTag, right: nextStream } = parseEmptyTag(stream2);
return pair({ type: TYPES.html, EmptyTag }, nextStream);
}, () => {
const { left: CommentTag, right: nextStream } = parseCommentTag(stream2);
return pair({ type: TYPES.html, CommentTag }, nextStream);
});
};
var parseStartTag = function(stream2) {
const token = stream2.head();
if (token.type === "<") {
const nextStream1 = eatSpaces(stream2.tail());
const { left: tagName, right: nextStream2 } = parseAlphaNumName(nextStream1);
const nextStream3 = eatSpaces(nextStream2);
const nextStream3 = eatSpacesTabsAndNewLines(nextStream2);
const { left: Attrs, right: nextStream4 } = parseAttrs(nextStream3);
const nextStream5 = eatSpaces(nextStream4);
const nextStream5 = eatSpacesTabsAndNewLines(nextStream4);
if (nextStream5.head().type === ">") {
return pair({ type: TYPES.startTag, tag: tagName.text, Attrs }, nextStream5.tail());
}
Expand All @@ -1104,15 +1117,27 @@ var parseEmptyTag = function(stream2) {
if (token.type === "<") {
const nextStream1 = eatSpaces(stream2.tail());
const { left: tagName, right: nextStream2 } = parseAlphaNumName(nextStream1);
const nextStream3 = eatSpaces(nextStream2);
const nextStream3 = eatSpacesTabsAndNewLines(nextStream2);
const { left: Attrs, right: nextStream4 } = parseAttrs(nextStream3);
const nextStream5 = eatSpaces(nextStream4);
const nextStream5 = eatSpacesTabsAndNewLines(nextStream4);
if (nextStream5.head().type === "/>") {
return pair({ type: TYPES.emptyTag, tag: tagName.text, Attrs }, nextStream5.tail());
}
}
throw new Error(`Error occurred while parsing EmptyTag,` + stream2.toString());
};
var parseCommentTag = function(stream2) {
return success(stream2).filter((nextStream) => {
return nextStream.head().type === "<!--";
}).map((nextStream) => {
const { left: AnyBut, right: nextStream1 } = parseAnyBut((token) => token.type === "-->")(nextStream.tail());
if (AnyBut.textArray.length > 0)
return pair({ type: TYPES.commentTag }, nextStream1.tail());
throw new Error(`Dummy error. Real error to be thrown in _orCatch_ function`);
}).orCatch(() => {
throw new Error(`Error occurred while parsing Attr, ${stream2.toString()}`);
});
};
function parseAlphaNumName(tokenStream) {
const strBuffer = [];
let s = tokenStream;
Expand Down Expand Up @@ -1140,7 +1165,7 @@ var parseCharAlphaNumName = function(charStream) {
var parseAttrs = function(stream2) {
return or(() => {
const { left: Attr, right: nextStream } = parseAttr(stream2);
const nextStreamNoSpaces = eatSpaces(nextStream);
const nextStreamNoSpaces = eatSpacesTabsAndNewLines(nextStream);
const { left: Attrs, right: nextStream1 } = parseAttrs(nextStreamNoSpaces);
return pair({
type: TYPES.attrs,
Expand Down Expand Up @@ -1249,7 +1274,7 @@ var parseInnerHtmlTypes = function(stream2) {
});
};
var parseEndTag = function(stream2) {
const filteredStream = eatSymbolsWhile(stream2, (token2) => token2.type === " " || token2.type === "\t" || token2.type === "\n");
const filteredStream = eatSpacesTabsAndNewLines(stream2);
const token = filteredStream.head();
if (token.type === "</") {
const nextStream1 = eatSpaces(filteredStream.tail());
Expand Down Expand Up @@ -1303,6 +1328,7 @@ var TYPES = {
html: "html",
startTag: "startTag",
emptyTag: "emptyTag",
commentTag: "commentTag",
innerHtml: "innerHtml",
innerHtmlTypes: "innerHtmlTypes",
endTag: "endTag",
Expand Down Expand Up @@ -12661,7 +12687,7 @@ var controlWordWhitespaceRegexString = "(" + controlWordRegexString + ")" + spac
var controlSpaceRegexString = "\\\\(\n|[ \r\t]+\n?)[ \r\t]*";
var combiningDiacriticalMarkString = "[\u0300-\u036F]";
var combiningDiacriticalMarksEndRegex = new RegExp(combiningDiacriticalMarkString + "+$");
var tokenRegexString = "(" + spaceRegexString + "+)|" + (controlSpaceRegexString + "|") + "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + (combiningDiacriticalMarkString + "*") + "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + (combiningDiacriticalMarkString + "*|\\\\verb\\*([^]).*?\\4|\\\\verb([^*a-zA-Z]).*?\\5|\\\\verb\\*([^]).*?\\4|\\\\verb([^*a-zA-Z]).*?\\5") + ("|" + controlWordWhitespaceRegexString) + ("|" + controlSymbolRegexString + ")");
var tokenRegexString = "(" + spaceRegexString + "+)|" + (controlSpaceRegexString + "|") + "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + (combiningDiacriticalMarkString + "*") + "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + (combiningDiacriticalMarkString + "*|\\\\verb\\*([^]).*?\\4|\\\\verb([^*a-zA-Z]).*?\\5") + ("|" + controlWordWhitespaceRegexString) + ("|" + controlSymbolRegexString + ")");

class Lexer2 {
constructor(input, settings) {
Expand Down Expand Up @@ -15504,6 +15530,10 @@ class Render {
{
predicate: (h) => !!h.EmptyTag,
value: (h) => this.renderEmptyTag(h.EmptyTag)
},
{
predicate: (h) => !!h.CommentTag,
value: (h) => this.renderCommentTag(h.CommentTag)
}
])(html);
}
Expand Down Expand Up @@ -15545,6 +15575,9 @@ class Render {
attributes.forEach(({ attributeName, attributeValue }) => container.attr(attributeName, attributeValue));
return container;
}
renderCommentTag(commentTag) {
return buildDom();
}
renderNablaText(text2) {
const { left: Expression } = parseExpression(tokenizer(stream(text2)));
if (Expression.expressions.length > 0) {
Expand Down
Loading

0 comments on commit 1288864

Please sign in to comment.