Switch from cheerio to DOMParser (#10929)
* Add tests around feature_latex_maths * Switch from cheerio to DOMParser * strict * Iterate
This commit is contained in:
parent
151b0efe73
commit
72d1bd910a
7 changed files with 98 additions and 99 deletions
|
@ -19,7 +19,6 @@ limitations under the License.
|
|||
|
||||
import React, { LegacyRef, ReactElement, ReactNode } from "react";
|
||||
import sanitizeHtml from "sanitize-html";
|
||||
import { load as cheerio } from "cheerio";
|
||||
import classNames from "classnames";
|
||||
import EMOJIBASE_REGEX from "emojibase-regex";
|
||||
import { merge, split } from "lodash";
|
||||
|
@ -549,27 +548,19 @@ export function bodyToHtml(content: IContent, highlights: Optional<string[]>, op
|
|||
}
|
||||
|
||||
safeBody = sanitizeHtml(formattedBody!, sanitizeParams);
|
||||
const phtml = cheerio(safeBody, {
|
||||
// @ts-ignore: The `_useHtmlParser2` internal option is the
|
||||
// simplest way to both parse and render using `htmlparser2`.
|
||||
_useHtmlParser2: true,
|
||||
decodeEntities: false,
|
||||
});
|
||||
const isPlainText = phtml.html() === phtml.root().text();
|
||||
const phtml = new DOMParser().parseFromString(safeBody, "text/html");
|
||||
const isPlainText = phtml.body.innerHTML === phtml.body.textContent;
|
||||
isHtmlMessage = !isPlainText;
|
||||
|
||||
if (isHtmlMessage && SettingsStore.getValue("feature_latex_maths")) {
|
||||
// @ts-ignore - The types for `replaceWith` wrongly expect
|
||||
// Cheerio instance to be returned.
|
||||
phtml('div, span[data-mx-maths!=""]').replaceWith(function (i, e) {
|
||||
return katex.renderToString(decode(phtml(e).attr("data-mx-maths")), {
|
||||
[...phtml.querySelectorAll<HTMLElement>("div, span[data-mx-maths]")].forEach((e) => {
|
||||
e.outerHTML = katex.renderToString(decode(e.getAttribute("data-mx-maths")), {
|
||||
throwOnError: false,
|
||||
// @ts-ignore - `e` can be an Element, not just a Node
|
||||
displayMode: e.name == "div",
|
||||
displayMode: e.tagName == "DIV",
|
||||
output: "htmlAndMathml",
|
||||
});
|
||||
});
|
||||
safeBody = phtml.html();
|
||||
safeBody = phtml.body.innerHTML;
|
||||
}
|
||||
} else if (highlighter) {
|
||||
safeBody = highlighter.applyHighlights(escapeHtml(plainBody), safeHighlights!).join("");
|
||||
|
|
|
@ -16,7 +16,6 @@ limitations under the License.
|
|||
*/
|
||||
|
||||
import { encode } from "html-entities";
|
||||
import { load as cheerio } from "cheerio";
|
||||
import escapeHtml from "escape-html";
|
||||
|
||||
import Markdown from "../Markdown";
|
||||
|
@ -133,8 +132,7 @@ export function htmlSerializeFromMdIfNeeded(md: string, { forceHTML = false } =
|
|||
});
|
||||
});
|
||||
|
||||
// make sure div tags always start on a new line, otherwise it will confuse
|
||||
// the markdown parser
|
||||
// make sure div tags always start on a new line, otherwise it will confuse the markdown parser
|
||||
md = md.replace(/(.)<div/g, function (m, p1) {
|
||||
return `${p1}\n<div`;
|
||||
});
|
||||
|
@ -143,39 +141,29 @@ export function htmlSerializeFromMdIfNeeded(md: string, { forceHTML = false } =
|
|||
const parser = new Markdown(md);
|
||||
if (!parser.isPlainText() || forceHTML) {
|
||||
// feed Markdown output to HTML parser
|
||||
const phtml = cheerio(parser.toHTML(), {
|
||||
// @ts-ignore: The `_useHtmlParser2` internal option is the
|
||||
// simplest way to both parse and render using `htmlparser2`.
|
||||
_useHtmlParser2: true,
|
||||
decodeEntities: false,
|
||||
});
|
||||
const phtml = new DOMParser().parseFromString(parser.toHTML(), "text/html");
|
||||
|
||||
if (SettingsStore.getValue("feature_latex_maths")) {
|
||||
// original Markdown without LaTeX replacements
|
||||
const parserOrig = new Markdown(orig);
|
||||
const phtmlOrig = cheerio(parserOrig.toHTML(), {
|
||||
// @ts-ignore: The `_useHtmlParser2` internal option is the
|
||||
// simplest way to both parse and render using `htmlparser2`.
|
||||
_useHtmlParser2: true,
|
||||
decodeEntities: false,
|
||||
});
|
||||
const phtmlOrig = new DOMParser().parseFromString(parserOrig.toHTML(), "text/html");
|
||||
|
||||
// since maths delimiters are handled before Markdown,
|
||||
// code blocks could contain mangled content.
|
||||
// replace code blocks with original content
|
||||
phtmlOrig("code").each(function (i) {
|
||||
phtml("code").eq(i).text(phtmlOrig("code").eq(i).text());
|
||||
[...phtmlOrig.getElementsByTagName("code")].forEach((e, i) => {
|
||||
phtml.getElementsByTagName("code").item(i)!.textContent = e.textContent;
|
||||
});
|
||||
|
||||
// add fallback output for latex math, which should not be interpreted as markdown
|
||||
phtml("div, span").each(function (i, e) {
|
||||
const tex = phtml(e).attr("data-mx-maths");
|
||||
[...phtml.querySelectorAll("div, span")].forEach((e, i) => {
|
||||
const tex = e.getAttribute("data-mx-maths");
|
||||
if (tex) {
|
||||
phtml(e).html(`<code>${tex}</code>`);
|
||||
e.innerHTML = `<code>${tex}</code>`;
|
||||
}
|
||||
});
|
||||
}
|
||||
return phtml.html();
|
||||
return phtml.body.innerHTML;
|
||||
}
|
||||
// ensure removal of escape backslashes in non-Markdown messages
|
||||
if (md.indexOf("\\") > -1) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue