Fix a variety of issues with HTML → Markdown conversion (#8004)

* Fix a variety of issues with HTML → Markdown conversion

Signed-off-by: Robin Townsend <robin@robin.town>

* Fix lint

Signed-off-by: Robin Townsend <robin@robin.town>

* Fix @room pill formatting not being applied to link text

Signed-off-by: Robin Townsend <robin@robin.town>
This commit is contained in:
Robin 2022-03-09 07:43:05 -05:00 committed by GitHub
parent 65691202f7
commit c10ac9e4a0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 422 additions and 260 deletions

View file

@ -0,0 +1,178 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`editor/deserialize html messages escapes angle brackets 1`] = `
Array [
Object {
"text": "\\\\> \\\\\\\\<del>no formatting here\\\\\\\\</del>",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages escapes asterisks 1`] = `
Array [
Object {
"text": "\\\\*hello\\\\*",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages escapes backslashes 1`] = `
Array [
Object {
"text": "C:\\\\\\\\My Documents",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages escapes backticks in code blocks 1`] = `
Array [
Object {
"text": "\`\`this → \` is a backtick\`\`",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "\`\`\`\`",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "and here are 3 of them:",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "\`\`\`",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "\`\`\`\`",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages escapes backticks outside of code blocks 1`] = `
Array [
Object {
"text": "some \\\\\`backticks\\\\\`",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages escapes square brackets 1`] = `
Array [
Object {
"text": "\\\\[not an actual link\\\\](https://example.org)",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages escapes underscores 1`] = `
Array [
Object {
"text": "\\\\_\\\\_emphasis\\\\_\\\\_",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages preserves nested formatting 1`] = `
Array [
Object {
"text": "a<sub>b_c**d<u>e</u>**_</sub>",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages preserves nested quotes 1`] = `
Array [
Object {
"text": "> foo",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "> ",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "> > bar",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages surrounds lists with newlines 1`] = `
Array [
Object {
"text": "foo",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "- bar",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "baz",
"type": "plain",
},
]
`;

View file

@ -237,18 +237,6 @@ describe('editor/deserialize', function() {
expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[4]).toStrictEqual({ type: "plain", text: "3. Finish" });
});
it('non tight lists', () => {
const html = "<ol><li><p>Start</p></li><li><p>Continue</p></li><li><p>Finish</p></li></ol>";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts.length).toBe(8);
expect(parts[0]).toStrictEqual({ type: "plain", text: "1. Start" });
expect(parts[1]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[2]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[3]).toStrictEqual({ type: "plain", text: "2. Continue" });
expect(parts[4]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[5]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[6]).toStrictEqual({ type: "plain", text: "3. Finish" });
});
it('nested unordered lists', () => {
const html = "<ul><li>Oak<ul><li>Spruce<ul><li>Birch</li></ul></li></ul></li></ul>";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
@ -269,13 +257,13 @@ describe('editor/deserialize', function() {
expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[4]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES.repeat(2)}1. Birch` });
});
it('nested tight lists', () => {
it('nested lists', () => {
const html = "<ol><li>Oak\n<ol><li>Spruce\n<ol><li>Birch</li></ol></li></ol></li></ol>";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts.length).toBe(5);
expect(parts[0]).toStrictEqual({ type: "plain", text: "1. Oak" });
expect(parts[0]).toStrictEqual({ type: "plain", text: "1. Oak\n" });
expect(parts[1]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[2]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES}1. Spruce` });
expect(parts[2]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES}1. Spruce\n` });
expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[4]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES.repeat(2)}1. Birch` });
});
@ -291,5 +279,56 @@ describe('editor/deserialize', function() {
expect(parts.length).toBe(1);
expect(parts[0]).toStrictEqual({ type: "plain", text: "/me says _DON'T SHOUT_!" });
});
it('preserves nested quotes', () => {
const html = "<blockquote>foo<blockquote>bar</blockquote></blockquote>";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('surrounds lists with newlines', () => {
const html = "foo<ul><li>bar</li></ul>baz";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('preserves nested formatting', () => {
const html = "a<sub>b<em>c<strong>d<u>e</u></strong></em></sub>";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('escapes backticks in code blocks', () => {
const html = "<p><code>this → ` is a backtick</code></p>" +
"<pre><code>and here are 3 of them:\n```</code></pre>";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('escapes backticks outside of code blocks', () => {
const html = "some `backticks`";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('escapes backslashes', () => {
const html = "C:\\My Documents";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('escapes asterisks', () => {
const html = "*hello*";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('escapes underscores', () => {
const html = "__emphasis__";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('escapes square brackets', () => {
const html = "[not an actual link](https://example.org)";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('escapes angle brackets', () => {
const html = "> \\<del>no formatting here\\</del>";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
});
});