Don't consider textual characters to be emoji (#12582)

* Don't consider textual characters to be emoji

We were using emojibase-regex to match emoji within messages. However, the docs (https://emojibase.dev/docs/regex/) state that this regex matches both emoji and text presentation characters. This is not what we want, and will result in false positives for characters like '↔' that could turn into an emoji if paired with a variation selector. Unfortunately, none of the other regexes provided by Emojibase do what we want either (https://github.com/milesj/emojibase/issues/174). In the meantime, browser support for the RGI_Emoji character sequence class has made it feasible to write an emoji regex by hand, so that's what I've done.

* Add a fallback for BIGEMOJI_REGEX as well
This commit is contained in:
Robin 2024-07-04 13:48:07 -04:00 committed by GitHub
parent 489bc32674
commit c61eca8c24
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 98 additions and 12 deletions

View file

@ -107,6 +107,12 @@ describe("bodyToHtml", () => {
expect(html).toMatchInlineSnapshot(`"<span class="mx_EventTile_searchHighlight">test</span> foo &lt;b&gt;bar"`);
});
it("generates big emoji for emoji made of multiple characters", () => {
const { asFragment } = render(bodyToHtml({ body: "👨‍👩‍👧‍👦 ↔️ 🇮🇸", msgtype: "m.text" }, [], {}) as ReactElement);
expect(asFragment()).toMatchSnapshot();
});
it("should generate big emoji for an emoji-only reply to a message", () => {
const { asFragment } = render(
bodyToHtml(
@ -132,6 +138,12 @@ describe("bodyToHtml", () => {
expect(asFragment()).toMatchSnapshot();
});
it("does not mistake characters in text presentation mode for emoji", () => {
const { asFragment } = render(bodyToHtml({ body: "↔ ❗︎", msgtype: "m.text" }, [], {}) as ReactElement);
expect(asFragment()).toMatchSnapshot();
});
describe("feature_latex_maths", () => {
beforeEach(() => {
jest.spyOn(SettingsStore, "getValue").mockImplementation((feature) => feature === "feature_latex_maths");

View file

@ -1,5 +1,16 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`bodyToHtml does not mistake characters in text presentation mode for emoji 1`] = `
<DocumentFragment>
<span
class="mx_EventTile_body"
dir="auto"
>
↔ ❗︎
</span>
</DocumentFragment>
`;
exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"<p>hello</p><pre><code>$\\xi$</code></pre><p>world</p>"`;
exports[`bodyToHtml feature_latex_maths should not mangle divs 1`] = `"<p>hello</p><div>world</div>"`;
@ -8,6 +19,36 @@ exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"<p>hel
exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span> world"`;
exports[`bodyToHtml generates big emoji for emoji made of multiple characters 1`] = `
<DocumentFragment>
<span
class="mx_EventTile_body mx_EventTile_bigEmoji"
dir="auto"
>
<span
class="mx_Emoji"
title=":man-woman-girl-boy:"
>
👨‍👩‍👧‍👦
</span>
<span
class="mx_Emoji"
title=":left_right_arrow:"
>
↔️
</span>
<span
class="mx_Emoji"
title=":flag-is:"
>
🇮🇸
</span>
</span>
</DocumentFragment>
`;
exports[`bodyToHtml should generate big emoji for an emoji-only reply to a message 1`] = `
<DocumentFragment>
<span