Don't consider textual characters to be emoji (#12582)

* Don't consider textual characters to be emoji

We were using emojibase-regex to match emoji within messages. However, the docs (https://emojibase.dev/docs/regex/) state that this regex matches both emoji and text presentation characters. This is not what we want, and will result in false positives for characters like '↔' that could turn into an emoji if paired with a variation selector. Unfortunately, none of the other regexes provided by Emojibase do what we want either (https://github.com/milesj/emojibase/issues/174). In the meantime, browser support for the RGI_Emoji character sequence class has made it feasible to write an emoji regex by hand, so that's what I've done.

* Add a fallback for BIGEMOJI_REGEX as well
This commit is contained in:
Robin 2024-07-04 13:48:07 -04:00 committed by GitHub
parent 489bc32674
commit c61eca8c24
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 98 additions and 12 deletions

View file

@ -15,11 +15,10 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
import EMOJIBASE_REGEX from "emojibase-regex";
import { MatrixClient, RoomMember, Room } from "matrix-js-sdk/src/matrix";
import AutocompleteWrapperModel, { GetAutocompleterComponent, UpdateCallback, UpdateQuery } from "./autocomplete";
import { unicodeToShortcode } from "../HtmlUtils";
import { EMOJI_REGEX, unicodeToShortcode } from "../HtmlUtils";
import * as Avatar from "../Avatar";
import defaultDispatcher from "../dispatcher/dispatcher";
import { Action } from "../dispatcher/actions";
@ -197,7 +196,7 @@ abstract class BasePart {
abstract class PlainBasePart extends BasePart {
protected acceptsInsertion(chr: string, offset: number, inputType: string): boolean {
if (chr === "\n" || EMOJIBASE_REGEX.test(chr)) {
if (chr === "\n" || EMOJI_REGEX.test(chr)) {
return false;
}
// when not pasting or dropping text, reject characters that should start a pill candidate
@ -375,7 +374,7 @@ class NewlinePart extends BasePart implements IBasePart {
export class EmojiPart extends BasePart implements IBasePart {
protected acceptsInsertion(chr: string, offset: number): boolean {
return EMOJIBASE_REGEX.test(chr);
return EMOJI_REGEX.test(chr);
}
protected acceptsRemoval(position: number, chr: string): boolean {
@ -573,7 +572,7 @@ export class PartCreator {
case "\n":
return new NewlinePart();
default:
if (EMOJIBASE_REGEX.test(getFirstGrapheme(input))) {
if (EMOJI_REGEX.test(getFirstGrapheme(input))) {
return new EmojiPart();
}
return new PlainPart();
@ -650,7 +649,7 @@ export class PartCreator {
let plainText = "";
for (const data of graphemeSegmenter.segment(text)) {
if (EMOJIBASE_REGEX.test(data.segment)) {
if (EMOJI_REGEX.test(data.segment)) {
if (plainText) {
parts.push(this.plain(plainText));
plainText = "";