Skip to content

Commit

Permalink
Fix autolinking errors due to regex and email validation
Browse files Browse the repository at this point in the history
Fix some edge cases in the autolinking logic with the regex as well as
validating email domains and add unit tests for them.

Fixes: #19462
  • Loading branch information
ryzokuken committed Feb 19, 2025
1 parent a857ca3 commit 70e2873
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 13 deletions.
4 changes: 4 additions & 0 deletions test/unit/autolinker_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ describe("autolinker", function () {
"[email protected]", // Keep the original case.
"mailto:[email protected]",
],
["[email protected]", "mailto:[email protected]"],
["[email protected]", "mailto:[email protected]"],
["<[email protected]>", "mailto:[email protected]"],
]);
});

Expand Down Expand Up @@ -140,6 +143,7 @@ describe("autolinker", function () {
"http//[00:00:00:00:00:00", // Invalid IPv6 address.
"http//[]", // Empty IPv6 address.
"abc.example.com", // URL without scheme.
"JD?M$0QP)lKn06l1apKDC@\\qJ4B!!(5m+j.7F790m", // Not a valid email.
].join("\n")
);
expect(matches.length).toEqual(0);
Expand Down
32 changes: 19 additions & 13 deletions web/autolinker.js
Original file line number Diff line number Diff line change
Expand Up @@ -96,31 +96,37 @@ class Autolinker {
static #regex;

static findLinks(text) {
// Regex can be tested and verified at https://regex101.com/r/zgDwPE/1.
// Regex can be tested and verified at https://regex101.com/r/rXoLiT/2.
this.#regex ??=
/\b(?:https?:\/\/|mailto:|www\.)(?:[[\S--\[]--\p{P}]|\/|[\p{P}--\[]+[[\S--\[]--\p{P}])+|\b[[\S--@]--\{]+@[\S--.]+\.[[\S--\[]--\p{P}]{2,}/gmv;
/\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv;

const [normalizedText, diffs] = normalize(text);
const matches = normalizedText.matchAll(this.#regex);
const links = [];
for (const match of matches) {
const raw =
match[0].startsWith("www.") ||
match[0].startsWith("mailto:") ||
match[0].startsWith("http://") ||
match[0].startsWith("https://")
? match[0]
: `mailto:${match[0]}`;
const url = createValidAbsoluteUrl(raw, null, {
const [url, emailDomain] = match;
let raw;
if (
url.startsWith("www.") ||
url.startsWith("http://") ||
url.startsWith("https://")
) {
raw = url;
} else if (URL.canParse(`http://${emailDomain}`)) {
raw = url.startsWith("mailto:") ? url : `mailto:${url}`;
} else {
continue;
}
const absoluteURL = createValidAbsoluteUrl(raw, null, {
addDefaultProtocol: true,
});
if (url) {
if (absoluteURL) {
const [index, length] = getOriginalIndex(
diffs,
match.index,
match[0].length
url.length
);
links.push({ url: url.href, index, length });
links.push({ url: absoluteURL.href, index, length });
}
}
return links;
Expand Down

0 comments on commit 70e2873

Please sign in to comment.