Skip to content

Commit f4043b0

Browse files
Merge pull request #20038 from calixteman/bug1974112
Don't remove a dash at the end of a line when guessing urls (bug 1974112)
2 parents 85b67f1 + bb6b421 commit f4043b0

File tree

3 files changed

+18
-2
lines changed

3 files changed

+18
-2
lines changed

test/unit/autolinker_spec.js

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,4 +195,13 @@ describe("autolinker", function () {
195195
["httptest@email.com", "mailto:httptest@email.com"],
196196
]);
197197
});
198+
199+
it("shouldn't remove the dash when it's an the end of a line (bug 1974112)", function () {
200+
testLinks([
201+
[
202+
"https://github.com/pypi/linehaul-cloud-\nfunction",
203+
"https://github.com/pypi/linehaul-cloud-function",
204+
],
205+
]);
206+
});
198207
});

web/autolinker.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ class Autolinker {
138138
this.#regex ??=
139139
/\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv;
140140

141-
const [normalizedText, diffs] = normalize(text);
141+
const [normalizedText, diffs] = normalize(text, { ignoreDashEOL: true });
142142
const matches = normalizedText.matchAll(this.#regex);
143143
const links = [];
144144
for (const match of matches) {

web/pdf_find_controller.js

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ const NFKC_CHARS_TO_NORMALIZE = new Map();
9797
let noSyllablesRegExp = null;
9898
let withSyllablesRegExp = null;
9999

100-
function normalize(text) {
100+
function normalize(text, options = {}) {
101101
// The diacritics in the text or in the query can be composed or not.
102102
// So we use a decomposed text using NFD (and the same for the query)
103103
// in order to be sure that diacritics are in the same order.
@@ -118,6 +118,7 @@ function normalize(text) {
118118
}
119119

120120
const hasSyllables = syllablePositions.length > 0;
121+
const ignoreDashEOL = options.ignoreDashEOL ?? false;
121122

122123
let normalizationRegex;
123124
if (!hasSyllables && noSyllablesRegExp) {
@@ -294,6 +295,12 @@ function normalize(text) {
294295
}
295296

296297
if (p5) {
298+
if (ignoreDashEOL) {
299+
// Keep the - but remove the EOL.
300+
shiftOrigin += 1;
301+
eol += 1;
302+
return p5.slice(0, -1);
303+
}
297304
// In "X-\ny", "-\n" is removed because an hyphen at the end of a line
298305
// between two letters is likely here to mark a break in a word.
299306
// If X is encoded with UTF-32 then it can have a length greater than 1.

0 commit comments

Comments
 (0)