Skip to content

Commit 1ebc896

Browse files
authored
Merge pull request #20019 from calixteman/bug1885505
Get the text under highlight/squiggly/underline/strikethrough annotations (bug 1885505)
2 parents 7d3b2a6 + 3bdc5d5 commit 1ebc896

File tree

10 files changed

+449
-10
lines changed

10 files changed

+449
-10
lines changed

src/core/annotation.js

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,10 @@ class Annotation {
12471247
return null;
12481248
}
12491249

1250+
get overlaysTextContent() {
1251+
return false;
1252+
}
1253+
12501254
get hasTextContent() {
12511255
return false;
12521256
}
@@ -4711,6 +4715,10 @@ class HighlightAnnotation extends MarkupAnnotation {
47114715
}
47124716
}
47134717

4718+
get overlaysTextContent() {
4719+
return true;
4720+
}
4721+
47144722
static createNewDict(annotation, xref, { apRef, ap }) {
47154723
const { color, oldAnnotation, opacity, rect, rotation, user, quadPoints } =
47164724
annotation;
@@ -4835,6 +4843,10 @@ class UnderlineAnnotation extends MarkupAnnotation {
48354843
this.data.popupRef = null;
48364844
}
48374845
}
4846+
4847+
get overlaysTextContent() {
4848+
return true;
4849+
}
48384850
}
48394851

48404852
class SquigglyAnnotation extends MarkupAnnotation {
@@ -4879,6 +4891,10 @@ class SquigglyAnnotation extends MarkupAnnotation {
48794891
this.data.popupRef = null;
48804892
}
48814893
}
4894+
4895+
get overlaysTextContent() {
4896+
return true;
4897+
}
48824898
}
48834899

48844900
class StrikeOutAnnotation extends MarkupAnnotation {
@@ -4918,6 +4934,10 @@ class StrikeOutAnnotation extends MarkupAnnotation {
49184934
this.data.popupRef = null;
49194935
}
49204936
}
4937+
4938+
get overlaysTextContent() {
4939+
return true;
4940+
}
49214941
}
49224942

49234943
class StampAnnotation extends MarkupAnnotation {

src/core/document.js

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ import { calculateMD5 } from "./calculate_md5.js";
6666
import { Catalog } from "./catalog.js";
6767
import { clearGlobalCaches } from "./cleanup_helper.js";
6868
import { DatasetReader } from "./dataset_reader.js";
69+
import { Intersector } from "./intersector.js";
6970
import { Linearization } from "./parser.js";
7071
import { NullStream } from "./stream.js";
7172
import { ObjectLoader } from "./object_loader.js";
@@ -632,6 +633,7 @@ class Page {
632633
includeMarkedContent,
633634
disableNormalization,
634635
sink,
636+
intersector = null,
635637
}) {
636638
const contentStreamPromise = this.getContentStream();
637639
const resourcesPromise = this.loadResources(RESOURCES_KEYS_TEXT_CONTENT);
@@ -658,6 +660,7 @@ class Page {
658660
sink,
659661
viewBox: this.view,
660662
lang,
663+
intersector,
661664
});
662665
}
663666

@@ -707,6 +710,8 @@ class Page {
707710
intentDisplay = !!(intent & RenderingIntentFlag.DISPLAY),
708711
intentPrint = !!(intent & RenderingIntentFlag.PRINT);
709712

713+
const highlightedAnnotations = [];
714+
710715
for (const annotation of annotations) {
711716
// Get the annotation even if it's hidden because
712717
// JS can change its display.
@@ -732,9 +737,29 @@ class Page {
732737
);
733738
})
734739
);
740+
} else if (annotation.overlaysTextContent && isVisible) {
741+
highlightedAnnotations.push(annotation);
735742
}
736743
}
737744

745+
if (highlightedAnnotations.length > 0) {
746+
const intersector = new Intersector(highlightedAnnotations);
747+
textContentPromises.push(
748+
this.extractTextContent({
749+
handler,
750+
task,
751+
includeMarkedContent: false,
752+
disableNormalization: false,
753+
sink: null,
754+
viewBox: this.view,
755+
lang: null,
756+
intersector,
757+
}).then(() => {
758+
intersector.setText();
759+
})
760+
);
761+
}
762+
738763
await Promise.all(textContentPromises);
739764
return annotationsData;
740765
}

src/core/evaluator.js

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2362,6 +2362,7 @@ class PartialEvaluator {
23622362
disableNormalization = false,
23632363
keepWhiteSpace = false,
23642364
prevRefs = null,
2365+
intersector = null,
23652366
}) {
23662367
const objId = stream.dict?.objId;
23672368
const seenRefs = new RefSet(prevRefs);
@@ -2506,6 +2507,7 @@ class PartialEvaluator {
25062507
transform = textContentItem.prevTransform,
25072508
fontName = textContentItem.fontName,
25082509
}) {
2510+
intersector?.addExtraChar(" ");
25092511
textContent.items.push({
25102512
str: " ",
25112513
dir: "ltr",
@@ -2964,9 +2966,21 @@ class PartialEvaluator {
29642966

29652967
if (!font.vertical) {
29662968
scaledDim *= textState.textHScale;
2969+
intersector?.addGlyph(
2970+
getCurrentTextTransform(),
2971+
scaledDim,
2972+
0,
2973+
glyph.unicode
2974+
);
29672975
textState.translateTextMatrix(scaledDim, 0);
29682976
textChunk.width += scaledDim;
29692977
} else {
2978+
intersector?.addGlyph(
2979+
getCurrentTextTransform(),
2980+
0,
2981+
scaledDim,
2982+
glyph.unicode
2983+
);
29702984
textState.translateTextMatrix(0, scaledDim);
29712985
scaledDim = Math.abs(scaledDim);
29722986
textChunk.height += scaledDim;
@@ -2985,8 +2999,12 @@ class PartialEvaluator {
29852999
// alignment issues between the textLayer and the canvas if the text
29863000
// contains e.g. tabs (fixes issue6612.pdf).
29873001
textChunk.str.push(" ");
3002+
intersector?.addExtraChar(" ");
3003+
}
3004+
3005+
if (!intersector) {
3006+
textChunk.str.push(glyphUnicode);
29883007
}
2989-
textChunk.str.push(glyphUnicode);
29903008

29913009
if (charSpacing) {
29923010
if (!font.vertical) {
@@ -3002,6 +3020,7 @@ class PartialEvaluator {
30023020
}
30033021

30043022
function appendEOL() {
3023+
intersector?.addExtraChar("\n");
30053024
resetLastChars();
30063025
if (textContentItem.initialized) {
30073026
textContentItem.hasEOL = true;
@@ -3027,6 +3046,7 @@ class PartialEvaluator {
30273046
if (textContentItem.initialized) {
30283047
resetLastChars();
30293048
textContentItem.str.push(" ");
3049+
intersector?.addExtraChar(" ");
30303050
}
30313051
return false;
30323052
}
@@ -3078,7 +3098,7 @@ class PartialEvaluator {
30783098
if (batch && length < TEXT_CHUNK_BATCH_SIZE) {
30793099
return;
30803100
}
3081-
sink.enqueue(textContent, length);
3101+
sink?.enqueue(textContent, length);
30823102
textContent.items = [];
30833103
textContent.styles = Object.create(null);
30843104
}
@@ -3088,7 +3108,7 @@ class PartialEvaluator {
30883108
return new Promise(function promiseBody(resolve, reject) {
30893109
const next = function (promise) {
30903110
enqueueChunk(/* batch = */ true);
3091-
Promise.all([promise, sink.ready]).then(function () {
3111+
Promise.all([promise, sink?.ready]).then(function () {
30923112
try {
30933113
promiseBody(resolve, reject);
30943114
} catch (ex) {
@@ -3341,7 +3361,7 @@ class PartialEvaluator {
33413361
},
33423362

33433363
get desiredSize() {
3344-
return sink.desiredSize;
3364+
return sink.desiredSize ?? 0;
33453365
},
33463366

33473367
get ready() {
@@ -3359,7 +3379,7 @@ class PartialEvaluator {
33593379
: resources,
33603380
stateManager: xObjStateManager,
33613381
includeMarkedContent,
3362-
sink: sinkWrapper,
3382+
sink: sink && sinkWrapper,
33633383
seenStyles,
33643384
viewBox,
33653385
lang,
@@ -3499,7 +3519,7 @@ class PartialEvaluator {
34993519
}
35003520
break;
35013521
} // switch
3502-
if (textContent.items.length >= sink.desiredSize) {
3522+
if (textContent.items.length >= (sink?.desiredSize ?? 1)) {
35033523
// Wait for ready, if we reach highWaterMark.
35043524
stop = true;
35053525
break;

0 commit comments

Comments
 (0)