Skip to content

Commit

Permalink
Merge pull request #19518 from Snuffleupagus/JpegStream-EXIF-replace
Browse files Browse the repository at this point in the history
Move the EXIF-block replacement into `JpegStream` (PR 19356 follow-up)
  • Loading branch information
calixteman authored Feb 20, 2025
2 parents c64d3d7 + d5ce35f commit 34ef74c
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 12 deletions.
17 changes: 15 additions & 2 deletions src/core/jpeg_stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,23 @@ class JpegStream extends DecodeStream {
if (!bytes) {
return null;
}
const data = this.#skipUselessBytes(bytes);
if (!JpegImage.canUseImageDecoder(data, jpegOptions.colorTransform)) {
let data = this.#skipUselessBytes(bytes);
const useImageDecoder = JpegImage.canUseImageDecoder(
data,
jpegOptions.colorTransform
);
if (!useImageDecoder) {
return null;
}
if (useImageDecoder.exifStart) {
// Replace the entire EXIF-block with dummy data, to ensure that a
// non-default EXIF orientation won't cause the image to be rotated
// when using `ImageDecoder` (fixes bug1942064.pdf).
//
// Copy the data first, to avoid modifying the original PDF document.
data = data.slice();
data.fill(0x00, useImageDecoder.exifStart, useImageDecoder.exifEnd);
}
decoder = new ImageDecoder({
data,
type: "image/jpeg",
Expand Down
26 changes: 16 additions & 10 deletions src/core/jpg.js
Original file line number Diff line number Diff line change
Expand Up @@ -782,8 +782,11 @@ function readDataBlock(data, offset) {
}

const array = data.subarray(offset, endOffset);
offset += array.length;
return { appData: array, newOffset: offset };
return {
appData: array,
oldOffset: offset,
newOffset: offset + array.length,
};
}

function skipData(data, offset) {
Expand All @@ -805,6 +808,7 @@ class JpegImage {
}

static canUseImageDecoder(data, colorTransform = -1) {
let exifOffsets = null;
let offset = 0;
let numComponents = null;
let fileMarker = readUint16(data, offset);
Expand All @@ -820,7 +824,7 @@ class JpegImage {
case 0xffe1: // APP1 - Exif
// TODO: Remove this once https://github.com/w3c/webcodecs/issues/870
// is fixed.
const { appData, newOffset } = readDataBlock(data, offset);
const { appData, oldOffset, newOffset } = readDataBlock(data, offset);
offset = newOffset;

// 'Exif\x00\x00'
Expand All @@ -832,10 +836,12 @@ class JpegImage {
appData[4] === 0 &&
appData[5] === 0
) {
// Replace the entire EXIF-block with dummy data, to ensure that a
// non-default EXIF orientation won't cause the image to be rotated
// when using `ImageDecoder` (fixes bug1942064.pdf).
appData.fill(0x00, 6);
if (exifOffsets) {
throw new JpegError("Duplicate EXIF-blocks found.");
}
// Don't do the EXIF-block replacement here, see `JpegStream`,
// since that can modify the original PDF document.
exifOffsets = { exifStart: oldOffset + 6, exifEnd: newOffset };
}
fileMarker = readUint16(data, offset);
offset += 2;
Expand All @@ -861,12 +867,12 @@ class JpegImage {
offset += 2;
}
if (numComponents === 4) {
return false;
return null;
}
if (numComponents === 3 && colorTransform === 0) {
return false;
return null;
}
return true;
return exifOffsets || {};
}

parse(data, { dnlScanLines = null } = {}) {
Expand Down
23 changes: 23 additions & 0 deletions test/unit/api_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -2235,6 +2235,29 @@ describe("api", function () {
expect(data.length).toEqual(basicApiFileLength);
});

it("gets data from PDF document with JPEG image containing EXIF-data (bug 1942064)", async function () {
const typedArrayPdf = await DefaultFileReaderFactory.fetch({
path: TEST_PDFS_PATH + "bug1942064.pdf",
});

// Sanity check to make sure that we fetched the entire PDF file.
expect(typedArrayPdf instanceof Uint8Array).toEqual(true);
expect(typedArrayPdf.length).toEqual(10719);

const loadingTask = getDocument(typedArrayPdf.slice());
const pdfDoc = await loadingTask.promise;
const page = await pdfDoc.getPage(1);
// Trigger parsing of the JPEG image.
await page.getOperatorList();

const data = await pdfDoc.getData();
expect(data instanceof Uint8Array).toEqual(true);
// Ensure that the EXIF-block wasn't modified.
expect(typedArrayPdf).toEqual(data);

await loadingTask.destroy();
});

it("gets download info", async function () {
const downloadInfo = await pdfDocument.getDownloadInfo();
expect(downloadInfo).toEqual({ length: basicApiFileLength });
Expand Down

0 comments on commit 34ef74c

Please sign in to comment.