Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change getPDFFileNameFromURL to ignore data: URLs for performance reasons (issue 8263) #8321

Merged
merged 3 commits into from
Apr 20, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions src/shared/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -1190,17 +1190,16 @@ var createBlob = function createBlob(data, contentType) {
if (typeof Blob !== 'undefined') {
return new Blob([data], { type: contentType });
}
warn('The "Blob" constructor is not supported.');
throw new Error('The "Blob" constructor is not supported.');
};

var createObjectURL = (function createObjectURLClosure() {
// Blob/createObjectURL is not available, falling back to data schema.
var digits =
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=';

return function createObjectURL(data, contentType, forceDataSchema) {
if (!forceDataSchema &&
typeof URL !== 'undefined' && URL.createObjectURL) {
return function createObjectURL(data, contentType, forceDataSchema = false) {
if (!forceDataSchema) {
var blob = createBlob(data, contentType);
return URL.createObjectURL(blob);
}
Expand Down
136 changes: 132 additions & 4 deletions test/unit/ui_utils_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,20 @@
(function (root, factory) {
if (typeof define === 'function' && define.amd) {
define('pdfjs-test/unit/ui_utils_spec', ['exports',
'pdfjs-web/ui_utils'], factory);
'pdfjs-web/ui_utils', 'pdfjs/shared/util'], factory);
} else if (typeof exports !== 'undefined') {
factory(exports, require('../../web/ui_utils.js'));
factory(exports, require('../../web/ui_utils.js'),
require('../../src/shared/util.js'));
} else {
factory((root.pdfjsTestUnitUiUtilsSpec = {}), root.pdfjsWebUiUtils);
factory((root.pdfjsTestUnitUiUtilsSpec = {}), root.pdfjsWebUiUtils,
root.pdfjsSharedUtil);
}
}(this, function (exports, webUiUtils) {
}(this, function (exports, webUiUtils, sharedUtil) {

var binarySearchFirstItem = webUiUtils.binarySearchFirstItem;
var getPDFFileNameFromURL = webUiUtils.getPDFFileNameFromURL;
var EventBus = webUiUtils.EventBus;
var createObjectURL = sharedUtil.createObjectURL;

describe('ui_utils', function() {
describe('binary search', function() {
Expand Down Expand Up @@ -57,6 +61,130 @@ describe('ui_utils', function() {
});
});

describe('getPDFFileNameFromURL', function() {
it('gets PDF filename', function() {
// Relative URL
expect(getPDFFileNameFromURL('/pdfs/file1.pdf')).toEqual('file1.pdf');
// Absolute URL
expect(getPDFFileNameFromURL(
'http://www.example.com/pdfs/file2.pdf')).toEqual('file2.pdf');
});

it('gets fallback filename', function() {
// Relative URL
expect(getPDFFileNameFromURL('/pdfs/file1.txt')).toEqual('document.pdf');
// Absolute URL
expect(getPDFFileNameFromURL(
'http://www.example.com/pdfs/file2.txt')).toEqual('document.pdf');
});

it('gets custom fallback filename', function() {
// Relative URL
expect(getPDFFileNameFromURL('/pdfs/file1.txt', 'qwerty1.pdf')).
toEqual('qwerty1.pdf');
// Absolute URL
expect(getPDFFileNameFromURL('http://www.example.com/pdfs/file2.txt',
'qwerty2.pdf')).toEqual('qwerty2.pdf');

// An empty string should be a valid custom fallback filename.
expect(getPDFFileNameFromURL('/pdfs/file3.txt', '')).toEqual('');
});

it('gets PDF filename from URL containing leading/trailing whitespace',
function() {
// Relative URL
expect(getPDFFileNameFromURL(
' /pdfs/file1.pdf ')).toEqual('file1.pdf');
// Absolute URL
expect(getPDFFileNameFromURL(
' http://www.example.com/pdfs/file2.pdf ')).toEqual('file2.pdf');
});

it('gets PDF filename from query string', function() {
// Relative URL
expect(getPDFFileNameFromURL(
'/pdfs/pdfs.html?name=file1.pdf')).toEqual('file1.pdf');
// Absolute URL
expect(getPDFFileNameFromURL(
'http://www.example.com/pdfs/pdf.html?file2.pdf')).toEqual('file2.pdf');
});

it('gets PDF filename from hash string', function() {
// Relative URL
expect(getPDFFileNameFromURL(
'/pdfs/pdfs.html#name=file1.pdf')).toEqual('file1.pdf');
// Absolute URL
expect(getPDFFileNameFromURL(
'http://www.example.com/pdfs/pdf.html#file2.pdf')).toEqual('file2.pdf');
});

it('gets correct PDF filename when multiple ones are present', function() {
// Relative URL
expect(getPDFFileNameFromURL(
'/pdfs/file1.pdf?name=file.pdf')).toEqual('file1.pdf');
// Absolute URL
expect(getPDFFileNameFromURL(
'http://www.example.com/pdfs/file2.pdf#file.pdf')).toEqual('file2.pdf');
});

it('gets PDF filename from URI-encoded data', function() {
var encodedUrl = encodeURIComponent(
'http://www.example.com/pdfs/file1.pdf');
expect(getPDFFileNameFromURL(encodedUrl)).toEqual('file1.pdf');

var encodedUrlWithQuery = encodeURIComponent(
'http://www.example.com/pdfs/file.txt?file2.pdf');
expect(getPDFFileNameFromURL(encodedUrlWithQuery)).toEqual('file2.pdf');
});

it('gets PDF filename from data mistaken for URI-encoded', function() {
expect(getPDFFileNameFromURL('/pdfs/%AA.pdf')).toEqual('%AA.pdf');

expect(getPDFFileNameFromURL('/pdfs/%2F.pdf')).toEqual('%2F.pdf');
});

it('gets PDF filename from (some) standard protocols', function() {
// HTTP
expect(getPDFFileNameFromURL('http://www.example.com/file1.pdf')).
toEqual('file1.pdf');
// HTTPS
expect(getPDFFileNameFromURL('https://www.example.com/file2.pdf')).
toEqual('file2.pdf');
// File
expect(getPDFFileNameFromURL('file:///path/to/files/file3.pdf')).
toEqual('file3.pdf');
// FTP
expect(getPDFFileNameFromURL('ftp://www.example.com/file4.pdf')).
toEqual('file4.pdf');
});

it('gets PDF filename from query string appended to "blob:" URL',
function() {
var typedArray = new Uint8Array([1, 2, 3, 4, 5]);
var blobUrl = createObjectURL(typedArray, 'application/pdf');
// Sanity check to ensure that a "blob:" URL was returned.
expect(blobUrl.indexOf('blob:') === 0).toEqual(true);

expect(getPDFFileNameFromURL(blobUrl + '?file.pdf')).toEqual('file.pdf');
});

it('gets fallback filename from query string appended to "data:" URL',
function() {
var typedArray = new Uint8Array([1, 2, 3, 4, 5]);
var dataUrl = createObjectURL(typedArray, 'application/pdf',
/* forceDataSchema = */ true);
// Sanity check to ensure that a "data:" URL was returned.
expect(dataUrl.indexOf('data:') === 0).toEqual(true);

expect(getPDFFileNameFromURL(dataUrl + '?file1.pdf')).
toEqual('document.pdf');

// Should correctly detect a "data:" URL with leading whitespace.
expect(getPDFFileNameFromURL(' ' + dataUrl + '?file2.pdf')).
toEqual('document.pdf');
});
});

describe('EventBus', function () {
it('dispatch event', function () {
var eventBus = new EventBus();
Expand Down
16 changes: 8 additions & 8 deletions web/pdf_attachment_viewer.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,15 @@ var PDFAttachmentViewer = (function PDFAttachmentViewerClosure() {
/**
* @private
*/
_bindPdfLink:
function PDFAttachmentViewer_bindPdfLink(button, content, filename) {
_bindPdfLink(button, content, filename) {
if (PDFJS.disableCreateObjectURL) {
throw new Error('bindPdfLink: ' +
'Unsupported "PDFJS.disableCreateObjectURL" value.');
}
var blobUrl;
button.onclick = function() {
if (!blobUrl) {
blobUrl = createObjectURL(
content, 'application/pdf', PDFJS.disableCreateObjectURL);
blobUrl = createObjectURL(content, 'application/pdf');
}
var viewerUrl;
if (typeof PDFJSDev === 'undefined' || PDFJSDev.test('GENERIC')) {
Expand All @@ -97,10 +99,8 @@ var PDFAttachmentViewer = (function PDFAttachmentViewerClosure() {
// eslint-disable-next-line no-undef
viewerUrl = chrome.runtime.getURL('/content/web/viewer.html') +
'?file=' + encodeURIComponent(blobUrl + '#' + filename);
} else {
} else if (PDFJSDev.test('FIREFOX || MOZCENTRAL')) {
// Let Firefox's content handler catch the URL and display the PDF.
// In Firefox PDFJS.disableCreateObjectURL is always false, so
// blobUrl is always a blob:-URL and never a data:-URL.
viewerUrl = blobUrl + '?' + encodeURIComponent(filename);
}
window.open(viewerUrl);
Expand Down Expand Up @@ -151,7 +151,7 @@ var PDFAttachmentViewer = (function PDFAttachmentViewerClosure() {
div.className = 'attachmentsItem';
var button = document.createElement('button');
button.textContent = filename;
if (/\.pdf$/i.test(filename)) {
if (/\.pdf$/i.test(filename) && !PDFJS.disableCreateObjectURL) {
this._bindPdfLink(button, item.content, filename);
} else {
this._bindLink(button, item.content, filename);
Expand Down
27 changes: 19 additions & 8 deletions web/ui_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -353,24 +353,35 @@ function noContextMenuHandler(e) {
e.preventDefault();
}

function isDataSchema(url) {
var i = 0, ii = url.length;
while (i < ii && url[i].trim() === '') {
i++;
}
return url.substr(i, 5).toLowerCase() === 'data:';
}

/**
* Returns the filename or guessed filename from the url (see issue 3455).
* url {String} The original PDF location.
* defaultFilename {string} The value to return if the file name is unknown.
* @return {String} Guessed PDF file name.
* @param {string} url - The original PDF location.
* @param {string} defaultFilename - The value returned if the filename is
* unknown, or the protocol is unsupported.
* @returns {string} Guessed PDF filename.
*/
function getPDFFileNameFromURL(url, defaultFilename) {
if (typeof defaultFilename === 'undefined') {
defaultFilename = 'document.pdf';
function getPDFFileNameFromURL(url, defaultFilename = 'document.pdf') {
if (isDataSchema(url)) {
console.warn('getPDFFileNameFromURL: ' +
'ignoring "data:" URL for performance reasons.');
return defaultFilename;
}
var reURI = /^(?:(?:[^:]+:)?\/\/[^\/]+)?([^?#]*)(\?[^#]*)?(#.*)?$/;
// SCHEME HOST 1.PATH 2.QUERY 3.REF
// Pattern to get last matching NAME.pdf
var reFilename = /[^\/?#=]+\.pdf\b(?!.*\.pdf\b)/i;
var splitURI = reURI.exec(url);
var suggestedFilename = reFilename.exec(splitURI[1]) ||
reFilename.exec(splitURI[2]) ||
reFilename.exec(splitURI[3]);
reFilename.exec(splitURI[2]) ||
reFilename.exec(splitURI[3]);
if (suggestedFilename) {
suggestedFilename = suggestedFilename[0];
if (suggestedFilename.indexOf('%') !== -1) {
Expand Down