From b63ef7a8b6527205a4b51c5e1dc468aaeabf934f Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 3 Mar 2016 13:07:22 +0100 Subject: [PATCH] Refactor `LinkAnnotation` slightly to add `data.url`/`data.dest` at the end This patch also makes sure that all URLs are converted to the correct encoding. --- src/core/annotation.js | 105 ++++++++++++++++++-------------- src/display/annotation_layer.js | 2 +- src/shared/util.js | 2 +- 3 files changed, 60 insertions(+), 49 deletions(-) diff --git a/src/core/annotation.js b/src/core/annotation.js index b77ab4eabf5df..23769d0414896 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -37,6 +37,7 @@ var AnnotationFlag = sharedUtil.AnnotationFlag; var AnnotationType = sharedUtil.AnnotationType; var OPS = sharedUtil.OPS; var Util = sharedUtil.Util; +var isString = sharedUtil.isString; var isArray = sharedUtil.isArray; var isInt = sharedUtil.isInt; var isValidUrl = sharedUtil.isValidUrl; @@ -705,68 +706,78 @@ var LinkAnnotation = (function LinkAnnotationClosure() { var data = this.data; data.annotationType = AnnotationType.LINK; - var action = dict.get('A'); + var action = dict.get('A'), url, dest; if (action && isDict(action)) { var linkType = action.get('S').name; - if (linkType === 'URI') { - var url = action.get('URI'); - if (isName(url)) { - // Some bad PDFs do not put parentheses around relative URLs. - url = '/' + url.name; - } else if (url) { - url = addDefaultProtocolToUrl(url); - } - // TODO: pdf spec mentions urls can be relative to a Base - // entry in the dictionary. - if (!isValidUrl(url, false)) { - url = ''; - } - // According to ISO 32000-1:2008, section 12.6.4.7, - // URI should to be encoded in 7-bit ASCII. - // Some bad PDFs may have URIs in UTF-8 encoding, see Bugzilla 1122280. - try { - data.url = stringToUTF8String(url); - } catch (e) { - // Fall back to a simple copy. - data.url = url; - } - } else if (linkType === 'GoTo') { - data.dest = action.get('D'); - } else if (linkType === 'GoToR') { - var urlDict = action.get('F'); - if (isDict(urlDict)) { - // We assume that the 'url' is a Filspec dictionary - // and fetch the url without checking any further - url = urlDict.get('F') || ''; - } + switch (linkType) { + case 'URI': + url = action.get('URI'); + if (isName(url)) { + // Some bad PDFs do not put parentheses around relative URLs. + url = '/' + url.name; + } else if (url) { + url = addDefaultProtocolToUrl(url); + } + // TODO: pdf spec mentions urls can be relative to a Base + // entry in the dictionary. + break; - // TODO: pdf reference says that GoToR - // can also have 'NewWindow' attribute - if (!isValidUrl(url, false)) { - url = ''; - } - data.url = url; - data.dest = action.get('D'); - } else if (linkType === 'Named') { - data.action = action.get('N').name; - } else { - warn('unrecognized link type: ' + linkType); + case 'GoTo': + dest = action.get('D'); + break; + + case 'GoToR': + var urlDict = action.get('F'); + if (isDict(urlDict)) { + // We assume that the 'url' is a Filspec dictionary + // and fetch the url without checking any further + url = urlDict.get('F') || ''; + } + + // TODO: pdf reference says that GoToR + // can also have 'NewWindow' attribute + dest = action.get('D'); + break; + + case 'Named': + data.action = action.get('N').name; + break; + + default: + warn('unrecognized link type: ' + linkType); + } + } else if (dict.has('Dest')) { // Simple destination link. + dest = dict.get('Dest'); + } + + if (url) { + if (isValidUrl(url, /* allowRelative = */ false)) { + data.url = tryConvertUrlEncoding(url); } - } else if (dict.has('Dest')) { - // simple destination link - var dest = dict.get('Dest'); + } + if (dest) { data.dest = isName(dest) ? dest.name : dest; } } // Lets URLs beginning with 'www.' default to using the 'http://' protocol. function addDefaultProtocolToUrl(url) { - if (url && url.indexOf('www.') === 0) { + if (isString(url) && url.indexOf('www.') === 0) { return ('http://' + url); } return url; } + function tryConvertUrlEncoding(url) { + // According to ISO 32000-1:2008, section 12.6.4.7, URIs should be encoded + // in 7-bit ASCII. Some bad PDFs use UTF-8 encoding, see Bugzilla 1122280. + try { + return stringToUTF8String(url); + } catch (e) { + return url; + } + } + Util.inherit(LinkAnnotation, Annotation, {}); return LinkAnnotation; diff --git a/src/display/annotation_layer.js b/src/display/annotation_layer.js index 4ef44aa6b5352..5a6cba848fc24 100644 --- a/src/display/annotation_layer.js +++ b/src/display/annotation_layer.js @@ -284,7 +284,7 @@ var LinkAnnotationElement = (function LinkAnnotationElementClosure() { if (this.data.action) { this._bindNamedAction(link, this.data.action); } else { - this._bindLink(link, ('dest' in this.data) ? this.data.dest : null); + this._bindLink(link, (this.data.dest || null)); } } diff --git a/src/shared/util.js b/src/shared/util.js index d63fab4a34395..485a343390697 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -312,7 +312,7 @@ function isSameOrigin(baseUrl, otherUrl) { // Validates if URL is safe and allowed, e.g. to avoid XSS. function isValidUrl(url, allowRelative) { - if (!url) { + if (!url || typeof url !== 'string') { return false; } // RFC 3986 (http://tools.ietf.org/html/rfc3986#section-3.1)