Skip to content

Commit

Permalink
Merge pull request #10185 from Snuffleupagus/find-less-normalization
Browse files Browse the repository at this point in the history
Reduce the number of redundant text normalization operations in `PDFFindController`
  • Loading branch information
timvandermeij authored Oct 28, 2018
2 parents ed4ac1b + 5dc12f9 commit a77ac5f
Showing 1 changed file with 27 additions and 14 deletions.
41 changes: 27 additions & 14 deletions web/pdf_find_controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,18 @@ const CHARACTERS_TO_NORMALIZE = {
'\u00BE': '3/4', // Vulgar fraction three quarters
};

let normalizationRegex = null;
function normalize(text) {
if (!normalizationRegex) {
// Compile the regular expression for text normalization once.
const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join('');
normalizationRegex = new RegExp(`[${replace}]`, 'g');
}
return text.replace(normalizationRegex, function(ch) {
return CHARACTERS_TO_NORMALIZE[ch];
});
}

/**
* @typedef {Object} PDFFindControllerOptions
* @property {IPDFLinkService} linkService - The navigation/linking service.
Expand All @@ -59,10 +71,6 @@ class PDFFindController {

this._reset();
eventBus.on('findbarclose', this._onFindBarClose.bind(this));

// Compile the regular expression for text normalization once.
const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join('');
this._normalizationRegex = new RegExp(`[${replace}]`, 'g');
}

get highlightMatches() {
Expand Down Expand Up @@ -152,7 +160,7 @@ class PDFFindController {
matchIdx: null,
};
this._extractTextPromises = [];
this._pageContents = []; // Stores the text for each page.
this._pageContents = []; // Stores the normalized text for each page.
this._matchesCountTotal = 0;
this._pagesToSearch = null;
this._pendingFindMatches = Object.create(null);
Expand All @@ -164,10 +172,15 @@ class PDFFindController {
this._firstPageCapability = createPromiseCapability();
}

_normalize(text) {
return text.replace(this._normalizationRegex, function(ch) {
return CHARACTERS_TO_NORMALIZE[ch];
});
/**
* @return {string} The (current) normalized search query.
*/
get _query() {
if (this._state.query !== this._rawQuery) {
this._rawQuery = this._state.query;
this._normalizedQuery = normalize(this._state.query);
}
return this._normalizedQuery;
}

/**
Expand Down Expand Up @@ -304,8 +317,8 @@ class PDFFindController {
}

_calculateMatch(pageIndex) {
let pageContent = this._normalize(this._pageContents[pageIndex]);
let query = this._normalize(this._state.query);
let pageContent = this._pageContents[pageIndex];
let query = this._query;
const { caseSensitive, entireWord, phraseSearch, } = this._state;

if (query.length === 0) {
Expand Down Expand Up @@ -362,8 +375,8 @@ class PDFFindController {
strBuf.push(textItems[j].str);
}

// Store the page content (text items) as one string.
this._pageContents[i] = strBuf.join('');
// Store the normalized page content (text items) as one string.
this._pageContents[i] = normalize(strBuf.join(''));
extractTextCapability.resolve(i);
}, (reason) => {
console.error(`Unable to get text content for page ${i + 1}`, reason);
Expand Down Expand Up @@ -423,7 +436,7 @@ class PDFFindController {
}

// If there's no query there's no point in searching.
if (this._state.query === '') {
if (this._query === '') {
this._updateUIState(FindState.FOUND);
return;
}
Expand Down

0 comments on commit a77ac5f

Please sign in to comment.