Skip to content

Commit

Permalink
feat(min_length): add minlength option for loading resources (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink authored May 14, 2019
1 parent 2b6edfc commit 1522b2a
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 13 deletions.
13 changes: 5 additions & 8 deletions classifier/CompoundStreetClassifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,12 @@ class CompoundStreetClassifier extends WordClassifier {
setup () {
// load street suffixes
this.suffixes = {}
libpostal.load(this.suffixes, ['de'], 'concatenated_suffixes_separable.txt')

// remove any suffixes which contain less than 3 characters (excluding a period)
// this removes suffixes such as 'r.' which can be ambiguous
for (let token in this.suffixes) {
if (token.length < 3) {
delete this.suffixes[token]
}
}
libpostal.load(this.suffixes, ['de'], 'concatenated_suffixes_separable.txt', {
// remove any suffixes which contain less than 3 characters (excluding a period)
// this removes suffixes such as 'r.' which can be ambiguous
minlength: 3
})

// whitelist
this.suffixes.park = true
Expand Down
3 changes: 2 additions & 1 deletion classifier/PersonalTitleClassifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ class PersonalTitleClassifier extends PhraseClassifier {
setup () {
this.index = {}
libpostal.load(this.index, libpostal.languages, 'personal_titles.txt', {
replace: [/\.$/, '']
replace: [/\.$/, ''],
minlength: 2
})
}

Expand Down
11 changes: 9 additions & 2 deletions resources/libpostal/libpostal.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ function _normalize (cell, options) {
if (options && options.replace) {
value = value.replace(options.replace[0], options.replace[1])
}
if (options && options.minlength) {
if (value.length < options.minlength) { return '' }
}
if (options && options.lowercase) {
value = value.toLowerCase()
}
Expand All @@ -36,14 +39,18 @@ function _normalize (cell, options) {
function _add (index, options) {
return cell => {
const value = _normalize(cell, options)
index[value] = true
if (value && value.length) {
index[value] = true
}
}
}

function _remove (index, options) {
return cell => {
const value = _normalize(cell, options)
delete index[value]
if (value && value.length) {
delete index[value]
}
}
}

Expand Down
11 changes: 9 additions & 2 deletions resources/whosonfirst/whosonfirst.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ function _normalize (cell, options) {
if (options && options.replace) {
value = value.replace(options.replace[0], options.replace[1])
}
if (options && options.minlength) {
if (value.length < options.minlength) { return '' }
}
if (options && options.lowercase) {
value = value.toLowerCase()
}
Expand All @@ -40,14 +43,18 @@ function _normalize (cell, options) {
function _add (set, options) {
return cell => {
const value = _normalize(cell, options)
set.add(value)
if (value && value.length) {
set.add(value)
}
}
}

function _remove (set, options) {
return cell => {
const value = _normalize(cell, options)
set.delete(value)
if (value && value.length) {
set.delete(value)
}
}
}

Expand Down

0 comments on commit 1522b2a

Please sign in to comment.