Skip to content

Commit

Permalink
feat(classifier): Add personal title classifier (fra addresses)
Browse files Browse the repository at this point in the history
  • Loading branch information
Joxit committed May 6, 2019
1 parent 4e0c663 commit 2bc6c22
Show file tree
Hide file tree
Showing 9 changed files with 205 additions and 6 deletions.
10 changes: 10 additions & 0 deletions classification/PersonalTitleClassification.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
const Classification = require('../classification/Classification')

class PersonalTitleClassification extends Classification {
constructor (confidence, meta) {
super(confidence, meta)
this.label = 'personal_title'
}
}

module.exports = PersonalTitleClassification
24 changes: 24 additions & 0 deletions classification/PersonalTitleClassification.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const Classification = require('./PersonalTitleClassification')

module.exports.tests = {}

module.exports.tests.constructor = (test) => {
test('constructor', (t) => {
let c = new Classification()
t.false(c.public)
t.equals(c.label, 'personal_title')
t.equals(c.confidence, 1.0)
t.deepEqual(c.meta, {})
t.end()
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`PersonalTitleClassification: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
2 changes: 1 addition & 1 deletion classifier/PersonClassifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const libpostal = require('../resources/libpostal/libpostal')
class PersonClassifier extends PhraseClassifier {
setup () {
this.index = {}
libpostal.load(this.index, ['all'], 'people.txt', { lowercase: true })
libpostal.load(this.index, ['all', 'fr'], 'people.txt', { lowercase: true })
}

each (span) {
Expand Down
3 changes: 2 additions & 1 deletion classifier/PersonClassifier.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ module.exports.tests.contains_numerals = (test) => {
module.exports.tests.classify = (test) => {
let valid = [
'Martin Luther King', 'm l k', 'MLK',
'John Fitzgerald Kennedy', 'j f k', 'JFK'
'John Fitzgerald Kennedy', 'j f k', 'JFK',
'cdg', 'Charles De Gaulle'
]

valid.forEach(token => {
Expand Down
27 changes: 27 additions & 0 deletions classifier/PersonalTitleClassifier.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
const PhraseClassifier = require('./super/PhraseClassifier')
const PersonalTitleClassification = require('../classification/PersonalTitleClassification')
const libpostal = require('../resources/libpostal/libpostal')

// dictionaries sourced from the libpostal project
// see: https://github.com/openvenues/libpostal

class PersonalTitleClassifier extends PhraseClassifier {
setup () {
this.index = {}
libpostal.load(this.index, libpostal.languages, 'personal_titles.txt', {
replace: [/\.$/, '']
})
}

each (span) {
// skip spans which contain numbers
if (span.contains.numerals) { return }

// use an inverted index for full token matching as it's O(1)
if (this.index.hasOwnProperty(span.norm.replace(/\.$/, ''))) {
span.classify(new PersonalTitleClassification(1))
}
}
}

module.exports = PersonalTitleClassifier
50 changes: 50 additions & 0 deletions classifier/PersonalTitleClassifier.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
const PersonalTitleClassifier = require('./PersonalTitleClassifier')
const PersonalTitleClassification = require('../classification/PersonalTitleClassification')
const Span = require('../tokenization/Span')

module.exports.tests = {}

function classify (body) {
let c = new PersonalTitleClassifier()
let s = new Span(body)
c.each(s, null, 1)
return s
}

module.exports.tests.contains_numerals = (test) => {
test('contains numerals: honours contains.numerals boolean', (t) => {
let c = new PersonalTitleClassifier()
let s = new Span('example')
s.contains.numerals = true
c.each(s, null, 1)
t.deepEqual(s.classifications, {})
t.end()
})
}

module.exports.tests.classify = (test) => {
let valid = [
'Général', 'General', 'gal',
'Saint', 'st', 'cdt'
]

valid.forEach(token => {
test(`classify: ${token}`, (t) => {
let s = classify(token)
t.deepEqual(s.classifications, {
PersonalTitleClassification: new PersonalTitleClassification(1.0)
})
t.end()
})
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`PersonalTitleClassifier: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
69 changes: 65 additions & 4 deletions classifier/scheme/street.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ module.exports = [
]
},
{
// Rue Montmartre
// Rue Montmartre or Boulevard Charles De Gaulle
confidence: 0.88,
Class: StreetClassification,
scheme: [
Expand All @@ -26,7 +26,7 @@ module.exports = [
not: ['StreetClassification', 'IntersectionClassification']
},
{
is: ['AlphaClassification'],
is: ['AlphaClassification', 'PersonClassification'],
not: ['StreetClassification', 'IntersectionClassification']
}
]
Expand Down Expand Up @@ -141,7 +141,7 @@ module.exports = [
]
},
{
// Rue Du Paris
// Rue De Paris
confidence: 0.8,
Class: StreetClassification,
scheme: [
Expand All @@ -154,7 +154,7 @@ module.exports = [
not: ['StreetClassification', 'IntersectionClassification']
},
{
is: ['AlphaClassification'],
is: ['AlphaClassification', 'PersonClassification'],
not: ['StreetClassification', 'StreetPrefixClassification']
}
]
Expand Down Expand Up @@ -182,6 +182,67 @@ module.exports = [
}
]
},
{
// Rue Saint Anne
confidence: 0.81,
Class: StreetClassification,
scheme: [
{
is: ['StreetPrefixClassification'],
not: ['StreetClassification', 'IntersectionClassification']
},
{
is: ['PersonalTitleClassification'],
not: ['StreetClassification', 'IntersectionClassification']
},
{
is: ['AlphaClassification', 'GivenNameClassification', 'PersonClassification'],
not: ['StreetClassification', 'StreetPrefixClassification']
}
]
},
{
// Boulevard du Général Charles De Gaulle
confidence: 0.81,
Class: StreetClassification,
scheme: [
{
is: ['StreetPrefixClassification'],
not: ['StreetClassification', 'IntersectionClassification']
},
{
is: ['StopWordClassification'],
not: ['StreetClassification', 'IntersectionClassification']
},
{
is: ['PersonalTitleClassification'],
not: ['StreetClassification', 'IntersectionClassification']
},
{
is: ['AlphaClassification', 'GivenNameClassification', 'PersonClassification'],
not: ['StreetClassification', 'StreetPrefixClassification']
}
]
},
{
// Avenue Aristide Briand or Allée Victor Hugo
confidence: 0.92,
Class: StreetClassification,
scheme: [
{
is: ['StreetPrefixClassification'],
not: ['StreetClassification', 'IntersectionClassification']
},
{
is: ['GivenNameClassification', 'AlphaClassification'],
not: ['StreetClassification', 'IntersectionClassification']
},
{
is: ['SurnameClassification'],
not: ['StreetClassification', 'StreetPrefixClassification']
}
]
},
{
// +++ Main Street
confidence: 0.84,
Expand Down
2 changes: 2 additions & 0 deletions parser/AddressParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ const PersonClassifier = require('../classifier/PersonClassifier')
const GivenNameClassifier = require('../classifier/GivenNameClassifier')
const SurnameClassifier = require('../classifier/SurnameClassifier')
const PersonalSuffixClassifier = require('../classifier/PersonalSuffixClassifier')
const PersonalTitleClassifier = require('../classifier/PersonalTitleClassifier')
const ChainClassifier = require('../classifier/ChainClassifier')
const IntersectionClassifier = require('../classifier/IntersectionClassifier')
// const MultiStreetClassifier = require('../classifier/MultiStreetClassifier')
Expand Down Expand Up @@ -49,6 +50,7 @@ class AddressParser extends Parser {
new GivenNameClassifier(),
new SurnameClassifier(),
new PersonalSuffixClassifier(),
new PersonalTitleClassifier(),
new ChainClassifier(),
new WhosOnFirstClassifier(),

Expand Down
24 changes: 24 additions & 0 deletions test/address.fra.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,30 @@ const testcase = (test, common) => {
assert('Rue de la Paix', [
{ street: 'Rue de la Paix' }
], true)

assert('Boulevard du Général Charles De Gaulle', [
{ street: 'Boulevard du Général Charles De Gaulle' }
], true)

assert('11 Boulevard Saint Germains', [
{ housenumber: '11' }, { street: 'Boulevard Saint Germains' }
], true)

assert('Rue Saint Anne', [
{ street: 'Rue Saint Anne' }
], true)

assert('Boulevard Charles De Gaulle', [
{ street: 'Boulevard Charles De Gaulle' }
], true)

assert('Allée Victor Hugo', [
{ street: 'Allée Victor Hugo' }
], true)

assert('Avenue Aristide Briand', [
{ street: 'Avenue Aristide Briand' }
], true)
}

module.exports.all = (tape, common) => {
Expand Down

0 comments on commit 2bc6c22

Please sign in to comment.