Skip to content

Commit

Permalink
feat(final_token_classifications): add final token classifications, i…
Browse files Browse the repository at this point in the history
…mprove autocomplete parsing (#19)
  • Loading branch information
missinglink authored May 14, 2019
1 parent 1522b2a commit f590df2
Show file tree
Hide file tree
Showing 9 changed files with 190 additions and 2 deletions.
10 changes: 10 additions & 0 deletions classification/FinalTokenClassification.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
const Classification = require('../classification/Classification')

class FinalTokenClassification extends Classification {
constructor (confidence, meta) {
super(confidence, meta)
this.label = 'final_token'
}
}

module.exports = FinalTokenClassification
24 changes: 24 additions & 0 deletions classification/FinalTokenClassification.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const Classification = require('./FinalTokenClassification')

module.exports.tests = {}

module.exports.tests.constructor = (test) => {
test('constructor', (t) => {
let c = new Classification()
t.false(c.public)
t.equals(c.label, 'final_token')
t.equals(c.confidence, 1.0)
t.deepEqual(c.meta, {})
t.end()
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`FinalTokenClassification: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
10 changes: 10 additions & 0 deletions classification/FinalTokenSingleCharacterClassification.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
const Classification = require('../classification/Classification')

class FinalTokenSingleCharacterClassification extends Classification {
constructor (confidence, meta) {
super(confidence, meta)
this.label = 'final_token_single_character'
}
}

module.exports = FinalTokenSingleCharacterClassification
24 changes: 24 additions & 0 deletions classification/FinalTokenSingleCharacterClassification.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const Classification = require('./FinalTokenSingleCharacterClassification')

module.exports.tests = {}

module.exports.tests.constructor = (test) => {
test('constructor', (t) => {
let c = new Classification()
t.false(c.public)
t.equals(c.label, 'final_token_single_character')
t.equals(c.confidence, 1.0)
t.deepEqual(c.meta, {})
t.end()
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`FinalTokenSingleCharacterClassification: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
24 changes: 24 additions & 0 deletions classifier/FinalTokenClassifier.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const BaseClassifier = require('./super/BaseClassifier')
const FinalTokenClassification = require('../classification/FinalTokenClassification')
const FinalTokenSingleCharacterClassification = require('../classification/FinalTokenSingleCharacterClassification')

// classify the final token with 'FinalTokenClassification'
// and also a 'FinalTokenSingleCharacterClassification' if its only
// a single character in length.
// note: this can be useful for improving autocomplete.

class FinalTokenClassifier extends BaseClassifier {
classify (tokenizer) {
if (tokenizer.section.length < 1) { return }
let lastSection = tokenizer.section[tokenizer.section.length - 1]
let children = lastSection.graph.findAll('child')
if (children.length < 1) { return }
let lastChild = children[children.length - 1]
lastChild.classify(new FinalTokenClassification(1.0))
if (lastChild.norm.length === 1) {
lastChild.classify(new FinalTokenSingleCharacterClassification(1.0))
}
}
}

module.exports = FinalTokenClassifier
82 changes: 82 additions & 0 deletions classifier/FinalTokenClassifier.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
const FinalTokenClassifier = require('./FinalTokenClassifier')
const Tokenizer = require('../tokenization/Tokenizer')

module.exports.tests = {}

function classify (body) {
let c = new FinalTokenClassifier()
let t = new Tokenizer(body)
c.classify(t)

// generate an array containing all the spans
// with a final token classification
let classifications = {
FinalTokenClassification: [],
FinalTokenSingleCharacterClassification: []
}
t.section.forEach(s => {
s.graph.findAll('child').forEach(c => {
if (c.classifications.hasOwnProperty('FinalTokenClassification')) {
classifications.FinalTokenClassification.push(c)
}
if (c.classifications.hasOwnProperty('FinalTokenSingleCharacterClassification')) {
classifications.FinalTokenSingleCharacterClassification.push(c)
}
})
})
return classifications
}

module.exports.tests.classify = (test) => {
test('classify: empty string', (t) => {
let c = classify('')
t.equals(c.FinalTokenClassification.length, 0)
t.equals(c.FinalTokenSingleCharacterClassification.length, 0)
t.end()
})

test('classify: A', (t) => {
let c = classify('A')
t.equals(c.FinalTokenClassification.length, 1)
t.equals(c.FinalTokenClassification[0].body, 'A')
t.equals(c.FinalTokenSingleCharacterClassification.length, 1)
t.equals(c.FinalTokenSingleCharacterClassification[0].body, 'A')
t.end()
})

test('classify: A B', (t) => {
let c = classify('A B')
t.equals(c.FinalTokenClassification.length, 1)
t.equals(c.FinalTokenClassification[0].body, 'B')
t.equals(c.FinalTokenSingleCharacterClassification.length, 1)
t.equals(c.FinalTokenSingleCharacterClassification[0].body, 'B')
t.end()
})

test('classify: A BC', (t) => {
let c = classify('A BC')
t.equals(c.FinalTokenClassification.length, 1)
t.equals(c.FinalTokenClassification[0].body, 'BC')
t.equals(c.FinalTokenSingleCharacterClassification.length, 0)
t.end()
})

test('classify: A BC, D', (t) => {
let c = classify('A BC, D')
t.equals(c.FinalTokenClassification.length, 1)
t.equals(c.FinalTokenClassification[0].body, 'D')
t.equals(c.FinalTokenSingleCharacterClassification.length, 1)
t.equals(c.FinalTokenSingleCharacterClassification[0].body, 'D')
t.end()
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`FinalTokenClassifier: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
4 changes: 2 additions & 2 deletions classifier/scheme/street.js
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ module.exports = [
},
{
// Main Street West
confidence: 0.83,
confidence: 0.88,
Class: StreetClassification,
scheme: [
{
Expand All @@ -329,7 +329,7 @@ module.exports = [
},
{
is: ['DirectionalClassification'],
not: ['StreetClassification', 'IntersectionClassification']
not: ['StreetClassification', 'IntersectionClassification', 'FinalTokenSingleCharacterClassification']
}
]
}
Expand Down
2 changes: 2 additions & 0 deletions parser/AddressParser.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
const Parser = require('./Parser')
const AlphaNumericClassifier = require('../classifier/AlphaNumericClassifier')
const FinalTokenClassifier = require('../classifier/FinalTokenClassifier')
const HouseNumberClassifier = require('../classifier/HouseNumberClassifier')
const PostcodeClassifier = require('../classifier/PostcodeClassifier')
const StreetPrefixClassifier = require('../classifier/StreetPrefixClassifier')
Expand Down Expand Up @@ -34,6 +35,7 @@ class AddressParser extends Parser {
[
// generic word classifiers
new AlphaNumericClassifier(),
new FinalTokenClassifier(),

// word classifiers
new HouseNumberClassifier(),
Expand Down
12 changes: 12 additions & 0 deletions test/functional.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,18 @@ const testcase = (test, common) => {
assert('Portland Cafe Portland OR', [
{ locality: 'Portland' }, { region: 'OR' }
], true)

// trailing directional causes issue with autocomplete
assert('1 Foo St N', [{ housenumber: '1' }, { street: 'Foo St' }], true)
assert('1 Foo St S', [{ housenumber: '1' }, { street: 'Foo St' }], true)
assert('1 Foo St E', [{ housenumber: '1' }, { street: 'Foo St' }], true)
assert('1 Foo St W', [{ housenumber: '1' }, { street: 'Foo St' }], true)

// ...but we allow two letter directionals
assert('1 Foo St NW', [{ housenumber: '1' }, { street: 'Foo St NW' }], true)
assert('1 Foo St NE', [{ housenumber: '1' }, { street: 'Foo St NE' }], true)
assert('1 Foo St SW', [{ housenumber: '1' }, { street: 'Foo St SW' }], true)
assert('1 Foo St SE', [{ housenumber: '1' }, { street: 'Foo St SE' }], true)
}

module.exports.all = (tape, common) => {
Expand Down

0 comments on commit f590df2

Please sign in to comment.