Skip to content

Commit

Permalink
Number Parser: Refactor for improved algorithm
Browse files Browse the repository at this point in the history
- Correctly handles prefix and suffix literals; #353;
- Loose Matching: This implementation is now much closer to UTS#35 7.1.2 Loose
  Matching http://unicode.org/reports/tr35/#Loose_Matching and fixes all
  reported cases that are related to it, including #288;
- Regression: Drop scientific notation parsing support, which wasn't documented
  anyway and shall be implemented by #533.

Ref #292
Fixes #353

Fixes #46
Fixes #288
Fixes #443
Fixes #457
Fixes #492
Fixes #587
Fixes #644
  • Loading branch information
rxaviers committed Dec 13, 2016
1 parent 639da54 commit bba6a5e
Show file tree
Hide file tree
Showing 27 changed files with 717 additions and 271 deletions.
64 changes: 58 additions & 6 deletions doc/api/number/number-parser.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,71 @@ esParser( "3,14" );
Some more examples.

```javascript
var parser = Globalize( "en" ).numberParser();
var enParser = Globalize( "en" ).numberParser();

enParser( "12,735" );
// > 12735

parser( "12,735.00" );
enParser( "12,735.00" );
// > 12735

parser( "6.626E-34" );
// > 6.626e-34
Globalize( "en" ).numberParser({ style: "percent" })( "100%" );
// > 1

parser( "" );
enParser( "" );
// > Infinity

parser( "invalid-stuff" );
enParser( "-3" );
// > -3

enParser( "-∞" );
// > -Infinity

enParser( "invalid-stuff" );
// > NaN

enParser( "invalid-stuff-that-includes-number-123" );
// > NaN

enParser( "invalid-stuff-123-that-includes-number" );
// > NaN

enParser( "123-invalid-stuff-that-includes-number" );
// > NaN

// Invalid decimal separator. (note `.` is used as decimal separator for English)
enParser( "3,14" );
// > NaN

// Invalid grouping separator position.
enParser( "127,35.00" );
// > NaN
```

Loose matching examples.

```js
// Swedish
var svParser = Globalize( "sv" ).numberParser();

// Loose matching for spaces [:Zs:].
// 1: Uses regular space SPACE U+0020.
// 2: Uses NO-BREAK SPACE U+00A0, e.g., "1\xA0000,50" = "1 000,50".
svParser( "1 000,50" ); /* 1 */
// > 1000.5
svParser( "1\xA0000,50" ); /* 2 */
// > 1000.5

// Finish.
var fiParser = Globalize( "fi" ).numberParser();

// Loose matching for hyphen minus [:Dash:].
// 1: Uses regular minus sign HYPHEN-MINUS U+002D.
// 2: Uses MINUS SIGN U+2212, e.g., "\u22123" = "−3".
fiParser( "-3" ); /* 1 */
// > -3
fiParser( "\u22123" ); /* 2 */
// > -3
```

For improved performance on iterations, first create the parser. Then, reuse it
Expand Down
1 change: 1 addition & 0 deletions src/build/intro-date.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ var createError = Globalize._createError,
formatMessage = Globalize._formatMessage,
numberSymbol = Globalize._numberSymbol,
regexpEscape = Globalize._regexpEscape,
removeLiteralQuotes = Globalize._removeLiteralQuotes,
runtimeBind = Globalize._runtimeBind,
stringPad = Globalize._stringPad,
validateCldr = Globalize._validateCldr,
Expand Down
1 change: 0 additions & 1 deletion src/build/intro-number.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
}(this, function( Cldr, Globalize ) {

var createError = Globalize._createError,
objectExtend = Globalize._objectExtend,
regexpEscape = Globalize._regexpEscape,
runtimeBind = Globalize._runtimeBind,
stringPad = Globalize._stringPad,
Expand Down
11 changes: 4 additions & 7 deletions src/date/format.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ define([
"./pattern-re",
"./start-of",
"./timezone-hour-format",
"./week-days"
"./week-days",
"../util/remove-literal-quotes"
], function( dateDayOfWeek, dateDayOfYear, dateMillisecondsInDay, datePatternRe, dateStartOf,
dateTimezoneHourFormat, dateWeekDays ) {
dateTimezoneHourFormat, dateWeekDays, removeLiteralQuotes ) {

/**
* format( date, properties )
Expand Down Expand Up @@ -248,11 +249,7 @@ return function( date, numberFormatters, properties ) {

// ' literals.
case "'":
current = current.replace( /''/, "'" );
if ( length > 2 ) {
current = current.slice( 1, -1 );
}
ret = current;
ret = removeLiteralQuotes( current );
break;

// Anything else is considered a literal, including [ ,:/.@#], chinese, japonese, and
Expand Down
11 changes: 4 additions & 7 deletions src/date/tokenizer.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
define([
"./pattern-re",
"../util/regexp/escape",
"../util/regexp/n"
], function( datePatternRe, regexpEscape, regexpN ) {
"../util/regexp/n",
"../util/remove-literal-quotes"
], function( datePatternRe, regexpEscape, regexpN, removeLiteralQuotes ) {

/**
* tokenizer( value, pattern, properties )
Expand Down Expand Up @@ -368,11 +369,7 @@ return function( value, numberParser, properties ) {

case "'":
token.type = "literal";
current = current.replace( /''/, "'" );
if ( length > 2 ) {
current = current.slice( 1, -1 );
}
tokenRe = new RegExp( regexpEscape( current ) );
tokenRe = new RegExp( regexpEscape( removeLiteralQuotes( current ) ) );
break;

default:
Expand Down
6 changes: 4 additions & 2 deletions src/number.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ define([
"./number/parse-properties",
"./number/pattern",
"./number/symbol",
"./util/remove-literal-quotes",
"./util/string/pad",

"cldr/event",
Expand All @@ -24,7 +25,7 @@ define([
validateDefaultLocale, validateParameterPresence, validateParameterRange,
validateParameterTypeNumber, validateParameterTypePlainObject, validateParameterTypeString,
numberFormatterFn, numberFormatProperties, numberNumberingSystem, numberParserFn,
numberParseProperties, numberPattern, numberSymbol, stringPad ) {
numberParseProperties, numberPattern, numberSymbol, removeLiteralQuotes, stringPad ) {

function validateDigits( properties ) {
var minimumIntegerDigits = properties[ 2 ],
Expand Down Expand Up @@ -125,7 +126,7 @@ Globalize.prototype.numberParser = function( options ) {
pattern = numberPattern( options.style || "decimal", cldr );
}

properties = numberParseProperties( pattern, cldr );
properties = numberParseProperties( pattern, cldr, options );

cldr.off( "get", validateCldr );

Expand Down Expand Up @@ -177,6 +178,7 @@ Globalize._createErrorUnsupportedFeature = createErrorUnsupportedFeature;
Globalize._numberNumberingSystem = numberNumberingSystem;
Globalize._numberPattern = numberPattern;
Globalize._numberSymbol = numberSymbol;
Globalize._removeLiteralQuotes = removeLiteralQuotes;
Globalize._stringPad = stringPad;
Globalize._validateParameterTypeNumber = validateParameterTypeNumber;
Globalize._validateParameterTypeString = validateParameterTypeString;
Expand Down
11 changes: 4 additions & 7 deletions src/number/format.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
define([
"./format/grouping-separator",
"./format/integer-fraction-digits",
"./format/significant-digits"
"./format/significant-digits",
"../util/remove-literal-quotes"
], function( numberFormatGroupingSeparator, numberFormatIntegerFractionDigits,
numberFormatSignificantDigits ) {
numberFormatSignificantDigits, removeLiteralQuotes ) {

/**
* format( number, properties )
Expand Down Expand Up @@ -101,11 +102,7 @@ return function( number, properties ) {

// Literals
if ( literal ) {
literal = literal.replace( /''/, "'" );
if ( literal.length > 2 ) {
literal = literal.slice( 1, -1 );
}
return literal;
return removeLiteralQuotes( literal );
}

// Symbols
Expand Down
40 changes: 0 additions & 40 deletions src/number/number-re.js

This file was deleted.

Loading

0 comments on commit bba6a5e

Please sign in to comment.