Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Native Digits Support #71045

Merged
merged 6 commits into from
Jun 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -115,5 +115,21 @@ public void DigitSubstitutionListTest(string cultureName, DigitShapes shape)
}
}

public static IEnumerable<object[]> NativeDigitTestData()
{
yield return new object[] { "ccp-Cakm-BD", new string[] { "\U0001E950", "\U0001E951", "\U0001E952", "\U0001E953", "\U0001E954", "\U0001E955", "\U0001E956", "\U0001E957", "\U0001E958", "\U0001E959" }};
yield return new object[] { "ar-SA", new string[] {"\u0660", "\u0661", "\u0662", "\u0663", "\u0664", "\u0665", "\u0666", "\u0667", "\u0668", "\u0669" }};
yield return new object[] { "en-US", new string[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" }};
yield return new object[] { "ur-IN", new string[] { "\u06F0", "\u06F1", "\u06F2", "\u06F3", "\u06F4", "\u06F5", "\u06F6", "\u06F7", "\u06F8", "\u06F9" }};
}

public static bool FullICUPlatform => PlatformDetection.ICUVersion.Major >= 66 && PlatformDetection.IsNotBrowser;

[ConditionalTheory(nameof(FullICUPlatform))]
[MemberData(nameof(NativeDigitTestData))]
public void TestNativeDigits(string cultureName, string[] nativeDigits)
{
Assert.Equal(nativeDigits, CultureInfo.GetCultureInfo(cultureName).NumberFormat.NativeDigits);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2138,7 +2138,7 @@ private string[] GetNativeDigits()
{
string[] result = NumberFormatInfo.s_asciiDigits;

// LOCALE_SNATIVEDIGITS (array of 10 single character strings).
// NLS LOCALE_SNATIVEDIGITS (array of 10 single character strings). In case of ICU, the buffer can be longer.
string digits = GetLocaleInfoCoreUserOverride(LocaleStringData.Digits);

// if digits.Length < NumberFormatInfo.s_asciiDigits.Length means the native digits setting is messed up in the host machine.
Expand All @@ -2148,31 +2148,49 @@ private string[] GetNativeDigits()
return result;
}

// Try to check if the digits are all ASCII so we can avoid the array allocation and use the static array NumberFormatInfo.s_asciiDigits instead.
// If we have non-ASCII digits, we should exit the loop very quickly.
int i = 0;
while (i < NumberFormatInfo.s_asciiDigits.Length)
{
if (digits[i] != NumberFormatInfo.s_asciiDigits[i][0])
{
break;
}
i++;
}
// In ICU we separate the digits with the '\uFFFF' character

if (i >= NumberFormatInfo.s_asciiDigits.Length)
if (digits.StartsWith("0\uFFFF1\uFFFF2\uFFFF3\uFFFF4\uFFFF5\uFFFF6\uFFFF7\uFFFF8\uFFFF9\uFFFF", StringComparison.Ordinal) || // ICU common cases
digits.StartsWith("0123456789", StringComparison.Ordinal)) // NLS common cases
{
return result;
}

// we have non-ASCII digits
// Non-ASCII digits

// Check if values coming from ICU separated by 0xFFFF
int ffffPos = digits.IndexOf('\uFFFF');

result = new string[10];
for (i = 0; i < result.Length; i++)
if (ffffPos < 0) // NLS case
{
result[i] = char.ToString(digits[i]);
for (int i = 0; i < result.Length; i++)
{
result[i] = char.ToString(digits[i]);
}

return result;
}

return result;
// ICU case

int start = 0;
int index = 0;

do
{
result[index++] = digits.Substring(start, ffffPos - start);
start = ++ffffPos;
while ((uint)ffffPos < (uint)digits.Length && digits[ffffPos] != '\uFFFF')
{
ffffPos++;
}

} while (ffffPos < digits.Length && index < 10);

Debug.Assert(index >= 10, $"Couldn't read native digits for '{_sWindowsName}' successfully.");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be index == 10? result is exactly 10 items long, so if we tried to do result[10], it would have thrown.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we can do that. I am not sure if this is worth another PR though.


return index < 10 ? NumberFormatInfo.s_asciiDigits : result;
}

internal void GetNFIValues(NumberFormatInfo nfi)
Expand Down
74 changes: 52 additions & 22 deletions src/native/libs/System.Globalization.Native/pal_localeStringData.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,17 @@ Obtains the value of a DecimalFormatSymbols
static UErrorCode GetLocaleInfoDecimalFormatSymbol(const char* locale,
UNumberFormatSymbol symbol,
UChar* value,
int32_t valueLength)
int32_t valueLength,
int32_t* symbolLength)
{
UErrorCode status = U_ZERO_ERROR;
UNumberFormat* pFormat = unum_open(UNUM_DECIMAL, NULL, 0, locale, NULL, &status);
unum_getSymbol(pFormat, symbol, value, valueLength, &status);

int32_t lengthResult = unum_getSymbol(pFormat, symbol, value, valueLength, &status);
if (symbolLength != NULL)
{
*symbolLength = lengthResult;
}
unum_close(pFormat);
return status;
}
Expand All @@ -39,14 +45,15 @@ static UErrorCode GetDigitSymbol(const char* locale,
UNumberFormatSymbol symbol,
int digit,
UChar* value,
int32_t valueLength)
int32_t valueLength,
int32_t* symbolLength)
{
if (U_FAILURE(previousStatus))
{
return previousStatus;
}

return GetLocaleInfoDecimalFormatSymbol(locale, symbol, value + digit, valueLength - digit);
return GetLocaleInfoDecimalFormatSymbol(locale, symbol, value + digit, valueLength - digit, symbolLength);
}

/*
Expand Down Expand Up @@ -279,26 +286,49 @@ int32_t GlobalizationNative_GetLocaleInfoString(const UChar* localeName,
}
break;
case LocaleString_ThousandSeparator:
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_GROUPING_SEPARATOR_SYMBOL, value, valueLength);
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_GROUPING_SEPARATOR_SYMBOL, value, valueLength, NULL);
break;
case LocaleString_DecimalSeparator:
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_DECIMAL_SEPARATOR_SYMBOL, value, valueLength);
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_DECIMAL_SEPARATOR_SYMBOL, value, valueLength, NULL);
break;
case LocaleString_Digits:
status = GetDigitSymbol(locale, status, UNUM_ZERO_DIGIT_SYMBOL, 0, value, valueLength);
// symbols UNUM_ONE_DIGIT to UNUM_NINE_DIGIT are contiguous
for (int32_t symbol = UNUM_ONE_DIGIT_SYMBOL; symbol <= UNUM_NINE_DIGIT_SYMBOL; symbol++)
{
int charIndex = symbol - UNUM_ONE_DIGIT_SYMBOL + 1;
status = GetDigitSymbol(
locale, status, (UNumberFormatSymbol)symbol, charIndex, value, valueLength);
// Native digit can be more than one 16-bit character (e.g. ccp-Cakm-BD locale which using surrogate pairs to represent the native digit).
// We'll separate the native digits in the returned buffer by the character '\uFFFF'.
int32_t symbolLength = 0;
status = GetDigitSymbol(locale, status, UNUM_ZERO_DIGIT_SYMBOL, 0, value, valueLength, &symbolLength);

int32_t charIndex = symbolLength;

if (U_SUCCESS(status) && (uint32_t)charIndex < (uint32_t)valueLength)
{
value[charIndex++] = 0xFFFF;

// symbols UNUM_ONE_DIGIT to UNUM_NINE_DIGIT are contiguous
for (int32_t symbol = UNUM_ONE_DIGIT_SYMBOL; symbol <= UNUM_NINE_DIGIT_SYMBOL && charIndex < valueLength - 3; symbol++)
{
status = GetDigitSymbol(locale, status, (UNumberFormatSymbol)symbol, charIndex, value, valueLength, &symbolLength);
charIndex += symbolLength;
if (!U_SUCCESS(status) || (uint32_t)charIndex >= (uint32_t)valueLength)
{
break;
}

value[charIndex++] = 0xFFFF;
}

if ((uint32_t)charIndex < (uint32_t)valueLength)
{
value[charIndex] = 0;
}
}
}
break;
case LocaleString_MonetarySymbol:
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_CURRENCY_SYMBOL, value, valueLength);
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_CURRENCY_SYMBOL, value, valueLength, NULL);
break;
case LocaleString_Iso4217MonetarySymbol:
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_INTL_CURRENCY_SYMBOL, value, valueLength);
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_INTL_CURRENCY_SYMBOL, value, valueLength, NULL);
break;
case LocaleString_CurrencyEnglishName:
status = GetLocaleCurrencyName(locale, false, value, valueLength);
Expand All @@ -307,11 +337,11 @@ int32_t GlobalizationNative_GetLocaleInfoString(const UChar* localeName,
status = GetLocaleCurrencyName(locale, true, value, valueLength);
break;
case LocaleString_MonetaryDecimalSeparator:
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MONETARY_SEPARATOR_SYMBOL, value, valueLength);
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MONETARY_SEPARATOR_SYMBOL, value, valueLength, NULL);
break;
case LocaleString_MonetaryThousandSeparator:
status =
GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MONETARY_GROUPING_SEPARATOR_SYMBOL, value, valueLength);
GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MONETARY_GROUPING_SEPARATOR_SYMBOL, value, valueLength, NULL);
break;
case LocaleString_AMDesignator:
status = GetLocaleInfoAmPm(locale, true, value, valueLength);
Expand All @@ -320,10 +350,10 @@ int32_t GlobalizationNative_GetLocaleInfoString(const UChar* localeName,
status = GetLocaleInfoAmPm(locale, false, value, valueLength);
break;
case LocaleString_PositiveSign:
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PLUS_SIGN_SYMBOL, value, valueLength);
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PLUS_SIGN_SYMBOL, value, valueLength, NULL);
break;
case LocaleString_NegativeSign:
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MINUS_SIGN_SYMBOL, value, valueLength);
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_MINUS_SIGN_SYMBOL, value, valueLength, NULL);
break;
case LocaleString_Iso639LanguageTwoLetterName:
status = GetLocaleIso639LanguageTwoLetterName(locale, value, valueLength);
Expand All @@ -338,10 +368,10 @@ int32_t GlobalizationNative_GetLocaleInfoString(const UChar* localeName,
status = GetLocaleIso3166CountryCode(locale, value, valueLength);
break;
case LocaleString_NaNSymbol:
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_NAN_SYMBOL, value, valueLength);
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_NAN_SYMBOL, value, valueLength, NULL);
break;
case LocaleString_PositiveInfinitySymbol:
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_INFINITY_SYMBOL, value, valueLength);
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_INFINITY_SYMBOL, value, valueLength, NULL);
break;
case LocaleString_ParentName:
{
Expand All @@ -358,10 +388,10 @@ int32_t GlobalizationNative_GetLocaleInfoString(const UChar* localeName,
break;
}
case LocaleString_PercentSymbol:
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PERCENT_SYMBOL, value, valueLength);
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PERCENT_SYMBOL, value, valueLength, NULL);
break;
case LocaleString_PerMilleSymbol:
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PERMILL_SYMBOL, value, valueLength);
status = GetLocaleInfoDecimalFormatSymbol(locale, UNUM_PERMILL_SYMBOL, value, valueLength, NULL);
break;
default:
status = U_UNSUPPORTED_ERROR;
Expand Down