Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[browser][non-icu] HybridGlobalization checking for prefix/suffix #84920

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions docs/design/features/hybrid-globalization.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,15 @@ hiraganaBig.localeCompare(katakanaSmall, "en-US", { sensitivity: "base" }) // 0;
`IgnoreKanaType | IgnoreWidth | IgnoreSymbols | IgnoreNonSpace`

`IgnoreKanaType | IgnoreWidth | IgnoreSymbols | IgnoreNonSpace | IgnoreCase`



**String starts with / ends with**

Affected public APIs:
- CompareInfo.IsPrefix
- CompareInfo.IsSuffix
- String.StartsWith
- String.EndsWith

Web API does not expose locale-sensitive endsWith/startsWith function. As a workaround, locale-sensitive string segmenter combined with locale-sensitive comparison is used. This approach, beyond having the same compare option limitations as described under **String comparison**, has additional limitations described in **String indexing**.
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,11 @@ internal static unsafe partial class JsGlobalization
{
[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe int CompareString(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options);

[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe bool StartsWith(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options, int* matchLengthPtr);

[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe bool EndsWith(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options, int* matchLengthPtr);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,8 @@

namespace System.Globalization.Tests
{
public class CompareInfoIsPrefixTests
public class CompareInfoIsPrefixTests : CompareInfoTestsBase
{
private static CompareInfo s_invariantCompare = CultureInfo.InvariantCulture.CompareInfo;
private static CompareInfo s_germanCompare = new CultureInfo("de-DE").CompareInfo;
private static CompareInfo s_hungarianCompare = new CultureInfo("hu-HU").CompareInfo;
private static CompareInfo s_turkishCompare = new CultureInfo("tr-TR").CompareInfo;
private static CompareInfo s_frenchCompare = new CultureInfo("fr-FR").CompareInfo;

public static IEnumerable<object[]> IsPrefix_TestData()
{
// Empty strings
Expand All @@ -31,8 +25,15 @@ public static IEnumerable<object[]> IsPrefix_TestData()
yield return new object[] { s_invariantCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "dz", "d", CompareOptions.None, true, 1 };
yield return new object[] { s_hungarianCompare, "dz", "d", CompareOptions.None, false, 0 };
yield return new object[] { s_hungarianCompare, "dz", "d", CompareOptions.Ordinal, true, 1 };
if (PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_hungarianCompare, "dz", "d", CompareOptions.None, true, 1 };
}
else
{
yield return new object[] { s_hungarianCompare, "dz", "d", CompareOptions.None, false, 0 };
}

// Turkish
yield return new object[] { s_turkishCompare, "interesting", "I", CompareOptions.None, false, 0 };
Expand All @@ -56,14 +57,16 @@ public static IEnumerable<object[]> IsPrefix_TestData()
yield return new object[] { s_invariantCompare, "\u00C0nimal", "a\u0300", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "\u00C0nimal", "a\u0300", CompareOptions.OrdinalIgnoreCase, false, 0 };
yield return new object[] { s_invariantCompare, "FooBar", "Foo\u0400Bar", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "FooBA\u0300R", "FooB\u00C0R", CompareOptions.IgnoreNonSpace, true, 7 };
yield return new object[] { s_invariantCompare, "FooBA\u0300R", "FooB\u00C0R", supportedIgnoreNonSpaceOption, true, 7 };
yield return new object[] { s_invariantCompare, "o\u0308", "o", CompareOptions.None, false, 0 };
yield return new object[] { s_invariantCompare, "o\u0308", "o", CompareOptions.Ordinal, true, 1 };
yield return new object[] { s_invariantCompare, "o\u0000\u0308", "o", CompareOptions.None, true, 1 };

// Weightless comparisons
yield return new object[] { s_invariantCompare, "", "\u200d", CompareOptions.None, true, 0 };
yield return new object[] { s_invariantCompare, "\u200dxy", "x", CompareOptions.None, true, 2 };
yield return new object[] { s_invariantCompare, "xy", "\u200d\u200dx", CompareOptions.None, true, 1 };
yield return new object[] { s_invariantCompare, "\0\0xy", "x", CompareOptions.None, true, 3 };

// Surrogates
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800\uDC00", CompareOptions.None, true, 2 };
Expand All @@ -76,42 +79,53 @@ public static IEnumerable<object[]> IsPrefix_TestData()
yield return new object[] { s_invariantCompare, "\uD800\uD800", "\uD800\uD800", CompareOptions.None, true, 2 };

// Ignore symbols
yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.IgnoreSymbols, true, 6 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.IgnoreSymbols, true, 6 };
yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.None, false, 0 };

// NULL character
yield return new object[] { s_invariantCompare, "a\u0000b", "a\u0000b", CompareOptions.None, true, 3 };
yield return new object[] { s_invariantCompare, "b\u0000a", "b\u0000b", CompareOptions.None, false, 0 };

// Platform differences
bool useNls = PlatformDetection.IsNlsGlobalization;
if (useNls)
{
yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.None, true, 7 };
yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, true, 7 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, true, 7 };
yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, true, 1 };
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.None, true, 1 };
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.IgnoreCase, true, 1 };
}
else
{
yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.None, false, 0 };
yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, false, 0 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, false, 0 };
yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, false, 0 };
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.None, false, 0 };
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.IgnoreCase, false, 0 };
}

// ICU bugs
// UInt16 overflow: https://unicode-org.atlassian.net/browse/ICU-20832 fixed in https://github.com/unicode-org/icu/pull/840 (ICU 65)
if (useNls || PlatformDetection.ICUVersion.Major >= 65)
// error in JS for HybridGlobalization: Fatal javascript OOM in Committing semi space failed.
if (!PlatformDetection.IsHybridGlobalizationOnBrowser && (useNls || PlatformDetection.ICUVersion.Major >= 65))
{
yield return new object[] { s_frenchCompare, "b", new string('a', UInt16.MaxValue + 1), CompareOptions.None, false, 0 };
}

// Prefixes where matched length does not equal value string length
yield return new object[] { s_invariantCompare, "dzxyz", "\u01F3", CompareOptions.IgnoreNonSpace, true, 2 };
yield return new object[] { s_invariantCompare, "\u01F3xyz", "dz", CompareOptions.IgnoreNonSpace, true, 1 };
yield return new object[] { s_germanCompare, "Strasse xyz", "stra\u00DFe", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, true, 7 };
yield return new object[] { s_germanCompare, "Strasse xyz", "xtra\u00DFe", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, false, 0 };
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Strasse", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, true, 6 };
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Xtrasse", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, false, 0 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_invariantCompare, "dzxyz", "\u01F3", supportedIgnoreNonSpaceOption, true, 2 };
yield return new object[] { s_invariantCompare, "\u01F3xyz", "dz", supportedIgnoreNonSpaceOption, true, 1 };
yield return new object[] { s_germanCompare, "Strasse xyz", "stra\u00DFe", supportedIgnoreCaseIgnoreNonSpaceOptions, true, 7 };
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Strasse", supportedIgnoreCaseIgnoreNonSpaceOptions, true, 6 };
}
yield return new object[] { s_germanCompare, "Strasse xyz", "xtra\u00DFe", supportedIgnoreCaseIgnoreNonSpaceOptions, false, 0 };
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Xtrasse", supportedIgnoreCaseIgnoreNonSpaceOptions, false, 0 };
}

[Theory]
Expand Down Expand Up @@ -139,7 +153,7 @@ public void IsPrefix(CompareInfo compareInfo, string source, string value, Compa
using BoundedMemory<char> valueBoundedMemory = BoundedMemory.AllocateFromExistingData<char>(value);
valueBoundedMemory.MakeReadonly();

Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options));
// Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options));
Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength));
Assert.Equal(expectedMatchLength, actualMatchLength);
}
Expand All @@ -150,7 +164,7 @@ public void IsPrefix_UnassignedUnicode()
bool result = PlatformDetection.IsNlsGlobalization ? true : false;
int expectedMatchLength = (result) ? 6 : 0;
IsPrefix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", CompareOptions.None, result, expectedMatchLength);
IsPrefix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", CompareOptions.IgnoreNonSpace, result, expectedMatchLength);
IsPrefix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", supportedIgnoreNonSpaceOption, result, expectedMatchLength);
}

[Fact]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,8 @@

namespace System.Globalization.Tests
{
public class CompareInfoIsSuffixTests
public class CompareInfoIsSuffixTests : CompareInfoTestsBase
{
private static CompareInfo s_invariantCompare = CultureInfo.InvariantCulture.CompareInfo;
private static CompareInfo s_germanCompare = new CultureInfo("de-DE").CompareInfo;
private static CompareInfo s_hungarianCompare = new CultureInfo("hu-HU").CompareInfo;
private static CompareInfo s_turkishCompare = new CultureInfo("tr-TR").CompareInfo;
private static CompareInfo s_frenchCompare = new CultureInfo("fr-FR").CompareInfo;
private static CompareInfo s_slovakCompare = new CultureInfo("sk-SK").CompareInfo;

public static IEnumerable<object[]> IsSuffix_TestData()
{
// Empty strings
Expand All @@ -32,12 +25,27 @@ public static IEnumerable<object[]> IsSuffix_TestData()
yield return new object[] { s_invariantCompare, "foobardzsdzs", "rddzs", CompareOptions.None, false, 0 };
yield return new object[] { s_invariantCompare, "foobardzsdzs", "rddzs", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "dz", "z", CompareOptions.None, true, 1 };
yield return new object[] { s_hungarianCompare, "dz", "z", CompareOptions.None, false, 0 };
yield return new object[] { s_hungarianCompare, "dz", "z", CompareOptions.Ordinal, true, 1 };
if (PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_hungarianCompare, "dz", "z", CompareOptions.None, true, 1 };
}
else
{
yield return new object[] { s_hungarianCompare, "dz", "z", CompareOptions.None, false, 0 };
}

// Slovak
yield return new object[] { s_slovakCompare, "ch", "h", CompareOptions.None, false, 0 };
yield return new object[] { s_slovakCompare, "velmi chora", "hora", CompareOptions.None, false, 0 };
if (PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_slovakCompare, "ch", "h", CompareOptions.None, true, 1 };
yield return new object[] { s_slovakCompare, "velmi chora", "hora", CompareOptions.None, true, 4 };
}
else
{
yield return new object[] { s_slovakCompare, "ch", "h", CompareOptions.None, false, 0 };
yield return new object[] { s_slovakCompare, "velmi chora", "hora", CompareOptions.None, false, 0 };
}
yield return new object[] { s_slovakCompare, "chh", "H", CompareOptions.IgnoreCase, true, 1 };

// Turkish
Expand All @@ -62,7 +70,7 @@ public static IEnumerable<object[]> IsSuffix_TestData()
yield return new object[] { s_invariantCompare, "Exhibit \u00C0", "a\u0300", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "Exhibit \u00C0", "a\u0300", CompareOptions.OrdinalIgnoreCase, false, 0 };
yield return new object[] { s_invariantCompare, "FooBar", "Foo\u0400Bar", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "FooBA\u0300R", "FooB\u00C0R", CompareOptions.IgnoreNonSpace, true, 7 };
yield return new object[] { s_invariantCompare, "FooBA\u0300R", "FooB\u00C0R", supportedIgnoreNonSpaceOption, true, 7 };
yield return new object[] { s_invariantCompare, "o\u0308", "o", CompareOptions.None, false, 0 };
yield return new object[] { s_invariantCompare, "o\u0308", "o", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "o\u0308o", "o", CompareOptions.None, true, 1 };
Expand All @@ -71,6 +79,8 @@ public static IEnumerable<object[]> IsSuffix_TestData()
// Weightless comparisons
yield return new object[] { s_invariantCompare, "", "\u200d", CompareOptions.None, true, 0 };
yield return new object[] { s_invariantCompare, "xy\u200d", "y", CompareOptions.None, true, 2 };
yield return new object[] { s_invariantCompare, "xy", "y\u200d\u200d", CompareOptions.None, true, 1 };
yield return new object[] { s_invariantCompare, "xy\0\0", "y", CompareOptions.None, true, 3 };

// Surrogates
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800\uDC00", CompareOptions.None, true, 2 };
Expand All @@ -83,11 +93,12 @@ public static IEnumerable<object[]> IsSuffix_TestData()
yield return new object[] { s_invariantCompare, "\uD800\uD800", "\uD800\uD800", CompareOptions.None, true, 2 };

// Ignore symbols
yield return new object[] { s_invariantCompare, "More Test's", "Tests", CompareOptions.IgnoreSymbols, true, 6 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
yield return new object[] { s_invariantCompare, "More Test's", "Tests", CompareOptions.IgnoreSymbols, true, 6 };
yield return new object[] { s_invariantCompare, "More Test's", "Tests", CompareOptions.None, false, 0 };

// NULL character
yield return new object[] { s_invariantCompare, "a\u0000b", "a\u0000b", CompareOptions.None, true, 3 };
yield return new object[] { s_invariantCompare, "a\u0000b", "a\u0000b", CompareOptions.None, true, 3 }; // suffix had null removed and now it's returning 2 instead of 3
yield return new object[] { s_invariantCompare, "a\u0000b", "b\u0000b", CompareOptions.None, false, 0 };

// Platform differences
Expand All @@ -106,12 +117,15 @@ public static IEnumerable<object[]> IsSuffix_TestData()
}

// Suffixes where matched length does not equal value string length
yield return new object[] { s_invariantCompare, "xyzdz", "\u01F3", CompareOptions.IgnoreNonSpace, true, 2 };
yield return new object[] { s_invariantCompare, "xyz\u01F3", "dz", CompareOptions.IgnoreNonSpace, true, 1 };
yield return new object[] { s_germanCompare, "xyz Strasse", "stra\u00DFe", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, true, 7 };
yield return new object[] { s_germanCompare, "xyz Strasse", "xtra\u00DFe", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, false, 0 };
yield return new object[] { s_germanCompare, "xyz stra\u00DFe", "Strasse", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, true, 6 };
yield return new object[] { s_germanCompare, "xyz stra\u00DFe", "Xtrasse", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, false, 0 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_invariantCompare, "xyzdz", "\u01F3", supportedIgnoreNonSpaceOption, true, 2 };
yield return new object[] { s_invariantCompare, "xyz\u01F3", "dz", supportedIgnoreNonSpaceOption, true, 1 };
yield return new object[] { s_germanCompare, "xyz Strasse", "stra\u00DFe", supportedIgnoreCaseIgnoreNonSpaceOptions, true, 7 };
yield return new object[] { s_germanCompare, "xyz stra\u00DFe", "Strasse", supportedIgnoreCaseIgnoreNonSpaceOptions, true, 6 };
}
yield return new object[] { s_germanCompare, "xyz Strasse", "xtra\u00DFe", supportedIgnoreCaseIgnoreNonSpaceOptions, false, 0 };
yield return new object[] { s_germanCompare, "xyz stra\u00DFe", "Xtrasse", supportedIgnoreCaseIgnoreNonSpaceOptions, false, 0 };
}

[Theory]
Expand Down Expand Up @@ -151,7 +165,7 @@ public void IsSuffix_UnassignedUnicode()
int expectedMatchLength = (result) ? 6 : 0;

IsSuffix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", CompareOptions.None, result, expectedMatchLength);
IsSuffix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", CompareOptions.IgnoreNonSpace, result, expectedMatchLength);
IsSuffix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", supportedIgnoreNonSpaceOption, result, expectedMatchLength);
}

[Fact]
Expand Down
Loading