Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lock protect nullability cache of symbolic regex node #60942

Merged
merged 5 commits into from
Nov 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,12 @@ internal static class CharKind
/// <summary>Gets the next character kind from a context</summary>
internal static uint Next(uint context) => context >> 3;

/// <summary>Creates the context of the previous and the next character kinds.</summary>
/// <summary>Encodes the pair (prevKind, nextKind) using 6 bits</summary>
internal static uint Context(uint prevKind, uint nextKind) => (nextKind << 3) | prevKind;

/// <summary>Exclusive maximum context (limit) is 64 because a context uses bit-shifting where each kind needs 3 bits.</summary>
internal const int ContextLimit = 64;

internal static string DescribePrev(uint i) => i switch
{
StartStop => @"\A",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,7 @@ public DfaMatchingState<TSetType> TakeTransition(
Debug.Assert(builder._delta is not null);

int offset = (currentState.Id << builder._mintermsCount) | mintermId;
return
builder._delta[offset] ??
matcher.CreateNewTransition(currentState, minterm, offset);
return Volatile.Read(ref builder._delta[offset]) ?? matcher.CreateNewTransition(currentState, minterm, offset);
}
}

Expand Down Expand Up @@ -391,7 +389,7 @@ public DfaMatchingState<TSetType> TakeTransition(
DfaMatchingState<TSetType> nextStates = builder.MkState(oneState, currentStates.PrevCharKind);

int offset = (nextStates.Id << builder._mintermsCount) | mintermId;
DfaMatchingState<TSetType> p = builder._delta[offset] ?? matcher.CreateNewTransition(nextStates, minterm, offset);
DfaMatchingState<TSetType> p = Volatile.Read(ref builder._delta[offset]) ?? matcher.CreateNewTransition(nextStates, minterm, offset);

// Observe that if p.Node is an Or it will be flattened.
union = builder.MkOr2(union, p.Node);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ namespace System.Text.RegularExpressions.Symbolic
internal sealed class SymbolicRegexNode<S> where S : notnull
{
internal const string EmptyCharClass = "[]";
/// <summary>Some byte other than 0 to represent true</summary>
internal const byte TrueByte = 1;
/// <summary>Some byte other than 0 to represent false</summary>
internal const byte FalseByte = 2;
/// <summary>The undefined value is the default value 0</summary>
internal const byte UndefinedByte = 0;

internal readonly SymbolicRegexBuilder<S> _builder;
internal readonly SymbolicRegexKind _kind;
Expand All @@ -23,7 +29,11 @@ internal sealed class SymbolicRegexNode<S> where S : notnull
internal readonly SymbolicRegexNode<S>? _right;
internal readonly SymbolicRegexSet<S>? _alts;

private Dictionary<uint, bool>? _nullabilityCache;
/// <summary>
/// Caches nullability of this node for any given context (0 &lt;= context &lt; ContextLimit)
/// when _info.StartsWithSomeAnchor and _info.CanBeNullable are true. Otherwise the cache is null.
/// </summary>
private byte[]? _nullabilityCache;

private S _startSet;

Expand All @@ -50,6 +60,7 @@ private SymbolicRegexNode(SymbolicRegexBuilder<S> builder, SymbolicRegexKind kin
_info = info;
_hashcode = ComputeHashCode();
_startSet = ComputeStartSet();
_nullabilityCache = info.StartsWithSomeAnchor && info.CanBeNullable ? new byte[CharKind.ContextLimit] : null;
}

private bool _isInternalizedUnion;
Expand Down Expand Up @@ -162,92 +173,100 @@ static void AppendToList(SymbolicRegexNode<S> concat, List<SymbolicRegexNode<S>>
/// <param name="context">kind info for previous and next characters</param>
internal bool IsNullableFor(uint context)
{
if (!_info.StartsWithSomeAnchor)
return IsNullable;

if (!_info.CanBeNullable)
return false;
if (_nullabilityCache is null)
{
// if _nullabilityCache is null then IsNullable==CanBeNullable
// Observe that if IsNullable==true then CanBeNullable==true.
// but when the node does not start with an anchor
// and IsNullable==false then CanBeNullable==false.
return _info.IsNullable;
}

if (!StackHelper.TryEnsureSufficientExecutionStack())
{
return StackHelper.CallOnEmptyStack(IsNullableFor, context);
}

// Initialize the nullability cache for this node.
_nullabilityCache ??= new Dictionary<uint, bool>();
Debug.Assert(context < CharKind.ContextLimit);

if (!_nullabilityCache.TryGetValue(context, out bool is_nullable))
// If nullablity has been computed for the given context then return it
byte b = Volatile.Read(ref _nullabilityCache[context]);
if (b != UndefinedByte)
{
switch (_kind)
{
case SymbolicRegexKind.Loop:
Debug.Assert(_left is not null);
is_nullable = _lower == 0 || _left.IsNullableFor(context);
break;
return b == TrueByte;
}

case SymbolicRegexKind.Concat:
Debug.Assert(_left is not null && _right is not null);
is_nullable = _left.IsNullableFor(context) && _right.IsNullableFor(context);
break;
// Otherwise compute the nullability recursively for the given context
bool is_nullable;
switch (_kind)
{
case SymbolicRegexKind.Loop:
Debug.Assert(_left is not null);
is_nullable = _lower == 0 || _left.IsNullableFor(context);
break;

case SymbolicRegexKind.Or:
case SymbolicRegexKind.And:
Debug.Assert(_alts is not null);
is_nullable = _alts.IsNullableFor(context);
break;
case SymbolicRegexKind.Concat:
Debug.Assert(_left is not null && _right is not null);
is_nullable = _left.IsNullableFor(context) && _right.IsNullableFor(context);
break;

case SymbolicRegexKind.Not:
Debug.Assert(_left is not null);
is_nullable = !_left.IsNullableFor(context);
break;
case SymbolicRegexKind.Or:
case SymbolicRegexKind.And:
Debug.Assert(_alts is not null);
is_nullable = _alts.IsNullableFor(context);
break;

case SymbolicRegexKind.StartAnchor:
is_nullable = CharKind.Prev(context) == CharKind.StartStop;
break;
case SymbolicRegexKind.Not:
Debug.Assert(_left is not null);
is_nullable = !_left.IsNullableFor(context);
break;

case SymbolicRegexKind.EndAnchor:
is_nullable = CharKind.Next(context) == CharKind.StartStop;
break;
case SymbolicRegexKind.StartAnchor:
is_nullable = CharKind.Prev(context) == CharKind.StartStop;
break;

case SymbolicRegexKind.BOLAnchor:
// Beg-Of-Line anchor is nullable when the previous character is Newline or Start
// note: at least one of the bits must be 1, but both could also be 1 in case of very first newline
is_nullable = (CharKind.Prev(context) & CharKind.NewLineS) != 0;
break;
case SymbolicRegexKind.EndAnchor:
is_nullable = CharKind.Next(context) == CharKind.StartStop;
break;

case SymbolicRegexKind.EOLAnchor:
// End-Of-Line anchor is nullable when the next character is Newline or Stop
// note: at least one of the bits must be 1, but both could also be 1 in case of \Z
is_nullable = (CharKind.Next(context) & CharKind.NewLineS) != 0;
break;
case SymbolicRegexKind.BOLAnchor:
// Beg-Of-Line anchor is nullable when the previous character is Newline or Start
// note: at least one of the bits must be 1, but both could also be 1 in case of very first newline
is_nullable = (CharKind.Prev(context) & CharKind.NewLineS) != 0;
break;

case SymbolicRegexKind.WBAnchor:
// test that prev char is word letter iff next is not not word letter
is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) != 0;
break;
case SymbolicRegexKind.EOLAnchor:
// End-Of-Line anchor is nullable when the next character is Newline or Stop
// note: at least one of the bits must be 1, but both could also be 1 in case of \Z
is_nullable = (CharKind.Next(context) & CharKind.NewLineS) != 0;
break;

case SymbolicRegexKind.NWBAnchor:
// test that prev char is word letter iff next is word letter
is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) == 0;
break;
case SymbolicRegexKind.WBAnchor:
// test that prev char is word letter iff next is not not word letter
is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) != 0;
break;

case SymbolicRegexKind.EndAnchorZ:
// \Z anchor is nullable when the next character is either the last Newline or Stop
// note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
is_nullable = (CharKind.Next(context) & CharKind.StartStop) != 0;
break;
case SymbolicRegexKind.NWBAnchor:
// test that prev char is word letter iff next is word letter
is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) == 0;
break;

default: //SymbolicRegexKind.EndAnchorZRev:
// EndAnchorZRev (rev(\Z)) anchor is nullable when the prev character is either the first Newline or Start
// note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
Debug.Assert(_kind == SymbolicRegexKind.EndAnchorZRev);
is_nullable = (CharKind.Prev(context) & CharKind.StartStop) != 0;
break;
}
case SymbolicRegexKind.EndAnchorZ:
// \Z anchor is nullable when the next character is either the last Newline or Stop
// note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
is_nullable = (CharKind.Next(context) & CharKind.StartStop) != 0;
break;

_nullabilityCache[context] = is_nullable;
default: // SymbolicRegexKind.EndAnchorZRev:
// EndAnchorZRev (rev(\Z)) anchor is nullable when the prev character is either the first Newline or Start
// note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
Debug.Assert(_kind == SymbolicRegexKind.EndAnchorZRev);
is_nullable = (CharKind.Prev(context) & CharKind.StartStop) != 0;
break;
}

Volatile.Write(ref _nullabilityCache[context], is_nullable ? TrueByte : FalseByte);

return is_nullable;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1827,5 +1827,33 @@ public async Task UseRegexConcurrently_ThreadSafe_Success(RegexEngine engine, Ti
}, CancellationToken.None, TaskCreationOptions.LongRunning, TaskScheduler.Default)).ToArray());
}
}

[Theory]
[MemberData(nameof(MatchWordsInAnchoredRegexes_TestData))]
public async Task MatchWordsInAnchoredRegexes(RegexEngine engine, RegexOptions options, string pattern, string input, (int, int)[] matches)
{
// The aim of these test is to test corner cases of matches involving anchors
// For NonBacktracking these tests are meant to
// cover most contexts in _nullabilityForContext in SymbolicRegexNode
Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);
MatchCollection ms = r.Matches(input);
Assert.Equal(matches.Length, ms.Count);
for (int i = 0; i < matches.Length; i++)
{
Assert.Equal(ms[i].Index, matches[i].Item1);
Assert.Equal(ms[i].Length, matches[i].Item2);
}
}

public static IEnumerable<object[]> MatchWordsInAnchoredRegexes_TestData()
{
foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
{
yield return new object[] { engine, RegexOptions.None, @"\b\w{10,}\b", "this is a complicated word in a\nnontrivial sentence", new (int, int)[] { (10, 11), (32, 10) } };
yield return new object[] { engine, RegexOptions.Multiline, @"^\w{10,}\b", "this is a\ncomplicated word in a\nnontrivial sentence", new (int, int)[] { (10, 11), (32, 10) } };
yield return new object[] { engine, RegexOptions.None, @"\b\d{1,2}\/\d{1,2}\/\d{2,4}\b", "date 10/12/1966 and 10/12/66 are the same", new (int, int)[] { (5, 10), (20, 8) } };
yield return new object[] { engine, RegexOptions.Multiline, @"\b\d{1,2}\/\d{1,2}\/\d{2,4}$", "date 10/12/1966\nand 10/12/66\nare the same", new (int, int)[] { (5, 10), (20, 8) } };
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -520,8 +520,9 @@ public void TestConjunctionOverCounting(string conjunct1, string conjunct2, stri
Assert.Contains("conditional", e.Message);
}
}
#endregion


#region Random input generation tests
public static IEnumerable<object[]> GenerateRandomMembers_TestData()
{
string[] patterns = new string[] { @"pa[5\$s]{2}w[o0]rd$", @"\w\d+", @"\d{10}" };
Expand All @@ -536,7 +537,7 @@ public static IEnumerable<object[]> GenerateRandomMembers_TestData()
{
foreach (string input in inputs)
{
yield return new object[] {engine, pattern, input, !negative };
yield return new object[] { engine, pattern, input, !negative };
}
}
}
Expand Down