diff --git a/docs/parsers.md b/docs/parsers.md index 0bc0e5f4..0787ac24 100644 --- a/docs/parsers.md +++ b/docs/parsers.md @@ -553,7 +553,7 @@ Result: ### Deferred -Creates a parser that can be references before it is actually defined. This is used when there is a cyclic dependency between parsers. +Creates a parser that can be referenced before it is actually defined. This is used when there is a cyclic dependency between parsers. ```c# Deferred Deferred() @@ -777,6 +777,7 @@ Result: ``` failure: "Unexpected char c" ``` + ### When Adds some additional logic for a parser to succeed. @@ -823,6 +824,15 @@ Parser Discard() Parser Discard(U value) ``` +### Lookup + +Builds a parser that lists all possible matches to improve performance. Most parsers implement `ISeekable` parsers in order to provide `OneOf` a way to build a lookup table and identify the potential next parsers in the chain. Some parsers don't implement `ISeekable` because they are built too late, like `Deferred`. The `Lookup` parser circumvents that lack. + +```c# +Parser Lookup(params ReadOnlySpan expectedChars) +Parser Lookup(params ISeekable[] parsers) +``` + ## Other parsers ### AnyCharBefore diff --git a/docs/writing.md b/docs/writing.md index 1c3ab20f..22e5770e 100644 --- a/docs/writing.md +++ b/docs/writing.md @@ -25,12 +25,15 @@ public override bool Parse(ParseContext context, ref ParseResult(parseResult1.Value, parseResult2.Value)); + + context.ExitParser(this); return true; } context.Scanner.Cursor.ResetPosition(start); } + context.ExitParser(this); return false; } ``` @@ -51,10 +54,12 @@ public override bool Parse(ParseContext context, ref ParseResult result) { if (parser.Parse(context, ref result)) { + context.ExitParser(this); return true; } } + context.ExitParser(this); return false; } ``` diff --git a/src/Parlot/CharMap.cs b/src/Parlot/CharMap.cs index bf034827..f248398f 100644 --- a/src/Parlot/CharMap.cs +++ b/src/Parlot/CharMap.cs @@ -1,12 +1,19 @@ -using System; +using System; using System.Collections.Generic; using System.Linq; +using System.Reflection; using System.Runtime.CompilerServices; namespace Parlot; +/// +/// Combines maps of ascii and non-ascii characters. +/// If all characters are ascii, the non-ascii dictionary is not used. +/// internal sealed class CharMap where T : class { + public static MethodInfo IndexerMethodInfo = typeof(CharMap).GetMethod("get_Item", BindingFlags.Public | BindingFlags.Instance)!; + private readonly T[] _asciiMap = new T[128]; private Dictionary? _nonAsciiMap; diff --git a/src/Parlot/Compilation/CompilationContext.cs b/src/Parlot/Compilation/CompilationContext.cs index 91c60eb2..c932081b 100644 --- a/src/Parlot/Compilation/CompilationContext.cs +++ b/src/Parlot/Compilation/CompilationContext.cs @@ -1,4 +1,4 @@ -using Parlot.Fluent; +using Parlot.Fluent; using System; using System.Collections.Generic; using System.Linq.Expressions; @@ -48,7 +48,7 @@ public CompilationContext() /// Gets or sets whether the current compilation phase should ignore the results of the parsers. /// /// - /// When set to false, the compiled statements don't need to record and define the property. + /// When set to true, the compiled statements don't need to record and define the property. /// This is done to optimize compiled parser that are usually used for pattern matching only. /// public bool DiscardResult { get; set; } @@ -85,7 +85,12 @@ public CompilationResult CreateCompilationResult(Type valueType, bool defaultSuc result.Variables.Add(valueVariable); result.Body.Add(Expression.Assign(successVariable, Expression.Constant(defaultSuccess, typeof(bool)))); - result.Body.Add(Expression.Assign(valueVariable, defaultValue ?? Expression.Default(valueType))); + + // Don't need to assign a type's default value + if (defaultValue != null) + { + result.Body.Add(Expression.Assign(valueVariable, defaultValue ?? Expression.Default(valueType))); + } return result; } diff --git a/src/Parlot/Compilation/CompiledParser.cs b/src/Parlot/Compilation/CompiledParser.cs index 36e7169a..a94a4027 100644 --- a/src/Parlot/Compilation/CompiledParser.cs +++ b/src/Parlot/Compilation/CompiledParser.cs @@ -1,4 +1,4 @@ -using Parlot.Fluent; +using Parlot.Fluent; using System; namespace Parlot.Compilation; @@ -26,12 +26,15 @@ public class CompiledParser : Parser, ICompiledParser public CompiledParser(Func> parse, Parser source) { + Name = "Compiled"; _parse = parse ?? throw new ArgumentNullException(nameof(parse)); Source = source; } public override bool Parse(ParseContext context, ref ParseResult result) { + context.EnterParser(this); + var cursor = context.Scanner.Cursor; var start = cursor.Offset; var parsed = _parse(context); @@ -39,9 +42,12 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (parsed.Item1) { result.Set(start, cursor.Offset, parsed.Item2); + + context.ExitParser(this); return true; } + context.ExitParser(this); return false; } } diff --git a/src/Parlot/Compilation/ExpressionHelper.cs b/src/Parlot/Compilation/ExpressionHelper.cs index a59ad922..eea8c23a 100644 --- a/src/Parlot/Compilation/ExpressionHelper.cs +++ b/src/Parlot/Compilation/ExpressionHelper.cs @@ -1,4 +1,4 @@ -using Parlot.Fluent; +using Parlot.Fluent; using System; using System.Linq.Expressions; using System.Reflection; @@ -8,7 +8,7 @@ namespace Parlot.Compilation; public static class ExpressionHelper { - internal static readonly MethodInfo ParserContext_SkipWhiteSpaceMethod = typeof(ParseContext).GetMethod(nameof(ParseContext.SkipWhiteSpace), Array.Empty())!; + internal static readonly MethodInfo ParserContext_SkipWhiteSpaceMethod = typeof(ParseContext).GetMethod(nameof(ParseContext.SkipWhiteSpace), [])!; internal static readonly MethodInfo ParserContext_WhiteSpaceParser = typeof(ParseContext).GetProperty(nameof(ParseContext.WhiteSpaceParser))?.GetGetMethod()!; internal static readonly MethodInfo Scanner_ReadText_NoResult = typeof(Scanner).GetMethod(nameof(Parlot.Scanner.ReadText), [typeof(ReadOnlySpan), typeof(StringComparison)])!; internal static readonly MethodInfo Scanner_ReadChar = typeof(Scanner).GetMethod(nameof(Parlot.Scanner.ReadChar), [typeof(char)])!; @@ -23,23 +23,21 @@ public static class ExpressionHelper internal static readonly MethodInfo Scanner_ReadDoubleQuotedString = typeof(Scanner).GetMethod(nameof(Parlot.Scanner.ReadDoubleQuotedString), [])!; internal static readonly MethodInfo Scanner_ReadQuotedString = typeof(Scanner).GetMethod(nameof(Parlot.Scanner.ReadQuotedString), [])!; - internal static readonly MethodInfo Cursor_Advance = typeof(Cursor).GetMethod(nameof(Parlot.Cursor.Advance), Array.Empty())!; + internal static readonly MethodInfo Cursor_Advance = typeof(Cursor).GetMethod(nameof(Parlot.Cursor.Advance), [])!; internal static readonly MethodInfo Cursor_AdvanceNoNewLines = typeof(Cursor).GetMethod(nameof(Parlot.Cursor.AdvanceNoNewLines), [typeof(int)])!; internal static readonly MethodInfo Cursor_ResetPosition = typeof(Cursor).GetMethod("ResetPosition")!; + internal static readonly MethodInfo Character_IsWhiteSpace = typeof(Character).GetMethod(nameof(Character.IsWhiteSpace))!; internal static readonly ConstructorInfo Exception_ToString = typeof(Exception).GetConstructor([typeof(string)])!; internal static readonly ConstructorInfo TextSpan_Constructor = typeof(TextSpan).GetConstructor([typeof(string), typeof(int), typeof(int)])!; internal static readonly MethodInfo ReadOnlySpan_ToString = typeof(ReadOnlySpan).GetMethod(nameof(ToString), [])!; - + internal static readonly MethodInfo MemoryExtensions_AsSpan = typeof(MemoryExtensions).GetMethod(nameof(MemoryExtensions.AsSpan), [typeof(string)])!; - public static Expression ArrayEmpty() => ((Expression>)(() => Array.Empty())).Body; public static Expression New() where T : new() => ((Expression>)(() => new T())).Body; - public static readonly Expression> CharacterIsInRange = (cursor, b, c) => Character.IsInRange(cursor.Current, b, c); - //public static Expression NewOptionalResult(this CompilationContext _, Expression hasValue, Expression value) => Expression.New(GetOptionalResult_Constructor(), [hasValue, value]); public static Expression NewTextSpan(this CompilationContext _, Expression buffer, Expression offset, Expression count) => Expression.New(TextSpan_Constructor, [buffer, offset, count]); public static MemberExpression Scanner(this CompilationContext context) => Expression.Field(context.ParseContext, "Scanner"); @@ -53,11 +51,28 @@ public static class ExpressionHelper public static MemberExpression Buffer(this CompilationContext context) => Expression.Field(context.Scanner(), "Buffer"); public static Expression ThrowObject(this CompilationContext _, Expression o) => Expression.Throw(Expression.New(Exception_ToString, Expression.Call(o, o.Type.GetMethod("ToString", [])!))); public static Expression ThrowParseException(this CompilationContext context, Expression message) => Expression.Throw(Expression.New(typeof(ParseException).GetConstructors().First(), [message, context.Position()])); + public static Expression BreakPoint(this CompilationContext _, Expression state, Action action) => Expression.Invoke(Expression.Constant(action, typeof(Action)), Expression.Convert(state, typeof(object))); public static MethodCallExpression ReadSingleQuotedString(this CompilationContext context) => Expression.Call(context.Scanner(), Scanner_ReadSingleQuotedString); public static MethodCallExpression ReadDoubleQuotedString(this CompilationContext context) => Expression.Call(context.Scanner(), Scanner_ReadDoubleQuotedString); public static MethodCallExpression ReadQuotedString(this CompilationContext context) => Expression.Call(context.Scanner(), Scanner_ReadQuotedString); public static MethodCallExpression ReadChar(this CompilationContext context, char c) => Expression.Call(context.Scanner(), Scanner_ReadChar, Expression.Constant(c)); + + // Surprisingly whiting the same direct code with this helper is slower that calling scanner.ReadChar() + public static Expression ReadCharInlined(this CompilationContext context, char c, CompilationResult result) + { + var constant = Expression.Constant(c); + return Expression.IfThen( + Expression.Equal(context.Current(), constant), // if (cursor.Current == 'c') + Expression.Block( + Expression.Assign(result.Success, TrueExpression), + context.Advance(), + context.DiscardResult + ? Expression.Empty() + : Expression.Assign(result.Value, constant) + ) + ); + } public static MethodCallExpression ReadDecimal(this CompilationContext context) => Expression.Call(context.Scanner(), Scanner_ReadDecimal); public static MethodCallExpression ReadDecimal(this CompilationContext context, Expression allowLeadingSign, Expression allowDecimalSeparator, Expression allowGroupSeparator, Expression allowExponent, Expression number, Expression decimalSeparator, Expression groupSeparator) => Expression.Call(context.Scanner(), Scanner_ReadDecimalAllArguments, allowLeadingSign, allowDecimalSeparator, allowGroupSeparator, allowExponent, number, decimalSeparator, groupSeparator); public static MethodCallExpression ReadInteger(this CompilationContext context) => Expression.Call(context.Scanner(), Scanner_ReadInteger); @@ -88,4 +103,6 @@ public static MethodCallExpression ParserSkipWhiteSpace(this CompilationContext { return Expression.Call(context.ParseContext, ParserContext_SkipWhiteSpaceMethod); } + + public static ConstantExpression TrueExpression { get; } = Expression.Constant(true, typeof(bool)); } diff --git a/src/Parlot/Fluent/Always.cs b/src/Parlot/Fluent/Always.cs index 632e891e..8152bdf3 100644 --- a/src/Parlot/Fluent/Always.cs +++ b/src/Parlot/Fluent/Always.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using System.Linq.Expressions; namespace Parlot.Fluent; @@ -12,6 +12,7 @@ public sealed class Always : Parser, ICompilable public Always(T value) { + Name = "Always"; _value = value; } @@ -21,6 +22,7 @@ public override bool Parse(ParseContext context, ref ParseResult result) result.Set(context.Scanner.Cursor.Offset, context.Scanner.Cursor.Offset, _value); + context.ExitParser(this); return true; } diff --git a/src/Parlot/Fluent/Between.cs b/src/Parlot/Fluent/Between.cs index 2835f379..dcd49465 100644 --- a/src/Parlot/Fluent/Between.cs +++ b/src/Parlot/Fluent/Between.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using Parlot.Rewriting; using System; using System.Linq.Expressions; @@ -23,6 +23,8 @@ public Between(Parser before, Parser parser, Parser after) ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"Between({before.Name},{parser.Name},{after.Name})"; } public bool CanSeek { get; } @@ -43,6 +45,8 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (!_before.Parse(context, ref parsedA)) { + context.ExitParser(this); + // Don't reset position since _before should do it return false; } @@ -50,6 +54,8 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (!_parser.Parse(context, ref result)) { cursor.ResetPosition(start); + + context.ExitParser(this); return false; } @@ -58,9 +64,12 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (!_after.Parse(context, ref parsedB)) { cursor.ResetPosition(start); + + context.ExitParser(this); return false; } + context.ExitParser(this); return true; } diff --git a/src/Parlot/Fluent/Capture.cs b/src/Parlot/Fluent/Capture.cs index aa950032..983ab21c 100644 --- a/src/Parlot/Fluent/Capture.cs +++ b/src/Parlot/Fluent/Capture.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using System.Linq.Expressions; namespace Parlot.Fluent; @@ -10,6 +10,7 @@ public sealed class Capture : Parser, ICompilable public Capture(Parser parser) { _parser = parser; + Name = $"{parser.Name} (Capture)"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -28,11 +29,11 @@ public override bool Parse(ParseContext context, ref ParseResult resul result.Set(start.Offset, end, new TextSpan(context.Scanner.Buffer, start.Offset, length)); + context.ExitParser(this); return true; } - context.Scanner.Cursor.ResetPosition(start); - + context.ExitParser(this); return false; } @@ -61,31 +62,25 @@ public CompilationResult Compile(CompilationContext context) // // success = true; // } - // else - // { - // context.Scanner.Cursor.ResetPosition(start); - // } - var startOffset = context.Offset(start); + var startOffset = result.DeclareVariable($"startOffset{context.NextNumber}", context.Offset(start)); result.Body.Add( Expression.Block( parserCompileResult.Variables, Expression.Block(parserCompileResult.Body), - Expression.IfThenElse( - parserCompileResult.Success, - Expression.Block( - context.DiscardResult - ? Expression.Empty() - : Expression.Assign(result.Value, + Expression.IfThen( + test: parserCompileResult.Success, + ifTrue: Expression.Block( + // Never discard result here, that would nullify this parser + Expression.Assign(result.Value, context.NewTextSpan( context.Buffer(), startOffset, Expression.Subtract(context.Offset(), startOffset) )), Expression.Assign(result.Success, Expression.Constant(true, typeof(bool))) - ), - context.ResetPosition(start) + ) ) ) ); diff --git a/src/Parlot/Fluent/CharLiteral.cs b/src/Parlot/Fluent/CharLiteral.cs index c793af29..9bd3cfca 100644 --- a/src/Parlot/Fluent/CharLiteral.cs +++ b/src/Parlot/Fluent/CharLiteral.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using Parlot.Rewriting; using System.Linq.Expressions; @@ -10,6 +10,7 @@ public CharLiteral(char c) { Char = c; ExpectedChars = [c]; + Name = $"Char('{c}')"; } public char Char { get; } @@ -31,9 +32,12 @@ public override bool Parse(ParseContext context, ref ParseResult result) var start = cursor.Offset; cursor.Advance(); result.Set(start, cursor.Offset, Char); + + context.ExitParser(this); return true; } + context.ExitParser(this); return false; } diff --git a/src/Parlot/Fluent/Deferred.cs b/src/Parlot/Fluent/Deferred.cs index e3b1e5f1..dde910cb 100644 --- a/src/Parlot/Fluent/Deferred.cs +++ b/src/Parlot/Fluent/Deferred.cs @@ -1,21 +1,48 @@ -using Parlot.Compilation; +using Parlot.Compilation; +using Parlot.Rewriting; using System; +#if NET using System.Linq; +#endif using System.Linq.Expressions; namespace Parlot.Fluent; -public sealed class Deferred : Parser, ICompilable +public sealed class Deferred : Parser, ICompilable, ISeekable { - public Parser? Parser { get; set; } + private Parser? _parser; + + public Parser? Parser + { + get => _parser; + set + { + _parser = value ?? throw new ArgumentNullException(nameof(value)); + Name = $"{_parser.Name} (Deferred)"; + } + } + + public bool CanSeek { get; } + + public char[] ExpectedChars { get; } = []; + + public bool SkipWhitespace { get; } public Deferred() { + Name = "Deferred"; } - public Deferred(Func, Parser> parser) + public Deferred(Func, Parser> parser) : this() { Parser = parser(this); + + if (Parser is ISeekable seekable) + { + CanSeek = seekable.CanSeek; + ExpectedChars = seekable.ExpectedChars; + SkipWhitespace = seekable.SkipWhitespace; + } } public override bool Parse(ParseContext context, ref ParseResult result) @@ -25,7 +52,12 @@ public override bool Parse(ParseContext context, ref ParseResult result) throw new InvalidOperationException("Parser has not been initialized"); } - return Parser.Parse(context, ref result); + context.EnterParser(this); + + var outcome = Parser.Parse(context, ref result); + + context.ExitParser(this); + return outcome; } private bool _initialized; @@ -61,24 +93,25 @@ public CompilationResult Compile(CompilationContext context) var parserCompileResult = Parser.Build(context); var resultExpression = Expression.Variable(typeof(ValueTuple), $"result{context.NextNumber}"); - - var returnLabelTarget = Expression.Label(typeof(ValueTuple)); - var returnLabelExpression = Expression.Label(returnLabelTarget, resultExpression); + var returnTarget = Expression.Label(typeof(ValueTuple)); + var returnExpression = Expression.Return(returnTarget, resultExpression, typeof(ValueTuple)); + var returnLabel = Expression.Label(returnTarget, defaultValue: Expression.New(typeof(ValueTuple))); var lambda = Expression.Lambda>>( - Expression.Block( - typeof(ValueTuple), - parserCompileResult.Variables.Append(resultExpression), - Expression.Block(parserCompileResult.Body), - Expression.Assign(resultExpression, Expression.New( - typeof(ValueTuple).GetConstructor([typeof(bool), typeof(T)])!, - parserCompileResult.Success, - context.DiscardResult ? Expression.Default(parserCompileResult.Value.Type) : parserCompileResult.Value)), - returnLabelExpression), - true, - context.ParseContext) - ; + Expression.Block( + type: typeof(ValueTuple), + variables: parserCompileResult.Variables.Append(resultExpression), + Expression.Block(parserCompileResult.Body), + Expression.Assign(resultExpression, Expression.New( + typeof(ValueTuple).GetConstructor([typeof(bool), typeof(T)])!, + parserCompileResult.Success, + context.DiscardResult ? Expression.Default(parserCompileResult.Value.Type) : parserCompileResult.Value)), + returnExpression, + returnLabel), + true, + context.ParseContext + ); // Store the source lambda for debugging context.Lambdas.Add(lambda); diff --git a/src/Parlot/Fluent/Discard.cs b/src/Parlot/Fluent/Discard.cs index e0fbe97f..6b4eebfe 100644 --- a/src/Parlot/Fluent/Discard.cs +++ b/src/Parlot/Fluent/Discard.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using System.Linq.Expressions; namespace Parlot.Fluent; @@ -15,6 +15,8 @@ public Discard(Parser parser, U value) { _parser = parser; _value = value; + + Name = $"{parser.Name} (Discard)"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -26,9 +28,12 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (_parser.Parse(context, ref parsed)) { result.Set(parsed.Start, parsed.End, _value); + + context.ExitParser(this); return true; } + context.ExitParser(this); return false; } diff --git a/src/Parlot/Fluent/Else.cs b/src/Parlot/Fluent/Else.cs index 2afada2e..e4bdada6 100644 --- a/src/Parlot/Fluent/Else.cs +++ b/src/Parlot/Fluent/Else.cs @@ -1,4 +1,5 @@ -using Parlot.Compilation; +using Parlot.Compilation; +using Parlot.Rewriting; using System.Linq.Expressions; namespace Parlot.Fluent; @@ -6,15 +7,30 @@ namespace Parlot.Fluent; /// /// Returns a default value if the previous parser failed. /// -public sealed class Else : Parser, ICompilable +public sealed class Else : Parser, ICompilable, ISeekable { private readonly Parser _parser; private readonly T _value; + public bool CanSeek { get; } + + public char[] ExpectedChars { get; } = []; + + public bool SkipWhitespace { get; } + public Else(Parser parser, T value) { _parser = parser; _value = value; + + if (_parser is ISeekable seekable) + { + CanSeek = seekable.CanSeek; + ExpectedChars = seekable.ExpectedChars; + SkipWhitespace = seekable.SkipWhitespace; + } + + Name = $"{parser.Name} (Else)"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -26,6 +42,7 @@ public override bool Parse(ParseContext context, ref ParseResult result) result.Set(result.Start, result.End, _value); } + context.ExitParser(this); return true; } diff --git a/src/Parlot/Fluent/Eof.cs b/src/Parlot/Fluent/Eof.cs index eced51a6..ec4f01d5 100644 --- a/src/Parlot/Fluent/Eof.cs +++ b/src/Parlot/Fluent/Eof.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using System.Linq.Expressions; namespace Parlot.Fluent; @@ -13,6 +13,7 @@ public sealed class Eof : Parser, ICompilable public Eof(Parser parser) { _parser = parser; + Name = $"{parser.Name} (Eof)"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -21,9 +22,11 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (_parser.Parse(context, ref result) && context.Scanner.Cursor.Eof) { + context.ExitParser(this); return true; } + context.ExitParser(this); return false; } diff --git a/src/Parlot/Fluent/Error.cs b/src/Parlot/Fluent/Error.cs index e52056ee..6b4046d8 100644 --- a/src/Parlot/Fluent/Error.cs +++ b/src/Parlot/Fluent/Error.cs @@ -1,19 +1,35 @@ -using System; +using System; using System.Linq; using System.Linq.Expressions; using Parlot.Compilation; +using Parlot.Rewriting; namespace Parlot.Fluent; -public sealed class ElseError : Parser, ICompilable +public sealed class ElseError : Parser, ICompilable, ISeekable { private readonly Parser _parser; private readonly string _message; + public bool CanSeek { get; } + + public char[] ExpectedChars { get; } = []; + + public bool SkipWhitespace { get; } + public ElseError(Parser parser, string message) { _parser = parser ?? throw new ArgumentNullException(nameof(parser)); _message = message; + + if (_parser is ISeekable seekable) + { + CanSeek = seekable.CanSeek; + ExpectedChars = seekable.ExpectedChars; + SkipWhitespace = seekable.SkipWhitespace; + } + + Name = $"{parser.Name} (ElseError)"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -22,9 +38,11 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (!_parser.Parse(context, ref result)) { + context.ExitParser(this); throw new ParseException(_message, context.Scanner.Cursor.Position); } + context.ExitParser(this); return true; } @@ -81,6 +99,8 @@ public Error(Parser parser, string message) { _parser = parser ?? throw new ArgumentNullException(nameof(parser)); _message = message; + + Name = $"{parser.Name} (Error)"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -89,9 +109,11 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (_parser.Parse(context, ref result)) { + context.ExitParser(this); throw new ParseException(_message, context.Scanner.Cursor.Position); } + context.ExitParser(this); return false; } @@ -127,15 +149,30 @@ public CompilationResult Compile(CompilationContext context) } } -public sealed class Error : Parser, ICompilable +public sealed class Error : Parser, ICompilable, ISeekable { private readonly Parser _parser; private readonly string _message; + public bool CanSeek { get; } + + public char[] ExpectedChars { get; } = []; + + public bool SkipWhitespace { get; } + public Error(Parser parser, string message) { _parser = parser ?? throw new ArgumentNullException(nameof(parser)); _message = message; + + if (_parser is ISeekable seekable) + { + CanSeek = seekable.CanSeek; + ExpectedChars = seekable.ExpectedChars; + SkipWhitespace = seekable.SkipWhitespace; + } + + Name = $"{parser.Name} (Error)"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -146,9 +183,11 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (_parser.Parse(context, ref parsed)) { + context.ExitParser(this); throw new ParseException(_message, context.Scanner.Cursor.Position); } + context.ExitParser(this); return false; } diff --git a/src/Parlot/Fluent/Identifier.cs b/src/Parlot/Fluent/Identifier.cs index a22c33ce..d9afad34 100644 --- a/src/Parlot/Fluent/Identifier.cs +++ b/src/Parlot/Fluent/Identifier.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using System; using System.Linq.Expressions; using System.Reflection; @@ -17,6 +17,8 @@ public Identifier(Func? extraStart = null, Func? extraPa { _extraStart = extraStart; _extraPart = extraPart; + + Name = "Identifier"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -41,9 +43,12 @@ public override bool Parse(ParseContext context, ref ParseResult resul var end = context.Scanner.Cursor.Offset; result.Set(start, end, new TextSpan(context.Scanner.Buffer, start, end - start)); + + context.ExitParser(this); return true; } + context.ExitParser(this); return false; } diff --git a/src/Parlot/Fluent/IdentifierLiteral.cs b/src/Parlot/Fluent/IdentifierLiteral.cs index 556154a4..1f0a0ce8 100644 --- a/src/Parlot/Fluent/IdentifierLiteral.cs +++ b/src/Parlot/Fluent/IdentifierLiteral.cs @@ -19,6 +19,8 @@ public IdentifierLiteral(SearchValues startSearchValues, SearchValues result) @@ -29,6 +31,7 @@ public override bool Parse(ParseContext context, ref ParseResult resul if (span.Length == 0 || !_startSearchValues.Contains(span[0])) { + context.ExitParser(this); return false; } @@ -40,6 +43,8 @@ public override bool Parse(ParseContext context, ref ParseResult resul var start = context.Scanner.Cursor.Position.Offset; context.Scanner.Cursor.AdvanceNoNewLines(size); result.Set(start, start + size, new TextSpan(context.Scanner.Buffer, start, size)); + + context.ExitParser(this); return true; } } diff --git a/src/Parlot/Fluent/ListOfCharsLiteral.cs b/src/Parlot/Fluent/ListOfCharsLiteral.cs new file mode 100644 index 00000000..e01af89c --- /dev/null +++ b/src/Parlot/Fluent/ListOfCharsLiteral.cs @@ -0,0 +1,81 @@ +#if !NET8_0_OR_GREATER +using Parlot.Rewriting; +using System; + +namespace Parlot.Fluent; + +internal sealed class ListOfChars : Parser, ISeekable +{ + private readonly CharMap _map = new(); + private readonly int _minSize; + private readonly int _maxSize; + private readonly bool _hasNewLine; + + public bool CanSeek { get; } = true; + + public char[] ExpectedChars { get; } + + public bool SkipWhitespace { get; } + + public ListOfChars(string values, int minSize = 1, int maxSize = 0) + { + foreach (var c in values) + { + _map.Set(c, new object()); + + if (Character.IsNewLine(c)) + { + _hasNewLine = true; + } + } + + ExpectedChars = values.ToCharArray(); + _minSize = minSize; + _maxSize = maxSize; + + Name = $"AnyOf({values})"; + } + + public override bool Parse(ParseContext context, ref ParseResult result) + { + context.EnterParser(this); + + var cursor = context.Scanner.Cursor; + var span = cursor.Span; + var start = cursor.Offset; + + var size = 0; + var maxLength = _maxSize > 0 ? Math.Min(span.Length, _maxSize) : span.Length; + + for (var i = 0; i < maxLength; i++) + { + if (_map[span[i]] == null) + { + break; + } + + size++; + } + + if (size < _minSize) + { + context.ExitParser(this); + return false; + } + + if (_hasNewLine) + { + cursor.Advance(size); + } + else + { + cursor.AdvanceNoNewLines(size); + } + + result.Set(start, start + size, new TextSpan(context.Scanner.Buffer, start, size)); + + context.ExitParser(this); + return true; + } +} +#endif diff --git a/src/Parlot/Fluent/NonWhiteSpaceLiteral.cs b/src/Parlot/Fluent/NonWhiteSpaceLiteral.cs index d8a382b7..437c64fa 100644 --- a/src/Parlot/Fluent/NonWhiteSpaceLiteral.cs +++ b/src/Parlot/Fluent/NonWhiteSpaceLiteral.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using System.Linq.Expressions; namespace Parlot.Fluent; @@ -10,12 +10,16 @@ public sealed class NonWhiteSpaceLiteral : Parser, ICompilable public NonWhiteSpaceLiteral(bool includeNewLines = true) { _includeNewLines = includeNewLines; + Name = "NonWhiteSpaceLiteral"; } public override bool Parse(ParseContext context, ref ParseResult result) { + context.EnterParser(this); + if (context.Scanner.Cursor.Eof) { + context.ExitParser(this); return false; } @@ -34,10 +38,13 @@ public override bool Parse(ParseContext context, ref ParseResult resul if (start == end) { + context.ExitParser(this); return false; } result.Set(start, end, new TextSpan(context.Scanner.Buffer, start, end - start)); + + context.ExitParser(this); return true; } diff --git a/src/Parlot/Fluent/Not.cs b/src/Parlot/Fluent/Not.cs index eb141640..eaa7996b 100644 --- a/src/Parlot/Fluent/Not.cs +++ b/src/Parlot/Fluent/Not.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using System; using System.Linq.Expressions; @@ -11,6 +11,8 @@ public sealed class Not : Parser, ICompilable public Not(Parser parser) { _parser = parser ?? throw new ArgumentNullException(nameof(parser)); + + Name = $"Not ({parser.Name}"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -21,10 +23,13 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (!_parser.Parse(context, ref result)) { + context.ExitParser(this); return true; } context.Scanner.Cursor.ResetPosition(start); + + context.ExitParser(this); return false; } diff --git a/src/Parlot/Fluent/NumberLiteral.cs b/src/Parlot/Fluent/NumberLiteral.cs index 2c9f8df5..9791dc06 100644 --- a/src/Parlot/Fluent/NumberLiteral.cs +++ b/src/Parlot/Fluent/NumberLiteral.cs @@ -1,4 +1,4 @@ -#if NET8_0_OR_GREATER +#if NET8_0_OR_GREATER using Parlot.Compilation; using Parlot.Rewriting; using System; @@ -69,6 +69,8 @@ public NumberLiteral(NumberOptions numberOptions = NumberOptions.Number, char de } // Exponent can't be a starting char + + Name = "NumberLiteral"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -85,12 +87,15 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (T.TryParse(number, _numberStyles, _culture, out var value)) { result.Set(start, end, value); + + context.ExitParser(this); return true; } } context.Scanner.Cursor.ResetPosition(reset); + context.ExitParser(this); return false; } diff --git a/src/Parlot/Fluent/NumberLiteralBase.cs b/src/Parlot/Fluent/NumberLiteralBase.cs index 00ec3cdd..4301db26 100644 --- a/src/Parlot/Fluent/NumberLiteralBase.cs +++ b/src/Parlot/Fluent/NumberLiteralBase.cs @@ -1,4 +1,5 @@ -using Parlot.Compilation; +using Parlot.Compilation; +using Parlot.Rewriting; using System; using System.Globalization; using System.Linq.Expressions; @@ -11,7 +12,7 @@ namespace Parlot.Fluent; /// This class is used as a base class for custom number parsers which don't implement INumber after .NET 7.0. /// /// -public abstract class NumberLiteralBase : Parser, ICompilable +public abstract class NumberLiteralBase : Parser, ICompilable, ISeekable { private static readonly MethodInfo _defaultTryParseMethodInfo = typeof(T).GetMethod("TryParse", [typeof(string), typeof(NumberStyles), typeof(IFormatProvider), typeof(T).MakeByRefType()])!; @@ -25,6 +26,12 @@ public abstract class NumberLiteralBase : Parser, ICompilable private readonly bool _allowGroupSeparator; private readonly bool _allowExponent; + public bool CanSeek => true; + + public char[] ExpectedChars { get; set; } = []; + + public bool SkipWhitespace => false; + public abstract bool TryParseNumber(ReadOnlySpan s, NumberStyles style, IFormatProvider provider, out T value); public NumberLiteralBase(NumberOptions numberOptions = NumberOptions.Number, char decimalSeparator = NumberLiterals.DefaultDecimalSeparator, char groupSeparator = NumberLiterals.DefaultGroupSeparator, MethodInfo? tryParseMethodInfo = null) @@ -46,6 +53,27 @@ public NumberLiteralBase(NumberOptions numberOptions = NumberOptions.Number, cha _allowDecimalSeparator = (numberOptions & NumberOptions.AllowDecimalSeparator) != 0; _allowGroupSeparator = (numberOptions & NumberOptions.AllowGroupSeparators) != 0; _allowExponent = (numberOptions & NumberOptions.AllowExponent) != 0; + + var expectedChars = "0123456789"; + + if (_allowLeadingSign) + { + expectedChars += "+-"; + } + + if (_allowDecimalSeparator) + { + expectedChars += _decimalSeparator; + } + + if (_allowExponent) + { + expectedChars += "eE"; + } + + ExpectedChars = expectedChars.ToCharArray(); + + Name = "NumberLiteral"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -62,12 +90,15 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (TryParseNumber(number, _numberStyles, _culture, out T value)) { result.Set(start, end, value); + + context.ExitParser(this); return true; } } context.Scanner.Cursor.ResetPosition(reset); + context.ExitParser(this); return false; } diff --git a/src/Parlot/Fluent/OneOf.ABT.cs b/src/Parlot/Fluent/OneOf.ABT.cs index 04356bde..951022d2 100644 --- a/src/Parlot/Fluent/OneOf.ABT.cs +++ b/src/Parlot/Fluent/OneOf.ABT.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using System; using System.Linq.Expressions; @@ -15,6 +15,8 @@ public OneOf(Parser parserA, Parser parserB) { _parserA = parserA ?? throw new ArgumentNullException(nameof(parserA)); _parserB = parserB ?? throw new ArgumentNullException(nameof(parserB)); + + Name = $"OneOf ({parserA.Name}, {parserB.Name})"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -27,6 +29,7 @@ public override bool Parse(ParseContext context, ref ParseResult result) { result.Set(resultA.Start, resultA.End, resultA.Value); + context.ExitParser(this); return true; } @@ -36,9 +39,11 @@ public override bool Parse(ParseContext context, ref ParseResult result) { result.Set(resultB.Start, resultB.End, resultB.Value); + context.ExitParser(this); return true; } + context.ExitParser(this); return false; } diff --git a/src/Parlot/Fluent/OneOf.cs b/src/Parlot/Fluent/OneOf.cs index 9e6f007a..08fec929 100644 --- a/src/Parlot/Fluent/OneOf.cs +++ b/src/Parlot/Fluent/OneOf.cs @@ -14,67 +14,118 @@ namespace Parlot.Fluent; /// public sealed class OneOf : Parser, ICompilable, ISeekable { - private readonly Parser[] _parsers; + // Used as a lookup for OneOf to find other OneOf parsers that could + // be invoked when there is no match. + + public const char OtherSeekableChar = '\0'; internal readonly CharMap>>? _map; + internal readonly List>? _otherParsers; + + private readonly CharMap>> _lambdaMap = new(); + private Func>? _lambdaOtherParsers; public OneOf(Parser[] parsers) { - _parsers = parsers ?? throw new ArgumentNullException(nameof(parsers)); + Parsers = parsers ?? throw new ArgumentNullException(nameof(parsers)); // We can't build a lookup table if there is only one parser - if (_parsers.Length <= 1) + if (Parsers.Count <= 1) { return; } - // If all parsers are seekable we can build a lookup table - if (_parsers.All(x => x is ISeekable seekable && seekable.CanSeek)) - { - var lookupTable = new Dictionary>>(); + // Technically we shouldn't be able to build a lookup table if not all parsers are seekable. + // For instance, "or" | identifier is not seekable since 'identifier' is not seekable. + // Also the order of the parsers need to be conserved in the lookup table results. + // The solution is to add the parsers that are not seekable in all the groups, and + // keep the relative order of each. So if the parser p1, p2 ('a'), p3, p4 ('b') are available, + // the group 'a' will have p1, p2, p3 and the group 'b' will have p1, p3, p4. + // And then when we parse, we need to try the non-seekable parsers when the _map doesn't have the character to lookup. - foreach (var parser in _parsers) - { - var expectedChars = (parser as ISeekable)!.ExpectedChars; + // NB:We could extract the lookup logic as a separate parser and then have a each lookup group as a OneOf one. - foreach (var c in expectedChars) + if (Parsers.Any(x => x is ISeekable seekable)) + { + var lookupTable = Parsers + .Where(p => p is ISeekable seekable && seekable.CanSeek) + .Cast() + .SelectMany(s => s.ExpectedChars.Select(x => (Key: x, Parser: (Parser)s))) + .GroupBy(s => s.Key) + .ToDictionary(group => group.Key, group => new List>()); + + foreach (var parser in Parsers) + { + if (parser is ISeekable seekable && seekable.CanSeek) { - if (!lookupTable.TryGetValue(c, out var list)) + // If the parser is a Seekable skip it and add directly the concrete parser + var decoratedParser = (parser as Seekable)?.Parser ?? parser; + + foreach (var c in seekable.ExpectedChars) { - list = new List>(); - lookupTable[c] = list; + if (c != OtherSeekableChar) + { + lookupTable[c].Add(decoratedParser); + } + else + { + _otherParsers ??= []; + _otherParsers.Add(decoratedParser); + } } - - list.Add(parser); } - } - - if (lookupTable.Count <= 1) - { - // If all parsers have the same first char, no need to use a lookup table + else + { + _otherParsers ??= []; + _otherParsers.Add(parser); - ExpectedChars = lookupTable!.Keys.ToArray(); - CanSeek = true; - lookupTable = null; + foreach (var entry in lookupTable) + { + entry.Value.Add(parser); + } + } } - if (_parsers.All(x => x is ISeekable seekable && seekable.SkipWhitespace)) + // If only some parser use SkipWhiteSpace, we can't use a lookup table + // Meaning, All/None is fine, but not Any + if (Parsers.All(x => x is ISeekable seekable && seekable.SkipWhitespace)) { - // All parsers can start with white spaces SkipWhitespace = true; - } - if (_parsers.Any(x => x is ISeekable seekable && seekable.SkipWhitespace)) + // Remove the SkipWhiteSpace parser if we can + Parsers = Parsers.Select(x => x is SkipWhiteSpace skip ? skip.Parser : x).ToArray(); + } + else if (Parsers.Any(x => x is ISeekable seekable && seekable.SkipWhitespace)) { - // If not all parsers accept a white space, we can't use a lookup table since the order matters - lookupTable = null; } - if (lookupTable != null) + lookupTable?.Remove(OtherSeekableChar); + var expectedChars = string.Join(",", lookupTable?.Keys.ToArray() ?? []); + + Name = $"OneOf ({string.Join(",", Parsers.Select(x => x.Name))}) on '{expectedChars}'"; + if (lookupTable != null && lookupTable.Count > 0) { - CanSeek = true; _map = new CharMap>>(lookupTable); - ExpectedChars = _map.ExpectedChars.ToArray(); + + // This parser is only seekable if there isn't a parser + // that can't be reached without a lookup. + // However we use a trick to match other parsers + // by assigning them to `OtherSeekableChar` such that + // we can pass on this collection through parsers + // that forward the ISeekable implementation (e.g. Error, Then) + // This way we make more OneOf parsers seekable. + + if (_otherParsers == null) + { + CanSeek = true; + ExpectedChars = _map.ExpectedChars; + } + else + { + CanSeek = true; + ExpectedChars = [.. _map.ExpectedChars, OtherSeekableChar]; + _map.Set(OtherSeekableChar, _otherParsers); + } } } } @@ -85,7 +136,7 @@ public OneOf(Parser[] parsers) public bool SkipWhitespace { get; } - public Parser[] Parsers => _parsers; + public IReadOnlyList> Parsers { get; } public override bool Parse(ParseContext context, ref ParseResult result) { @@ -93,63 +144,55 @@ public override bool Parse(ParseContext context, ref ParseResult result) var cursor = context.Scanner.Cursor; - if (_map != null) - { - if (SkipWhitespace) - { - var start = context.Scanner.Cursor.Position; - - context.SkipWhiteSpace(); + var start = context.Scanner.Cursor.Position; - var seekableParsers = _map[cursor.Current]; - - if (seekableParsers != null) - { - var length = seekableParsers!.Count; + if (SkipWhitespace) + { + context.SkipWhiteSpace(); + } - for (var i = 0; i < length; i++) - { - if (seekableParsers[i].Parse(context, ref result)) - { - return true; - } - } - } + if (_map != null) + { + var seekableParsers = _map[cursor.Current] ?? _otherParsers; - context.Scanner.Cursor.ResetPosition(start); - } - else + if (seekableParsers != null) { - var seekableParsers = _map[cursor.Current]; + var length = seekableParsers.Count; - if (seekableParsers != null) + for (var i = 0; i < length; i++) { - var length = seekableParsers!.Count; - - for (var i = 0; i < length; i++) + if (seekableParsers[i].Parse(context, ref result)) { - if (seekableParsers[i].Parse(context, ref result)) - { - return true; - } + context.ExitParser(this); + return true; } } } } else { - var parsers = _parsers; - var length = parsers.Length; + var parsers = Parsers; + var length = parsers.Count; for (var i = 0; i < length; i++) { if (parsers[i].Parse(context, ref result)) { + context.ExitParser(this); return true; } } } + // We only need to reset the position if we are skipping whitespaces + // as the parsers would have reverted their own state + + if (SkipWhitespace) + { + context.Scanner.Cursor.ResetPosition(start); + } + + context.ExitParser(this); return false; } @@ -157,182 +200,244 @@ public CompilationResult Compile(CompilationContext context) { var result = context.CreateCompilationResult(); - Expression block = Expression.Empty(); + // var reset = context.Scanner.Cursor.Position; - //if (_map != null) - // For now don't use lookup maps for compiled code as there is no fast option in that case. + ParameterExpression? reset = null; - if (false) + if (SkipWhitespace) { - // Lookup table is converted to a switch expression + reset = context.DeclarePositionVariable(result); + result.Body.Add(context.ParserSkipWhiteSpace()); + } - // switch (Cursor.Current) - // { - // case 'a' : - // parse1 instructions - // - // if (parser1.Success) - // { - // success = true; - // value = parse1.Value; - // } - // - // break; // implicit in SwitchCase expression - // - // case 'b' : - // ... - // } + Expression block = Expression.Empty(); -#pragma warning disable CS0162 // Unreachable code detected - var cases = _map.ExpectedChars.Select(key => - { - Expression group = Expression.Empty(); + if (_map != null) + { + // Switch is too slow, even for 2 elements compared to CharMap + // Expression are also not optimized like the compiler can do. - var parsers = _map[key]; + // UseSwitch(); + UseLookup(); - // The list is reversed since the parsers are unwrapped - foreach (var parser in parsers!.ToArray().Reverse()) + void UseLookup() + { + //var seekableParsers = _map[cursor.Current] ?? _otherParsers; + + //if (seekableParsers != null) + //{ + // var length = seekableParsers.Count; + + // for (var i = 0; i < length; i++) + // { + // if (seekableParsers[i].Parse(context, ref result)) + // { + // context.ExitParser(this); + // return true; + // } + // } + //} + + var charMapSetMethodInfo = typeof(CharMap>>).GetMethod("Set")!; + var nullSeekableParser = Expression.Constant(null, typeof(Func>)); + + foreach (var key in _map!.ExpectedChars) { - var groupResult = parser.Build(context); + Expression group = Expression.Empty(); - group = Expression.Block( - groupResult.Variables, - Expression.Block(groupResult.Body), - Expression.IfThenElse( - groupResult.Success, - Expression.Block( - Expression.Assign(result.Success, Expression.Constant(true, typeof(bool))), - context.DiscardResult - ? Expression.Empty() - : Expression.Assign(result.Value, groupResult.Value) - ), - group - ) + var parsers = _map[key]; + var lambdaSuccess = Expression.Variable(typeof(bool), $"successL{context.NextNumber}"); + var lambdaResult = Expression.Variable(typeof(T), $"resultL{context.NextNumber}"); + + // The list is reversed since the parsers are unwrapped + foreach (var parser in parsers!.ToArray().Reverse()) + { + var groupResult = parser.Build(context); + + // lambdaSuccess and lambdaResult will be registered at the top of the method + + group = Expression.Block( + groupResult.Variables, + Expression.Block(groupResult.Body), + Expression.IfThenElse( + groupResult.Success, + Expression.Block( + Expression.Assign(lambdaSuccess, Expression.Constant(true, typeof(bool))), + context.DiscardResult + ? Expression.Empty() + : Expression.Assign(lambdaResult, groupResult.Value) + ), + group + ) + ); + } + + var resultExpression = Expression.Variable(typeof(ValueTuple), $"result{context.NextNumber}"); + var returnTarget = Expression.Label(typeof(ValueTuple)); + var returnExpression = Expression.Return(returnTarget, resultExpression, typeof(ValueTuple)); + var returnLabel = Expression.Label(returnTarget, defaultValue: Expression.New(typeof(ValueTuple))); + + var groupBlock = (BlockExpression)group; + + var lambda = Expression.Lambda>>( + body: Expression.Block( + type: typeof(ValueTuple), + variables: groupBlock.Variables + .Append(resultExpression) + .Append(lambdaSuccess) + .Append(lambdaResult), + group, + Expression.Assign( + resultExpression, + Expression.New( + typeof(ValueTuple).GetConstructor([typeof(bool), typeof(T)])!, + lambdaSuccess, + context.DiscardResult ? + Expression.Constant(default(T), typeof(T)) : + lambdaResult) + ), + returnExpression, + returnLabel), + name: $"_map_{key}_{context.NextNumber}", + parameters: [context.ParseContext] // Only the name is used, so it will match the ones inside each compiler ); - } - return (Key: (uint)key, Body: group); + context.Lambdas.Add(lambda); - }).ToArray(); -#pragma warning restore CS0162 // Unreachable code detected + if (key == OtherSeekableChar) + { + _lambdaOtherParsers = lambda.Compile(); + } + else + { + _lambdaMap.Set(key, lambda.Compile()); + } + } - // Creating the switch expression if we need it - SwitchExpression switchExpr = - Expression.Switch( - Expression.Convert(context.Current(), typeof(uint)), - Expression.Empty(), // no match => success = false - cases.Select(c => Expression.SwitchCase( - c.Body, - Expression.Constant(c.Key) - )).ToArray() - ); + var seekableParsers = result.DeclareVariable>>($"seekableParser{context.NextNumber}", nullSeekableParser); - // Implement binary tree comparison - // Still slow with a few elements + var tupleResult = result.DeclareVariable>($"tupleResult{context.NextNumber}"); - var current = Expression.Variable(typeof(uint), $"current{context.NextNumber}"); - var binarySwitch = Expression.Block( - [current], - Expression.Assign(current, Expression.Convert(context.Current(), typeof(uint))), - BinarySwitch(current, cases) - ); + result.Body.Add( + Expression.Block( + // seekableParser = mapValue[key]; + Expression.Assign(seekableParsers, Expression.Call(Expression.Constant(_lambdaMap), CharMap>>.IndexerMethodInfo, Expression.Convert(context.Current(), typeof(uint)))), + // seekableParser ??= _otherParsers) + Expression.IfThen( + Expression.Equal( + nullSeekableParser, + seekableParsers + ), + Expression.Assign(seekableParsers, Expression.Constant(_lambdaOtherParsers, typeof(Func>))) + ), + Expression.IfThen( + Expression.NotEqual( + nullSeekableParser, + seekableParsers + ), + Expression.Block( + Expression.Assign(tupleResult, Expression.Invoke(seekableParsers, context.ParseContext)), + Expression.Assign(result.Success, Expression.Field(tupleResult, "Item1")), + context.DiscardResult + ? Expression.Empty() + : Expression.Assign(result.Value, Expression.Field(tupleResult, "Item2")) + ) + ) + ) + ); + } - static Expression BinarySwitch(Expression num, (uint Key, Expression Body)[] cases) +#pragma warning disable CS8321 // Local function is declared but never used + void UseSwitch() { - if (cases.Length > 3) + // Lookup table is converted to a switch expression + + // switch (Cursor.Current) + // { + // case 'a' : + // parse1 instructions + // + // if (parser1.Success) + // { + // success = true; + // value = parse1.Value; + // } + // + // break; // implicit in SwitchCase expression + // + // case 'b' : + // ... + // } + + var cases = _map!.ExpectedChars.Where(x => x != OtherSeekableChar).Select(key => { - // Split comparison in two recursive comparisons for each part of the cases - - var lowerCount = (int)Math.Round((double)cases.Length / 2, MidpointRounding.ToEven); - var lowerValues = cases.Take(lowerCount).ToArray(); - var higherValues = cases.Skip(lowerCount).ToArray(); - - return Expression.IfThenElse( - Expression.LessThanOrEqual(num, Expression.Constant(lowerValues[^1].Key)), - // This value or lower - BinarySwitch(num, lowerValues), - // Higher values - BinarySwitch(num, higherValues) - ); - } - else if (cases.Length == 1) - { - return Expression.IfThen( - Expression.Equal(Expression.Constant(cases[0].Key), num), - cases[0].Body - ); - } - else if (cases.Length == 2) - { - return Expression.IfThenElse( - Expression.NotEqual(Expression.Constant(cases[0].Key), num), - Expression.IfThen( - Expression.Equal(Expression.Constant(cases[1].Key), num), - cases[1].Body), - cases[0].Body); - } - else // cases.Length == 3 - { - return Expression.IfThenElse( - Expression.NotEqual(Expression.Constant(cases[0].Key), num), - Expression.IfThenElse( - Expression.NotEqual(Expression.Constant(cases[1].Key), num), - Expression.IfThen( - Expression.Equal(Expression.Constant(cases[2].Key), num), - cases[2].Body), - cases[1].Body), - cases[0].Body); - } - } + Expression group = Expression.Empty(); - // Implement lookup - // Doesn't work since each method can update the main state with the result (closure issue?) + var parsers = _map[key]; - var table = Expression.Variable(typeof(CharMap), $"table{context.NextNumber}"); + // The list is reversed since the parsers are unwrapped + foreach (var parser in parsers!.ToArray().Reverse()) + { + var groupResult = parser.Build(context); + + group = Expression.Block( + groupResult.Variables, + Expression.Block(groupResult.Body), + Expression.IfThenElse( + groupResult.Success, + Expression.Block( + Expression.Assign(result.Success, Expression.Constant(true, typeof(bool))), + context.DiscardResult + ? Expression.Empty() + : Expression.Assign(result.Value, groupResult.Value) + ), + group + ) + ); + } - var indexerMethodInfo = typeof(CharMap).GetMethod("get_Item", [typeof(uint)])!; + return (Key: (uint)key, Body: group); - context.GlobalVariables.Add(table); - var action = result.DeclareVariable($"action{context.NextNumber}"); + }).ToArray(); - var lookupBlock = Expression.Block( - [current, action, table], - [ - Expression.Assign(current, Expression.Convert(context.Current(), typeof(uint))), - // Initialize lookup table once - Expression.IfThen( - Expression.Equal(Expression.Constant(null, typeof(object)), table), - Expression.Block([ - Expression.Assign(table, ExpressionHelper.New>()), - ..cases.Select(c => Expression.Call(table, typeof(CharMap).GetMethod("Set", [typeof(char), typeof(Action)])!, [Expression.Convert(Expression.Constant(c.Key), typeof(char)), Expression.Lambda(c.Body)])) - ] - ) - ), - Expression.Assign(action, Expression.Call(table, indexerMethodInfo, [current])), - Expression.IfThen( - Expression.NotEqual(Expression.Constant(null), action), - //ExpressionHelper.ThrowObject(context, current) - Expression.Invoke(action) - ) - ] - ); + // Construct default case body + Expression defaultBody = Expression.Empty(); - if (SkipWhitespace) - { - var start = context.DeclarePositionVariable(result); + if (_otherParsers != null) + { + foreach (var parser in _otherParsers.ToArray().Reverse()) + { + var defaultCompileResult = parser.Build(context); + + defaultBody = Expression.Block( + defaultCompileResult.Variables, + Expression.Block(defaultCompileResult.Body), + Expression.IfThenElse( + defaultCompileResult.Success, + Expression.Block( + Expression.Assign(result.Success, Expression.Constant(true, typeof(bool))), + context.DiscardResult + ? Expression.Empty() + : Expression.Assign(result.Value, defaultCompileResult.Value) + ), + defaultBody + ) + ); + } + } - block = Expression.Block( - context.ParserSkipWhiteSpace(), - binarySwitch, - Expression.IfThen( - Expression.IsFalse(result.Success), - context.ResetPosition(start)) + block = + Expression.Switch( + Expression.Convert(context.Current(), typeof(uint)), + defaultBody, + cases.Select(c => Expression.SwitchCase( + c.Body, + Expression.Constant(c.Key) + )).ToArray() ); } - else - { - block = binarySwitch; - } +#pragma warning restore CS8321 // Local function is declared but never used + } else { @@ -356,7 +461,7 @@ static Expression BinarySwitch(Expression num, (uint Key, Expression Body)[] cas // ... // } - foreach (var parser in _parsers.Reverse()) + foreach (var parser in Parsers.Reverse()) { var parserCompileResult = parser.Build(context); @@ -379,6 +484,22 @@ static Expression BinarySwitch(Expression num, (uint Key, Expression Body)[] cas result.Body.Add(block); + // [if skipwhitespace] + // if (!success) + // { + // context.Scanner.Cursor.ResetPosition(begin); + // } + + if (reset != null) + { + result.Body.Add( + Expression.IfThen( + Expression.Not(result.Success), + context.ResetPosition(reset) + ) + ); + } + return result; } } diff --git a/src/Parlot/Fluent/OneOrMany.cs b/src/Parlot/Fluent/OneOrMany.cs index df5a8b95..66db6b00 100644 --- a/src/Parlot/Fluent/OneOrMany.cs +++ b/src/Parlot/Fluent/OneOrMany.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using Parlot.Rewriting; using System; using System.Collections.Generic; @@ -22,6 +22,8 @@ public OneOrMany(Parser parser) ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"OneOrMany({parser.Name})"; } public bool CanSeek { get; } @@ -54,12 +56,15 @@ public override bool Parse(ParseContext context, ref ParseResult>(false, Expression.New(typeof(List))); + var result = context.CreateCompilationResult>(); + var results = result.DeclareVariable>($"results{context.NextNumber}", Expression.New(typeof(List))); // value = new List(); // @@ -69,7 +74,7 @@ public CompilationResult Compile(CompilationContext context) // // if (parser1.Success) // { - // results.Add(parse1.Value); + // value.Add(parse1.Value); // } // else // { @@ -85,12 +90,13 @@ public CompilationResult Compile(CompilationContext context) // if (value.Count > 0) // { // success = true; + // result = value; // } // var parserCompileResult = _parser.Build(context); - var breakLabel = Expression.Label("break"); + var breakLabel = Expression.Label($"exitWhile{context.NextNumber}"); var block = Expression.Block( parserCompileResult.Variables, @@ -100,9 +106,9 @@ public CompilationResult Compile(CompilationContext context) Expression.IfThenElse( parserCompileResult.Success, Expression.Block( - context.DiscardResult - ? Expression.Empty() - : Expression.Call(result.Value, _listAddMethodInfo, parserCompileResult.Value), + context.DiscardResult ? + Expression.Empty() : + Expression.Call(results, _listAddMethodInfo, parserCompileResult.Value), Expression.Assign(result.Success, Expression.Constant(true)) ), Expression.Break(breakLabel) @@ -111,7 +117,10 @@ public CompilationResult Compile(CompilationContext context) context.Eof(), Expression.Break(breakLabel) )), - breakLabel) + breakLabel), + context.DiscardResult ? + Expression.Empty() : + Expression.Assign(result.Value, results) ); result.Body.Add(block); diff --git a/src/Parlot/Fluent/ParseContext.cs b/src/Parlot/Fluent/ParseContext.cs index 949eec1b..a56b8251 100644 --- a/src/Parlot/Fluent/ParseContext.cs +++ b/src/Parlot/Fluent/ParseContext.cs @@ -38,6 +38,11 @@ public ParseContext(Scanner scanner, bool useNewLines = false) /// public Action? OnEnterParser { get; set; } + /// + /// Delegate that is executed whenever a parser is left. + /// + public Action? OnExitParser { get; set; } + /// /// The parser that is used to parse whitespaces and comments. /// @@ -78,10 +83,18 @@ public void SkipWhiteSpace() } /// - /// Called whenever a parser is invoked. Will be used to detect invalid states and infinite loops. + /// Called whenever a parser is invoked. /// public void EnterParser(Parser parser) { OnEnterParser?.Invoke(parser, this); } + + /// + /// Called whenever a parser exits. + /// + public void ExitParser(Parser parser) + { + OnExitParser?.Invoke(parser, this); + } } diff --git a/src/Parlot/Fluent/Parser.Compile.cs b/src/Parlot/Fluent/Parser.Compile.cs index bc16cc51..f49eb4c2 100644 --- a/src/Parlot/Fluent/Parser.Compile.cs +++ b/src/Parlot/Fluent/Parser.Compile.cs @@ -1,4 +1,4 @@ -using FastExpressionCompiler; +using FastExpressionCompiler; using Parlot.Compilation; using System; using System.Collections.Generic; @@ -17,50 +17,65 @@ public abstract partial class Parser /// A compiled parser. public Parser Compile() { - if (this is ICompiledParser) + lock (this) { - return this; - } + if (this is ICompiledParser) + { + return this; + } + + var compilationContext = new CompilationContext(); - var compilationContext = new CompilationContext(); + var compilationResult = Build(compilationContext); - var compilationResult = Build(compilationContext); + // return value; - // return value; + var resultExpression = Expression.Variable(typeof(ValueTuple), $"result{compilationContext.NextNumber}"); + var returnTarget = Expression.Label(typeof(ValueTuple)); + var returnExpression = Expression.Return(returnTarget, resultExpression, typeof(ValueTuple)); + var returnLabel = Expression.Label(returnTarget, defaultValue: Expression.New(typeof(ValueTuple))); - var returnLabelTarget = Expression.Label(typeof(ValueTuple)); - var returnLabelExpression = Expression.Label(returnLabelTarget, Expression.New(_valueTupleConstructor, compilationResult.Success, compilationResult.Value)); + compilationResult.Variables.Add(resultExpression); + compilationResult.Body.Add( + Expression.Block( + Expression.Assign(resultExpression, Expression.New(_valueTupleConstructor, compilationResult.Success, compilationResult.Value)), + returnExpression, + returnLabel + ) + ); - compilationResult.Body.Add(returnLabelExpression); + // global variables; - // global variables; + // parser variables; - // parser variables; + var allVariables = new List(); + allVariables.AddRange(compilationContext.GlobalVariables); + allVariables.AddRange(compilationResult.Variables); - var allVariables = new List(); - allVariables.AddRange(compilationContext.GlobalVariables); - allVariables.AddRange(compilationResult.Variables); + // global statements; - // global statements; + // parser statements; - // parser statements; + var allExpressions = new List(); + allExpressions.AddRange(compilationContext.GlobalExpressions); + allExpressions.AddRange(compilationResult.Body); - var allExpressions = new List(); - allExpressions.AddRange(compilationContext.GlobalExpressions); - allExpressions.AddRange(compilationResult.Body); + var body = Expression.Block( + typeof(ValueTuple), + allVariables, + allExpressions + ); - var body = Expression.Block( - typeof(ValueTuple), - allVariables, - allExpressions - ); + var result = Expression.Lambda>>(body, compilationContext.ParseContext); - var result = Expression.Lambda>>(body, compilationContext.ParseContext); + // In Debug mode, inspected the generated code with + // result.ToCSharpString(); - var parser = result.CompileFast(); + var parser = result.CompileFast(); - // parser is a Func, so we use CompiledParser to encapsulate it in a Parser - return new CompiledParser(parser, this); + // parser is a Func, so we use CompiledParser to encapsulate it in a Parser + return new CompiledParser(parser, this); + } } /// diff --git a/src/Parlot/Fluent/Parser.TryParse.cs b/src/Parlot/Fluent/Parser.TryParse.cs index 4a83705d..adb6ad25 100644 --- a/src/Parlot/Fluent/Parser.TryParse.cs +++ b/src/Parlot/Fluent/Parser.TryParse.cs @@ -1,4 +1,4 @@ -using System.Threading; +using System.Threading; namespace Parlot.Fluent; @@ -7,6 +7,8 @@ public abstract partial class Parser private int _invocations; private volatile Parser? _compiledParser; + public string? Name { get; set; } + public T? Parse(string text) { var context = new ParseContext(new Scanner(text)); @@ -39,7 +41,9 @@ private Parser CheckCompiled(ParseContext context) // Any other concurrent call here will return 'this'. This prevents multiple compilations of // the same parser, and a lock. - if (Interlocked.Increment(ref _invocations) == context.CompilationThreshold) + if (context.CompilationThreshold > 0 && + _invocations < context.CompilationThreshold && + Interlocked.Increment(ref _invocations) == context.CompilationThreshold) { return _compiledParser = this.Compile(); } @@ -85,4 +89,6 @@ public bool TryParse(ParseContext context, out T value, out ParseError? error) value = default!; return false; } + + public override string ToString() => $"{Name ?? GetType().Name}"; } diff --git a/src/Parlot/Fluent/Parser.cs b/src/Parlot/Fluent/Parser.cs index 95ee2f40..c954734f 100644 --- a/src/Parlot/Fluent/Parser.cs +++ b/src/Parlot/Fluent/Parser.cs @@ -1,4 +1,6 @@ -using System; +using Parlot.Rewriting; +using System; +using System.Linq; namespace Parlot.Fluent; @@ -51,6 +53,15 @@ public abstract partial class Parser /// public Parser Error(string message) => new Error(this, message); + /// + /// Names a parser. + /// + public Parser Named(string name) + { + this.Name = name; + return this; + } + /// /// Builds a parser that verifies the previous parser result matches a predicate. /// @@ -80,4 +91,14 @@ public abstract partial class Parser /// Builds a parser that returns a default value if the previous parser fails. /// public Parser Else(T value) => new Else(this, value); + + /// + /// Builds a parser that lists all possible matches to improve performance. + /// + public Parser Lookup(params ReadOnlySpan expectedChars) => new Seekable(this, expectedChars); + + /// + /// Builds a parser that lists all possible matches to improve performance. + /// + public Parser Lookup(params ISeekable[] parsers) => new Seekable(this, parsers.SelectMany(x => x.ExpectedChars).ToArray()); } diff --git a/src/Parlot/Fluent/Parsers.cs b/src/Parlot/Fluent/Parsers.cs index 8a1d22f1..e10d793f 100644 --- a/src/Parlot/Fluent/Parsers.cs +++ b/src/Parlot/Fluent/Parsers.cs @@ -208,7 +208,15 @@ public Parser Identifier(Func? extraStart = null, FuncThe set of char to match. /// The minimum number of matches required. Defaults to 1. /// When the parser reaches the maximum number of matches it returns . Defaults to 0, i.e. no maximum size. - public Parser AnyOf(ReadOnlySpan values, int minSize = 1, int maxSize = 0) => AnyOf(SearchValues.Create(values), minSize, maxSize); + public Parser AnyOf(ReadOnlySpan values, int minSize = 1, int maxSize = 0) => new SearchValuesCharLiteral(values, minSize, maxSize); +#else + /// + /// Builds a parser that matches a list of chars. + /// + /// The set of char to match. + /// The minimum number of matches required. Defaults to 1. + /// When the parser reaches the maximum number of matches it returns . Defaults to 0, i.e. no maximum size. + public Parser AnyOf(string values, int minSize = 1, int maxSize = 0) => new ListOfChars(values, minSize, maxSize); #endif } @@ -294,5 +302,13 @@ public Parser Number(NumberOptions numberOptions = NumberOptions.Number, c /// The minimum number of matches required. Defaults to 1. /// When the parser reaches the maximum number of matches it returns . Defaults to 0, i.e. no maximum size. public Parser AnyOf(ReadOnlySpan values, int minSize = 1, int maxSize = 0) => AnyOf(SearchValues.Create(values), minSize, maxSize); +#else + /// + /// Builds a parser that matches a list of chars. + /// + /// The set of char to match. + /// The minimum number of matches required. Defaults to 1. + /// When the parser reaches the maximum number of matches it returns . Defaults to 0, i.e. no maximum size. + public Parser AnyOf(string values, int minSize = 1, int maxSize = 0) => Parsers.SkipWhiteSpace(new ListOfChars(values, minSize, maxSize)); #endif } diff --git a/src/Parlot/Fluent/PatternLiteral.cs b/src/Parlot/Fluent/PatternLiteral.cs index 7ae1708f..344a05c4 100644 --- a/src/Parlot/Fluent/PatternLiteral.cs +++ b/src/Parlot/Fluent/PatternLiteral.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using System; using System.Linq.Expressions; @@ -15,6 +15,8 @@ public PatternLiteral(Func predicate, int minSize = 1, int maxSize = _predicate = predicate ?? throw new ArgumentNullException(nameof(predicate)); _minSize = minSize; _maxSize = maxSize; + + Name = "PatternLiteral"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -23,6 +25,7 @@ public override bool Parse(ParseContext context, ref ParseResult resul if (context.Scanner.Cursor.Eof || !_predicate(context.Scanner.Cursor.Current)) { + context.ExitParser(this); return false; } @@ -43,12 +46,14 @@ public override bool Parse(ParseContext context, ref ParseResult resul var end = context.Scanner.Cursor.Offset; result.Set(start, end, new TextSpan(context.Scanner.Buffer, start, end - start)); + context.ExitParser(this); return true; } // When the size constraint has not been met the parser may still have advanced the cursor. context.Scanner.Cursor.ResetPosition(startPosition); + context.ExitParser(this); return false; } @@ -93,7 +98,7 @@ public CompilationResult Compile(CompilationContext context) // #endif // } - var breakLabel = Expression.Label("break"); + var breakLabel = Expression.Label($"break{context.NextNumber}"); result.Body.Add( Expression.Loop( diff --git a/src/Parlot/Fluent/SearchValuesCharLiteral.cs b/src/Parlot/Fluent/SearchValuesCharLiteral.cs index 3faa0329..46081850 100644 --- a/src/Parlot/Fluent/SearchValuesCharLiteral.cs +++ b/src/Parlot/Fluent/SearchValuesCharLiteral.cs @@ -1,20 +1,39 @@ #if NET8_0_OR_GREATER +using Parlot.Rewriting; using System; using System.Buffers; namespace Parlot.Fluent; -internal sealed class SearchValuesCharLiteral : Parser +internal sealed class SearchValuesCharLiteral : Parser, ISeekable { private readonly SearchValues _searchValues; private readonly int _minSize; private readonly int _maxSize; + public bool CanSeek { get; } + + public char[] ExpectedChars { get; } = []; + + public bool SkipWhitespace { get; } + public SearchValuesCharLiteral(SearchValues searchValues, int minSize = 1, int maxSize = 0) { _searchValues = searchValues ?? throw new ArgumentNullException(nameof(searchValues)); _minSize = minSize; _maxSize = maxSize; + Name = $"AnyOf({searchValues})"; + } + + public SearchValuesCharLiteral(ReadOnlySpan searchValues, int minSize = 1, int maxSize = 0) + { + _searchValues = SearchValues.Create(searchValues); + _minSize = minSize; + _maxSize = maxSize; + + CanSeek = true; + ExpectedChars = searchValues.ToArray(); + Name = $"AnyOf('{searchValues}')"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -23,30 +42,44 @@ public override bool Parse(ParseContext context, ref ParseResult resul var span = context.Scanner.Cursor.Span; + if (_minSize > span.Length) + { + return false; + } + // First char not matching the searched values var index = span.IndexOfAnyExcept(_searchValues); + var size = 0; + if (index != -1) { // Too small? if (index == 0 || index < _minSize) { + context.ExitParser(this); return false; } - // Too large? - if (_maxSize > 0 && index > _maxSize) - { - return false; - } + size = index; + } + else + { + // If index == -1 the whole input is a match + size = span.Length; } - // If index == -1 the whole input is a match - var size = index == -1 ? span.Length : index; + // Too large? Take only the request size + if (_maxSize > 0 && size > _maxSize) + { + size = _maxSize; + } var start = context.Scanner.Cursor.Position.Offset; context.Scanner.Cursor.Advance(size); result.Set(start, start + size, new TextSpan(context.Scanner.Buffer, start, size)); + + context.ExitParser(this); return true; } } diff --git a/src/Parlot/Fluent/Seekable.cs b/src/Parlot/Fluent/Seekable.cs new file mode 100644 index 00000000..aa867845 --- /dev/null +++ b/src/Parlot/Fluent/Seekable.cs @@ -0,0 +1,63 @@ +using Parlot.Compilation; +using Parlot.Rewriting; +using System; +using System.Linq.Expressions; + +namespace Parlot.Fluent; + +/// +/// Wraps an existing parser as an implementation by provide the seekable properties. +/// +internal sealed class Seekable : Parser, ISeekable +{ + public bool CanSeek { get; } + + public char[] ExpectedChars { get; set; } + + public bool SkipWhitespace { get; } + + public Parser Parser { get; } + + public Seekable(Parser parser, params ReadOnlySpan expectedChars) + { + Parser = parser ?? throw new ArgumentNullException(nameof(parser)); + ExpectedChars = expectedChars.ToArray(); + SkipWhitespace = parser is SkipWhiteSpace; + + if (Parser is ISeekable seekable) + { + CanSeek = seekable.CanSeek; + ExpectedChars = seekable.ExpectedChars; + SkipWhitespace = seekable.SkipWhitespace; + } + + Name = $"{parser.Name} (Seekable)"; + } + + public override bool Parse(ParseContext context, ref ParseResult result) + { + context.EnterParser(this); + + var success = Parser.Parse(context, ref result); + + context.ExitParser(this); + return success; + } + + public CompilationResult Compile(CompilationContext context) + { + var result = context.CreateCompilationResult(true); + + var parserCompileResult = Parser.Build(context, requireResult: true); + + var block = Expression.Block( + parserCompileResult.Variables, + parserCompileResult.Body + ); + + result.Body.Add(block); + + return result; + } +} + diff --git a/src/Parlot/Fluent/Separated.cs b/src/Parlot/Fluent/Separated.cs index 5885eda9..f3e15839 100644 --- a/src/Parlot/Fluent/Separated.cs +++ b/src/Parlot/Fluent/Separated.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using Parlot.Rewriting; using System; using System.Collections.Generic; @@ -25,6 +25,7 @@ public Separated(Parser separator, Parser parser) ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + Name = $"Separated({separator.Name}, {parser.Name})"; } public bool CanSeek { get; } @@ -66,6 +67,7 @@ public override bool Parse(ParseContext context, ref ParseResult parser1, Parser parser2) ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"And({parser1.Name}, {parser2.Name})"; } public bool CanSeek { get; } @@ -43,12 +45,15 @@ public override bool Parse(ParseContext context, ref ParseResult(parseResult1.Value, parseResult2.Value)); + + context.ExitParser(this); return true; } context.Scanner.Cursor.ResetPosition(start); } + context.ExitParser(this); return false; } @@ -86,6 +91,8 @@ Parser lastParser ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"And({parser.Name}, {lastParser.Name})"; } public bool CanSeek { get; } @@ -115,12 +122,15 @@ public override bool Parse(ParseContext context, ref ParseResult> parser, Parser lastParser) ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"And({parser.Name}, {lastParser.Name})"; } public bool CanSeek { get; } @@ -186,12 +198,15 @@ public override bool Parse(ParseContext context, ref ParseResult> parser, Parser lastParser ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"And({parser.Name}, {lastParser.Name})"; } public bool CanSeek { get; } @@ -258,12 +275,15 @@ public override bool Parse(ParseContext context, ref ParseResult> parser, Parser lastPa ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"And({parser.Name}, {lastParser.Name})"; } public bool CanSeek { get; } @@ -331,6 +353,8 @@ public override bool Parse(ParseContext context, ref ParseResult> parser, Parser la ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"And({parser.Name}, {lastParser.Name})"; } public bool CanSeek { get; } @@ -406,6 +433,8 @@ public override bool Parse(ParseContext context, ref ParseResult parser1, Parser parser2) ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"AndSkip({parser1.Name}, {parser2.Name})"; } public bool CanSeek { get; } @@ -45,12 +47,15 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (_parser2.Parse(context, ref parseResult2)) { result.Set(parseResult1.Start, parseResult2.End, parseResult1.Value); + + context.ExitParser(this); return true; } context.Scanner.Cursor.ResetPosition(start); } + context.ExitParser(this); return false; } @@ -145,6 +150,8 @@ Parser lastParser ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"AndSkip({parser.Name}, {lastParser.Name})"; } public bool CanSeek { get; } @@ -173,12 +180,15 @@ public override bool Parse(ParseContext context, ref ParseResult> parser, Parser lastPar ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"AndSkip({parser.Name}, {lastParser.Name})"; } public bool CanSeek { get; } @@ -243,12 +255,15 @@ public override bool Parse(ParseContext context, ref ParseResult> parser, Parser las ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"AndSkip({parser.Name}, {lastParser.Name})"; } public bool CanSeek { get; } @@ -314,12 +331,15 @@ public override bool Parse(ParseContext context, ref ParseResult> parser, Parser ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"AndSkip({parser.Name}, {lastParser.Name})"; } public bool CanSeek { get; } @@ -387,6 +409,8 @@ public override bool Parse(ParseContext context, ref ParseResult> parser, Parser ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"AndSkip({parser.Name}, {lastParser.Name})"; } public bool CanSeek { get; } @@ -462,6 +489,8 @@ public override bool Parse(ParseContext context, ref ParseResult> parser, Pa ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"AndSkip({parser.Name}, {lastParser.Name})"; } public bool CanSeek { get; } @@ -538,6 +570,8 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (_parser2.Parse(context, ref parseResult2)) { result.Set(parseResult1.Start, parseResult2.End, parseResult2.Value); + + context.ExitParser(this); return true; } context.Scanner.Cursor.ResetPosition(start); } + context.ExitParser(this); return false; } @@ -173,12 +176,15 @@ public override bool Parse(ParseContext context, ref ParseResult : Parser, ICompilable, ISeekable { - private readonly Parser _parser; + public Parser Parser { get; } public SkipWhiteSpace(Parser parser) { - _parser = parser ?? throw new ArgumentNullException(nameof(parser)); + Parser = parser ?? throw new ArgumentNullException(nameof(parser)); + + Name = $"{Name} (SkipWhiteSpace)"; if (parser is ISeekable seekable) { @@ -32,11 +34,11 @@ public override bool Parse(ParseContext context, ref ParseResult result) var cursor = context.Scanner.Cursor; - // If we know there is no custom whitespace parser we can skip the skipper by checking if the - // current char is not part of the common alphanumeric chars - if (context.WhiteSpaceParser is null && Character.IsInRange(cursor.Current, (char)33, (char)126)) + // Shortcut for common scenario + if (context.WhiteSpaceParser is null && !Character.IsWhiteSpaceOrNewLine(cursor.Current)) { - return _parser.Parse(context, ref result); + context.ExitParser(this); + return Parser.Parse(context, ref result); } var start = cursor.Position; @@ -44,13 +46,15 @@ public override bool Parse(ParseContext context, ref ParseResult result) // Use the scanner's logic to ignore whitespaces since it knows about multi-line grammars context.SkipWhiteSpace(); - if (_parser.Parse(context, ref result)) + if (Parser.Parse(context, ref result)) { + context.ExitParser(this); return true; } cursor.ResetPosition(start); + context.ExitParser(this); return false; } @@ -60,23 +64,20 @@ public CompilationResult Compile(CompilationContext context) var start = context.DeclarePositionVariable(result); - var parserCompileResult = _parser.Build(context); + var parserCompileResult = Parser.Build(context); result.Body.Add( Expression.Block( parserCompileResult.Variables, - Expression.IfThen( - test: Expression.IsFalse(Expression.And( - Expression.Equal(Expression.Call(context.ParseContext, ExpressionHelper.ParserContext_WhiteSpaceParser), Expression.Default(typeof(Parser))), - Expression.Invoke(ExpressionHelper.CharacterIsInRange, [ExpressionHelper.Cursor(context), Expression.Constant((char)33), Expression.Constant((char)126)]))), - ifTrue: context.ParserSkipWhiteSpace() - ), + context.ParserSkipWhiteSpace(), Expression.Block( Expression.Block(parserCompileResult.Body), Expression.IfThenElse( parserCompileResult.Success, Expression.Block( - context.DiscardResult ? Expression.Empty() : Expression.Assign(result.Value, parserCompileResult.Value), + context.DiscardResult ? + Expression.Empty() : + Expression.Assign(result.Value, parserCompileResult.Value), Expression.Assign(result.Success, Expression.Constant(true, typeof(bool))) ), context.ResetPosition(start) diff --git a/src/Parlot/Fluent/StringLiteral.cs b/src/Parlot/Fluent/StringLiteral.cs index 53559b26..03b41691 100644 --- a/src/Parlot/Fluent/StringLiteral.cs +++ b/src/Parlot/Fluent/StringLiteral.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using Parlot.Rewriting; using System; using System.Linq.Expressions; @@ -34,6 +34,8 @@ public StringLiteral(StringLiteralQuotes quotes) StringLiteralQuotes.SingleOrDouble => SingleOrDoubleQuotes, _ => [] }; + + Name = "StringLiteral"; } public bool CanSeek { get; } = true; @@ -64,10 +66,13 @@ public override bool Parse(ParseContext context, ref ParseResult resul var decoded = Character.DecodeString(new TextSpan(context.Scanner.Buffer, start + 1, end - start - 2)); result.Set(start, end, decoded); + + context.ExitParser(this); return true; } else { + context.ExitParser(this); return false; } } diff --git a/src/Parlot/Fluent/Switch.cs b/src/Parlot/Fluent/Switch.cs index 588e047b..2460cd4e 100644 --- a/src/Parlot/Fluent/Switch.cs +++ b/src/Parlot/Fluent/Switch.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using System; using System.Linq; using System.Linq.Expressions; @@ -20,14 +20,19 @@ public Switch(Parser previousParser, Func> action) { _previousParser = previousParser ?? throw new ArgumentNullException(nameof(previousParser)); _action = action ?? throw new ArgumentNullException(nameof(action)); + + Name = $"{previousParser.Name} (Switch)"; } public override bool Parse(ParseContext context, ref ParseResult result) { + context.EnterParser(this); + var previousResult = new ParseResult(); if (!_previousParser.Parse(context, ref previousResult)) { + context.ExitParser(this); return false; } @@ -35,6 +40,7 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (nextParser == null) { + context.ExitParser(this); return false; } @@ -43,9 +49,12 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (nextParser.Parse(context, ref parsed)) { result.Set(parsed.Start, parsed.End, parsed.Value); + + context.ExitParser(this); return true; } + context.ExitParser(this); return false; } diff --git a/src/Parlot/Fluent/TextBefore.cs b/src/Parlot/Fluent/TextBefore.cs index 9e452d8e..9098ca5b 100644 --- a/src/Parlot/Fluent/TextBefore.cs +++ b/src/Parlot/Fluent/TextBefore.cs @@ -43,6 +43,8 @@ public TextBefore(Parser delimiter, bool canBeEmpty = false, bool failOnEof = #endif _canJumpToNextExpectedChar = true; } + + Name = $"TextBefore({delimiter.Name})"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -71,6 +73,8 @@ public override bool Parse(ParseContext context, ref ParseResult resul if (_failOnEof) { context.Scanner.Cursor.ResetPosition(start); + + context.ExitParser(this); return false; } @@ -78,10 +82,13 @@ public override bool Parse(ParseContext context, ref ParseResult resul if (length == 0 && !_canBeEmpty) { + context.ExitParser(this); return false; } result.Set(start.Offset, previous.Offset, new TextSpan(context.Scanner.Buffer, start.Offset, length)); + + context.ExitParser(this); return true; } @@ -98,10 +105,13 @@ public override bool Parse(ParseContext context, ref ParseResult resul if (length == 0 && !_canBeEmpty) { + context.ExitParser(this); return false; } result.Set(start.Offset, previous.Offset, new TextSpan(context.Scanner.Buffer, start.Offset, length)); + + context.ExitParser(this); return true; } diff --git a/src/Parlot/Fluent/TextLiteral.cs b/src/Parlot/Fluent/TextLiteral.cs index a0aea08d..04b88c84 100644 --- a/src/Parlot/Fluent/TextLiteral.cs +++ b/src/Parlot/Fluent/TextLiteral.cs @@ -14,6 +14,8 @@ public sealed class TextLiteral : Parser, ICompilable, ISeekable public TextLiteral(string text, StringComparison comparisonType) { + Name = $"TextLiteral(\"{text}\")"; + Text = text ?? throw new ArgumentNullException(nameof(text)); _comparisonType = comparisonType; _hasNewLines = text.Any(Character.IsNewLine); @@ -74,9 +76,12 @@ public override bool Parse(ParseContext context, ref ParseResult result) } result.Set(start, cursor.Offset, Text); + + context.ExitParser(this); return true; } + context.ExitParser(this); return false; } @@ -89,12 +94,6 @@ public CompilationResult Compile(CompilationContext context) // success = true; // value = Text; // } - // - // [if skipWhiteSpace] - // if (!success) - // { - // resetPosition(beginning); - // } var ifReadText = Expression.IfThen( Expression.Call( diff --git a/src/Parlot/Fluent/Then.cs b/src/Parlot/Fluent/Then.cs index 749ca57f..e3d7d8fa 100644 --- a/src/Parlot/Fluent/Then.cs +++ b/src/Parlot/Fluent/Then.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using Parlot.Rewriting; using System; using System.Linq; @@ -29,6 +29,8 @@ private Then(Parser parser) ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"{parser.Name} (Then)"; } public Then(Parser parser, Func action) : this(parser) @@ -54,12 +56,12 @@ public Then(Parser parser, U value) : this(parser) public override bool Parse(ParseContext context, ref ParseResult result) { - context.EnterParser(this); - var parsed = new ParseResult(); if (_parser.Parse(context, ref parsed)) { + context.EnterParser(this); + if (_action1 != null) { result.Set(parsed.Start, parsed.End, _action1.Invoke(parsed.Value)); @@ -74,9 +76,11 @@ public override bool Parse(ParseContext context, ref ParseResult result) result.Set(parsed.Start, parsed.End, _value!); } + context.ExitParser(this); return true; } + context.ExitParser(this); return false; } diff --git a/src/Parlot/Fluent/When.cs b/src/Parlot/Fluent/When.cs index 1c5f3e7c..dce8f015 100644 --- a/src/Parlot/Fluent/When.cs +++ b/src/Parlot/Fluent/When.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using System; using System.Linq; using System.Linq.Expressions; @@ -18,6 +18,8 @@ public When(Parser parser, Func action) { _action = action ?? throw new ArgumentNullException(nameof(action)); _parser = parser ?? throw new ArgumentNullException(nameof(parser)); + + Name = $"{parser.Name} (When)"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -33,6 +35,7 @@ public override bool Parse(ParseContext context, ref ParseResult result) context.Scanner.Cursor.ResetPosition(start); } + context.ExitParser(this); return valid; } diff --git a/src/Parlot/Fluent/WhiteSpaceLiteral.cs b/src/Parlot/Fluent/WhiteSpaceLiteral.cs index f0c7131f..75e85abb 100644 --- a/src/Parlot/Fluent/WhiteSpaceLiteral.cs +++ b/src/Parlot/Fluent/WhiteSpaceLiteral.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using System.Linq.Expressions; namespace Parlot.Fluent; @@ -10,6 +10,8 @@ public sealed class WhiteSpaceLiteral : Parser, ICompilable public WhiteSpaceLiteral(bool includeNewLines) { _includeNewLines = includeNewLines; + + Name = "WhiteSpaceLiteral"; } public override bool Parse(ParseContext context, ref ParseResult result) @@ -31,11 +33,13 @@ public override bool Parse(ParseContext context, ref ParseResult resul if (start == end) { + context.ExitParser(this); return false; } result.Set(start, context.Scanner.Cursor.Offset, new TextSpan(context.Scanner.Buffer, start, end - start)); + context.ExitParser(this); return true; } diff --git a/src/Parlot/Fluent/ZeroOrMany.cs b/src/Parlot/Fluent/ZeroOrMany.cs index 9620d228..17612ac2 100644 --- a/src/Parlot/Fluent/ZeroOrMany.cs +++ b/src/Parlot/Fluent/ZeroOrMany.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using Parlot.Rewriting; using System; using System.Collections.Generic; @@ -23,6 +23,8 @@ public ZeroOrMany(Parser parser) ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"ZeroOrMany({parser.Name})"; } public bool CanSeek { get; } @@ -61,6 +63,8 @@ public override bool Parse(ParseContext context, ref ParseResult)[]); + + context.ExitParser(this); return true; } @@ -104,7 +108,7 @@ public CompilationResult Compile(CompilationContext context) var parserCompileResult = _parser.Build(context); - var breakLabel = Expression.Label("break"); + var breakLabel = Expression.Label($"break{context.NextNumber}"); var block = Expression.Loop( diff --git a/src/Parlot/Fluent/ZeroOrOne.cs b/src/Parlot/Fluent/ZeroOrOne.cs index ae830318..d66cf286 100644 --- a/src/Parlot/Fluent/ZeroOrOne.cs +++ b/src/Parlot/Fluent/ZeroOrOne.cs @@ -1,4 +1,4 @@ -using Parlot.Compilation; +using Parlot.Compilation; using Parlot.Rewriting; using System; using System.Linq.Expressions; @@ -20,6 +20,8 @@ public ZeroOrOne(Parser parser, T defaultValue) ExpectedChars = seekable.ExpectedChars; SkipWhitespace = seekable.SkipWhitespace; } + + Name = $"ZeroOrOne({parser.Name})"; } public bool CanSeek { get; } @@ -39,6 +41,7 @@ public override bool Parse(ParseContext context, ref ParseResult result) result.Set(parsed.Start, parsed.End, success ? parsed.Value : _defaultValue); // ZeroOrOne always succeeds + context.ExitParser(this); return true; } diff --git a/src/Parlot/Parlot.csproj b/src/Parlot/Parlot.csproj index 6c146ab1..1c9b828e 100644 --- a/src/Parlot/Parlot.csproj +++ b/src/Parlot/Parlot.csproj @@ -12,6 +12,7 @@ $(NoWarn);1591 true enable + Latest diff --git a/src/Samples/Calc/FluentParser.cs b/src/Samples/Calc/FluentParser.cs index 4af41931..e6f2053e 100644 --- a/src/Samples/Calc/FluentParser.cs +++ b/src/Samples/Calc/FluentParser.cs @@ -1,4 +1,4 @@ -using Parlot.Fluent; +using Parlot.Fluent; using static Parlot.Fluent.Parsers; namespace Parlot.Tests.Calc; @@ -36,30 +36,32 @@ static FluentParser() var closeParen = Terms.Char(')'); // "(" expression ")" - var groupExpression = Between(openParen, expression, closeParen); + var groupExpression = Between(openParen, expression, closeParen).Named("group"); // primary => NUMBER | "(" expression ")"; - var primary = number.Or(groupExpression); + var primary = number.Or(groupExpression).Named("primary"); // ( "-" ) unary | primary; var unary = primary.Unary( (minus, x => new NegateExpression(x)) - ); + ).Named("unary"); // multiplicative => unary ( ( "/" | "*" ) unary )* ; var multiplicative = unary.LeftAssociative( (divided, static (a, b) => new Division(a, b)), (times, static (a, b) => new Multiplication(a, b)) - ); + ).Named("multiplicative"); // additive => multiplicative(("-" | "+") multiplicative) * ; var additive = multiplicative.LeftAssociative( (plus, static (a, b) => new Addition(a, b)), (minus, static (a, b) => new Subtraction(a, b)) - ); + ).Named("additive"); expression.Parser = additive; + expression.Named("expression"); + Expression = expression; } } diff --git a/test/Parlot.Benchmarks/ExprBench.cs b/test/Parlot.Benchmarks/ExprBench.cs index cac083ac..f82b5f14 100644 --- a/test/Parlot.Benchmarks/ExprBench.cs +++ b/test/Parlot.Benchmarks/ExprBench.cs @@ -1,4 +1,4 @@ -using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Configs; using Parlot.Benchmarks.PidginParsers; using Parlot.Fluent; @@ -9,7 +9,7 @@ namespace Parlot.Benchmarks; [MemoryDiagnoser, GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory), ShortRunJob] public class ExprBench { - private readonly Parser _parser = new(); + private readonly Parser _parser = FluentParser.Expression; private readonly Parser _compiled = FluentParser.Expression.Compile(); private const string _expression1 = "3 - 1 / 2 + 1"; diff --git a/test/Parlot.Benchmarks/RegexBenchmarks.cs b/test/Parlot.Benchmarks/RegexBenchmarks.cs index aaab8b35..669d24e0 100644 --- a/test/Parlot.Benchmarks/RegexBenchmarks.cs +++ b/test/Parlot.Benchmarks/RegexBenchmarks.cs @@ -1,4 +1,4 @@ -using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Configs; using Parlot.Fluent; using System; @@ -31,7 +31,8 @@ public partial class RegexBenchmarks public static readonly string Email = "sebastien.ros@gmail.com"; - public RegexBenchmarks() + [GlobalSetup] + public void Setup() { if (RegexEmail() != Email) throw new Exception(nameof(RegexEmail)); if (RegexEmailCompiled() != Email) throw new Exception(nameof(RegexEmailCompiled)); diff --git a/test/Parlot.Benchmarks/SwitchExpressionBenchmarks.cs b/test/Parlot.Benchmarks/SwitchExpressionBenchmarks.cs new file mode 100644 index 00000000..80e76f37 --- /dev/null +++ b/test/Parlot.Benchmarks/SwitchExpressionBenchmarks.cs @@ -0,0 +1,69 @@ +using BenchmarkDotNet.Attributes; +using Parlot.Fluent; +using System; +using System.Linq; +using static Parlot.Fluent.Parsers; + +namespace Parlot.Benchmarks; + + +// | Method | Length | Mean | Error | StdDev | Gen0 | Allocated | +// |-------------------- |------- |---------:|---------:|---------:|-------:|----------:| +// | LookupMatchFluent | 2 | 25.47 ns | 6.780 ns | 0.372 ns | 0.0091 | 152 B | +// | LookupMatchCompiled | 2 | 39.50 ns | 5.996 ns | 0.329 ns | 0.0091 | 152 B | +// | LookupMissFluent | 2 | 20.71 ns | 5.622 ns | 0.308 ns | 0.0091 | 152 B | +// | LookupMissCompiled | 2 | 26.30 ns | 5.586 ns | 0.306 ns | 0.0091 | 152 B | +// | LookupMatchFluent | 255 | 24.55 ns | 1.340 ns | 0.073 ns | 0.0091 | 152 B | +// | LookupMatchCompiled | 255 | 39.80 ns | 0.845 ns | 0.046 ns | 0.0091 | 152 B | +// | LookupMissFluent | 255 | 24.83 ns | 8.213 ns | 0.450 ns | 0.0091 | 152 B | +// | LookupMissCompiled | 255 | 27.73 ns | 4.930 ns | 0.270 ns | 0.0091 | 152 B | + +// Lookups win all the time, switches, loops,are no match. +// The compiled code is slower, even when using the Compiled wrapper over the fluent implementation. + +[MemoryDiagnoser, ShortRunJob] +public class SwitchExpressionBenchmarks +{ + private Parser _fluent; + private Parser _compiled; + private const int MaxValue = 127; + private string _matchString; + private string _missString; + + [Params(2, 255)] + public int Length { get; set; } + + [GlobalSetup] + public void Setup() + { + var parsers = Enumerable.Range(1, Length).Select(i => Literals.Char((char)(Random.Shared.Next(MaxValue-1)))).ToArray(); + _fluent = OneOf(parsers); + _compiled = _fluent.Compile(); + _matchString = ((CharLiteral)parsers[(int)(Length * 0.7)]).Char.ToString(); + _missString = ((char)MaxValue).ToString(); + } + + [Benchmark] + public char LookupMatchFluent() + { + return _fluent.Parse(_matchString); + } + + [Benchmark] + public char LookupMatchCompiled() + { + return _compiled.Parse(_matchString); + } + + [Benchmark] + public char LookupMissFluent() + { + return _fluent.Parse(_missString); + } + + [Benchmark] + public char LookupMissCompiled() + { + return _compiled.Parse(_missString); + } +} diff --git a/test/Parlot.Tests/BenchmarksTests.cs b/test/Parlot.Tests/BenchmarksTests.cs index 5c0f27db..7d50aa19 100644 --- a/test/Parlot.Tests/BenchmarksTests.cs +++ b/test/Parlot.Tests/BenchmarksTests.cs @@ -1,5 +1,6 @@ #if NET9_0_OR_GREATER using Parlot.Benchmarks; +using System.ComponentModel.DataAnnotations; using Xunit; namespace Parlot.Tests; @@ -239,5 +240,21 @@ public void ParlotEmail() var result = benchmarks.ParlotEmail(); Assert.Equal(RegexBenchmarks.Email, result); } + + [Fact] + public void ParlotLookupFluent() + { + var benchmarks = new SwitchExpressionBenchmarks() { Length = 2 }; + benchmarks.Setup(); + var result = benchmarks.LookupMatchFluent(); + } + + [Fact] + public void ParlotLookupCompiled() + { + var benchmarks = new SwitchExpressionBenchmarks() { Length = 2 }; + benchmarks.Setup(); + var result = benchmarks.LookupMatchCompiled(); + } } #endif diff --git a/test/Parlot.Tests/CompileTests.cs b/test/Parlot.Tests/CompileTests.cs index bf478b27..a973b2e1 100644 --- a/test/Parlot.Tests/CompileTests.cs +++ b/test/Parlot.Tests/CompileTests.cs @@ -186,11 +186,11 @@ public void ShouldCompileZeroOrMany() [Fact] public void ShouldCompileOneOrMany() { - var parser = OneOrMany(Terms.Text("hello").Or(Terms.Text("world"))).Compile(); + var parser = OneOrMany(Terms.Text("hello")).Compile(); - var result = parser.Parse(" hello world hello"); + var result = parser.Parse(" hello hello hello"); - Assert.Equal(new[] { "hello", "world", "hello" }, result); + Assert.Equal(new[] { "hello", "hello", "hello" }, result); } [Fact] @@ -241,7 +241,7 @@ public void ShouldcompiledSeparated() [Fact] public void SeparatedShouldNotBeConsumedIfNotFollowedByValueCompiled() { - // This test ensures that the separator is not consumed if there is no valid net value. + // This test ensures that the separator is not consumed if there is no valid next value. var parser = Separated(Terms.Char(','), Terms.Decimal()).AndSkip(Terms.Char(',')).And(Terms.Identifier()).Then(x => true).Compile(); @@ -307,9 +307,12 @@ public override bool Parse(ParseContext context, ref ParseResult result) if (context.Scanner.ReadChar(Char)) { result.Set(start, context.Scanner.Cursor.Offset, Char); + + context.ExitParser(this); return true; } + context.ExitParser(this); return false; } } diff --git a/test/Parlot.Tests/FluentTests.cs b/test/Parlot.Tests/FluentTests.cs index c81ca21c..8767134a 100644 --- a/test/Parlot.Tests/FluentTests.cs +++ b/test/Parlot.Tests/FluentTests.cs @@ -3,10 +3,8 @@ using System.Collections.Generic; using System.Globalization; using System.Numerics; +using System.Xml; using Xunit; -#if NET8_0_OR_GREATER -using System.Buffers; -#endif using static Parlot.Fluent.Parsers; @@ -511,9 +509,8 @@ public void ShouldParseEmails() Assert.Equal(_email, result.ToString()); } -#if NET8_0_OR_GREATER [Fact] - public void ShouldParseEmailsWithSearchValues() + public void ShouldParseEmailsWithAnyOf() { var letterOrDigitChars = "01234567890abcdefghijklmnopqrstuvwxyz"; @@ -531,7 +528,6 @@ public void ShouldParseEmailsWithSearchValues() Assert.True(Email.TryParse(_email, out var result)); Assert.Equal(_email, result.ToString()); } -#endif [Fact] public void ShouldParseEof() @@ -713,6 +709,39 @@ public void OneOfShouldNotFailWithLookupConflicts() Assert.True(parser.TryParse("abc", out _)); } + [Fact] + public void OneOfShouldHandleSkipWhiteSpaceMix() + { + var parser = Literals.Text("a").Or(Terms.Text("b")); + + Assert.True(parser.TryParse("a", out _)); + Assert.True(parser.TryParse("b", out _)); + Assert.False(parser.TryParse(" a", out _)); + Assert.True(parser.TryParse(" b", out _)); + } + + [Fact] + public void OneOfShouldHandleParsedWhiteSpace() + { + var parser = Literals.Text("a").Or(AnyCharBefore(Literals.Text("c"), false, true).Then(x => x.ToString())); + + Assert.True(parser.TryParse("a", out _)); + Assert.False(parser.TryParse("b", out _)); + Assert.False(parser.TryParse(" a", out _)); + Assert.True(parser.TryParse("\rcde", out _)); + } + + [Fact] + public void OneOfShouldHandleContextualWhiteSpace() + { + var parser = Terms.Text("a").Or(Terms.Text("b")); + + Assert.True(parser.TryParse(new ParseContext(new Scanner("\rb")), out _, out _)); + Assert.True(parser.TryParse(new ParseContext(new Scanner(" b")), out _, out _)); + Assert.False(parser.TryParse(new ParseContext(new Scanner("\rb"), useNewLines: true), out _, out _)); + Assert.True(parser.TryParse(new ParseContext(new Scanner(" b"), useNewLines: true), out _, out _)); + } + [Fact] public void SkipWhiteSpaceShouldResponseParseContextUseNewLines() { @@ -1036,4 +1065,51 @@ public void NumberShouldNotParseOverflow(string source) { Assert.False(Literals.Number().TryParse(source, out var _)); } + + [Theory] + [InlineData("a", "a", "a")] + [InlineData("a", "aa", "aa")] + [InlineData("a", "aaaa", "aaaa")] + [InlineData("ab", "ab", "ab")] + [InlineData("ba", "ab", "ab")] + [InlineData("abc", "aaabbbccc", "aaabbbccc")] + [InlineData("a", "aaab", "aaa")] + [InlineData("aa", "aaaab", "aaaa")] + public void AnyOfShouldMatch(string chars, string source, string expected) + { + Assert.Equal(expected, Literals.AnyOf(chars).Parse(source).ToString()); + } + + [Theory] + [InlineData("a", "b")] + [InlineData("a", "bbb")] + [InlineData("abc", "dabc")] + public void AnyOfShouldNotMAtch(string chars, string source) + { + Assert.False(Literals.AnyOf(chars).TryParse(source, out var _)); + } + [Fact] + public void AnyOfShouldRespectSizeConstraints() + { + Assert.False(Literals.AnyOf("a", minSize: 4).TryParse("aaa", out _)); + Assert.False(Literals.AnyOf("a", minSize: 2).TryParse("ab", out _)); + Assert.False(Literals.AnyOf("a", minSize: 3).TryParse("ab", out _)); + Assert.Equal("aa", Literals.AnyOf("a", minSize: 2, maxSize: 2).Parse("aa")); + Assert.Equal("aa", Literals.AnyOf("a", minSize: 2, maxSize: 3).Parse("aa")); + Assert.Equal("a", Literals.AnyOf("a", maxSize: 1).Parse("aa")); + Assert.Equal("aaaa", Literals.AnyOf("a", minSize: 2, maxSize: 4).Parse("aaaaaa")); + Assert.False(Literals.AnyOf("a", minSize: 2, maxSize: 2).TryParse("a", out _)); + } + + [Fact] + public void AnyOfShouldResetPositionWhenFalse() + { + Assert.False(Literals.AnyOf("a", minSize: 3) + .And(Literals.AnyOf("Z")) + .TryParse("aaZZ", out _)); + + Assert.True(Literals.AnyOf("a", minSize: 3) + .And(Literals.AnyOf("Z")) + .TryParse("aaaZZ", out _)); + } } diff --git a/test/Parlot.Tests/Models/FakeParser.cs b/test/Parlot.Tests/Models/FakeParser.cs new file mode 100644 index 00000000..c7055f3b --- /dev/null +++ b/test/Parlot.Tests/Models/FakeParser.cs @@ -0,0 +1,46 @@ +using System; +using Parlot.Fluent; +using Parlot.Rewriting; + +namespace Parlot.Tests.Models; + +public partial class RewriteTests +{ + public sealed class FakeParser : Parser, ISeekable + { + public FakeParser() + { + } + + public T Result { get; set; } + public bool CanSeek { get; set; } + public char[] ExpectedChars { get; set; } + public bool SkipWhitespace { get; set; } + public bool Success { get; set; } + public bool ThrowOnParse {get; set;} + public Action OnParse {get; set;} + + public override bool Parse(ParseContext context, ref ParseResult result) + { + if (ThrowOnParse) + { + throw new InvalidOperationException(); + } + + context.EnterParser(this); + + OnParse?.Invoke(context); + + if (Success) + { + result.Set(0, 0, Result); + + context.ExitParser(this); + return true; + } + + context.ExitParser(this); + return false; + } + } +} diff --git a/test/Parlot.Tests/Models/FakeSeekable.cs b/test/Parlot.Tests/Models/FakeSeekable.cs deleted file mode 100644 index 689987b6..00000000 --- a/test/Parlot.Tests/Models/FakeSeekable.cs +++ /dev/null @@ -1,34 +0,0 @@ -using Parlot.Fluent; -using Parlot.Rewriting; - -namespace Parlot.Tests.Models; - -public partial class RewriteTests -{ - public sealed class FakeSeekable : Parser, ISeekable - { - public FakeSeekable() - { - } - - public string Text { get; set; } - public bool CanSeek { get; set; } - public char[] ExpectedChars { get; set; } - public bool SkipWhiteSpace { get; } - public bool SkipWhitespace { get; set; } - public bool Success { get; set; } - - public override bool Parse(ParseContext context, ref ParseResult result) - { - context.EnterParser(this); - - if (Success) - { - result.Set(0, 0, Text); - return true; - } - - return false; - } - } -} diff --git a/test/Parlot.Tests/RewriteTests.cs b/test/Parlot.Tests/RewriteTests.cs index 9f608388..e518097b 100644 --- a/test/Parlot.Tests/RewriteTests.cs +++ b/test/Parlot.Tests/RewriteTests.cs @@ -24,6 +24,7 @@ public void TextLiteralShouldBeSeekable() public void SkipWhiteSpaceShouldBeSeekable() { var text = Terms.Text("hello"); + var seekable = text as ISeekable; Assert.NotNull(seekable); @@ -36,6 +37,7 @@ public void SkipWhiteSpaceShouldBeSeekable() public void CharLiteralShouldBeSeekable() { var text = Literals.Char('a'); + var seekable = text as ISeekable; Assert.NotNull(seekable); @@ -44,12 +46,15 @@ public void CharLiteralShouldBeSeekable() Assert.False(seekable.SkipWhitespace); } - [Fact] - public void OneOfShouldRewriteAllSeekable() + [Theory] + [InlineData(true)] + [InlineData(false)] + public void OneOfShouldRewriteAllSeekable(bool compile) { - var hello = new FakeSeekable { CanSeek = true, ExpectedChars = new[] { 'a' }, SkipWhitespace = false, Success = true, Text = "hello" }; - var goodbye = new FakeSeekable { CanSeek = true, ExpectedChars = new[] { 'b' }, SkipWhitespace = false, Success = true, Text = "goodbye" }; + var hello = new FakeParser { CanSeek = true, ExpectedChars = ['a'], SkipWhitespace = false, Success = true, Result = "hello" }; + var goodbye = new FakeParser { CanSeek = true, ExpectedChars = ['b'], SkipWhitespace = false, Success = true, Result = "goodbye" }; var oneof = Parsers.OneOf(hello, goodbye); + if (compile) oneof = oneof.Compile(); Assert.Equal("hello", oneof.Parse("a")); Assert.Equal("goodbye", oneof.Parse("b")); @@ -57,24 +62,144 @@ public void OneOfShouldRewriteAllSeekable() } [Fact] - public void OneOfShouldRewriteAllSeekableCompiled() + public void LookupTableInvokesNonSeekableInOrder() + { + var p1 = new FakeParser { CanSeek = false, ExpectedChars = ['a'], SkipWhitespace = false, Success = true, Result = "a" }; + var p2 = new FakeParser { CanSeek = true, ExpectedChars = ['b'], SkipWhitespace = false, Success = true, Result = "b" }; + var p3 = new FakeParser { CanSeek = false, ExpectedChars = ['c'], SkipWhitespace = false, Success = true, Result = "c" }; + + var p = OneOf(p1, p2, p3); + + // Parsing 'd' such that there is no match in the lookup and it invokes non-seekable parsers + Assert.Equal("a", p.Parse("d")); + + p1.Success = false; + + // We know the first non-seekable parser is invoked, now check if it invokes the other + // ones if the first fails + Assert.Equal("c", p.Parse("d")); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void LookupTableSkipsParsers(bool compile) { - var helloOrGoodbye = Parsers.OneOf(Terms.Text("hello"), Terms.Text("goodbye")).Compile(); + var p1 = new FakeParser { CanSeek = true, ExpectedChars = ['a'], ThrowOnParse = true }; + var p2 = new FakeParser { CanSeek = true, ExpectedChars = ['b'], SkipWhitespace = false, Success = true, Result = "b" }; + var p3 = new FakeParser { CanSeek = true, ExpectedChars = ['c'], SkipWhitespace = false, Success = true, Result = "c" }; + + var p = OneOf(p1, p2, p3); + if (compile) p = p.Compile(); - Assert.Equal("hello", helloOrGoodbye.Parse(" hello")); - Assert.Equal("goodbye", helloOrGoodbye.Parse(" goodbye")); - Assert.Null(helloOrGoodbye.Parse("yo!")); + Assert.Equal("b", p.Parse("b")); + Assert.Equal("c", p.Parse("c")); } [Fact] - public void OneOfShouldNotRewriteIfOneIsNotSeekable() + public void LookupTableInvokesAllParserWithSameLookup() { - var hello = new FakeSeekable { CanSeek = true, ExpectedChars = new[] { 'a' }, SkipWhitespace = false, Success = true, Text = "hello" }; - var goodbye = OneOf(Parsers.Literals.Text("goodbye")); - var oneof = Parsers.OneOf(goodbye, hello); + var p1 = new FakeParser { CanSeek = false, ExpectedChars = ['a'], Success = false, Result = "a" }; + var p2 = new FakeParser { CanSeek = true, ExpectedChars = ['b'], Success = true, Result = "b" }; + var p3 = new FakeParser { CanSeek = true, ExpectedChars = ['b'], Success = true, Result = "c" }; + + var p = OneOf(p1, p2, p3); + + // Parsing 'b' such that there is a match in the lookup and it invokes all parsers + Assert.Equal("b", p.Parse("b")); - Assert.Equal("hello", oneof.Parse("a")); - Assert.Equal("goodbye", oneof.Parse("goodbye")); - Assert.Equal("hello", oneof.Parse("b")); // b is not found in "goodbye" so the next parser is checked and always succeeds true + p2.Success = false; + + // We know the first seekable parser is invoked, now check if it invokes others in the same lookup + Assert.Equal("c", p.Parse("b")); + } + + [Fact] + public void OneOfIsSeekableIfAllAreSeekable() + { + // OneOf can create a lookup table based on ISeekable. + // However it can only be an ISeekable itself if all its parsers are. + // If one is not, then the caller would not be able to invoke it. + // This test ensures that such a parser is correctly invoked. + + var pa = new FakeParser { CanSeek = true, ExpectedChars = ['a'], Success = true, Result = "a" }; + var pb = new FakeParser { CanSeek = true, ExpectedChars = ['b'], Success = true, Result = "b" }; + var pc = new FakeParser { CanSeek = false, ExpectedChars = ['c'], Success = false, Result = "c" }; + var pd = new FakeParser { CanSeek = false, ExpectedChars = ['d'], Success = true, Result = "d" }; + + // This one should be seekable because it only contains seekable parsers + var p1 = OneOf(pa, pb); + + Assert.True(p1 is ISeekable seekable1 && seekable1.CanSeek); + + // This one should not be seekable because not of its parsers are. + var p2 = OneOf(pc, pd); + + Assert.False(p2 is ISeekable seekable2 && seekable2.CanSeek); + + var p3 = OneOf(p1, p2); + + Assert.Equal("a", p3.Parse("a")); + Assert.Equal("b", p3.Parse("b")); + Assert.Equal("d", p3.Parse("c")); + + // Because p2 is non-seekable and pc always succeeds, anything else returns 'c' + Assert.Equal("d", p3.Parse("d")); + Assert.Equal("d", p3.Parse("e")); + + } + + [Fact] + public void OneOfCanForwardSeekable() + { + // OneOf can create a lookup table based on ISeekable. + // However it can only be an ISeekable itself if all its parsers are. + // If one is not, then the caller would not be able to invoke it. + // This test ensures that such a parser is correctly invoked. + + var pa = new FakeParser { CanSeek = true, ExpectedChars = ['a'], Success = true, Result = "a" }; + var pb = new FakeParser { CanSeek = false, ExpectedChars = ['b'], Success = true, Result = "b" }; + var pc = new FakeParser { CanSeek = true, ExpectedChars = ['c'], Success = true, Result = "c" }; + var pd = new FakeParser { CanSeek = false, ExpectedChars = ['d'], Success = true, Result = "d" }; + + // These ones are seekable because one is at least. + var p1 = OneOf(pa, pb); + var p2 = OneOf(pc, pd); + + Assert.True(p1 is ISeekable seekable1 && seekable1.CanSeek); + Assert.Equal(['a', '\0'], ((ISeekable)p1).ExpectedChars); + Assert.True(p2 is ISeekable seekable2 && seekable2.CanSeek); + Assert.Equal(['c', '\0'], ((ISeekable)p2).ExpectedChars); + + var p3 = OneOf(p1, p2); + + Assert.Equal("a", p3.Parse("a")); + Assert.Equal("b", p3.Parse("b")); + Assert.Equal("c", p3.Parse("c")); + Assert.Equal("b", p3.Parse("d")); + + pb.Success = false; + + Assert.Equal("a", p3.Parse("a")); + Assert.Equal("d", p3.Parse("b")); + Assert.Equal("c", p3.Parse("c")); + Assert.Equal("d", p3.Parse("d")); + } + + [Fact] + public void OneOfCompiled() + { + var pa = new FakeParser { CanSeek = true, ExpectedChars = ['a'], Success = true, Result = "a" }; + var pb = new FakeParser { CanSeek = true, ExpectedChars = ['b'], Success = true, Result = "b" }; + var pc = new FakeParser { CanSeek = false, Success = true, Result = "c" }; + + var p1 = OneOf(pa, pb, pc).Compile(); + Assert.Equal("a", p1.Parse("a")); + Assert.Equal("b", p1.Parse("b")); + Assert.Equal("c", p1.Parse("c")); + + pc.Success = false; + + Assert.Null(p1.Parse("c")); } }