From 9b3f05296e365e47b152054cbb9d82c205ea4dce Mon Sep 17 00:00:00 2001 From: Paul Wankadia Date: Sun, 13 Aug 2023 13:04:51 +0000 Subject: [PATCH] Add support for `(?expr)`. This follows https://github.com/google/re2/commit/6148386 (and https://github.com/golang/go/commit/ee61186) to some extent. --- java/com/google/re2j/Parser.java | 15 ++++++++------- javatests/com/google/re2j/ParserTest.java | 10 ++++++++++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/java/com/google/re2j/Parser.java b/java/com/google/re2j/Parser.java index a26971e0..0e73e701 100644 --- a/java/com/google/re2j/Parser.java +++ b/java/com/google/re2j/Parser.java @@ -1053,21 +1053,22 @@ private void parsePerlFlags(StringIterator t) throws PatternSyntaxException { // support all three as well. EcmaScript 4 uses only the Python form. // // In both the open source world (via Code Search) and the - // Google source tree, (?Pname) is the dominant form, - // so that's the one we implement. One is enough. + // Google source tree, (?Pexpr) and (?expr) are the + // dominant forms of named captures and both are supported. String s = t.rest(); - if (s.startsWith("(?P<")) { + if (s.startsWith("(?P<") || s.startsWith("(?<")) { // Pull out name. - int end = s.indexOf('>'); + int begin = s.charAt(2) == 'P' ? 4 : 3; + int end = s.indexOf('>', begin); if (end < 0) { throw new PatternSyntaxException(ERR_INVALID_NAMED_CAPTURE, s); } - String name = s.substring(4, end); // "name" + String name = s.substring(begin, end); // "name" t.skipString(name); - t.skip(5); // "(?P<>" + t.skip(begin + 1); // "(?P<>" or "(?<>" if (!isValidCaptureName(name)) { throw new PatternSyntaxException( - ERR_INVALID_NAMED_CAPTURE, s.substring(0, end)); // "(?P" + ERR_INVALID_NAMED_CAPTURE, s.substring(0, end + 1)); // "(?P" or "(?" } // Like ordinary capture, but named. Regexp re = op(Regexp.Op.LEFT_PAREN); diff --git a/javatests/com/google/re2j/ParserTest.java b/javatests/com/google/re2j/ParserTest.java index 82d4813b..87cd152a 100644 --- a/javatests/com/google/re2j/ParserTest.java +++ b/javatests/com/google/re2j/ParserTest.java @@ -232,6 +232,7 @@ public boolean applies(int r) { // Test named captures {"(?Pa)", "cap{name:lit{a}}"}, + {"(?a)", "cap{name:lit{a}}"}, // Case-folded literals {"[Aa]", "litfold{A}"}, @@ -530,12 +531,20 @@ private static String runesToString(int[] runes) { "(?Pa)", "(?P<>a)", + "(?a", + "(?", + "(?a)", + "(?<>a)", "[a-Z]", "(?i)[a-Z]", "a{100000}", "a{100000,}", // Group names may not be repeated "(?Pbar)(?Pbaz)", + "(?Pbar)(?baz)", + "(?bar)(?Pbaz)", + "(?bar)(?baz)", "\\x", // https://github.com/google/re2j/issues/103 "\\xv", // https://github.com/google/re2j/issues/103 }; @@ -550,6 +559,7 @@ private static String runesToString(int[] runes) { "\\Q\\\\\\\\\\E", "(?:a)", "(?Pa)", + "(?a)", }; private static final String[] ONLY_POSIX = {