File: EmbeddedLanguages\RegularExpressions\RegexParser.cs
Web Access
Project: src\src\Features\Core\Portable\Microsoft.CodeAnalysis.Features.csproj (Microsoft.CodeAnalysis.Features)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Text.RegularExpressions;
using Microsoft.CodeAnalysis.EmbeddedLanguages.Common;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Text;
using Roslyn.Utilities;
 
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions;
 
using static EmbeddedSyntaxHelpers;
using static RegexHelpers;
using RegexAlternatingSequenceList = EmbeddedSeparatedSyntaxNodeList<RegexKind, RegexNode, RegexSequenceNode>;
using RegexNodeOrToken = EmbeddedSyntaxNodeOrToken<RegexKind, RegexNode>;
using RegexToken = EmbeddedSyntaxToken<RegexKind>;
 
/// <summary>
/// Produces a <see cref="RegexTree"/> from a sequence of <see cref="VirtualChar"/> characters.
///
/// Importantly, this parser attempts to replicate diagnostics with almost the exact same text
/// as the native .NET regex parser.  This is important so that users get an understandable
/// experience where it appears to them that this is all one cohesive system and that the IDE
/// will let them discover and fix the same issues they would encounter when previously trying
/// to just compile and execute these regexes.
/// </summary>
/// <remarks>
/// Invariants we try to maintain (and should consider a bug if we do not): l 1. If the .NET
/// regex parser does not report an error for a given pattern, we should not either. it would be
/// very bad if we told the user there was something wrong with there pattern when there really
/// wasn't.
///
/// 2. If the .NET regex parser does report an error for a given pattern, we should either not
/// report an error (not recommended) or report the same error at an appropriate location in the
/// pattern.  Not reporting the error can be confusing as the user will think their pattern is
/// ok, when it really is not.  However, it can be acceptable to do this as it's not telling
/// them that something is actually wrong, and it may be too difficult to find and report the
/// same error.  Note: there is only one time we do this in this parser (see the deviation
/// documented in <see cref="ParsePossibleEcmascriptBackreferenceEscape"/>).
///
/// Note1: "report the same error" means that we will attempt to report the error using the same
/// text the .NET regex parser uses for its error messages.  This is so that the user is not
/// confused when they use the IDE vs running the regex by getting different messages for the
/// same issue.
///
/// Note2: the above invariants make life difficult at times.  This happens due to the fact that
/// the .NET parser is multi-pass.  Meaning it does a first scan (which may report errors), then
/// does the full parse.  This means that it might report an error in a later location during
/// the initial scan than it would during the parse.  We replicate that behavior to follow the
/// second invariant.
///
/// Note3: It would be nice if we could check these invariants at runtime, so we could control
/// our behavior by the behavior of the real .NET regex engine.  For example, if the .NET regex
/// engine did not report any issues, we could suppress any diagnostics we generated and we
/// could log an NFW to record which pattern we deviated on so we could fix the issue for a
/// future release.  However, we cannot do this as the .NET regex engine has no guarantees about
/// its performance characteristics.  For example, certain regex patterns might end up causing
/// that engine to consume unbounded amounts of CPU and memory.  This is because the .NET regex
/// engine is not just a parser, but something that builds an actual recognizer using techniques
/// that are not necessarily bounded.  As such, while we test ourselves around it during our
/// tests, we cannot do the same at runtime as part of the IDE.
///
/// This parser was based off the corefx RegexParser based at:
/// https://github.com/dotnet/corefx/blob/f759243d724f462da0bcef54e86588f8a55352c6/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs#L1
///
/// Note4: The .NET parser itself changes over time (for example to fix behavior that even it
/// thinks is buggy).  When this happens, we have to make a choice as to which behavior to
/// follow. In general, the overall principle is that we should follow the more lenient
/// behavior.  If we end up taking the more strict interpretation we risk giving people an error
/// during design time that they would not get at runtime.  It's far worse to have that than to
/// not report an error, even though one might happen later.
/// </remarks>
internal partial struct RegexParser
{
    private readonly ImmutableDictionary<string, TextSpan> _captureNamesToSpan;
    private readonly ImmutableDictionary<int, TextSpan> _captureNumbersToSpan;
 
    private RegexLexer _lexer;
    private RegexOptions _options;
    private RegexToken _currentToken;
    private int _recursionDepth;
 
    private RegexParser(
        VirtualCharSequence text, RegexOptions options,
        ImmutableDictionary<string, TextSpan> captureNamesToSpan,
        ImmutableDictionary<int, TextSpan> captureNumbersToSpan) : this()
    {
        _lexer = new RegexLexer(text);
        _options = options;
 
        _captureNamesToSpan = captureNamesToSpan;
        _captureNumbersToSpan = captureNumbersToSpan;
 
        // Get the first token.  It is allowed to have trivia on it.
        ConsumeCurrentToken(allowTrivia: true);
    }
 
    /// <summary>
    /// Returns the latest token the lexer has produced, and then asks the lexer to 
    /// produce the next token after that.
    /// </summary>
    /// <param name="allowTrivia">Whether or not trivia is allowed on the next token
    /// produced.  In the .NET parser trivia is only allowed on a few constructs,
    /// and our parser mimics that behavior.  Note that even if trivia is allowed,
    /// the type of trivia that can be scanned depends on the current RegexOptions.
    /// For example, if <see cref="RegexOptions.IgnorePatternWhitespace"/> is currently
    /// enabled, then '#...' comments are allowed.  Otherwise, only '(?#...)' comments
    /// are allowed.</param>
    private RegexToken ConsumeCurrentToken(bool allowTrivia)
    {
        var previous = _currentToken;
        _currentToken = _lexer.ScanNextToken(allowTrivia, _options);
        return previous;
    }
 
    /// <summary>
    /// Given an input text, and set of options, parses out a fully representative syntax tree 
    /// and list of diagnostics.  Parsing should always succeed, except in the case of the stack 
    /// overflowing.
    /// </summary>
    public static RegexTree? TryParse(VirtualCharSequence text, RegexOptions options)
    {
        if (text.IsDefault)
        {
            return null;
        }
 
        try
        {
            // Parse the tree once, to figure out the capture groups.  These are needed
            // to then parse the tree again, as the captures will affect how we interpret
            // certain things (i.e. escape references) and what errors will be reported.
            //
            // This is necessary as .NET regexes allow references to *future* captures.
            // As such, we don't know when we're seeing a reference if it's to something
            // that exists or not.
            var tree1 = new RegexParser(text, options,
                ImmutableDictionary<string, TextSpan>.Empty,
                ImmutableDictionary<int, TextSpan>.Empty).ParseTree();
 
            var (captureNames, captureNumbers) = CaptureInfoAnalyzer.Analyze(text, tree1.Root, options);
 
            var tree2 = new RegexParser(
                text, options, captureNames, captureNumbers).ParseTree();
            return tree2;
        }
        catch (InsufficientExecutionStackException)
        {
            return null;
        }
    }
 
    private RegexTree ParseTree()
    {
        // Most callers to ParseAlternatingSequences are from group constructs.  As those
        // constructs will have already consumed the open paren, they don't want this sub-call
        // to consume through close-paren tokens as they want that token for themselves.
        // However, we're the topmost call and have not consumed an open paren.  And, we want
        // this call to consume all the way to the end, eating up excess close-paren tokens that
        // are encountered.
        var expression = this.ParseAlternatingSequences(consumeCloseParen: true, isConditional: false);
        Debug.Assert(_lexer.Position == _lexer.Text.Length);
        Debug.Assert(_currentToken.Kind == RegexKind.EndOfFile);
 
        var root = new RegexCompilationUnit(expression, _currentToken);
 
        using var _1 = PooledHashSet<EmbeddedDiagnostic>.GetInstance(out var seenDiagnostics);
        using var _2 = ArrayBuilder<EmbeddedDiagnostic>.GetInstance(out var diagnostics);
        CollectDiagnostics(root, seenDiagnostics, diagnostics);
 
        return new RegexTree(
            _lexer.Text, root, diagnostics.ToImmutable(),
            _captureNamesToSpan, _captureNumbersToSpan);
    }
 
    private void CollectDiagnostics(
        RegexNode node, HashSet<EmbeddedDiagnostic> seenDiagnostics, ArrayBuilder<EmbeddedDiagnostic> diagnostics)
    {
        try
        {
            _recursionDepth++;
            StackGuard.EnsureSufficientExecutionStack(_recursionDepth);
            CollectDiagnosticsWorker(node, seenDiagnostics, diagnostics);
        }
        finally
        {
            _recursionDepth--;
        }
    }
 
    private void CollectDiagnosticsWorker(RegexNode node, HashSet<EmbeddedDiagnostic> seenDiagnostics, ArrayBuilder<EmbeddedDiagnostic> diagnostics)
    {
        foreach (var child in node)
        {
            if (child.IsNode)
            {
                CollectDiagnostics(child.Node, seenDiagnostics, diagnostics);
            }
            else
            {
                var token = child.Token;
                foreach (var trivia in token.LeadingTrivia)
                {
                    AddUniqueDiagnostics(seenDiagnostics, trivia.Diagnostics, diagnostics);
                }
 
                // We never place trailing trivia on regex tokens.
                Debug.Assert(token.TrailingTrivia.IsEmpty);
                AddUniqueDiagnostics(seenDiagnostics, token.Diagnostics, diagnostics);
            }
        }
    }
 
    /// <summary>
    /// It's very common to have duplicated diagnostics.  For example, consider "((". This will
    /// have two 'missing )' diagnostics, both at the end.  Reporting both isn't helpful, so we
    /// filter duplicates out here.
    /// </summary>
    private static void AddUniqueDiagnostics(
        HashSet<EmbeddedDiagnostic> seenDiagnostics, ImmutableArray<EmbeddedDiagnostic> from, ArrayBuilder<EmbeddedDiagnostic> to)
    {
        foreach (var diagnostic in from)
        {
            if (seenDiagnostics.Add(diagnostic))
            {
                to.Add(diagnostic);
            }
        }
    }
 
    private RegexAlternationNode ParseAlternatingSequences(
        bool consumeCloseParen, bool isConditional)
    {
        try
        {
            _recursionDepth++;
            StackGuard.EnsureSufficientExecutionStack(_recursionDepth);
            return ParseAlternatingSequencesWorker(consumeCloseParen, isConditional);
        }
        finally
        {
            _recursionDepth--;
        }
    }
 
    /// <summary>
    /// Parses out code of the form: ...|...|... This is the type of code you have at the top level of a regex, or
    /// inside any grouping construct.  Note that sequences can be empty in .NET regex.  i.e. the following is
    /// legal:
    ///
    ///     ...||...
    ///
    /// An empty sequence just means "match at every position in the test string".
    /// </summary>
    private RegexAlternationNode ParseAlternatingSequencesWorker(
        bool consumeCloseParen, bool isConditional)
    {
        using var _ = ArrayBuilder<RegexNodeOrToken>.GetInstance(out var builder);
        builder.Add(ParseSequence(consumeCloseParen));
 
        while (_currentToken.Kind == RegexKind.BarToken)
        {
            // Trivia allowed between the | and the next token.
            var barToken = ConsumeCurrentToken(allowTrivia: true);
            if (isConditional && builder.Count >= 3)
            {
                // a conditional alternative expression only allows two cases (the true and false branches). We
                // already have seen both once we have 3 items (`t | f`).  Error on any further cases we see.
                barToken = barToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                    FeaturesResources.Too_many_bars_in_conditional_grouping,
                    barToken.GetSpan()));
            }
 
            builder.Add(barToken);
            builder.Add(ParseSequence(consumeCloseParen));
        }
 
        return new RegexAlternationNode(new RegexAlternatingSequenceList(builder.ToImmutable()));
    }
 
    private RegexSequenceNode ParseSequence(bool consumeCloseParen)
    {
        using var _1 = ArrayBuilder<RegexExpressionNode>.GetInstance(out var builder);
        while (ShouldConsumeSequenceElement(consumeCloseParen))
        {
            var last = builder.Count == 0 ? null : builder.Last();
            builder.Add(ParsePrimaryExpressionAndQuantifiers(last));
        }
 
        // We will commonly get tons of text nodes in a row.  For example, the regex `abc` will be three text nodes
        // in a row.  To help save on memory try to merge that into one single text node.
        using var _2 = ArrayBuilder<RegexExpressionNode>.GetInstance(out var sequence);
        MergeTextNodes(builder, sequence);
 
        return new RegexSequenceNode(sequence.ToImmutable());
    }
 
    private static void MergeTextNodes(ArrayBuilder<RegexExpressionNode> list, ArrayBuilder<RegexExpressionNode> final)
    {
        // Iterate all the nodes in the sequence we have, adding them directly to
        // `final` if they are not text nodes.  If they are text nodes, we attempt
        // to keep merging them with any following text nodes as long as well.
        for (var index = 0; index < list.Count;)
        {
            var current = list[index];
            if (current.Kind != RegexKind.Text)
            {
                // Not a text node.  Just add as-is, and move to the next node.
                index++;
                final.Add(current);
                continue;
            }
 
            // Got a text node.  Try to combine it with all following nodes.
            index = MergeAndAddAdjacentTextNodes(list, final, index);
        }
 
        return;
 
        // local functions
 
        static int MergeAndAddAdjacentTextNodes(
            ArrayBuilder<RegexExpressionNode> list,
            ArrayBuilder<RegexExpressionNode> final,
            int index)
        {
            var startIndex = index;
            var startTextNode = (RegexTextNode)list[startIndex];
 
            // Keep walking forward as long as we hit text nodes and we can 
            // merge that text node with the previous text node.
            index++;
            var lastTextNode = startTextNode;
            for (; index < list.Count; index++)
            {
                var currentNode = list[index];
                if (!CanMerge(lastTextNode, currentNode))
                {
                    // Hit something we couldn't merge with our last text node
                    // Break out and merge what we have so far.  'index' will
                    // be pointing at the right node for our caller.
                    break;
                }
 
                lastTextNode = (RegexTextNode)currentNode;
            }
 
            // If didn't have multiple text nodes in a row.  Just return the
            // starting node.  Otherwise, create one text node that has a token
            // that spans from the start of the first node to the end of the last node.
            final.Add(startTextNode == lastTextNode
                ? startTextNode
                : new RegexTextNode(CreateToken(
                    RegexKind.TextToken, startTextNode.TextToken.LeadingTrivia,
                    VirtualCharSequence.FromBounds(
                        startTextNode.TextToken.VirtualChars,
                        lastTextNode.TextToken.VirtualChars))));
 
            return index;
        }
 
        // Local functions
        static bool CanMerge(RegexTextNode lastNode, RegexExpressionNode next)
        {
            if (next.Kind == RegexKind.Text)
            {
                var lastTextToken = lastNode.TextToken;
                var nextTextToken = ((RegexTextNode)next).TextToken;
 
                // Can't merge if the next text node has leading trivia. Also, conservatively 
                // don't allow merging if there are diagnostics or values for these tokens.  
                // We might be able to support that, but it's easier to not do anything that 
                // might break an expectation someone might have downstream.                    /
                if (lastTextToken.Diagnostics.Length == 0 &&
                    nextTextToken.Diagnostics.Length == 0 &&
                    lastTextToken.Value == null &&
                    nextTextToken.Value == null &&
                    nextTextToken.LeadingTrivia.Length == 0)
                {
                    lastTextToken.VirtualChars.AssertAdjacentTo(nextTextToken.VirtualChars);
                    return true;
                }
            }
 
            return false;
        }
    }
 
    private readonly bool ShouldConsumeSequenceElement(bool consumeCloseParen)
    {
        if (_currentToken.Kind == RegexKind.EndOfFile)
        {
            return false;
        }
 
        if (_currentToken.Kind == RegexKind.BarToken)
        {
            return false;
        }
 
        if (_currentToken.Kind == RegexKind.CloseParenToken)
        {
            return consumeCloseParen;
        }
 
        return true;
    }
 
    private RegexExpressionNode ParsePrimaryExpressionAndQuantifiers(RegexExpressionNode? lastExpression)
    {
        var current = ParsePrimaryExpression(lastExpression);
        if (current.Kind == RegexKind.SimpleOptionsGrouping)
        {
            // Simple options (i.e. "(?i-x)" can't have quantifiers attached to them).
            return current;
        }
 
        return _currentToken.Kind switch
        {
            RegexKind.AsteriskToken => ParseZeroOrMoreQuantifier(current),
            RegexKind.PlusToken => ParseOneOrMoreQuantifier(current),
            RegexKind.QuestionToken => ParseZeroOrOneQuantifier(current),
            RegexKind.OpenBraceToken => TryParseNumericQuantifier(current, _currentToken),
            _ => current,
        };
    }
 
    private RegexExpressionNode TryParseLazyQuantifier(RegexQuantifierNode quantifier)
    {
        if (_currentToken.Kind != RegexKind.QuestionToken)
        {
            return quantifier;
        }
 
        // Whitespace allowed after the question and the next sequence element.
        return new RegexLazyQuantifierNode(quantifier,
            ConsumeCurrentToken(allowTrivia: true));
    }
 
    private RegexExpressionNode ParseZeroOrMoreQuantifier(RegexPrimaryExpressionNode current)
    {
        // Whitespace allowed between the quantifier and the possible following ? or next sequence item.
        return TryParseLazyQuantifier(new RegexZeroOrMoreQuantifierNode(current, ConsumeCurrentToken(allowTrivia: true)));
    }
 
    private RegexExpressionNode ParseOneOrMoreQuantifier(RegexPrimaryExpressionNode current)
    {
        // Whitespace allowed between the quantifier and the possible following ? or next sequence item.
        return TryParseLazyQuantifier(new RegexOneOrMoreQuantifierNode(current, ConsumeCurrentToken(allowTrivia: true)));
    }
 
    private RegexExpressionNode ParseZeroOrOneQuantifier(RegexPrimaryExpressionNode current)
    {
        // Whitespace allowed between the quantifier and the possible following ? or next sequence item.
        return TryParseLazyQuantifier(new RegexZeroOrOneQuantifierNode(current, ConsumeCurrentToken(allowTrivia: true)));
    }
 
    private RegexExpressionNode TryParseNumericQuantifier(
        RegexPrimaryExpressionNode expression, RegexToken openBraceToken)
    {
        var start = _lexer.Position;
 
        if (!TryParseNumericQuantifierParts(
                out var firstNumberToken,
                out var commaToken,
                out var secondNumberToken,
                out var closeBraceToken))
        {
            _currentToken = openBraceToken;
            _lexer.Position = start;
            return expression;
        }
 
        var quantifier = CreateQuantifier(
            expression, openBraceToken, firstNumberToken, commaToken,
            secondNumberToken, closeBraceToken);
 
        return TryParseLazyQuantifier(quantifier);
    }
 
    private static RegexQuantifierNode CreateQuantifier(
        RegexPrimaryExpressionNode expression,
        RegexToken openBraceToken, RegexToken firstNumberToken, RegexToken? commaToken,
        RegexToken? secondNumberToken, RegexToken closeBraceToken)
    {
        if (commaToken != null)
        {
            return secondNumberToken != null
                ? new RegexClosedNumericRangeQuantifierNode(expression, openBraceToken, firstNumberToken, commaToken.Value, secondNumberToken.Value, closeBraceToken)
                : new RegexOpenNumericRangeQuantifierNode(expression, openBraceToken, firstNumberToken, commaToken.Value, closeBraceToken);
        }
 
        return new RegexExactNumericQuantifierNode(expression, openBraceToken, firstNumberToken, closeBraceToken);
    }
 
    private bool TryParseNumericQuantifierParts(
        out RegexToken firstNumberToken, out RegexToken? commaToken,
        out RegexToken? secondNumberToken, out RegexToken closeBraceToken)
    {
        firstNumberToken = default;
        commaToken = null;
        secondNumberToken = null;
        closeBraceToken = default;
 
        var firstNumber = _lexer.TryScanNumber();
        if (firstNumber == null)
        {
            return false;
        }
 
        firstNumberToken = firstNumber.Value;
 
        // Nothing allowed between {x,n}
        ConsumeCurrentToken(allowTrivia: false);
 
        if (_currentToken.Kind == RegexKind.CommaToken)
        {
            commaToken = _currentToken;
 
            var start = _lexer.Position;
            secondNumberToken = _lexer.TryScanNumber();
 
            if (secondNumberToken == null)
            {
                // Nothing allowed between {x,n}
                ResetToPositionAndConsumeCurrentToken(start, allowTrivia: false);
            }
            else
            {
                var secondNumberTokenLocal = secondNumberToken.Value;
 
                // Nothing allowed between {x,n}
                ConsumeCurrentToken(allowTrivia: false);
 
                var val1 = (int)firstNumberToken.Value;
                var val2 = (int)secondNumberTokenLocal.Value;
 
                if (val2 < val1)
                {
                    secondNumberTokenLocal = secondNumberTokenLocal.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                        FeaturesResources.Illegal_x_y_with_x_less_than_y,
                        secondNumberTokenLocal.GetSpan()));
                    secondNumberToken = secondNumberTokenLocal;
                }
            }
        }
 
        if (_currentToken.Kind != RegexKind.CloseBraceToken)
        {
            return false;
        }
 
        // Whitespace allowed between the quantifier and the possible following ? or next sequence item.
        closeBraceToken = ConsumeCurrentToken(allowTrivia: true);
        return true;
    }
 
    private void ResetToPositionAndConsumeCurrentToken(int position, bool allowTrivia)
    {
        _lexer.Position = position;
        ConsumeCurrentToken(allowTrivia);
    }
 
    private RegexPrimaryExpressionNode ParsePrimaryExpression(RegexExpressionNode? lastExpression)
    {
        return _currentToken.Kind switch
        {
            RegexKind.DotToken => ParseWildcard(),
            RegexKind.CaretToken => ParseStartAnchor(),
            RegexKind.DollarToken => ParseEndAnchor(),
            RegexKind.BackslashToken => ParseEscape(_currentToken, allowTriviaAfterEnd: true),
            RegexKind.OpenBracketToken => ParseCharacterClass(),
            RegexKind.OpenParenToken => ParseGrouping(),
            RegexKind.CloseParenToken => ParseUnexpectedCloseParenToken(),
            RegexKind.OpenBraceToken => ParsePossibleUnexpectedNumericQuantifier(lastExpression),
            RegexKind.AsteriskToken or RegexKind.PlusToken or RegexKind.QuestionToken => ParseUnexpectedQuantifier(lastExpression),
            _ => ParseText(),
        };
    }
 
    private RegexPrimaryExpressionNode ParsePossibleUnexpectedNumericQuantifier(RegexExpressionNode? lastExpression)
    {
        // Native parser looks for something like {0,1} in a top level sequence and reports
        // an explicit error that that's not allowed.  However, something like {0, 1} is fine
        // and is treated as six textual tokens.
        var openBraceToken = _currentToken.With(kind: RegexKind.TextToken);
        var start = _lexer.Position;
 
        if (TryParseNumericQuantifierParts(
                out _, out _, out _, out _))
        {
            // Report that a numeric quantifier isn't allowed here.
            CheckQuantifierExpression(lastExpression, ref openBraceToken);
        }
 
        // Started with { but wasn't a numeric quantifier.  This is totally legal and is just
        // a textual sequence.  Restart, scanning this token as a normal sequence element.
        ResetToPositionAndConsumeCurrentToken(start, allowTrivia: true);
        return new RegexTextNode(openBraceToken);
    }
 
    private RegexPrimaryExpressionNode ParseUnexpectedCloseParenToken()
    {
        var token = _currentToken.With(kind: RegexKind.TextToken).AddDiagnosticIfNone(
            new EmbeddedDiagnostic(FeaturesResources.Too_many_close_parens, _currentToken.GetSpan()));
 
        // Technically, since an error occurred, we can do whatever we want here.  However,
        // the spirit of the native parser is that top level sequence elements are allowed
        // to have trivia.  So that's the behavior we mimic.
        ConsumeCurrentToken(allowTrivia: true);
        return new RegexTextNode(token);
    }
 
    private RegexPrimaryExpressionNode ParseText()
    {
        var token = ConsumeCurrentToken(allowTrivia: true);
        Debug.Assert(token.Value == null);
 
        // Allow trivia between this piece of text and the next sequence element
        return new RegexTextNode(token.With(kind: RegexKind.TextToken));
    }
 
    private RegexPrimaryExpressionNode ParseEndAnchor()
    {
        // Allow trivia between this anchor and the next sequence element
        return new RegexAnchorNode(RegexKind.EndAnchor, ConsumeCurrentToken(allowTrivia: true));
    }
 
    private RegexPrimaryExpressionNode ParseStartAnchor()
    {
        // Allow trivia between this anchor and the next sequence element
        return new RegexAnchorNode(RegexKind.StartAnchor, ConsumeCurrentToken(allowTrivia: true));
    }
 
    private RegexPrimaryExpressionNode ParseWildcard()
    {
        // Allow trivia between the . and the next sequence element
        return new RegexWildcardNode(ConsumeCurrentToken(allowTrivia: true));
    }
 
    private RegexGroupingNode ParseGrouping()
    {
        var start = _lexer.Position;
 
        // Check what immediately follows the (.  If we have (? it is processed specially.
        // However, we do not treat (? the same as ( ?
        var openParenToken = ConsumeCurrentToken(allowTrivia: false);
 
        switch (_currentToken.Kind)
        {
            case RegexKind.QuestionToken:
                return ParseGroupQuestion(openParenToken, _currentToken);
 
            default:
                // Wasn't (? just parse this as a normal group.
                _lexer.Position = start;
                return ParseSimpleGroup(openParenToken);
        }
    }
 
    private RegexToken ParseGroupingCloseParen()
    {
        switch (_currentToken.Kind)
        {
            case RegexKind.CloseParenToken:
                // Grouping completed normally.  Allow trivia between it and the next sequence element.
                return ConsumeCurrentToken(allowTrivia: true);
 
            default:
                return CreateMissingToken(RegexKind.CloseParenToken).AddDiagnosticIfNone(
                    new EmbeddedDiagnostic(FeaturesResources.Not_enough_close_parens, GetTokenStartPositionSpan(_currentToken)));
        }
    }
 
    private RegexSimpleGroupingNode ParseSimpleGroup(RegexToken openParenToken)
        => new(
            openParenToken, ParseGroupingEmbeddedExpression(_options), ParseGroupingCloseParen());
 
    private RegexExpressionNode ParseGroupingEmbeddedExpression(RegexOptions embeddedOptions)
    {
        // Save and restore options when we go into, and pop out of a group node.
        var currentOptions = _options;
        _options = embeddedOptions;
 
        // We're parsing the embedded sequence inside the current group.  As this is a sequence
        // we want to allow trivia between the current token we're on, and the first token
        // of the embedded sequence.
        ConsumeCurrentToken(allowTrivia: true);
 
        // When parsing out the sequence don't grab the close paren, that will be for our caller
        // to get.
        var expression = this.ParseAlternatingSequences(consumeCloseParen: false, isConditional: false);
        _options = currentOptions;
        return expression;
    }
 
    private readonly TextSpan GetTokenSpanIncludingEOF(RegexToken token)
        => token.Kind == RegexKind.EndOfFile
            ? GetTokenStartPositionSpan(token)
            : token.GetSpan();
 
    private readonly TextSpan GetTokenStartPositionSpan(RegexToken token)
    {
        return token.Kind == RegexKind.EndOfFile
            ? new TextSpan(_lexer.Text.Last().Span.End, 0)
            : new TextSpan(token.VirtualChars[0].Span.Start, 0);
    }
 
    private RegexGroupingNode ParseGroupQuestion(RegexToken openParenToken, RegexToken questionToken)
    {
        var optionsToken = _lexer.TryScanOptions();
        if (optionsToken != null)
        {
            return ParseOptionsGroupingNode(openParenToken, questionToken, optionsToken.Value);
        }
 
        var afterQuestionPos = _lexer.Position;
 
        // Lots of possible options when we see (?.  Look at the immediately following character
        // (without any allowed spaces) to decide what to parse out next.
        ConsumeCurrentToken(allowTrivia: false);
        switch (_currentToken.Kind)
        {
            case RegexKind.LessThanToken:
                // (?<=...) or (?<!...) or (?<...>...) or (?<...-...>...)
                return ParseLookbehindOrNamedCaptureOrBalancingGrouping(openParenToken, questionToken);
 
            case RegexKind.SingleQuoteToken:
                //  (?'...'...) or (?'...-...'...)
                return ParseNamedCaptureOrBalancingGrouping(
                    openParenToken, questionToken, _currentToken);
 
            case RegexKind.OpenParenToken:
                // alternation construct (?(...) | )
                return ParseConditionalGrouping(openParenToken, questionToken);
 
            case RegexKind.ColonToken:
                return ParseNonCapturingGroupingNode(openParenToken, questionToken);
 
            case RegexKind.EqualsToken:
                return ParsePositiveLookaheadGrouping(openParenToken, questionToken);
 
            case RegexKind.ExclamationToken:
                return ParseNegativeLookaheadGrouping(openParenToken, questionToken);
 
            case RegexKind.GreaterThanToken:
                return ParseAtomicGrouping(openParenToken, questionToken);
 
            default:
                if (_currentToken.Kind != RegexKind.CloseParenToken)
                {
                    // Native parser reports "Unrecognized grouping construct", *except* for (?)
                    openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                        FeaturesResources.Unrecognized_grouping_construct,
                        openParenToken.GetSpan()));
                }
 
                break;
        }
 
        // (?)
        // Parse this as a normal group. The question will immediately error as it's a 
        // quantifier not following anything.
        _lexer.Position = afterQuestionPos - 1;
        return ParseSimpleGroup(openParenToken);
    }
 
    private RegexConditionalGroupingNode ParseConditionalGrouping(RegexToken openParenToken, RegexToken questionToken)
    {
        var innerOpenParenToken = _currentToken;
        var afterInnerOpenParen = _lexer.Position;
 
        var captureToken = _lexer.TryScanNumberOrCaptureName();
        if (captureToken == null)
        {
            return ParseConditionalExpressionGrouping(openParenToken, questionToken);
        }
 
        var capture = captureToken.Value;
 
        RegexToken innerCloseParenToken;
        if (capture.Kind == RegexKind.NumberToken)
        {
            // If it's a numeric group, it has to be immediately followed by a ) and the
            // numeric reference has to exist.
            //
            // That means that (?(4 ) is not treated as an embedded expression but as an
            // error.  This is different from (?(a ) which will be treated as an embedded
            // expression, and different from (?(a) will be treated as an embedded
            // expression or capture group depending on if 'a' is a existing capture name.
 
            ConsumeCurrentToken(allowTrivia: false);
            if (_currentToken.Kind == RegexKind.CloseParenToken)
            {
                innerCloseParenToken = _currentToken;
                if (!HasCapture((int)capture.Value))
                {
                    capture = capture.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                        FeaturesResources.Reference_to_undefined_group,
                        capture.GetSpan()));
                }
            }
            else
            {
                innerCloseParenToken = CreateMissingToken(RegexKind.CloseParenToken);
                capture = capture.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                    FeaturesResources.Malformed,
                    capture.GetSpan()));
                MoveBackBeforePreviousScan();
            }
        }
        else
        {
            // If it's a capture name, it's ok if that capture doesn't exist.  In that case we
            // will just treat this as an conditional expression.
            if (!HasCapture((string)capture.Value))
            {
                _lexer.Position = afterInnerOpenParen;
                return ParseConditionalExpressionGrouping(openParenToken, questionToken);
            }
 
            // Capture name existed.  For this to be a capture grouping it exactly has to
            // match (?(a)   anything other than a close paren after the ) will make this
            // into a conditional expression.
            ConsumeCurrentToken(allowTrivia: false);
            if (_currentToken.Kind != RegexKind.CloseParenToken)
            {
                _lexer.Position = afterInnerOpenParen;
                return ParseConditionalExpressionGrouping(openParenToken, questionToken);
            }
 
            innerCloseParenToken = _currentToken;
        }
 
        // Was (?(name) or (?(num)  and name/num was a legal capture name.  Parse
        // this out as a conditional grouping.  Because we're going to be parsing out
        // an embedded sequence, allow trivia before the first element.
        ConsumeCurrentToken(allowTrivia: true);
        var result = ParseConditionalGroupingResult();
 
        return new RegexConditionalCaptureGroupingNode(
            openParenToken, questionToken,
            innerOpenParenToken, capture, innerCloseParenToken,
            result, ParseGroupingCloseParen());
    }
 
    private readonly bool HasCapture(int value)
        => _captureNumbersToSpan.ContainsKey(value);
 
    private readonly bool HasCapture(string value)
        => _captureNamesToSpan.ContainsKey(value);
 
    private void MoveBackBeforePreviousScan()
    {
        if (_currentToken.Kind != RegexKind.EndOfFile)
        {
            // Move back to un-consume whatever we just consumed.
            _lexer.Position--;
        }
    }
 
    private RegexConditionalGroupingNode ParseConditionalExpressionGrouping(
        RegexToken openParenToken, RegexToken questionToken)
    {
        // Reproduce very specific errors the .NET regex parser looks for.  Technically,
        // we would error out in these cases no matter what.  However, it means we can
        // stringently enforce that our parser produces the same errors as the native one.
        //
        // Move back before the (
        _lexer.Position--;
        if (_lexer.IsAt("(?#"))
        {
            var pos = _lexer.Position;
            var comment = _lexer.ScanComment(options: default);
            Contract.ThrowIfFalse(comment.HasValue);
            _lexer.Position = pos;
 
            if (comment.Value.Diagnostics.Length > 0)
            {
                openParenToken = openParenToken.AddDiagnosticIfNone(comment.Value.Diagnostics[0]);
            }
            else
            {
                openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                    FeaturesResources.Alternation_conditions_cannot_be_comments,
                    openParenToken.GetSpan()));
            }
        }
        else if (_lexer.IsAt("(?'"))
        {
            openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                FeaturesResources.Alternation_conditions_do_not_capture_and_cannot_be_named,
                openParenToken.GetSpan()));
        }
        else if (_lexer.IsAt("(?<"))
        {
            if (!_lexer.IsAt("(?<!") &&
                !_lexer.IsAt("(?<="))
            {
                openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                    FeaturesResources.Alternation_conditions_do_not_capture_and_cannot_be_named,
                    openParenToken.GetSpan()));
            }
        }
 
        // Consume the ( once more.
        ConsumeCurrentToken(allowTrivia: false);
        Debug.Assert(_currentToken.Kind == RegexKind.OpenParenToken);
 
        // Parse out the grouping that starts with the second open paren in (?(
        // this will get us to (?(...)
        var grouping = ParseGrouping();
 
        // Now parse out the embedded expression that follows that.  this will get us to
        // (?(...)...
        var result = ParseConditionalGroupingResult();
 
        // Finally, grab the close paren and produce (?(...)...)
        return new RegexConditionalExpressionGroupingNode(
            openParenToken, questionToken,
            grouping, result, ParseGroupingCloseParen());
    }
 
    private RegexExpressionNode ParseConditionalGroupingResult()
    {
        var currentOptions = _options;
        var result = this.ParseAlternatingSequences(consumeCloseParen: false, isConditional: true);
        _options = currentOptions;
        return result;
    }
 
    private RegexGroupingNode ParseLookbehindOrNamedCaptureOrBalancingGrouping(
        RegexToken openParenToken, RegexToken questionToken)
    {
        var start = _lexer.Position;
 
        // We have  (?<  Look for  (?<=  or  (?<!
        var lessThanToken = ConsumeCurrentToken(allowTrivia: false);
 
        switch (_currentToken.Kind)
        {
            case RegexKind.EqualsToken:
                return new RegexPositiveLookbehindGroupingNode(
                    openParenToken, questionToken, lessThanToken, _currentToken,
                    ParseGroupingEmbeddedExpression(_options | RegexOptions.RightToLeft), ParseGroupingCloseParen());
 
            case RegexKind.ExclamationToken:
                return new RegexNegativeLookbehindGroupingNode(
                    openParenToken, questionToken, lessThanToken, _currentToken,
                    ParseGroupingEmbeddedExpression(_options | RegexOptions.RightToLeft), ParseGroupingCloseParen());
 
            default:
                // Didn't have a lookbehind group.  Parse out as  (?<...>  or  (?<...-...>
                _lexer.Position = start;
                return ParseNamedCaptureOrBalancingGrouping(openParenToken, questionToken, lessThanToken);
        }
    }
 
    private RegexGroupingNode ParseNamedCaptureOrBalancingGrouping(
        RegexToken openParenToken, RegexToken questionToken, RegexToken openToken)
    {
        if (_lexer.Position == _lexer.Text.Length)
        {
            openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                FeaturesResources.Unrecognized_grouping_construct,
                GetSpan(openParenToken, openToken)));
        }
 
        // (?<...>...) or (?<...-...>...)
        // (?'...'...) or (?'...-...'...)
        var captureToken = _lexer.TryScanNumberOrCaptureName();
        if (captureToken == null)
        {
            // Can't have any trivia between the elements in this grouping header.
            ConsumeCurrentToken(allowTrivia: false);
            captureToken = CreateMissingToken(RegexKind.CaptureNameToken);
 
            if (_currentToken.Kind == RegexKind.MinusToken)
            {
                return ParseBalancingGrouping(
                    openParenToken, questionToken, openToken, captureToken.Value);
            }
            else
            {
                openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                    FeaturesResources.Invalid_group_name_Group_names_must_begin_with_a_word_character,
                    GetTokenSpanIncludingEOF(_currentToken)));
 
                // If we weren't at the end of the text, go back to before whatever character
                // we just consumed.
                MoveBackBeforePreviousScan();
            }
        }
 
        var capture = captureToken.Value;
        if (capture.Kind == RegexKind.NumberToken && (int)capture.Value == 0)
        {
            capture = capture.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                FeaturesResources.Capture_number_cannot_be_zero,
                capture.GetSpan()));
        }
 
        // Can't have any trivia between the elements in this grouping header.
        ConsumeCurrentToken(allowTrivia: false);
 
        if (_currentToken.Kind == RegexKind.MinusToken)
        {
            // Have  (?<...-  parse out the balancing group form.
            return ParseBalancingGrouping(
                openParenToken, questionToken,
                openToken, capture);
        }
 
        var closeToken = ParseCaptureGroupingCloseToken(ref openParenToken, openToken);
 
        return new RegexCaptureGroupingNode(
            openParenToken, questionToken,
            openToken, capture, closeToken,
            ParseGroupingEmbeddedExpression(_options), ParseGroupingCloseParen());
    }
 
    private RegexToken ParseCaptureGroupingCloseToken(ref RegexToken openParenToken, RegexToken openToken)
    {
        if ((openToken.Kind == RegexKind.LessThanToken && _currentToken.Kind == RegexKind.GreaterThanToken) ||
            (openToken.Kind == RegexKind.SingleQuoteToken && _currentToken.Kind == RegexKind.SingleQuoteToken))
        {
            return _currentToken;
        }
 
        if (_currentToken.Kind == RegexKind.EndOfFile)
        {
            openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                FeaturesResources.Unrecognized_grouping_construct,
                GetSpan(openParenToken, openToken)));
        }
        else
        {
            openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                FeaturesResources.Invalid_group_name_Group_names_must_begin_with_a_word_character,
                _currentToken.GetSpan()));
 
            // Rewind to where we were before seeing this bogus character.
            _lexer.Position--;
        }
 
        return CreateMissingToken(
            openToken.Kind == RegexKind.LessThanToken
                ? RegexKind.GreaterThanToken : RegexKind.SingleQuoteToken);
    }
 
    private RegexBalancingGroupingNode ParseBalancingGrouping(
        RegexToken openParenToken, RegexToken questionToken,
        RegexToken openToken, RegexToken firstCapture)
    {
        var minusToken = _currentToken;
        var secondCapture = _lexer.TryScanNumberOrCaptureName();
        if (secondCapture == null)
        {
            // Invalid group name: Group names must begin with a word character
            ConsumeCurrentToken(allowTrivia: false);
 
            openParenToken = openParenToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                FeaturesResources.Invalid_group_name_Group_names_must_begin_with_a_word_character,
                GetTokenSpanIncludingEOF(_currentToken)));
 
            // If we weren't at the end of the text, go back to before whatever character
            // we just consumed.
            MoveBackBeforePreviousScan();
            secondCapture = CreateMissingToken(RegexKind.CaptureNameToken);
        }
 
        var second = secondCapture.Value;
        CheckCapture(ref second);
 
        // Can't have any trivia between the elements in this grouping header.
        ConsumeCurrentToken(allowTrivia: false);
        var closeToken = ParseCaptureGroupingCloseToken(ref openParenToken, openToken);
 
        return new RegexBalancingGroupingNode(
            openParenToken, questionToken,
            openToken, firstCapture, minusToken, second, closeToken,
            ParseGroupingEmbeddedExpression(_options), ParseGroupingCloseParen());
    }
 
    private readonly void CheckCapture(ref RegexToken captureToken)
    {
        if (captureToken.IsMissing)
        {
            // Don't need to check for a synthesized error capture token.
            return;
        }
 
        if (captureToken.Kind == RegexKind.NumberToken)
        {
            var val = (int)captureToken.Value;
            if (!HasCapture(val))
            {
                captureToken = captureToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                    string.Format(FeaturesResources.Reference_to_undefined_group_number_0, val),
                    captureToken.GetSpan()));
            }
        }
        else
        {
            var val = (string)captureToken.Value;
            if (!HasCapture(val))
            {
                captureToken = captureToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                    string.Format(FeaturesResources.Reference_to_undefined_group_name_0, val),
                    captureToken.GetSpan()));
            }
        }
    }
 
    private RegexNonCapturingGroupingNode ParseNonCapturingGroupingNode(RegexToken openParenToken, RegexToken questionToken)
        => new(
            openParenToken, questionToken, _currentToken,
            ParseGroupingEmbeddedExpression(_options), ParseGroupingCloseParen());
 
    private RegexPositiveLookaheadGroupingNode ParsePositiveLookaheadGrouping(RegexToken openParenToken, RegexToken questionToken)
        => new(
            openParenToken, questionToken, _currentToken,
            ParseGroupingEmbeddedExpression(_options & ~RegexOptions.RightToLeft), ParseGroupingCloseParen());
 
    private RegexNegativeLookaheadGroupingNode ParseNegativeLookaheadGrouping(RegexToken openParenToken, RegexToken questionToken)
        => new(
            openParenToken, questionToken, _currentToken,
            ParseGroupingEmbeddedExpression(_options & ~RegexOptions.RightToLeft), ParseGroupingCloseParen());
 
    private RegexAtomicGroupingNode ParseAtomicGrouping(RegexToken openParenToken, RegexToken questionToken)
        => new(
            openParenToken, questionToken, _currentToken,
            ParseGroupingEmbeddedExpression(_options), ParseGroupingCloseParen());
 
    private RegexGroupingNode ParseOptionsGroupingNode(
        RegexToken openParenToken, RegexToken questionToken, RegexToken optionsToken)
    {
        // Only (?opts:...) or (?opts) are allowed.  After the opts must be a : or )
        ConsumeCurrentToken(allowTrivia: false);
        switch (_currentToken.Kind)
        {
            case RegexKind.CloseParenToken:
                // Allow trivia after the options and the next element in the sequence.
                _options = GetNewOptionsFromToken(_options, optionsToken);
                return new RegexSimpleOptionsGroupingNode(
                    openParenToken, questionToken, optionsToken,
                    ConsumeCurrentToken(allowTrivia: true));
 
            case RegexKind.ColonToken:
                return ParseNestedOptionsGroupingNode(openParenToken, questionToken, optionsToken);
 
            default:
                return new RegexSimpleOptionsGroupingNode(
                    openParenToken, questionToken, optionsToken,
                    CreateMissingToken(RegexKind.CloseParenToken).AddDiagnosticIfNone(
                        new EmbeddedDiagnostic(FeaturesResources.Unrecognized_grouping_construct, openParenToken.GetSpan())));
        }
    }
 
    private RegexNestedOptionsGroupingNode ParseNestedOptionsGroupingNode(
        RegexToken openParenToken, RegexToken questionToken, RegexToken optionsToken)
        => new(
            openParenToken, questionToken, optionsToken, _currentToken,
            ParseGroupingEmbeddedExpression(GetNewOptionsFromToken(_options, optionsToken)), ParseGroupingCloseParen());
 
    private static bool IsTextChar(RegexToken currentToken, char ch)
        => currentToken.Kind == RegexKind.TextToken && currentToken.VirtualChars.Length == 1 && currentToken.VirtualChars[0].Value == ch;
 
    private static RegexOptions GetNewOptionsFromToken(RegexOptions currentOptions, RegexToken optionsToken)
    {
        var copy = currentOptions;
        var on = true;
        foreach (var ch in optionsToken.VirtualChars)
        {
            switch (ch.Value)
            {
                case '-': on = false; break;
                case '+': on = true; break;
                default:
                    var newOption = OptionFromCode(ch);
                    if (on)
                    {
                        copy |= newOption;
                    }
                    else
                    {
                        copy &= ~newOption;
                    }
 
                    break;
            }
        }
 
        return copy;
    }
 
    private static RegexOptions OptionFromCode(VirtualChar ch)
    {
        switch (ch.Value)
        {
            case 'i': case 'I': return RegexOptions.IgnoreCase;
            case 'm': case 'M': return RegexOptions.Multiline;
            case 'n': case 'N': return RegexOptions.ExplicitCapture;
            case 's': case 'S': return RegexOptions.Singleline;
            case 'x': case 'X': return RegexOptions.IgnorePatternWhitespace;
            default:
                throw new InvalidOperationException();
        }
    }
 
    private RegexBaseCharacterClassNode ParseCharacterClass()
    {
        var openBracketToken = _currentToken;
        Debug.Assert(openBracketToken.Kind == RegexKind.OpenBracketToken);
        var caretToken = CreateMissingToken(RegexKind.CaretToken);
        var closeBracketToken = CreateMissingToken(RegexKind.CloseBracketToken);
 
        // trivia is not allowed anywhere in a character class
        ConsumeCurrentToken(allowTrivia: false);
        if (_currentToken.Kind == RegexKind.CaretToken)
        {
            caretToken = _currentToken;
        }
        else
        {
            MoveBackBeforePreviousScan();
        }
 
        // trivia is not allowed anywhere in a character class
        ConsumeCurrentToken(allowTrivia: false);
 
        using var _1 = ArrayBuilder<RegexExpressionNode>.GetInstance(out var builder);
        while (_currentToken.Kind != RegexKind.EndOfFile)
        {
            Debug.Assert(_currentToken.VirtualChars.Length == 1);
 
            if (_currentToken.Kind == RegexKind.CloseBracketToken && builder.Count > 0)
            {
                // Allow trivia after the character class, and whatever is next in the sequence.
                closeBracketToken = ConsumeCurrentToken(allowTrivia: true);
                break;
            }
 
            ParseCharacterClassComponents(builder);
        }
 
        // We will commonly get tons of text nodes in a row.  For example, the regex `[abc]` will be three text
        // nodes in a row.  To help save on memory try to merge that into one single text node.
        using var _2 = ArrayBuilder<RegexExpressionNode>.GetInstance(out var contents);
        MergeTextNodes(builder, contents);
 
        if (closeBracketToken.IsMissing)
        {
            closeBracketToken = closeBracketToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                FeaturesResources.Unterminated_character_class_set,
                GetTokenStartPositionSpan(_currentToken)));
        }
 
        var components = new RegexSequenceNode(contents.ToImmutable());
        return caretToken.IsMissing
            ? new RegexCharacterClassNode(openBracketToken, components, closeBracketToken)
            : new RegexNegatedCharacterClassNode(openBracketToken, caretToken, components, closeBracketToken);
    }
 
    private void ParseCharacterClassComponents(ArrayBuilder<RegexExpressionNode> components)
    {
        var left = ParseSingleCharacterClassComponent(isFirst: components.Count == 0, afterRangeMinus: false);
        if (left.Kind is RegexKind.CharacterClassEscape or RegexKind.CategoryEscape ||
            IsEscapedMinus(left))
        {
            // \s or \p{Lu} or \- on the left of a minus doesn't start a range. If there is a following
            // minus, it's just treated textually.
            components.Add(left);
            return;
        }
 
        if (_currentToken.Kind == RegexKind.MinusToken && !_lexer.IsAt("]"))
        {
            // trivia is not allowed anywhere in a character class
            var minusToken = ConsumeCurrentToken(allowTrivia: false);
 
            if (_currentToken.Kind == RegexKind.OpenBracketToken)
            {
                components.Add(left);
                components.Add(ParseCharacterClassSubtractionNode(minusToken));
            }
            else
            {
                // Note that behavior of parsing here changed in .net.  See issue:
                // https://github.com/dotnet/corefx/issues/31786
                //
                // We follow the latest behavior in .net which parses things correctly.
                var right = ParseSingleCharacterClassComponent(isFirst: false, afterRangeMinus: true);
 
                if (TryGetRangeComponentValue(left, out var leftCh) &&
                    TryGetRangeComponentValue(right, out var rightCh) &&
                    leftCh > rightCh)
                {
                    minusToken = minusToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                        FeaturesResources.x_y_range_in_reverse_order,
                        minusToken.GetSpan()));
                }
 
                components.Add(new RegexCharacterClassRangeNode(left, minusToken, right));
            }
        }
        else
        {
            components.Add(left);
        }
    }
 
    private static bool IsEscapedMinus([NotNullWhen(true)] RegexNode? node)
        => node is RegexSimpleEscapeNode simple && IsTextChar(simple.TypeToken, '-');
 
    private static bool TryGetRangeComponentValue(RegexExpressionNode component, out int ch)
    {
        // Don't bother examining the component if it has any errors already.  This also means
        // we don't have to worry about running into invalid escape sequences and the like.
        if (!HasProblem(component))
        {
            return TryGetRangeComponentValueWorker(component, out ch);
        }
 
        ch = default;
        return false;
    }
 
    private static bool TryGetRangeComponentValueWorker(RegexNode component, out int ch)
    {
        switch (component.Kind)
        {
            case RegexKind.SimpleEscape:
                var escapeNode = (RegexSimpleEscapeNode)component;
                ch = MapEscapeChar(escapeNode.TypeToken.VirtualChars[0]).Value;
                return true;
 
            case RegexKind.ControlEscape:
                var controlEscape = (RegexControlEscapeNode)component;
                var controlCh = controlEscape.ControlToken.VirtualChars[0].Value;
 
                // \ca interpreted as \cA
                if (controlCh is >= 'a' and <= 'z')
                {
                    controlCh -= (char)('a' - 'A');
                }
 
                // The control characters have values mapping from the A-Z range to numeric
                // values 1-26.  So, to map that, we subtract 'A' from the value (which would
                // give us 0-25) and then add '1' back to it.
                ch = controlCh - 'A' + 1;
                return true;
 
            case RegexKind.OctalEscape:
                ch = GetCharValue(((RegexOctalEscapeNode)component).OctalText, withBase: 8);
                return true;
 
            case RegexKind.HexEscape:
                ch = GetCharValue(((RegexHexEscapeNode)component).HexText, withBase: 16);
                return true;
 
            case RegexKind.UnicodeEscape:
                ch = GetCharValue(((RegexUnicodeEscapeNode)component).HexText, withBase: 16);
                return true;
 
            case RegexKind.PosixProperty:
                // When the native parser sees [:...:] it treats this as if it just saw '[' and skipped the 
                // rest.
                ch = '[';
                return true;
 
            case RegexKind.Text:
                ch = ((RegexTextNode)component).TextToken.VirtualChars[0].Value;
                return true;
 
            case RegexKind.Sequence:
                var sequence = (RegexSequenceNode)component;
#if DEBUG
                Debug.Assert(sequence.ChildCount > 0);
                for (int i = 0, n = sequence.ChildCount - 1; i < n; i++)
                    Debug.Assert(IsEscapedMinus(sequence.ChildAt(i).Node));
#endif
 
                var last = sequence.ChildAt(sequence.ChildCount - 1).Node;
                Contract.ThrowIfNull(last);
                if (IsEscapedMinus(last))
                    break;
 
                return TryGetRangeComponentValueWorker(last, out ch);
        }
 
        ch = default;
        return false;
    }
 
    private static int GetCharValue(RegexToken hexText, int withBase)
    {
        unchecked
        {
            var total = 0;
            foreach (var vc in hexText.VirtualChars)
            {
                total *= withBase;
                total += HexValue(vc);
            }
 
            return total;
        }
    }
 
    private static int HexValue(VirtualChar ch)
    {
        Debug.Assert(RegexLexer.IsHexChar(ch));
        unchecked
        {
            var temp = ch.Value - '0';
            if (temp is >= 0 and <= 9)
                return temp;
 
            temp = ch.Value - 'a';
            if (temp is >= 0 and <= 5)
                return temp + 10;
 
            temp = ch.Value - 'A';
            if (temp is >= 0 and <= 5)
                return temp + 10;
        }
 
        throw new InvalidOperationException();
    }
 
    private static bool HasProblem(RegexNodeOrToken component)
    {
        if (component.IsNode)
        {
            foreach (var child in component.Node)
            {
                if (HasProblem(child))
                {
                    return true;
                }
            }
        }
        else
        {
            var token = component.Token;
            if (token.IsMissing ||
                token.Diagnostics.Length > 0)
            {
                return true;
            }
 
            foreach (var trivia in token.LeadingTrivia)
            {
                if (trivia.Diagnostics.Length > 0)
                {
                    return true;
                }
            }
        }
 
        return false;
    }
 
    private RegexPrimaryExpressionNode ParseSingleCharacterClassComponent(bool isFirst, bool afterRangeMinus)
    {
        if (_currentToken.Kind == RegexKind.BackslashToken && _lexer.Position < _lexer.Text.Length)
        {
            var backslashToken = _currentToken;
 
            // trivia is not allowed anywhere in a character class, and definitely not between
            // a \ and the following character.
            ConsumeCurrentToken(allowTrivia: false);
            Debug.Assert(_currentToken.VirtualChars.Length == 1);
 
            var nextChar = _currentToken.VirtualChars[0];
            switch (nextChar.Value)
            {
                case 'D':
                case 'd':
                case 'S':
                case 's':
                case 'W':
                case 'w':
                case 'p':
                case 'P':
                    if (afterRangeMinus)
                    {
                        backslashToken = backslashToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                            string.Format(FeaturesResources.Cannot_include_class_0_in_character_range, nextChar),
                            GetSpan(backslashToken, _currentToken)));
                    }
 
                    // move back before the character we just scanned.
                    // trivia is not allowed anywhere in a character class.
 
                    // The above list are character class and category escapes.  ParseEscape can
                    // handle both of those, so we just defer to it.
                    _lexer.Position--;
                    return ParseEscape(backslashToken, allowTriviaAfterEnd: false);
 
                case '-':
                    // trivia is not allowed anywhere in a character class.
 
                    // We just let the basic consumption code pull out a token for us, we then
                    // convert that to text since we treat all characters after the - as text no
                    // matter what.
                    return new RegexSimpleEscapeNode(
                        backslashToken, ConsumeCurrentToken(allowTrivia: false).With(kind: RegexKind.TextToken));
 
                default:
                    // trivia is not allowed anywhere in a character class.
 
                    // Note: it is very intentional that we're calling ParseCharEscape and not
                    // ParseEscape.  Normal escapes are not interpreted the same way inside a
                    // character class.  For example \b is not an anchor in a character class.
                    // And things like \k'...' are not k-captures, etc. etc.  
                    _lexer.Position--;
                    return ParseCharEscape(backslashToken, allowTriviaAfterEnd: false);
            }
        }
 
        if (!afterRangeMinus &&
            !isFirst &&
            _currentToken.Kind == RegexKind.MinusToken &&
            _lexer.IsAt("["))
        {
            // have a trailing subtraction.
            // trivia is not allowed anywhere in a character class
            return ParseCharacterClassSubtractionNode(
                ConsumeCurrentToken(allowTrivia: false));
        }
 
        // From the .NET regex code:
        // This is code for Posix style properties - [:Ll:] or [:IsTibetan:].
        // It currently doesn't do anything other than skip the whole thing!
        if (!afterRangeMinus && _currentToken.Kind == RegexKind.OpenBracketToken && _lexer.IsAt(":"))
        {
            var beforeBracketPos = _lexer.Position - 1;
            // trivia is not allowed anywhere in a character class
            ConsumeCurrentToken(allowTrivia: false);
 
            var captureName = _lexer.TryScanCaptureName();
            if (captureName.HasValue && _lexer.IsAt(":]"))
            {
                _lexer.Position += 2;
                var textChars = _lexer.GetSubPattern(beforeBracketPos, _lexer.Position);
                var token = CreateToken(RegexKind.TextToken, [], textChars);
 
                // trivia is not allowed anywhere in a character class
                ConsumeCurrentToken(allowTrivia: false);
                return new RegexPosixPropertyNode(token);
            }
            else
            {
                // Reset to back where we were.
                // trivia is not allowed anywhere in a character class
                _lexer.Position = beforeBracketPos;
                ConsumeCurrentToken(allowTrivia: false);
                Debug.Assert(_currentToken.Kind == RegexKind.OpenBracketToken);
            }
        }
 
        // trivia is not allowed anywhere in a character class
        return new RegexTextNode(
            ConsumeCurrentToken(allowTrivia: false).With(kind: RegexKind.TextToken));
    }
 
    private RegexPrimaryExpressionNode ParseCharacterClassSubtractionNode(RegexToken minusToken)
    {
        var charClass = ParseCharacterClass();
 
        if (_currentToken.Kind is not RegexKind.CloseBracketToken and not RegexKind.EndOfFile)
        {
            minusToken = minusToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                FeaturesResources.A_subtraction_must_be_the_last_element_in_a_character_class,
                GetTokenStartPositionSpan(minusToken)));
        }
 
        return new RegexCharacterClassSubtractionNode(minusToken, charClass);
    }
 
    /// <summary>
    /// Parses out an escape sequence.  Escape sequences are allowed in top level sequences
    /// and in character classes.  In a top level sequence trivia will be allowed afterwards,
    /// but in a character class trivia is not allowed afterwards.
    /// </summary>
    private RegexEscapeNode ParseEscape(RegexToken backslashToken, bool allowTriviaAfterEnd)
    {
        Debug.Assert(_lexer.Text[_lexer.Position - 1] == '\\');
 
        // No spaces between \ and next char.
        ConsumeCurrentToken(allowTrivia: false);
 
        if (_currentToken.Kind == RegexKind.EndOfFile)
        {
            backslashToken = backslashToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                FeaturesResources.Illegal_backslash_at_end_of_pattern,
                backslashToken.GetSpan()));
            return new RegexSimpleEscapeNode(backslashToken, CreateMissingToken(RegexKind.TextToken));
        }
 
        Debug.Assert(_currentToken.VirtualChars.Length == 1);
        switch (_currentToken.VirtualChars[0].Value)
        {
            case 'b':
            case 'B':
            case 'A':
            case 'G':
            case 'Z':
            case 'z':
                return new RegexAnchorEscapeNode(
                    backslashToken, ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd));
 
            case 'w':
            case 'W':
            case 's':
            case 'S':
            case 'd':
            case 'D':
                return new RegexCharacterClassEscapeNode(
                    backslashToken, ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd));
 
            case 'p':
            case 'P':
                return ParseCategoryEscape(backslashToken, allowTriviaAfterEnd);
        }
 
        // Move back to after the backslash
        _lexer.Position--;
        return ParseBasicBackslash(backslashToken, allowTriviaAfterEnd);
    }
 
    private RegexEscapeNode ParseBasicBackslash(RegexToken backslashToken, bool allowTriviaAfterEnd)
    {
        Debug.Assert(_lexer.Text[_lexer.Position - 1] == '\\');
 
        // No spaces between \ and next char.
        ConsumeCurrentToken(allowTrivia: false);
 
        if (_currentToken.Kind == RegexKind.EndOfFile)
        {
            backslashToken = backslashToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                FeaturesResources.Illegal_backslash_at_end_of_pattern,
                backslashToken.GetSpan()));
            return new RegexSimpleEscapeNode(backslashToken, CreateMissingToken(RegexKind.TextToken));
        }
 
        Debug.Assert(_currentToken.VirtualChars.Length == 1);
        var ch = _currentToken.VirtualChars[0];
        if (ch == 'k')
        {
            return ParsePossibleKCaptureEscape(backslashToken, allowTriviaAfterEnd);
        }
 
        if (ch.Value is '<' or '\'')
        {
            _lexer.Position--;
            return ParsePossibleCaptureEscape(backslashToken, allowTriviaAfterEnd);
        }
 
        if (ch.Value is >= '1' and <= '9')
        {
            _lexer.Position--;
            return ParsePossibleBackreferenceEscape(backslashToken, allowTriviaAfterEnd);
        }
 
        _lexer.Position--;
        return ParseCharEscape(backslashToken, allowTriviaAfterEnd);
    }
 
    private RegexEscapeNode ParsePossibleBackreferenceEscape(RegexToken backslashToken, bool allowTriviaAfterEnd)
    {
        Debug.Assert(_lexer.Text[_lexer.Position - 1] == '\\');
        return HasOption(_options, RegexOptions.ECMAScript)
            ? ParsePossibleEcmascriptBackreferenceEscape(backslashToken, allowTriviaAfterEnd)
            : ParsePossibleRegularBackreferenceEscape(backslashToken, allowTriviaAfterEnd);
    }
 
    private RegexEscapeNode ParsePossibleEcmascriptBackreferenceEscape(
        RegexToken backslashToken, bool allowTriviaAfterEnd)
    {
        // Small deviation: Ecmascript allows references only to captures that precede
        // this position (unlike .NET which allows references in any direction).  However,
        // because we don't track position, we just consume the entire back-reference.
        //
        // This is addressable if we add position tracking when we locate all the captures.
 
        Debug.Assert(_lexer.Text[_lexer.Position - 1] == '\\');
        var start = _lexer.Position;
 
        var bestPosition = -1;
        var capVal = 0;
        while (_lexer.Position < _lexer.Text.Length &&
               _lexer.Text[_lexer.Position] is var ch &&
               (ch >= '0' && ch <= '9'))
        {
            unchecked
            {
                capVal *= 10;
                capVal += (ch.Value - '0');
            }
 
            _lexer.Position++;
 
            if (HasCapture(capVal))
            {
                bestPosition = _lexer.Position;
            }
        }
 
        if (bestPosition != -1)
        {
            var numberToken = CreateToken(
                RegexKind.NumberToken, [],
                _lexer.GetSubPattern(start, bestPosition)).With(value: capVal);
            ResetToPositionAndConsumeCurrentToken(bestPosition, allowTrivia: allowTriviaAfterEnd);
            return new RegexBackreferenceEscapeNode(backslashToken, numberToken);
        }
 
        _lexer.Position = start;
        return ParseCharEscape(backslashToken, allowTriviaAfterEnd);
    }
 
    private RegexEscapeNode ParsePossibleRegularBackreferenceEscape(
        RegexToken backslashToken, bool allowTriviaAfterEnd)
    {
        Debug.Assert(_lexer.Text[_lexer.Position - 1] == '\\');
        var start = _lexer.Position;
 
        var number = _lexer.TryScanNumber();
        Contract.ThrowIfNull(number);
        var numberToken = number.Value;
        var capVal = (int)numberToken.Value;
        if (HasCapture(capVal) ||
            capVal <= 9)
        {
            CheckCapture(ref numberToken);
 
            ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd);
            return new RegexBackreferenceEscapeNode(backslashToken, numberToken);
        }
 
        _lexer.Position = start;
        return ParseCharEscape(backslashToken, allowTriviaAfterEnd);
    }
 
    private RegexEscapeNode ParsePossibleCaptureEscape(RegexToken backslashToken, bool allowTriviaAfterEnd)
    {
        Debug.Assert(_lexer.Text[_lexer.Position - 1] == '\\');
        Debug.Assert(_lexer.Text[_lexer.Position].Value is '<' or '\'');
 
        var afterBackslashPosition = _lexer.Position;
        ScanCaptureParts(allowTriviaAfterEnd, out var openToken, out var capture, out var closeToken);
 
        if (openToken.IsMissing || capture.IsMissing || closeToken.IsMissing)
        {
            _lexer.Position = afterBackslashPosition;
            return ParseCharEscape(backslashToken, allowTriviaAfterEnd);
        }
 
        return new RegexCaptureEscapeNode(
            backslashToken, openToken, capture, closeToken);
    }
 
    private RegexEscapeNode ParsePossibleKCaptureEscape(RegexToken backslashToken, bool allowTriviaAfterEnd)
    {
        var typeToken = _currentToken;
        var afterBackslashPosition = _lexer.Position - @"k".Length;
 
        ScanCaptureParts(allowTriviaAfterEnd, out var openToken, out var capture, out var closeToken);
        if (openToken.IsMissing)
        {
            backslashToken = backslashToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                FeaturesResources.Malformed_named_back_reference,
                GetSpan(backslashToken, typeToken)));
            return new RegexSimpleEscapeNode(backslashToken, typeToken.With(kind: RegexKind.TextToken));
        }
 
        if (capture.IsMissing || closeToken.IsMissing)
        {
            // Native parser falls back to normal escape scanning, if it doesn't see a capture,
            // or close brace.  For normal .NET regexes, this will then fail later (as \k is not
            // a legal escape), but will succeed for ecmascript regexes.
            _lexer.Position = afterBackslashPosition;
            return ParseCharEscape(backslashToken, allowTriviaAfterEnd);
        }
 
        return new RegexKCaptureEscapeNode(
            backslashToken, typeToken, openToken, capture, closeToken);
    }
 
    private void ScanCaptureParts(
        bool allowTriviaAfterEnd, out RegexToken openToken, out RegexToken capture, out RegexToken closeToken)
    {
        openToken = CreateMissingToken(RegexKind.LessThanToken);
        capture = CreateMissingToken(RegexKind.CaptureNameToken);
        closeToken = CreateMissingToken(RegexKind.GreaterThanToken);
 
        // No trivia allowed in <cap> or 'cap'
        ConsumeCurrentToken(allowTrivia: false);
 
        if (_lexer.Position < _lexer.Text.Length &&
            (_currentToken.Kind == RegexKind.LessThanToken || _currentToken.Kind == RegexKind.SingleQuoteToken))
        {
            openToken = _currentToken;
        }
        else
        {
            return;
        }
 
        var captureToken = _lexer.TryScanNumberOrCaptureName();
        capture = captureToken == null
            ? CreateMissingToken(RegexKind.CaptureNameToken)
            : captureToken.Value;
 
        // No trivia allowed in <cap> or 'cap'
        ConsumeCurrentToken(allowTrivia: false);
        closeToken = CreateMissingToken(RegexKind.GreaterThanToken);
 
        if (!capture.IsMissing &&
            ((openToken.Kind == RegexKind.LessThanToken && _currentToken.Kind == RegexKind.GreaterThanToken) ||
             (openToken.Kind == RegexKind.SingleQuoteToken && _currentToken.Kind == RegexKind.SingleQuoteToken)))
        {
            CheckCapture(ref capture);
            closeToken = ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd);
        }
    }
 
    private RegexEscapeNode ParseCharEscape(RegexToken backslashToken, bool allowTriviaAfterEnd)
    {
        Debug.Assert(_lexer.Text[_lexer.Position - 1] == '\\');
 
        // no trivia between \ and the next char
        ConsumeCurrentToken(allowTrivia: false);
        Debug.Assert(_currentToken.VirtualChars.Length == 1);
 
        var ch = _currentToken.VirtualChars[0];
        if (ch.Value is >= '0' and <= '7')
        {
            _lexer.Position--;
            var octalDigits = _lexer.ScanOctalCharacters(_options);
            Debug.Assert(octalDigits.VirtualChars.Length > 0);
 
            ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd);
            return new RegexOctalEscapeNode(backslashToken, octalDigits);
        }
 
        switch (ch.Value)
        {
            case 'a':
            case 'b':
            case 'e':
            case 'f':
            case 'n':
            case 'r':
            case 't':
            case 'v':
                return new RegexSimpleEscapeNode(
                    backslashToken, ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd));
            case 'x':
                return ParseHexEscape(backslashToken, allowTriviaAfterEnd);
            case 'u':
                return ParseUnicodeEscape(backslashToken, allowTriviaAfterEnd);
            case 'c':
                return ParseControlEscape(backslashToken, allowTriviaAfterEnd);
            default:
                var typeToken = ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd).With(kind: RegexKind.TextToken);
 
                if (!HasOption(_options, RegexOptions.ECMAScript) && RegexCharClass.IsBoundaryWordChar(ch))
                {
                    typeToken = typeToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                        string.Format(FeaturesResources.Unrecognized_escape_sequence_0, ch),
                        typeToken.GetSpan()));
                }
 
                return new RegexSimpleEscapeNode(backslashToken, typeToken);
        }
    }
 
    private RegexEscapeNode ParseUnicodeEscape(RegexToken backslashToken, bool allowTriviaAfterEnd)
    {
        var typeToken = _currentToken;
        var hexChars = _lexer.ScanHexCharacters(4);
        ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd);
        return new RegexUnicodeEscapeNode(backslashToken, typeToken, hexChars);
    }
 
    private RegexEscapeNode ParseHexEscape(RegexToken backslashToken, bool allowTriviaAfterEnd)
    {
        var typeToken = _currentToken;
        var hexChars = _lexer.ScanHexCharacters(2);
        ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd);
        return new RegexHexEscapeNode(backslashToken, typeToken, hexChars);
    }
 
    private RegexControlEscapeNode ParseControlEscape(RegexToken backslashToken, bool allowTriviaAfterEnd)
    {
        // Nothing allowed between \c and the next char
        var typeToken = ConsumeCurrentToken(allowTrivia: false);
 
        if (_currentToken.Kind == RegexKind.EndOfFile)
        {
            typeToken = typeToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                FeaturesResources.Missing_control_character,
                typeToken.GetSpan()));
            return new RegexControlEscapeNode(backslashToken, typeToken, CreateMissingToken(RegexKind.TextToken));
        }
 
        Debug.Assert(_currentToken.VirtualChars.Length == 1);
 
        var ch = _currentToken.VirtualChars[0].Value;
 
        unchecked
        {
            // From: https://github.com/dotnet/corefx/blob/80e220fc7009de0f0611ee6b52d4d5ffd25eb6c7/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs#L1450
 
            // Note: Roslyn accepts a control escape that current .NET parser does not.
            // Specifically: \c[
            //
            // It is a bug that the .NET parser does not support this construct.  The bug was
            // reported at: https://github.com/dotnet/corefx/issues/26501 and was fixed for
            // CoreFx with https://github.com/dotnet/corefx/commit/80e220fc7009de0f0611ee6b52d4d5ffd25eb6c7
            //
            // Because it was a bug, we follow the correct behavior.  That means we will not
            // report a diagnostic for a Regex that someone might run on a previous version of
            // .NET that ends up throwing at runtime.  That's acceptable.  Our goal is to match
            // the latest .NET 'correct' behavior.  Not intermediary points with bugs that have
            // since been fixed.
 
            // \ca interpreted as \cA
            if (ch is >= 'a' and <= 'z')
            {
                ch -= (char)('a' - 'A');
            }
 
            if (ch is >= '@' and <= '_')
            {
                var controlToken = ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd).With(kind: RegexKind.TextToken);
                return new RegexControlEscapeNode(backslashToken, typeToken, controlToken);
            }
            else
            {
                typeToken = typeToken.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                    FeaturesResources.Unrecognized_control_character,
                    _currentToken.GetSpan()));
 
                // Don't consume the bogus control character.
                return new RegexControlEscapeNode(backslashToken, typeToken, CreateMissingToken(RegexKind.TextToken));
            }
        }
    }
 
    private RegexEscapeNode ParseCategoryEscape(RegexToken backslash, bool allowTriviaAfterEnd)
    {
        Debug.Assert(_lexer.Text[_lexer.Position - 1] is var ch && (ch == 'P' || ch == 'p'));
        var typeToken = _currentToken;
 
        var start = _lexer.Position;
 
        if (!TryGetCategoryEscapeParts(
                allowTriviaAfterEnd,
                out var openBraceToken,
                out var categoryToken,
                out var closeBraceToken,
                out var message))
        {
            ResetToPositionAndConsumeCurrentToken(start, allowTrivia: allowTriviaAfterEnd);
            typeToken = typeToken.With(kind: RegexKind.TextToken).AddDiagnosticIfNone(new EmbeddedDiagnostic(
                message, GetSpan(backslash, typeToken)));
            return new RegexSimpleEscapeNode(backslash, typeToken);
        }
 
        return new RegexCategoryEscapeNode(backslash, typeToken, openBraceToken, categoryToken, closeBraceToken);
    }
 
    private bool TryGetCategoryEscapeParts(
        bool allowTriviaAfterEnd,
        out RegexToken openBraceToken,
        out RegexToken categoryToken,
        out RegexToken closeBraceToken,
        [NotNullWhen(false)] out string? message)
    {
        openBraceToken = default;
        categoryToken = default;
        closeBraceToken = default;
        message = null;
 
        if (_lexer.Text.Length - _lexer.Position < "{x}".Length)
        {
            message = FeaturesResources.Incomplete_character_escape;
            return false;
        }
 
        // no whitespace in \p{x}
        ConsumeCurrentToken(allowTrivia: false);
 
        if (_currentToken.Kind != RegexKind.OpenBraceToken)
        {
            message = FeaturesResources.Malformed_character_escape;
            return false;
        }
 
        openBraceToken = _currentToken;
        var category = _lexer.TryScanEscapeCategory();
 
        // no whitespace in \p{x}
        ConsumeCurrentToken(allowTrivia: false);
        if (_currentToken.Kind != RegexKind.CloseBraceToken)
        {
            message = FeaturesResources.Incomplete_character_escape;
            return false;
        }
 
        if (category == null)
        {
            message = FeaturesResources.Unknown_property;
            return false;
        }
 
        categoryToken = category.Value;
        closeBraceToken = ConsumeCurrentToken(allowTrivia: allowTriviaAfterEnd);
        return true;
    }
 
    private RegexTextNode ParseUnexpectedQuantifier(RegexExpressionNode? lastExpression)
    {
        // This is just a bogus element in the higher level sequence.  Allow trivia 
        // after this to abide by the spirit of the native parser.
        var token = ConsumeCurrentToken(allowTrivia: true);
        CheckQuantifierExpression(lastExpression, ref token);
        return new RegexTextNode(token.With(kind: RegexKind.TextToken));
    }
 
    private static void CheckQuantifierExpression(RegexExpressionNode? current, ref RegexToken token)
    {
        if (current == null ||
            current.Kind == RegexKind.SimpleOptionsGrouping)
        {
            token = token.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                FeaturesResources.Quantifier_x_y_following_nothing, token.GetSpan()));
        }
        else if (current is RegexQuantifierNode or RegexLazyQuantifierNode)
        {
            token = token.AddDiagnosticIfNone(new EmbeddedDiagnostic(
                string.Format(FeaturesResources.Nested_quantifier_0, token.VirtualChars.First()), token.GetSpan()));
        }
    }
}