File: System\Text\RegularExpressions\RegexParser.cs
Web Access
Project: src\src\libraries\System.Text.RegularExpressions\src\System.Text.RegularExpressions.csproj (System.Text.RegularExpressions)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Buffers;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
 
namespace System.Text.RegularExpressions
{
    /// <summary>Builds a tree of RegexNodes from a regular expression.</summary>
    internal ref struct RegexParser
    {
        // Implementation notes:
        // It would be nice to get rid of the comment modes, since the
        // ScanBlank() calls are just kind of duct-taped in.
 
        private const int EscapeMaxBufferSize = 256;
        private const int OptionStackDefaultSize = 32;
        private const int MaxValueDiv10 = int.MaxValue / 10;
        private const int MaxValueMod10 = int.MaxValue % 10;
 
        private RegexNode? _stack;
        private RegexNode? _group;
        private RegexNode? _alternation;
        private RegexNode? _concatenation;
        private RegexNode? _unit;
 
        private readonly string _pattern;
        private int _pos;
        private readonly CultureInfo _culture;
        private RegexCaseBehavior _caseBehavior;
        private bool _hasIgnoreCaseBackreferenceNodes;
 
        private int _autocap;
        private int _capcount;
        private int _captop;
        private readonly int _capsize;
 
        private readonly Hashtable _caps;
        private Hashtable? _capnames;
 
        private int[]? _capnumlist;
        private List<string>? _capnamelist;
 
        private RegexOptions _options;
        // NOTE: _optionsStack is ValueListBuilder<int> to ensure that
        //       ArrayPool<int>.Shared, not ArrayPool<RegexOptions>.Shared,
        //       will be created if the stackalloc'd capacity is ever exceeded.
        private ValueListBuilder<int> _optionsStack;
 
        private bool _ignoreNextParen; // flag to skip capturing a parentheses group
 
        private RegexParser(string pattern, RegexOptions options, CultureInfo culture, Hashtable caps, int capsize, Hashtable? capnames, Span<int> optionSpan)
        {
            Debug.Assert(pattern != null, "Pattern must be set");
            Debug.Assert(culture != null, "Culture must be set");
 
            _pattern = pattern;
            _options = options;
            _culture = culture;
            _caseBehavior = default;
            _hasIgnoreCaseBackreferenceNodes = false;
            _caps = caps;
            _capsize = capsize;
            _capnames = capnames;
 
            _optionsStack = new ValueListBuilder<int>(optionSpan);
            _stack = null;
            _group = null;
            _alternation = null;
            _concatenation = null;
            _unit = null;
            _pos = 0;
            _autocap = 0;
            _capcount = 0;
            _captop = 0;
            _capnumlist = null;
            _capnamelist = null;
            _ignoreNextParen = false;
        }
 
        /// <summary>Gets the culture to use based on the specified options.</summary>
        internal static CultureInfo GetTargetCulture(RegexOptions options) =>
#pragma warning disable RS1035 // The symbol 'CultureInfo.CurrentCulture' is banned for use by analyzers.
            (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
#pragma warning restore RS1035
 
        public static RegexOptions ParseOptionsInPattern(string pattern, RegexOptions options)
        {
            using var parser = new RegexParser(pattern, options, CultureInfo.InvariantCulture, // since we won't perform case conversions, culture doesn't matter in this case.
                new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize]);
 
            // We don't really need to Count the Captures, but this method will already do a quick
            // pass through the pattern, and will scan the options found and return them as an out
            // parameter, so we use that to get out the pattern inline options.
            parser.CountCaptures(out RegexOptions foundOptionsInPattern);
            parser.Reset(options);
            return foundOptionsInPattern;
        }
 
        public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo culture)
        {
            using var parser = new RegexParser(pattern, options, culture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize]);
 
            parser.CountCaptures(out _);
            parser.Reset(options);
            RegexNode root = parser.ScanRegex();
 
            int[]? captureNumberList = parser._capnumlist;
            Hashtable? sparseMapping = parser._caps;
            int captop = parser._captop;
 
            int captureCount;
            if (captureNumberList == null || captop == captureNumberList.Length)
            {
                // The capture list isn't sparse.  Null out the capture mapping as it's not necessary,
                // and store the number of captures.
                captureCount = captop;
                sparseMapping = null;
            }
            else
            {
                // The capture list is sparse.  Store the number of captures, and populate the number-to-names-list.
                captureCount = captureNumberList.Length;
                for (int i = 0; i < captureNumberList.Length; i++)
                {
                    sparseMapping[captureNumberList[i]] = i;
                }
            }
 
            return new RegexTree(root, captureCount, parser._capnamelist?.ToArray(), parser._capnames!, sparseMapping, options, parser._hasIgnoreCaseBackreferenceNodes ? culture : null);
        }
 
        /// <summary>This static call constructs a flat concatenation node given a replacement pattern.</summary>
        public static RegexReplacement ParseReplacement(string pattern, RegexOptions options, Hashtable caps, int capsize, Hashtable capnames)
        {
#pragma warning disable RS1035 // The symbol 'CultureInfo.CurrentCulture' is banned for use by analyzers.
            CultureInfo culture = (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
#pragma warning restore RS1035
            using var parser = new RegexParser(pattern, options, culture, caps, capsize, capnames, stackalloc int[OptionStackDefaultSize]);
 
            RegexNode root = parser.ScanReplacement();
            var regexReplacement = new RegexReplacement(pattern, root, caps);
 
            return regexReplacement;
        }
 
        /// <summary>Escapes all metacharacters (including |,(,),[,{,|,^,$,*,+,?,\, spaces and #)</summary>
        public static string Escape(string input)
        {
            int indexOfMetachar = IndexOfMetachar(input.AsSpan());
            return indexOfMetachar < 0
                ? input
                : EscapeImpl(input.AsSpan(), indexOfMetachar);
        }
 
        private static string EscapeImpl(ReadOnlySpan<char> input, int indexOfMetachar)
        {
            // For small inputs we allocate on the stack. In most cases a buffer three
            // times larger the original string should be sufficient as usually not all
            // characters need to be encoded.
            // For larger string we rent the input string's length plus a fixed
            // conservative amount of chars from the ArrayPool.
            ValueStringBuilder vsb = input.Length <= (EscapeMaxBufferSize / 3) ?
                new ValueStringBuilder(stackalloc char[EscapeMaxBufferSize]) :
                new ValueStringBuilder(input.Length + 200);
 
            while (true)
            {
                vsb.Append(input.Slice(0, indexOfMetachar));
                input = input.Slice(indexOfMetachar);
 
                if (input.IsEmpty)
                {
                    break;
                }
 
                char ch = input[0];
 
                switch (ch)
                {
                    case '\n':
                        ch = 'n';
                        break;
                    case '\r':
                        ch = 'r';
                        break;
                    case '\t':
                        ch = 't';
                        break;
                    case '\f':
                        ch = 'f';
                        break;
                }
 
                vsb.Append('\\');
                vsb.Append(ch);
                input = input.Slice(1);
 
                indexOfMetachar = IndexOfMetachar(input);
                if (indexOfMetachar < 0)
                {
                    indexOfMetachar = input.Length;
                }
            }
 
            return vsb.ToString();
        }
 
        /// <summary> Unescapes all metacharacters (including (,),[,],{,},|,^,$,*,+,?,\, spaces and #)</summary>
        public static string Unescape(string input)
        {
            int i = input.IndexOf('\\');
            return i >= 0 ?
                UnescapeImpl(input, i) :
                input;
        }
 
        private static string UnescapeImpl(string input, int i)
        {
            var parser = new RegexParser(input, RegexOptions.None, CultureInfo.InvariantCulture, new Hashtable(), 0, null, stackalloc int[OptionStackDefaultSize]);
 
            // In the worst case the escaped string has the same length.
            // For small inputs we use stack allocation.
            ValueStringBuilder vsb = input.Length <= EscapeMaxBufferSize ?
                new ValueStringBuilder(stackalloc char[EscapeMaxBufferSize]) :
                new ValueStringBuilder(input.Length);
 
            vsb.Append(input.AsSpan(0, i));
            do
            {
                i++;
                parser._pos = i;
                if (i < input.Length)
                {
                    vsb.Append(parser.ScanCharEscape());
                }
 
                i = parser._pos;
                int lastpos = i;
                while (i < input.Length && input[i] != '\\')
                {
                    i++;
                }
 
                vsb.Append(input.AsSpan(lastpos, i - lastpos));
            } while (i < input.Length);
 
            parser.Dispose();
 
            return vsb.ToString();
        }
 
        /// <summary>Resets parsing to the beginning of the pattern</summary>
        private void Reset(RegexOptions options)
        {
            _pos = 0;
            _autocap = 1;
            _ignoreNextParen = false;
            _optionsStack.Length = 0;
            _options = options;
            _stack = null;
        }
 
        public void Dispose() => _optionsStack.Dispose();
 
        /// <summary>The main parsing function</summary>
        private RegexNode ScanRegex()
        {
            char ch;
            bool isQuantifier = false;
 
            // For the main Capture object, strip out the IgnoreCase option. The rest of the nodes will strip it out depending on the content
            // of each node.
            StartGroup(new RegexNode(RegexNodeKind.Capture, (_options & ~RegexOptions.IgnoreCase), 0, -1));
 
            while (_pos < _pattern.Length)
            {
                bool wasPrevQuantifier = isQuantifier;
                isQuantifier = false;
 
                ScanBlank();
 
                int startpos = _pos;
 
                // move past all of the normal characters.  We'll stop when we hit some kind of control character,
                // or if IgnorePatternWhiteSpace is on, we'll stop when we see some whitespace.
                if ((_options & RegexOptions.IgnorePatternWhitespace) != 0)
                {
                    while (_pos < _pattern.Length && (!IsSpecialOrSpace(ch = _pattern[_pos]) || (ch == '{' && !IsTrueQuantifier())))
                        _pos++;
                }
                else
                {
                    while (_pos < _pattern.Length && (!IsSpecial(ch = _pattern[_pos]) || (ch == '{' && !IsTrueQuantifier())))
                        _pos++;
                }
 
                int endpos = _pos;
 
                ScanBlank();
 
                if (_pos == _pattern.Length)
                {
                    ch = '!'; // nonspecial, means at end
                }
                else if (IsSpecial(ch = _pattern[_pos]))
                {
                    isQuantifier = IsQuantifier(ch);
                    _pos++;
                }
                else
                {
                    ch = ' '; // nonspecial, means at ordinary char
                }
 
                if (startpos < endpos)
                {
                    int cchUnquantified = endpos - startpos - (isQuantifier ? 1 : 0);
 
                    wasPrevQuantifier = false;
 
                    if (cchUnquantified > 0)
                    {
                        AddToConcatenate(startpos, cchUnquantified, false);
                    }
 
                    if (isQuantifier)
                    {
                        _unit = RegexNode.CreateOneWithCaseConversion(_pattern[endpos - 1], _options, _culture, ref _caseBehavior);
                    }
                }
 
                switch (ch)
                {
                    case '!':
                        goto BreakOuterScan;
 
                    case ' ':
                        goto ContinueOuterScan;
 
                    case '[':
                        {
                            string setString = ScanCharClass((_options & RegexOptions.IgnoreCase) != 0, scanOnly: false)!.ToStringClass();
                            _unit = new RegexNode(RegexNodeKind.Set, _options & ~RegexOptions.IgnoreCase, setString);
                        }
                        break;
 
                    case '(':
                        _optionsStack.Append((int)_options);
                        if (ScanGroupOpen() is RegexNode grouper)
                        {
                            PushGroup();
                            StartGroup(grouper);
                        }
                        else
                        {
                            _optionsStack.Length--;
                        }
                        continue;
 
                    case '|':
                        AddAlternate();
                        goto ContinueOuterScan;
 
                    case ')':
                        if (_stack == null)
                        {
                            throw MakeException(RegexParseError.InsufficientOpeningParentheses, SR.InsufficientOpeningParentheses);
                        }
 
                        AddGroup();
                        PopGroup();
                        _options = (RegexOptions)_optionsStack.Pop();
 
                        if (_unit == null)
                        {
                            goto ContinueOuterScan;
                        }
                        break;
 
                    case '\\':
                        if (_pos == _pattern.Length)
                        {
                            throw MakeException(RegexParseError.UnescapedEndingBackslash, SR.UnescapedEndingBackslash);
                        }
 
                        _unit = ScanBackslash(scanOnly: false)!;
                        break;
 
                    case '^':
                        _unit = new RegexNode((_options & RegexOptions.Multiline) != 0 ? RegexNodeKind.Bol : RegexNodeKind.Beginning, _options);
                        break;
 
                    case '$':
                        _unit = new RegexNode((_options & RegexOptions.Multiline) != 0 ? RegexNodeKind.Eol : RegexNodeKind.EndZ, _options);
                        break;
 
                    case '.':
                        _unit = (_options & RegexOptions.Singleline) != 0 ?
                            new RegexNode(RegexNodeKind.Set, _options & ~RegexOptions.IgnoreCase, RegexCharClass.AnyClass) :
                            new RegexNode(RegexNodeKind.Notone, _options & ~RegexOptions.IgnoreCase, '\n');
                        break;
 
                    case '{':
                    case '*':
                    case '+':
                    case '?':
                        if (_unit == null)
                        {
                            throw wasPrevQuantifier ?
                                MakeException(RegexParseError.NestedQuantifiersNotParenthesized, SR.Format(SR.NestedQuantifiersNotParenthesized, ch)) :
                                MakeException(RegexParseError.QuantifierAfterNothing, SR.Format(SR.QuantifierAfterNothing, ch));
                        }
                        --_pos;
                        break;
 
                    default:
                        Debug.Fail($"Unexpected char {ch}");
                        break;
                }
 
                ScanBlank();
 
                if (_pos == _pattern.Length || !(isQuantifier = IsTrueQuantifier()))
                {
                    _concatenation!.AddChild(_unit!);
                    _unit = null;
                    goto ContinueOuterScan;
                }
 
                ch = _pattern[_pos++];
 
                // Handle quantifiers
                while (_unit != null)
                {
                    int min = 0, max = 0;
 
                    switch (ch)
                    {
                        case '*':
                            max = int.MaxValue;
                            break;
 
                        case '?':
                            max = 1;
                            break;
 
                        case '+':
                            min = 1;
                            max = int.MaxValue;
                            break;
 
                        case '{':
                            startpos = _pos;
                            max = min = ScanDecimal();
                            if (startpos < _pos)
                            {
                                if (_pos < _pattern.Length && _pattern[_pos] == ',')
                                {
                                    _pos++;
                                    max = _pos == _pattern.Length || _pattern[_pos] == '}' ? int.MaxValue : ScanDecimal();
                                }
                            }
 
                            if (startpos == _pos || _pos == _pattern.Length || _pattern[_pos++] != '}')
                            {
                                _concatenation!.AddChild(_unit!);
                                _unit = null;
                                _pos = startpos - 1;
                                goto ContinueOuterScan;
                            }
 
                            break;
 
                        default:
                            Debug.Fail($"Unexpected char {ch}");
                            break;
                    }
 
                    ScanBlank();
 
                    bool lazy = false;
                    if (_pos < _pattern.Length && _pattern[_pos] == '?')
                    {
                        _pos++;
                        lazy = true;
                    }
 
                    if (min > max)
                    {
                        throw MakeException(RegexParseError.ReversedQuantifierRange, SR.ReversedQuantifierRange);
                    }
 
                    _concatenation!.AddChild(_unit!.MakeQuantifier(lazy, min, max));
                    _unit = null;
                }
 
            ContinueOuterScan:
                ;
            }
 
        BreakOuterScan:
            ;
 
            if (_stack != null)
            {
                throw MakeException(RegexParseError.InsufficientClosingParentheses, SR.InsufficientClosingParentheses);
            }
 
            AddGroup();
 
            return _unit!.FinalOptimize();
        }
 
        /// <summary>Simple parsing for replacement patterns</summary>
        private RegexNode ScanReplacement()
        {
            _concatenation = new RegexNode(RegexNodeKind.Concatenate, _options);
 
            while (_pos < _pattern.Length)
            {
                int startpos = _pos;
 
                _pos = _pattern.IndexOf('$', _pos);
                if (_pos < 0)
                {
                    _pos = _pattern.Length;
                }
 
                AddToConcatenate(startpos, _pos - startpos, isReplacement: true);
 
                if (_pos < _pattern.Length)
                {
                    _pos++;
                    _concatenation.AddChild(ScanDollar());
                    _unit = null;
                }
            }
 
            return _concatenation;
        }
 
        /// <summary>Scans contents of [] (not including []'s), and converts to a RegexCharClass</summary>
        private RegexCharClass? ScanCharClass(bool caseInsensitive, bool scanOnly)
        {
            char ch;
            char chPrev = '\0';
            bool inRange = false;
            bool firstChar = true;
            bool closed = false;
 
            RegexCharClass? charClass = scanOnly ? null : new RegexCharClass();
 
            if (_pos < _pattern.Length && _pattern[_pos] == '^')
            {
                _pos++;
                if (!scanOnly)
                {
                    charClass!.Negate = true;
                }
                if ((_options & RegexOptions.ECMAScript) != 0 && _pattern[_pos] == ']')
                {
                    firstChar = false;
                }
            }
 
            for (; _pos < _pattern.Length; firstChar = false)
            {
                bool translatedChar = false;
                ch = _pattern[_pos++];
                if (ch == ']')
                {
                    if (!firstChar)
                    {
                        closed = true;
                        break;
                    }
                }
                else if (ch == '\\' && _pos < _pattern.Length)
                {
                    switch (ch = _pattern[_pos++])
                    {
                        case 'D':
                        case 'd':
                            if (!scanOnly)
                            {
                                if (inRange)
                                {
                                    throw MakeException(RegexParseError.ShorthandClassInCharacterRange, SR.Format(SR.ShorthandClassInCharacterRange, ch));
                                }
                                charClass!.AddDigit((_options & RegexOptions.ECMAScript) != 0, ch == 'D', _pattern, _pos);
                            }
                            continue;
 
                        case 'S':
                        case 's':
                            if (!scanOnly)
                            {
                                if (inRange)
                                {
                                    throw MakeException(RegexParseError.ShorthandClassInCharacterRange, SR.Format(SR.ShorthandClassInCharacterRange, ch));
                                }
                                charClass!.AddSpace((_options & RegexOptions.ECMAScript) != 0, ch == 'S');
                            }
                            continue;
 
                        case 'W':
                        case 'w':
                            if (!scanOnly)
                            {
                                if (inRange)
                                {
                                    throw MakeException(RegexParseError.ShorthandClassInCharacterRange, SR.Format(SR.ShorthandClassInCharacterRange, ch));
                                }
 
                                charClass!.AddWord((_options & RegexOptions.ECMAScript) != 0, ch == 'W');
                            }
                            continue;
 
                        case 'p':
                        case 'P':
                            if (!scanOnly)
                            {
                                if (inRange)
                                {
                                    throw MakeException(RegexParseError.ShorthandClassInCharacterRange, SR.Format(SR.ShorthandClassInCharacterRange, ch));
                                }
 
                                charClass!.AddCategoryFromName(ParseProperty(), ch != 'p', caseInsensitive, _pattern, _pos);
                            }
                            else
                            {
                                ParseProperty();
                            }
                            continue;
 
                        case '-':
                            if (!scanOnly)
                            {
                                if (inRange)
                                {
                                    if (chPrev > ch)
                                    {
                                        throw MakeException(RegexParseError.ReversedCharacterRange, SR.ReversedCharacterRange);
                                    }
 
                                    charClass!.AddRange(chPrev, ch);
                                    inRange = false;
                                    chPrev = '\0';
                                }
                                else
                                {
                                    charClass!.AddRange(ch, ch);
                                }
                            }
                            continue;
 
                        default:
                            --_pos;
                            ch = ScanCharEscape(); // non-literal character
                            translatedChar = true;
                            break; // this break will only break out of the switch
                    }
                }
 
                if (inRange)
                {
                    inRange = false;
                    if (!scanOnly)
                    {
                        if (ch == '[' && !translatedChar && !firstChar)
                        {
                            // We thought we were in a range, but we're actually starting a subtraction.
                            // In that case, we'll add chPrev to our char class, skip the opening [, and
                            // scan the new character class recursively.
                            charClass!.AddChar(chPrev);
                            charClass.AddSubtraction(ScanCharClass(caseInsensitive, scanOnly)!);
 
                            if (_pos < _pattern.Length && _pattern[_pos] != ']')
                            {
                                throw MakeException(RegexParseError.ExclusionGroupNotLast, SR.ExclusionGroupNotLast);
                            }
                        }
                        else
                        {
                            // a regular range, like a-z
                            if (chPrev > ch)
                            {
                                throw MakeException(RegexParseError.ReversedCharacterRange, SR.ReversedCharacterRange);
                            }
                            charClass!.AddRange(chPrev, ch);
                        }
                    }
                }
                else if (_pos + 1 < _pattern.Length && _pattern[_pos] == '-' && _pattern[_pos + 1] != ']')
                {
                    // this could be the start of a range
                    chPrev = ch;
                    inRange = true;
                    _pos++;
                }
                else if (_pos < _pattern.Length && ch == '-' && !translatedChar && _pattern[_pos] == '[' && !firstChar)
                {
                    // we aren't in a range, and now there is a subtraction.  Usually this happens
                    // only when a subtraction follows a range, like [a-z-[b]]
                    _pos++;
                    RegexCharClass? rcc = ScanCharClass(caseInsensitive, scanOnly);
                    if (!scanOnly)
                    {
                        charClass!.AddSubtraction(rcc!);
 
                        if (_pos < _pattern.Length && _pattern[_pos] != ']')
                        {
                            throw MakeException(RegexParseError.ExclusionGroupNotLast, SR.ExclusionGroupNotLast);
                        }
                    }
                }
                else
                {
                    if (!scanOnly)
                    {
                        charClass!.AddRange(ch, ch);
                    }
                }
            }
 
            if (!closed)
            {
                throw MakeException(RegexParseError.UnterminatedBracket, SR.UnterminatedBracket);
            }
 
            if (!scanOnly && caseInsensitive)
            {
                charClass!.AddCaseEquivalences(_culture);
            }
 
            return charClass;
        }
 
        /// <summary>
        /// Scans chars following a '(' (not counting the '('), and returns
        /// a RegexNode for the type of group scanned, or null if the group
        /// simply changed options (?cimsx-cimsx) or was a comment (#...).
        /// </summary>
        private RegexNode? ScanGroupOpen()
        {
            // just return a RegexNode if we have:
            // 1. "(" followed by nothing
            // 2. "(x" where x != ?
            // 3. "(?)"
            if (_pos == _pattern.Length || _pattern[_pos] != '?' || (_pos + 1 < _pattern.Length && _pattern[_pos + 1] == ')'))
            {
                if ((_options & RegexOptions.ExplicitCapture) != 0 || _ignoreNextParen)
                {
                    _ignoreNextParen = false;
                    return new RegexNode(RegexNodeKind.Group, _options);
                }
                else
                {
                    return new RegexNode(RegexNodeKind.Capture, _options, _autocap++, -1);
                }
            }
 
            _pos++;
 
            while (true)
            {
                if (_pos == _pattern.Length)
                {
                    break;
                }
 
                RegexNodeKind nodeType;
                char close = '>';
                char ch = _pattern[_pos++];
                switch (ch)
                {
                    case ':':
                        // noncapturing group
                        nodeType = RegexNodeKind.Group;
                        break;
 
                    case '=':
                        // lookahead assertion
                        _options &= ~RegexOptions.RightToLeft;
                        nodeType = RegexNodeKind.PositiveLookaround;
                        break;
 
                    case '!':
                        // negative lookahead assertion
                        _options &= ~RegexOptions.RightToLeft;
                        nodeType = RegexNodeKind.NegativeLookaround;
                        break;
 
                    case '>':
                        // atomic subexpression
                        nodeType = RegexNodeKind.Atomic;
                        break;
 
                    case '\'':
                        close = '\'';
                        goto case '<'; // fallthrough
 
                    case '<':
                        if (_pos == _pattern.Length)
                        {
                            goto BreakRecognize;
                        }
 
                        switch (ch = _pattern[_pos++])
                        {
                            case '=':
                                if (close == '\'')
                                {
                                    goto BreakRecognize;
                                }
 
                                // lookbehind assertion
                                _options |= RegexOptions.RightToLeft;
                                nodeType = RegexNodeKind.PositiveLookaround;
                                break;
 
                            case '!':
                                if (close == '\'')
                                {
                                    goto BreakRecognize;
                                }
 
                                // negative lookbehind assertion
                                _options |= RegexOptions.RightToLeft;
                                nodeType = RegexNodeKind.NegativeLookaround;
                                break;
 
                            default:
                                --_pos;
                                int capnum = -1;
                                int uncapnum = -1;
                                bool proceed = false;
 
                                // grab part before -
 
                                if ((uint)(ch - '0') <= 9)
                                {
                                    capnum = ScanDecimal();
 
                                    if (!IsCaptureSlot(capnum))
                                    {
                                        capnum = -1;
                                    }
 
                                    // check if we have bogus characters after the number
                                    if (_pos < _pattern.Length && !(_pattern[_pos] == close || _pattern[_pos] == '-'))
                                    {
                                        throw MakeException(RegexParseError.CaptureGroupNameInvalid, SR.CaptureGroupNameInvalid);
                                    }
 
                                    if (capnum == 0)
                                    {
                                        throw MakeException(RegexParseError.CaptureGroupOfZero, SR.CaptureGroupOfZero);
                                    }
                                }
                                else if (RegexCharClass.IsBoundaryWordChar(ch))
                                {
                                    string capname = ScanCapname();
 
                                    if (_capnames?[capname] is int tmpCapnum)
                                    {
                                        capnum = tmpCapnum;
                                    }
 
                                    // check if we have bogus character after the name
                                    if (_pos < _pattern.Length && !(_pattern[_pos] == close || _pattern[_pos] == '-'))
                                    {
                                        throw MakeException(RegexParseError.CaptureGroupNameInvalid, SR.CaptureGroupNameInvalid);
                                    }
                                }
                                else if (ch == '-')
                                {
                                    proceed = true;
                                }
                                else
                                {
                                    // bad group name - starts with something other than a word character and isn't a number
                                    throw MakeException(RegexParseError.CaptureGroupNameInvalid, SR.CaptureGroupNameInvalid);
                                }
 
                                // grab part after - if any
 
                                if ((capnum != -1 || proceed) && _pos + 1 < _pattern.Length && _pattern[_pos] == '-')
                                {
                                    _pos++;
                                    ch = _pattern[_pos];
 
                                    if ((uint)(ch - '0') <= 9)
                                    {
                                        uncapnum = ScanDecimal();
 
                                        if (!IsCaptureSlot(uncapnum))
                                        {
                                            throw MakeException(RegexParseError.UndefinedNumberedReference, SR.Format(SR.UndefinedNumberedReference, uncapnum));
                                        }
 
                                        // check if we have bogus characters after the number
                                        if (_pos < _pattern.Length && _pattern[_pos] != close)
                                        {
                                            throw MakeException(RegexParseError.CaptureGroupNameInvalid, SR.CaptureGroupNameInvalid);
                                        }
                                    }
                                    else if (RegexCharClass.IsBoundaryWordChar(ch))
                                    {
                                        string uncapname = ScanCapname();
 
                                        uncapnum = _capnames?[uncapname] is int tmpCapnum ?
                                            tmpCapnum :
                                            throw MakeException(RegexParseError.UndefinedNamedReference, SR.Format(SR.UndefinedNamedReference, uncapname));
 
                                        // check if we have bogus character after the name
                                        if (_pos < _pattern.Length && _pattern[_pos] != close)
                                        {
                                            throw MakeException(RegexParseError.CaptureGroupNameInvalid, SR.CaptureGroupNameInvalid);
                                        }
                                    }
                                    else
                                    {
                                        // bad group name - starts with something other than a word character and isn't a number
                                        throw MakeException(RegexParseError.CaptureGroupNameInvalid, SR.CaptureGroupNameInvalid);
                                    }
                                }
 
                                // actually make the node
 
                                if ((capnum != -1 || uncapnum != -1) && _pos < _pattern.Length && _pattern[_pos++] == close)
                                {
                                    return new RegexNode(RegexNodeKind.Capture, _options, capnum, uncapnum);
                                }
                                goto BreakRecognize;
                        }
                        break;
 
                    case '(':
                        // conditional alternation construct (?(...) | )
 
                        int parenPos = _pos;
                        if (_pos < _pattern.Length)
                        {
                            ch = _pattern[_pos];
 
                            // check if the alternation condition is a backref
                            if (ch is >= '0' and <= '9')
                            {
                                int capnum = ScanDecimal();
                                if (_pos < _pattern.Length && _pattern[_pos++] == ')')
                                {
                                    if (IsCaptureSlot(capnum))
                                    {
                                        return new RegexNode(RegexNodeKind.BackreferenceConditional, _options, capnum);
                                    }
 
                                    throw MakeException(RegexParseError.AlternationHasUndefinedReference, SR.Format(SR.AlternationHasUndefinedReference, capnum.ToString()));
                                }
 
                                throw MakeException(RegexParseError.AlternationHasMalformedReference, SR.Format(SR.AlternationHasMalformedReference, capnum.ToString()));
                            }
                            else if (RegexCharClass.IsBoundaryWordChar(ch))
                            {
                                string capname = ScanCapname();
 
                                if (_capnames?[capname] is int tmpCapnum && _pos < _pattern.Length && _pattern[_pos++] == ')')
                                {
                                    return new RegexNode(RegexNodeKind.BackreferenceConditional, _options, tmpCapnum);
                                }
                            }
                        }
                        // not a backref
                        nodeType = RegexNodeKind.ExpressionConditional;
                        _pos = parenPos - 1;       // jump to the start of the parentheses
                        _ignoreNextParen = true;    // but make sure we don't try to capture the insides
 
                        if (_pos + 2 < _pattern.Length && _pattern[_pos + 1] == '?')
                        {
                            // disallow comments in the condition
                            if (_pattern[_pos + 2] == '#')
                            {
                                throw MakeException(RegexParseError.AlternationHasComment, SR.AlternationHasComment);
                            }
 
                            // disallow named capture group (?<..>..) in the condition
                            if (_pattern[_pos + 2] == '\'' || (_pos + 3 < _pattern.Length && _pattern[_pos + 2] == '<' && _pattern[_pos + 3] != '!' && _pattern[_pos + 3] != '='))
                            {
                                throw MakeException(RegexParseError.AlternationHasNamedCapture, SR.AlternationHasNamedCapture);
                            }
                        }
 
                        break;
 
                    default:
                        --_pos;
 
                        nodeType = RegexNodeKind.Group;
                        // Disallow options in the children of a testgroup node
                        if (_group!.Kind != RegexNodeKind.ExpressionConditional)
                        {
                            ScanOptions();
                        }
 
                        if (_pos == _pattern.Length)
                        {
                            goto BreakRecognize;
                        }
 
                        if ((ch = _pattern[_pos++]) == ')')
                        {
                            return null;
                        }
 
                        if (ch != ':')
                        {
                            goto BreakRecognize;
                        }
                        break;
                }
 
                return new RegexNode(nodeType, _options);
            }
 
        BreakRecognize:
            ;
            // break Recognize comes here
 
            throw MakeException(RegexParseError.InvalidGroupingConstruct, SR.InvalidGroupingConstruct);
        }
 
        /// <summary>Scans whitespace or x-mode comments</summary>
        private void ScanBlank()
        {
            while (true)
            {
                if ((_options & RegexOptions.IgnorePatternWhitespace) != 0)
                {
                    while (_pos < _pattern.Length && IsSpace(_pattern[_pos]))
                    {
                        _pos++;
                    }
                }
 
                if ((_options & RegexOptions.IgnorePatternWhitespace) != 0 && _pos < _pattern.Length && _pattern[_pos] == '#')
                {
                    _pos = _pattern.IndexOf('\n', _pos);
                    if (_pos < 0)
                    {
                        _pos = _pattern.Length;
                    }
                }
                else if (_pos + 2 < _pattern.Length && _pattern[_pos + 2] == '#' && _pattern[_pos + 1] == '?' && _pattern[_pos] == '(')
                {
                    _pos = _pattern.IndexOf(')', _pos);
                    if (_pos < 0)
                    {
                        _pos = _pattern.Length;
                        throw MakeException(RegexParseError.UnterminatedComment, SR.UnterminatedComment);
                    }
 
                    _pos++;
                }
                else
                {
                    break;
                }
            }
        }
 
        /// <summary>Scans chars following a '\' (not counting the '\'), and returns a RegexNode for the type of atom scanned</summary>
        private RegexNode? ScanBackslash(bool scanOnly)
        {
            Debug.Assert(_pos < _pattern.Length, "The current reading position must not be at the end of the pattern");
 
            char ch;
            switch (ch = _pattern[_pos])
            {
                case 'b':
                case 'B':
                case 'A':
                case 'G':
                case 'Z':
                case 'z':
                    _pos++;
                    return scanOnly ? null :
                        new RegexNode(TypeFromCode(ch), _options);
 
                case 'w':
                    _pos++;
                    return scanOnly ? null :
                        new RegexNode(RegexNodeKind.Set, (_options & ~RegexOptions.IgnoreCase), (_options & RegexOptions.ECMAScript) != 0 ? RegexCharClass.ECMAWordClass : RegexCharClass.WordClass);
 
                case 'W':
                    _pos++;
                    return scanOnly ? null :
                        new RegexNode(RegexNodeKind.Set, (_options & ~RegexOptions.IgnoreCase), (_options & RegexOptions.ECMAScript) != 0 ? RegexCharClass.NotECMAWordClass : RegexCharClass.NotWordClass);
 
                case 's':
                    _pos++;
                    return scanOnly ? null :
                        new RegexNode(RegexNodeKind.Set, (_options & ~RegexOptions.IgnoreCase), (_options & RegexOptions.ECMAScript) != 0 ? RegexCharClass.ECMASpaceClass : RegexCharClass.SpaceClass);
 
                case 'S':
                    _pos++;
                    return scanOnly ? null :
                        new RegexNode(RegexNodeKind.Set, (_options & ~RegexOptions.IgnoreCase), (_options & RegexOptions.ECMAScript) != 0 ? RegexCharClass.NotECMASpaceClass : RegexCharClass.NotSpaceClass);
 
                case 'd':
                    _pos++;
                    return scanOnly ? null :
                        new RegexNode(RegexNodeKind.Set, (_options & ~RegexOptions.IgnoreCase), (_options & RegexOptions.ECMAScript) != 0 ? RegexCharClass.ECMADigitClass : RegexCharClass.DigitClass);
 
                case 'D':
                    _pos++;
                    return scanOnly ? null :
                        new RegexNode(RegexNodeKind.Set, (_options & ~RegexOptions.IgnoreCase), (_options & RegexOptions.ECMAScript) != 0 ? RegexCharClass.NotECMADigitClass : RegexCharClass.NotDigitClass);
 
                case 'p':
                case 'P':
                    _pos++;
                    if (scanOnly)
                    {
                        return null;
                    }
 
                    var cc = new RegexCharClass();
                    cc.AddCategoryFromName(ParseProperty(), ch != 'p', (_options & RegexOptions.IgnoreCase) != 0, _pattern, _pos);
                    if ((_options & RegexOptions.IgnoreCase) != 0)
                    {
                        cc.AddCaseEquivalences(_culture);
                    }
 
                    return new RegexNode(RegexNodeKind.Set, (_options & ~RegexOptions.IgnoreCase), cc.ToStringClass());
 
                default:
                    RegexNode? result = ScanBasicBackslash(scanOnly);
                    if (result != null && result.Kind == RegexNodeKind.Backreference && (result.Options & RegexOptions.IgnoreCase) != 0)
                    {
                        _hasIgnoreCaseBackreferenceNodes = true;
                    }
                    return result;
            }
        }
 
        /// <summary>Scans \-style backreferences and character escapes</summary>
        private RegexNode? ScanBasicBackslash(bool scanOnly)
        {
            Debug.Assert(_pos < _pattern.Length, "The current reading position must not be at the end of the pattern");
 
            int backpos = _pos;
            char close = '\0';
            bool angled = false;
            char ch = _pattern[_pos];
 
            // allow \k<foo> instead of \<foo>, which is now deprecated
 
            if (ch == 'k')
            {
                if (_pos + 1 < _pattern.Length)
                {
                    _pos++;
                    ch = _pattern[_pos++];
                    if (ch is '<' or '\'')
                    {
                        angled = true;
                        close = (ch == '\'') ? '\'' : '>';
                    }
                }
 
                if (!angled || _pos == _pattern.Length)
                {
                    throw MakeException(RegexParseError.MalformedNamedReference, SR.MalformedNamedReference);
                }
 
                ch = _pattern[_pos];
            }
 
            // Note angle without \g
 
            else if ((ch == '<' || ch == '\'') && _pos + 1 < _pattern.Length)
            {
                angled = true;
                close = (ch == '\'') ? '\'' : '>';
                _pos++;
                ch = _pattern[_pos];
            }
 
            // Try to parse backreference: \<1>
 
            if (angled && ch >= '0' && ch <= '9')
            {
                int capnum = ScanDecimal();
 
                if (_pos < _pattern.Length && _pattern[_pos++] == close)
                {
                    return
                        scanOnly ? null :
                        IsCaptureSlot(capnum) ? new RegexNode(RegexNodeKind.Backreference, _options, capnum) :
                        throw MakeException(RegexParseError.UndefinedNumberedReference, SR.Format(SR.UndefinedNumberedReference, capnum.ToString()));
                }
            }
 
            // Try to parse backreference or octal: \1
 
            else if (!angled && ch >= '1' && ch <= '9')
            {
                if ((_options & RegexOptions.ECMAScript) != 0)
                {
                    int capnum = -1;
                    int newcapnum = ch - '0';
                    int pos = _pos - 1;
                    while (newcapnum <= _captop)
                    {
                        if (IsCaptureSlot(newcapnum) && (_caps == null || (int)_caps[newcapnum]! < pos))
                        {
                            capnum = newcapnum;
                        }
 
                        _pos++;
                        if (_pos == _pattern.Length || (ch = _pattern[_pos]) < '0' || ch > '9')
                        {
                            break;
                        }
 
                        newcapnum = newcapnum * 10 + (ch - '0');
                    }
 
                    if (capnum >= 0)
                    {
                        return scanOnly ? null : new RegexNode(RegexNodeKind.Backreference, _options, capnum);
                    }
                }
                else
                {
                    int capnum = ScanDecimal();
 
                    if (scanOnly)
                    {
                        return null;
                    }
 
                    if (IsCaptureSlot(capnum))
                    {
                        return new RegexNode(RegexNodeKind.Backreference, _options, capnum);
                    }
 
                    if (capnum <= 9)
                    {
                        throw MakeException(RegexParseError.UndefinedNumberedReference, SR.Format(SR.UndefinedNumberedReference, capnum.ToString()));
                    }
                }
            }
 
            // Try to parse backreference: \<foo>
 
            else if (angled && RegexCharClass.IsBoundaryWordChar(ch))
            {
                string capname = ScanCapname();
 
                if (_pos < _pattern.Length && _pattern[_pos++] == close)
                {
                    return
                        scanOnly ? null :
                        _capnames?[capname] is int tmpCapnum ? new RegexNode(RegexNodeKind.Backreference, _options, tmpCapnum) :
                        throw MakeException(RegexParseError.UndefinedNamedReference, SR.Format(SR.UndefinedNamedReference, capname));
                }
            }
 
            // Not backreference: must be char code
 
            _pos = backpos;
            ch = ScanCharEscape();
 
            return !scanOnly ?
                RegexNode.CreateOneWithCaseConversion(ch, _options, _culture, ref _caseBehavior) :
                null;
        }
 
        /// <summary>Scans $ patterns recognized within replacement patterns</summary>
        private RegexNode ScanDollar()
        {
            if (_pos == _pattern.Length)
            {
                return RegexNode.CreateOneWithCaseConversion('$', _options, _culture, ref _caseBehavior);
            }
 
            char ch = _pattern[_pos];
            bool angled;
            int backpos = _pos;
            int lastEndPos = backpos;
 
            // Note angle
 
            if (ch == '{' && _pos + 1 < _pattern.Length)
            {
                angled = true;
                _pos++;
                ch = _pattern[_pos];
            }
            else
            {
                angled = false;
            }
 
            // Try to parse backreference: \1 or \{1} or \{cap}
 
            if (ch is >= '0' and <= '9')
            {
                if (!angled && (_options & RegexOptions.ECMAScript) != 0)
                {
                    int capnum = -1;
                    int newcapnum = ch - '0';
                    _pos++;
                    if (IsCaptureSlot(newcapnum))
                    {
                        capnum = newcapnum;
                        lastEndPos = _pos;
                    }
 
                    while (_pos < _pattern.Length && (ch = _pattern[_pos]) >= '0' && ch <= '9')
                    {
                        int digit = ch - '0';
                        if (newcapnum > MaxValueDiv10 || (newcapnum == MaxValueDiv10 && digit > MaxValueMod10))
                        {
                            throw MakeException(RegexParseError.QuantifierOrCaptureGroupOutOfRange, SR.QuantifierOrCaptureGroupOutOfRange);
                        }
 
                        newcapnum = newcapnum * 10 + digit;
 
                        _pos++;
                        if (IsCaptureSlot(newcapnum))
                        {
                            capnum = newcapnum;
                            lastEndPos = _pos;
                        }
                    }
                    _pos = lastEndPos;
                    if (capnum >= 0)
                    {
                        return new RegexNode(RegexNodeKind.Backreference, _options, capnum);
                    }
                }
                else
                {
                    int capnum = ScanDecimal();
                    if (!angled || _pos < _pattern.Length && _pattern[_pos++] == '}')
                    {
                        if (IsCaptureSlot(capnum))
                        {
                            return new RegexNode(RegexNodeKind.Backreference, _options, capnum);
                        }
                    }
                }
            }
            else if (angled && RegexCharClass.IsBoundaryWordChar(ch))
            {
                string capname = ScanCapname();
                if (_pos < _pattern.Length && _pattern[_pos++] == '}')
                {
                    if (_capnames?[capname] is int tmpCapnum)
                    {
                        return new RegexNode(RegexNodeKind.Backreference, _options, tmpCapnum);
                    }
                }
            }
            else if (!angled)
            {
                int capnum = 1;
 
                switch (ch)
                {
                    case '$':
                        _pos++;
                        return RegexNode.CreateOneWithCaseConversion('$', _options, _culture, ref _caseBehavior);
 
                    case '&':
                        capnum = 0;
                        break;
 
                    case '`':
                        capnum = RegexReplacement.LeftPortion;
                        break;
 
                    case '\'':
                        capnum = RegexReplacement.RightPortion;
                        break;
 
                    case '+':
                        capnum = RegexReplacement.LastGroup;
                        break;
 
                    case '_':
                        capnum = RegexReplacement.WholeString;
                        break;
                }
 
                if (capnum != 1)
                {
                    _pos++;
                    return new RegexNode(RegexNodeKind.Backreference, _options, capnum);
                }
            }
 
            // unrecognized $: literalize
 
            _pos = backpos;
            return RegexNode.CreateOneWithCaseConversion('$', _options, _culture, ref _caseBehavior);
        }
 
        /// <summary>Scans a capture name: consumes word chars</summary>
        private string ScanCapname()
        {
            int startpos = _pos;
 
            while (_pos < _pattern.Length)
            {
                if (!RegexCharClass.IsBoundaryWordChar(_pattern[_pos++]))
                {
                    --_pos;
                    break;
                }
            }
 
            return _pattern.Substring(startpos, _pos - startpos);
        }
 
        /// <summary>Scans up to three octal digits (stops before exceeding 0377)</summary>
        private char ScanOctal()
        {
            // Consume octal chars only up to 3 digits and value 0377
            int c = Math.Min(3, _pattern.Length - _pos);
            int d;
            int i;
            for (i = 0; c > 0 && (uint)(d = _pattern[_pos] - '0') <= 7; c -= 1)
            {
                _pos++;
                i = (i * 8) + d;
                if ((_options & RegexOptions.ECMAScript) != 0 && i >= 0x20)
                {
                    break;
                }
            }
 
            // Octal codes only go up to 255.  Any larger and the behavior that Perl follows
            // is simply to truncate the high bits.
            i &= 0xFF;
 
            return (char)i;
        }
 
        /// <summary>Scans any number of decimal digits (pegs value at 2^31-1 if too large)</summary>
        private int ScanDecimal()
        {
            int i = 0;
            int d;
 
            while (_pos < _pattern.Length && (uint)(d = (char)(_pattern[_pos] - '0')) <= 9)
            {
                _pos++;
 
                if (i > MaxValueDiv10 || (i == MaxValueDiv10 && d > MaxValueMod10))
                {
                    throw MakeException(RegexParseError.QuantifierOrCaptureGroupOutOfRange, SR.QuantifierOrCaptureGroupOutOfRange);
                }
 
                i = (i * 10) + d;
            }
 
            return i;
        }
 
        /// <summary>Scans exactly c hex digits (c=2 for \xFF, c=4 for \uFFFF)</summary>
        private char ScanHex(int c)
        {
            int i = 0;
 
            if (_pos + c <= _pattern.Length)
            {
                for (; c > 0; c -= 1)
                {
                    char ch = _pattern[_pos++];
                    int result = HexConverter.FromChar(ch);
                    if (result == 0xFF)
                    {
                        break;
                    }
 
                    i = (i * 0x10) + result;
                }
            }
 
            if (c > 0)
            {
                throw MakeException(RegexParseError.InsufficientOrInvalidHexDigits, SR.InsufficientOrInvalidHexDigits);
            }
 
            return (char)i;
        }
 
        /// <summary>Grabs and converts an ASCII control character</summary>
        private char ScanControl()
        {
            if (_pos == _pattern.Length)
            {
                throw MakeException(RegexParseError.MissingControlCharacter, SR.MissingControlCharacter);
            }
 
            char ch = _pattern[_pos++];
 
            // \ca interpreted as \cA
 
            if ((uint)(ch - 'a') <= 'z' - 'a')
            {
                ch = (char)(ch - ('a' - 'A'));
            }
 
            if ((ch = (char)(ch - '@')) < ' ')
            {
                return ch;
            }
 
            throw MakeException(RegexParseError.UnrecognizedControlCharacter, SR.UnrecognizedControlCharacter);
        }
 
        /// <summary>Scans cimsx-cimsx option string, stops at the first unrecognized char.</summary>
        private void ScanOptions()
        {
            for (bool off = false; _pos < _pattern.Length; _pos++)
            {
                char ch = _pattern[_pos];
 
                if (ch == '-')
                {
                    off = true;
                }
                else if (ch == '+')
                {
                    off = false;
                }
                else
                {
                    RegexOptions options = (char)(ch | 0x20) switch
                    {
                        'i' => RegexOptions.IgnoreCase,
                        'm' => RegexOptions.Multiline,
                        'n' => RegexOptions.ExplicitCapture,
                        's' => RegexOptions.Singleline,
                        'x' => RegexOptions.IgnorePatternWhitespace,
                        _ => RegexOptions.None,
                    };
                    if (options == 0)
                    {
                        return;
                    }
 
                    if (off)
                    {
                        _options &= ~options;
                    }
                    else
                    {
                        _options |= options;
                    }
                }
            }
        }
 
        /// <summary>Scans \ code for escape codes that map to single Unicode chars.</summary>
        private char ScanCharEscape()
        {
            char ch = _pattern[_pos++];
 
            if (ch is >= '0' and <= '7')
            {
                --_pos;
                return ScanOctal();
            }
 
            switch (ch)
            {
                case 'x':
                    return ScanHex(2);
                case 'u':
                    return ScanHex(4);
                case 'a':
                    return '\u0007';
                case 'b':
                    return '\b';
                case 'e':
                    return '\e';
                case 'f':
                    return '\f';
                case 'n':
                    return '\n';
                case 'r':
                    return '\r';
                case 't':
                    return '\t';
                case 'v':
                    return '\u000B';
                case 'c':
                    return ScanControl();
                default:
                    if ((_options & RegexOptions.ECMAScript) == 0 && RegexCharClass.IsBoundaryWordChar(ch))
                    {
                        throw MakeException(RegexParseError.UnrecognizedEscape, SR.Format(SR.UnrecognizedEscape, ch));
                    }
                    return ch;
            }
        }
 
        /// <summary>Scans X for \p{X} or \P{X}</summary>
        private string ParseProperty()
        {
            if (_pos + 2 >= _pattern.Length)
            {
                throw MakeException(RegexParseError.InvalidUnicodePropertyEscape, SR.InvalidUnicodePropertyEscape);
            }
 
            char ch = _pattern[_pos++];
            if (ch != '{')
            {
                throw MakeException(RegexParseError.MalformedUnicodePropertyEscape, SR.MalformedUnicodePropertyEscape);
            }
 
            int startpos = _pos;
            while (_pos < _pattern.Length)
            {
                ch = _pattern[_pos++];
                if (!(RegexCharClass.IsBoundaryWordChar(ch) || ch == '-'))
                {
                    --_pos;
                    break;
                }
            }
 
            string capname = _pattern.Substring(startpos, _pos - startpos);
 
            if (_pos == _pattern.Length || _pattern[_pos++] != '}')
            {
                throw MakeException(RegexParseError.InvalidUnicodePropertyEscape, SR.InvalidUnicodePropertyEscape);
            }
 
            return capname;
        }
 
        /// <summary>Returns the node kind for zero-length assertions with a \ code.</summary>
        private readonly RegexNodeKind TypeFromCode(char ch) =>
            ch switch
            {
                'b' => (_options & RegexOptions.ECMAScript) != 0 ? RegexNodeKind.ECMABoundary : RegexNodeKind.Boundary,
                'B' => (_options & RegexOptions.ECMAScript) != 0 ? RegexNodeKind.NonECMABoundary : RegexNodeKind.NonBoundary,
                'A' => RegexNodeKind.Beginning,
                'G' => RegexNodeKind.Start,
                'Z' => RegexNodeKind.EndZ,
                'z' => RegexNodeKind.End,
                _ => RegexNodeKind.Nothing,
            };
 
        /// <summary>
        /// A prescanner for deducing the slots used for captures by doing a partial tokenization of the pattern.
        /// </summary>
        private void CountCaptures(out RegexOptions optionsFoundInPattern)
        {
            NoteCaptureSlot(0, 0);
            optionsFoundInPattern = RegexOptions.None;
            _autocap = 1;
 
            while (_pos < _pattern.Length)
            {
                int pos = _pos;
                char ch = _pattern[_pos++];
                switch (ch)
                {
                    case '\\':
                        if (_pos < _pattern.Length)
                        {
                            ScanBackslash(scanOnly: true);
                        }
                        break;
 
                    case '#':
                        if ((_options & RegexOptions.IgnorePatternWhitespace) != 0)
                        {
                            --_pos;
                            ScanBlank();
                        }
                        break;
 
                    case '[':
                        ScanCharClass(caseInsensitive: false, scanOnly: true);
                        break;
 
                    case ')':
                        if (_optionsStack.Length != 0)
                        {
                            _options = (RegexOptions)_optionsStack.Pop();
                        }
                        break;
 
                    case '(':
                        if (_pos + 1 < _pattern.Length && _pattern[_pos + 1] == '#' && _pattern[_pos] == '?')
                        {
                            // we have a comment (?#
                            --_pos;
                            ScanBlank();
                        }
                        else
                        {
                            _optionsStack.Append((int)_options);
                            if (_pos < _pattern.Length && _pattern[_pos] == '?')
                            {
                                // we have (?...
                                _pos++;
 
                                if (_pos + 1 < _pattern.Length && (_pattern[_pos] == '<' || _pattern[_pos] == '\''))
                                {
                                    // named group: (?<... or (?'...
 
                                    _pos++;
                                    ch = _pattern[_pos];
 
                                    if (ch != '0' && RegexCharClass.IsBoundaryWordChar(ch))
                                    {
                                        if ((uint)(ch - '1') <= '9' - '1')
                                        {
                                            NoteCaptureSlot(ScanDecimal(), pos);
                                        }
                                        else
                                        {
                                            NoteCaptureName(ScanCapname(), pos);
                                        }
                                    }
                                }
                                else
                                {
                                    // (?...
 
                                    // get the options if it's an option construct (?cimsx-cimsx...)
                                    ScanOptions();
                                    optionsFoundInPattern |= _options;
 
                                    if (_pos < _pattern.Length)
                                    {
                                        if (_pattern[_pos] == ')')
                                        {
                                            // (?cimsx-cimsx)
                                            _pos++;
                                            _optionsStack.Length--;
                                        }
                                        else if (_pattern[_pos] == '(')
                                        {
                                            // alternation construct: (?(foo)yes|no)
                                            // ignore the next paren so we don't capture the condition
                                            _ignoreNextParen = true;
 
                                            // break from here so we don't reset _ignoreNextParen
                                            break;
                                        }
                                    }
                                }
                            }
                            else
                            {
                                // Simple (unnamed) capture group.
                                // Add unnamed parentheses if ExplicitCapture is not set
                                // and the next parentheses is not ignored.
                                if ((_options & RegexOptions.ExplicitCapture) == 0 && !_ignoreNextParen)
                                {
                                    NoteCaptureSlot(_autocap++, pos);
                                }
                            }
                        }
 
                        _ignoreNextParen = false;
                        break;
                }
            }
 
            AssignNameSlots();
        }
 
        /// <summary>Notes a used capture slot</summary>
        private void NoteCaptureSlot(int i, int pos)
        {
            object boxedI = i; // workaround to remove a boxed int when adding to the hashtable
            if (!_caps.ContainsKey(boxedI))
            {
                // the rhs of the hashtable isn't used in the parser
 
                _caps.Add(boxedI, pos);
                _capcount++;
 
                if (_captop <= i)
                {
                    _captop = i == int.MaxValue ? i : i + 1;
                }
            }
        }
 
        /// <summary>Notes a used capture slot</summary>
        private void NoteCaptureName(string name, int pos)
        {
            if (_capnames == null)
            {
                _capnames = new Hashtable();
                _capnamelist = new List<string>();
            }
 
            if (!_capnames.ContainsKey(name))
            {
                _capnames.Add(name, pos);
                _capnamelist!.Add(name);
            }
        }
 
        /// <summary>Assigns unused slot numbers to the capture names.</summary>
        private void AssignNameSlots()
        {
            if (_capnames != null)
            {
                for (int i = 0; i < _capnamelist!.Count; i++)
                {
                    while (IsCaptureSlot(_autocap))
                    {
                        _autocap++;
                    }
 
                    string name = _capnamelist[i];
                    int pos = (int)_capnames[name]!;
                    _capnames[name] = _autocap;
                    NoteCaptureSlot(_autocap, pos);
 
                    _autocap++;
                }
            }
 
            // if the caps array has at least one gap, construct the list of used slots
 
            if (_capcount < _captop)
            {
                _capnumlist = new int[_capcount];
                int i = 0;
 
                // Manual use of IDictionaryEnumerator instead of foreach to avoid DictionaryEntry box allocations.
                IDictionaryEnumerator de = _caps.GetEnumerator();
                while (de.MoveNext())
                {
                    _capnumlist[i++] = (int)de.Key;
                }
 
                Array.Sort(_capnumlist);
            }
 
            // merge capsnumlist into capnamelist
 
            if (_capnames != null || _capnumlist != null)
            {
                List<string>? oldcapnamelist;
                int next;
                int k = 0;
 
                if (_capnames == null)
                {
                    oldcapnamelist = null;
                    _capnames = new Hashtable();
                    _capnamelist = new List<string>();
                    next = -1;
                }
                else
                {
                    oldcapnamelist = _capnamelist;
                    _capnamelist = new List<string>();
                    next = (int)_capnames[oldcapnamelist![0]]!;
                }
 
                for (int i = 0; i < _capcount; i++)
                {
                    int j = (_capnumlist == null) ? i : _capnumlist[i];
 
                    if (next == j)
                    {
                        _capnamelist.Add(oldcapnamelist![k++]);
                        next = (k == oldcapnamelist.Count) ? -1 : (int)_capnames[oldcapnamelist[k]]!;
                    }
                    else
                    {
                        string str = j.ToString(_culture);
                        _capnamelist.Add(str);
                        _capnames[str] = j;
                    }
                }
            }
        }
 
        /// <summary>True if the capture slot was noted</summary>
        private readonly bool IsCaptureSlot(int i)
        {
            if (_caps != null)
            {
                return _caps.ContainsKey(i);
            }
 
            return i >= 0 && i < _capsize;
        }
 
        /// <summary>
        /// When generating code on a regex that uses a sparse set
        /// of capture slots, we hash them to a dense set of indices
        /// for an array of capture slots. Instead of doing the hash
        /// at match time, it's done at compile time, here.
        /// </summary>
        internal static int MapCaptureNumber(int capnum, Hashtable? caps) =>
            capnum == -1 ? -1 :
            caps != null ? (int)caps[capnum]! :
            capnum;
 
        private const byte Q = 4;    // quantifier          * + ? {
        private const byte S = 3;    // stopper             $ ( ) . [ \ ^ |
        private const byte Z = 2;    // # stopper           #
        private const byte W = 1;    // whitespace          \t \n \f \r ' '
 
        /// <summary>For categorizing ASCII characters.</summary>
        private static ReadOnlySpan<byte> Category =>
        [
            // 0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
               0, 0, 0, 0, 0, 0, 0, 0, 0, W, W, 0, W, W, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            //    !  "  #  $  %  &  '  (  )  *  +  ,  -  .  /  0  1  2  3  4  5  6  7  8  9  :  ;  <  =  >  ?
               W, 0, 0, Z, S, 0, 0, 0, S, S, Q, Q, 0, 0, S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Q,
            // @  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O  P  Q  R  S  T  U  V  W  X  Y  Z  [  \  ]  ^  _
               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, S, 0,
            // '  a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p  q  r  s  t  u  v  w  x  y  z  {  |  }  ~
               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Q, S, 0, 0, 0
        ];
 
#if NET8_0_OR_GREATER
        private static readonly SearchValues<char> s_metachars =
            SearchValues.Create("\t\n\f\r #$()*+.?[\\^{|");
 
        private static int IndexOfMetachar(ReadOnlySpan<char> input) =>
            input.IndexOfAny(s_metachars);
#else
        private static int IndexOfMetachar(ReadOnlySpan<char> input)
        {
            for (int i = 0; i < input.Length; i++)
            {
                if (input[i] <= '|' && Category[input[i]] > 0)
                {
                    return i;
                }
            }
 
            return -1;
        }
#endif
 
        /// <summary>Returns true for those characters that terminate a string of ordinary chars.</summary>
        private static bool IsSpecial(char ch) => ch <= '|' && Category[ch] >= S;
 
        /// <summary>Returns true for those characters including whitespace that terminate a string of ordinary chars.</summary>
        private static bool IsSpecialOrSpace(char ch) => ch <= '|' && Category[ch] >= W;
 
        /// <summary>Returns true for those characters that begin a quantifier.</summary>
        private static bool IsQuantifier(char ch) => ch <= '{' && Category[ch] == Q;
 
        /// <summary>Returns true for whitespace.</summary>
        private static bool IsSpace(char ch) => ch <= ' ' && Category[ch] == W;
 
        private readonly bool IsTrueQuantifier()
        {
            Debug.Assert(_pos < _pattern.Length, "The current reading position must not be at the end of the pattern");
 
            int startpos = _pos;
            char ch = _pattern[startpos];
            if (ch != '{')
            {
                return ch <= '{' && Category[ch] >= Q;
            }
 
            int pos = startpos;
            int nChars = _pattern.Length - _pos;
            while (--nChars > 0 && (uint)((ch = _pattern[++pos]) - '0') <= 9) ;
 
            if (nChars == 0 || pos - startpos == 1)
            {
                return false;
            }
 
            if (ch == '}')
            {
                return true;
            }
 
            if (ch != ',')
            {
                return false;
            }
 
            while (--nChars > 0 && (uint)((ch = _pattern[++pos]) - '0') <= 9) ;
 
            return nChars > 0 && ch == '}';
        }
 
        /// <summary>Add a string to the last concatenate.</summary>
        private void AddToConcatenate(int pos, int cch, bool isReplacement)
        {
            switch (cch)
            {
                case 0:
                    return;
 
                case 1:
                    _concatenation!.AddChild(RegexNode.CreateOneWithCaseConversion(_pattern[pos], isReplacement ? _options & ~RegexOptions.IgnoreCase : _options, _culture, ref _caseBehavior));
                    break;
 
                case > 1 when (_options & RegexOptions.IgnoreCase) == 0 || isReplacement || !RegexCharClass.ParticipatesInCaseConversion(_pattern.AsSpan(pos, cch)):
                    _concatenation!.AddChild(new RegexNode(RegexNodeKind.Multi, _options & ~RegexOptions.IgnoreCase, _pattern.Substring(pos, cch)));
                    break;
 
                default:
                    foreach (char c in _pattern.AsSpan(pos, cch))
                    {
                        _concatenation!.AddChild(RegexNode.CreateOneWithCaseConversion(c, _options, _culture, ref _caseBehavior));
                    }
                    break;
            }
        }
 
        /// <summary>Push the parser state (in response to an open paren)</summary>
        private void PushGroup()
        {
            _group!.Parent = _stack;
            _alternation!.Parent = _group;
            _concatenation!.Parent = _alternation;
            _stack = _concatenation;
        }
 
        /// <summary>Remember the pushed state (in response to a ')')</summary>
        private void PopGroup()
        {
            _concatenation = _stack;
            _alternation = _concatenation!.Parent;
            _group = _alternation!.Parent;
            _stack = _group!.Parent;
 
            // The first () inside a Testgroup group goes directly to the group
            if (_group.Kind == RegexNodeKind.ExpressionConditional && _group.ChildCount() == 0)
            {
                if (_unit == null)
                {
                    throw MakeException(RegexParseError.AlternationHasMalformedCondition, SR.AlternationHasMalformedCondition);
                }
 
                _group.AddChild(_unit);
                _unit = null;
            }
        }
 
        /// <summary>Start a new round for the parser state (in response to an open paren or string start)</summary>
        private void StartGroup(RegexNode openGroup)
        {
            _group = openGroup;
            _alternation = new RegexNode(RegexNodeKind.Alternate, _options);
            _concatenation = new RegexNode(RegexNodeKind.Concatenate, _options);
        }
 
        /// <summary>Finish the current concatenation (in response to a |)</summary>
        private void AddAlternate()
        {
            // The | parts inside a Testgroup group go directly to the group
 
            if (_group!.Kind is RegexNodeKind.ExpressionConditional or RegexNodeKind.BackreferenceConditional)
            {
                _group.AddChild(_concatenation!.ReverseConcatenationIfRightToLeft());
            }
            else
            {
                _alternation!.AddChild(_concatenation!.ReverseConcatenationIfRightToLeft());
            }
 
            _concatenation = new RegexNode(RegexNodeKind.Concatenate, _options);
        }
 
        /// <summary>Finish the current group (in response to a ')' or end)</summary>
        private void AddGroup()
        {
            if (_group!.Kind is RegexNodeKind.ExpressionConditional or RegexNodeKind.BackreferenceConditional)
            {
                _group.AddChild(_concatenation!.ReverseConcatenationIfRightToLeft());
 
                if (_group.Kind == RegexNodeKind.BackreferenceConditional && _group.ChildCount() > 2 || _group.ChildCount() > 3)
                {
                    throw MakeException(RegexParseError.AlternationHasTooManyConditions, SR.AlternationHasTooManyConditions);
                }
            }
            else
            {
                _alternation!.AddChild(_concatenation!.ReverseConcatenationIfRightToLeft());
                _group.AddChild(_alternation);
            }
 
            _unit = _group;
        }
 
        /// <summary>Fills in a RegexParseException</summary>
        private readonly RegexParseException MakeException(RegexParseError error, string message) =>
            new RegexParseException(error, _pos, SR.Format(SR.MakeException, _pattern, _pos, message));
 
        /// <summary>Gets group name from its number.</summary>
        internal static string GroupNameFromNumber(Hashtable? caps, string[]? capslist, int capsize, int i)
        {
            if (capslist is null)
            {
                if ((uint)i < (uint)capsize)
                {
                    return ((uint)i).ToString();
                }
            }
            else
            {
                if ((caps is null || caps.TryGetValue(i, out i)) &&
                    (uint)i < (uint)capslist.Length)
                {
                    return capslist[i];
                }
            }
 
            return string.Empty;
        }
    }
}