RegexCharClass.cs

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Threading;
 
namespace System.Text.RegularExpressions
{
    // The main function of RegexCharClass is as a builder to turn ranges, characters and
    // Unicode categories into a single string.  This string is used as a black box
    // representation of a character class by the rest of Regex.  The format is as follows.
    //
    // Char index   Use
    //      0       Flags - currently this only holds the "negate" flag
    //      1       length of the string representing the "set" portion, e.g. [a-z0-9] only has a "set"
    //      2       length of the string representing the "category" portion, e.g. [\p{Lu}] only has a "category"
    //      3...m   The set.  These are a series of ranges which define the characters included in the set.
    //              To determine if a given character is in the set, we binary search over this set of ranges
    //              and see where the character should go.  Based on whether the ending index is odd or even,
    //              we know if the character is in the set.
    //      m+1...n The categories.  This is a list of UnicodeCategory enum values which describe categories
    //              included in this class. These can either be individual values (either UnicodeCategory - 1
    //              for inclusive values, or -1 - UnicodeCategory for exclusive values), or a "group", which
    //              is a contiguous sequence of such values surrounded by \0 values; all values in the group
    //              have the same positive/negative orientation.
 
    /// <summary>Provides the "set of Unicode chars" functionality used by the regexp engine.</summary>
    internal sealed partial class RegexCharClass
    {
        // Constants
        internal const int FlagsIndex = 0;
        internal const int SetLengthIndex = 1;
        internal const int CategoryLengthIndex = 2;
        internal const int SetStartIndex = 3; // must be odd for subsequent logic to work
 
        internal const char LastChar = '\uFFFF';
 
        internal const short SpaceConst = 100;
        private const short NotSpaceConst = -100;
 
        private const string InternalRegexIgnoreCase = "__InternalRegexIgnoreCase__";
        private const string SpaceCategories = "\x64";
        private const string NotSpaceCategories = "\uFF9C";
        private const string WordCategories = "\u0000\u0002\u0004\u0005\u0003\u0001\u0006\u0009\u0013\u0000";
        private const string NotWordCategories = "\u0000\uFFFE\uFFFC\uFFFB\uFFFD\uFFFF\uFFFA\uFFF7\uFFED\u0000";
 
        internal const string SpaceClass = "\u0000\u0000\u0001\u0064"; // \s
        internal const string NotSpaceClass = "\u0000\u0000\u0001\uFF9C"; // \S
        internal const string WordClass = "\u0000\u0000\u000A\u0000\u0002\u0004\u0005\u0003\u0001\u0006\u0009\u0013\u0000"; // \w
        internal const string NotWordClass = "\u0000\u0000\u000A\u0000\uFFFE\uFFFC\uFFFB\uFFFD\uFFFF\uFFFA\uFFF7\uFFED\u0000"; // \W
        internal const string DigitClass = "\u0000\u0000\u0001\u0009"; // \d
        internal const string NotDigitClass = "\u0000\u0000\u0001\uFFF7"; // \D
        internal const string ControlClass = "\0\0\u0001\u000f"; // \p{Cc}
        internal const string NotControlClass = "\0\0\u0001\ufff1"; // \P{Cc}
        internal const string LetterClass = "\0\0\a\0\u0002\u0004\u0005\u0003\u0001\0"; // \p{L}
        internal const string NotLetterClass = "\0\0\u0007\0\ufffe\ufffc\ufffb\ufffd\uffff\0"; // \P{L}
        internal const string LetterOrDigitClass = "\0\0\b\0\u0002\u0004\u0005\u0003\u0001\0\t"; // [\p{L}\d]
        internal const string NotLetterOrDigitClass = "\u0001\0\b\0\u0002\u0004\u0005\u0003\u0001\0\t"; // [^\p{L}\d]
        internal const string LowerClass = "\0\0\u0001\u0002"; // \p{Ll}
        internal const string NotLowerClass = "\0\0\u0001\ufffe"; // \P{Ll}
        internal const string UpperClass = "\0\0\u0001\u0001"; // \p{Lu}
        internal const string NotUpperClass = "\0\0\u0001\uffff"; // \P{Lu}
        internal const string NumberClass = "\0\0\u0005\0\t\n\v\0"; // \p{N}
        internal const string NotNumberClass = "\0\0\u0005\0\ufff7\ufff6\ufff5\0"; // \P{N}
        internal const string PunctuationClass = "\0\0\t\0\u0013\u0014\u0016\u0019\u0015\u0018\u0017\0"; // \p{P}
        internal const string NotPunctuationClass = "\0\0\u0009\0\uffed\uffec\uffea\uffe7\uffeb\uffe8\uffe9\0"; // \P{P}
        internal const string SeparatorClass = "\0\0\u0005\0\r\u000e\f\0"; // \p{Z}
        internal const string NotSeparatorClass = "\0\0\u0005\0\ufff3\ufff2\ufff4\0"; // \P{Z}
        internal const string SymbolClass = "\0\0\u0006\0\u001b\u001c\u001a\u001d\0"; // \p{S}
        internal const string NotSymbolClass = "\0\0\u0006\0\uffe5\uffe4\uffe6\uffe3\0"; // \P{S}
        internal const string AsciiLetterClass = "\0\u0004\0A[a{"; // [A-Za-z]
        internal const string NotAsciiLetterClass = "\u0001\u0004\0A[a{"; // [^A-Za-z]
        internal const string AsciiLetterOrDigitClass = "\0\u0006\00:A[a{"; // [A-Za-z0-9]
        internal const string NotAsciiLetterOrDigitClass = "\u0001\u0006\00:A[a{"; // [^A-Za-z0-9]
        internal const string HexDigitClass = "\0\u0006\00:AGag"; // [A-Fa-f0-9]
        internal const string NotHexDigitClass = "\u0001\u0006\00:AGag"; // [^A-Fa-f0-9]
        internal const string HexDigitUpperClass = "\0\u0004\00:AG"; // [A-F0-9]
        internal const string NotHexDigitUpperClass = "\u0001\u0004\00:AG"; // [A-F0-9]
        internal const string HexDigitLowerClass = "\0\u0004\00:ag"; // [a-f0-9]
        internal const string NotHexDigitLowerClass = "\u0001\u0004\00:ag"; // [a-f0-9]
 
        private const string ECMASpaceRanges = "\u0009\u000E\u0020\u0021";
        private const string NotECMASpaceRanges = "\0\u0009\u000E\u0020\u0021";
        private const string ECMAWordRanges = "\u0030\u003A\u0041\u005B\u005F\u0060\u0061\u007B\u0130\u0131";
        private const string NotECMAWordRanges = "\0\u0030\u003A\u0041\u005B\u005F\u0060\u0061\u007B\u0130\u0131";
        private const string ECMADigitRanges = "\u0030\u003A";
        private const string NotECMADigitRanges = "\0\u0030\u003A";
 
        internal const string ECMASpaceClass = "\x00\x04\x00" + ECMASpaceRanges;
        internal const string NotECMASpaceClass = "\x01\x04\x00" + ECMASpaceRanges;
        internal const string ECMAWordClass = "\x00\x0A\x00" + ECMAWordRanges;
        internal const string NotECMAWordClass = "\x01\x0A\x00" + ECMAWordRanges;
        internal const string ECMADigitClass = "\x00\x02\x00" + ECMADigitRanges;
        internal const string NotECMADigitClass = "\x01\x02\x00" + ECMADigitRanges;
 
        internal const string NotNewLineClass = "\x01\x02\x00\x0A\x0B";
 
        internal const string AnyClass = "\x00\x01\x00\x00";
        private const string EmptyClass = "\x00\x00\x00";
 
        // Sets regularly used as a canonical way to express the equivalent of '.' with Singleline when Singleline isn't in use.
        internal const string WordNotWordClass = "\u0000\u0000\u0014\u0000\u0002\u0004\u0005\u0003\u0001\u0006\u0009\u0013\u0000\u0000\uFFFE\uFFFC\uFFFB\uFFFD\uFFFF\uFFFA\uFFF7\uFFED\u0000";
        internal const string NotWordWordClass = "\u0000\u0000\u0014\u0000\uFFFE\uFFFC\uFFFB\uFFFD\uFFFF\uFFFA\uFFF7\uFFED\u0000\u0000\u0002\u0004\u0005\u0003\u0001\u0006\u0009\u0013\u0000";
        internal const string DigitNotDigitClass = "\u0000\u0000\u0002\u0009\uFFF7";
        internal const string NotDigitDigitClass = "\u0000\u0000\u0002\uFFF7\u0009";
        internal const string SpaceNotSpaceClass = "\u0000\u0000\u0002\u0064\uFF9C";
        internal const string NotSpaceSpaceClass = "\u0000\u0000\u0002\uFF9C\u0064";
 
        // UnicodeCategory is zero based, so we add one to each value and subtract it off later
        private const int DefinedCategoriesCapacity = 38;
        private static readonly Dictionary<string, string> s_definedCategories = new Dictionary<string, string>(DefinedCategoriesCapacity)
        {
            // Others
            { "Cc", "\u000F" }, // UnicodeCategory.Control + 1
            { "Cf", "\u0010" }, // UnicodeCategory.Format + 1
            { "Cn", "\u001E" }, // UnicodeCategory.OtherNotAssigned + 1
            { "Co", "\u0012" }, // UnicodeCategory.PrivateUse + 1
            { "Cs", "\u0011" }, // UnicodeCategory.Surrogate + 1
            { "C", "\u0000\u000F\u0010\u001E\u0012\u0011\u0000" },
 
            // Letters
            { "Ll", "\u0002" }, // UnicodeCategory.LowercaseLetter + 1
            { "Lm", "\u0004" }, // UnicodeCategory.ModifierLetter + 1
            { "Lo", "\u0005" }, // UnicodeCategory.OtherLetter + 1
            { "Lt", "\u0003" }, // UnicodeCategory.TitlecaseLetter + 1
            { "Lu", "\u0001" }, // UnicodeCategory.UppercaseLetter + 1
            { "L", "\u0000\u0002\u0004\u0005\u0003\u0001\u0000" },
 
            // InternalRegexIgnoreCase = {LowercaseLetter} OR {TitlecaseLetter} OR {UppercaseLetter}
            // !!!This category should only ever be used in conjunction with RegexOptions.IgnoreCase code paths!!!
            { InternalRegexIgnoreCase, "\u0000\u0002\u0003\u0001\u0000" },
 
            // Marks
            { "Mc", "\u0007" }, // UnicodeCategory.SpacingCombiningMark + 1
            { "Me", "\u0008" }, // UnicodeCategory.EnclosingMark + 1
            { "Mn", "\u0006" }, // UnicodeCategory.NonSpacingMark + 1
            { "M", "\u0000\u0007\u0008\u0006\u0000" },
 
            // Numbers
            { "Nd", "\u0009" }, // UnicodeCategory.DecimalDigitNumber + 1
            { "Nl", "\u000A" }, // UnicodeCategory.LetterNumber + 1
            { "No", "\u000B" }, // UnicodeCategory.OtherNumber + 1
            { "N", "\u0000\u0009\u000A\u000B\u0000" },
 
            // Punctuation
            { "Pc", "\u0013" }, // UnicodeCategory.ConnectorPunctuation + 1
            { "Pd", "\u0014" }, // UnicodeCategory.DashPunctuation + 1
            { "Pe", "\u0016" }, // UnicodeCategory.ClosePunctuation + 1
            { "Po", "\u0019" }, // UnicodeCategory.OtherPunctuation + 1
            { "Ps", "\u0015" }, // UnicodeCategory.OpenPunctuation + 1
            { "Pf", "\u0018" }, // UnicodeCategory.FinalQuotePunctuation + 1
            { "Pi", "\u0017" }, // UnicodeCategory.InitialQuotePunctuation + 1
            { "P", "\u0000\u0013\u0014\u0016\u0019\u0015\u0018\u0017\u0000" },
 
            // Symbols
            { "Sc", "\u001B" }, // UnicodeCategory.CurrencySymbol + 1
            { "Sk", "\u001C" }, // UnicodeCategory.ModifierSymbol + 1
            { "Sm", "\u001A" }, // UnicodeCategory.MathSymbol + 1
            { "So", "\u001D" }, // UnicodeCategory.OtherSymbol + 1
            { "S", "\u0000\u001B\u001C\u001A\u001D\u0000" },
 
            // Separators
            { "Zl", "\u000D" }, // UnicodeCategory.LineSeparator + 1
            { "Zp", "\u000E" }, // UnicodeCategory.ParagraphSeparator + 1
            { "Zs", "\u000C" }, // UnicodeCategory.SpaceSeparator + 1
            { "Z", "\u0000\u000D\u000E\u000C\u0000" },
        };
 
        /*
         *   The property table contains all the block definitions defined in the
         *   XML schema spec (http://www.w3.org/TR/2001/PR-xmlschema-2-20010316/#charcter-classes), Unicode 4.0 spec (www.unicode.org),
         *   and Perl 5.6 (see Programming Perl, 3rd edition page 167).   Three blocks defined by Perl (and here) may
         *   not be in the Unicode: IsHighPrivateUseSurrogates, IsHighSurrogates, and IsLowSurrogates.
         *
        **/
        // Has to be sorted by the first column
        private static readonly string[][] s_propTable =
        [
            ["IsAlphabeticPresentationForms",         "\uFB00\uFB50"],
            ["IsArabic",                              "\u0600\u0700"],
            ["IsArabicPresentationForms-A",           "\uFB50\uFE00"],
            ["IsArabicPresentationForms-B",           "\uFE70\uFF00"],
            ["IsArmenian",                            "\u0530\u0590"],
            ["IsArrows",                              "\u2190\u2200"],
            ["IsBasicLatin",                          "\u0000\u0080"],
            ["IsBengali",                             "\u0980\u0A00"],
            ["IsBlockElements",                       "\u2580\u25A0"],
            ["IsBopomofo",                            "\u3100\u3130"],
            ["IsBopomofoExtended",                    "\u31A0\u31C0"],
            ["IsBoxDrawing",                          "\u2500\u2580"],
            ["IsBraillePatterns",                     "\u2800\u2900"],
            ["IsBuhid",                               "\u1740\u1760"],
            ["IsCJKCompatibility",                    "\u3300\u3400"],
            ["IsCJKCompatibilityForms",               "\uFE30\uFE50"],
            ["IsCJKCompatibilityIdeographs",          "\uF900\uFB00"],
            ["IsCJKRadicalsSupplement",               "\u2E80\u2F00"],
            ["IsCJKSymbolsandPunctuation",            "\u3000\u3040"],
            ["IsCJKUnifiedIdeographs",                "\u4E00\uA000"],
            ["IsCJKUnifiedIdeographsExtensionA",      "\u3400\u4DC0"],
            ["IsCherokee",                            "\u13A0\u1400"],
            ["IsCombiningDiacriticalMarks",           "\u0300\u0370"],
            ["IsCombiningDiacriticalMarksforSymbols", "\u20D0\u2100"],
            ["IsCombiningHalfMarks",                  "\uFE20\uFE30"],
            ["IsCombiningMarksforSymbols",            "\u20D0\u2100"],
            ["IsControlPictures",                     "\u2400\u2440"],
            ["IsCurrencySymbols",                     "\u20A0\u20D0"],
            ["IsCyrillic",                            "\u0400\u0500"],
            ["IsCyrillicSupplement",                  "\u0500\u0530"],
            ["IsDevanagari",                          "\u0900\u0980"],
            ["IsDingbats",                            "\u2700\u27C0"],
            ["IsEnclosedAlphanumerics",               "\u2460\u2500"],
            ["IsEnclosedCJKLettersandMonths",         "\u3200\u3300"],
            ["IsEthiopic",                            "\u1200\u1380"],
            ["IsGeneralPunctuation",                  "\u2000\u2070"],
            ["IsGeometricShapes",                     "\u25A0\u2600"],
            ["IsGeorgian",                            "\u10A0\u1100"],
            ["IsGreek",                               "\u0370\u0400"],
            ["IsGreekExtended",                       "\u1F00\u2000"],
            ["IsGreekandCoptic",                      "\u0370\u0400"],
            ["IsGujarati",                            "\u0A80\u0B00"],
            ["IsGurmukhi",                            "\u0A00\u0A80"],
            ["IsHalfwidthandFullwidthForms",          "\uFF00\uFFF0"],
            ["IsHangulCompatibilityJamo",             "\u3130\u3190"],
            ["IsHangulJamo",                          "\u1100\u1200"],
            ["IsHangulSyllables",                     "\uAC00\uD7B0"],
            ["IsHanunoo",                             "\u1720\u1740"],
            ["IsHebrew",                              "\u0590\u0600"],
            ["IsHighPrivateUseSurrogates",            "\uDB80\uDC00"],
            ["IsHighSurrogates",                      "\uD800\uDB80"],
            ["IsHiragana",                            "\u3040\u30A0"],
            ["IsIPAExtensions",                       "\u0250\u02B0"],
            ["IsIdeographicDescriptionCharacters",    "\u2FF0\u3000"],
            ["IsKanbun",                              "\u3190\u31A0"],
            ["IsKangxiRadicals",                      "\u2F00\u2FE0"],
            ["IsKannada",                             "\u0C80\u0D00"],
            ["IsKatakana",                            "\u30A0\u3100"],
            ["IsKatakanaPhoneticExtensions",          "\u31F0\u3200"],
            ["IsKhmer",                               "\u1780\u1800"],
            ["IsKhmerSymbols",                        "\u19E0\u1A00"],
            ["IsLao",                                 "\u0E80\u0F00"],
            ["IsLatin-1Supplement",                   "\u0080\u0100"],
            ["IsLatinExtended-A",                     "\u0100\u0180"],
            ["IsLatinExtended-B",                     "\u0180\u0250"],
            ["IsLatinExtendedAdditional",             "\u1E00\u1F00"],
            ["IsLetterlikeSymbols",                   "\u2100\u2150"],
            ["IsLimbu",                               "\u1900\u1950"],
            ["IsLowSurrogates",                       "\uDC00\uE000"],
            ["IsMalayalam",                           "\u0D00\u0D80"],
            ["IsMathematicalOperators",               "\u2200\u2300"],
            ["IsMiscellaneousMathematicalSymbols-A",  "\u27C0\u27F0"],
            ["IsMiscellaneousMathematicalSymbols-B",  "\u2980\u2A00"],
            ["IsMiscellaneousSymbols",                "\u2600\u2700"],
            ["IsMiscellaneousSymbolsandArrows",       "\u2B00\u2C00"],
            ["IsMiscellaneousTechnical",              "\u2300\u2400"],
            ["IsMongolian",                           "\u1800\u18B0"],
            ["IsMyanmar",                             "\u1000\u10A0"],
            ["IsNumberForms",                         "\u2150\u2190"],
            ["IsOgham",                               "\u1680\u16A0"],
            ["IsOpticalCharacterRecognition",         "\u2440\u2460"],
            ["IsOriya",                               "\u0B00\u0B80"],
            ["IsPhoneticExtensions",                  "\u1D00\u1D80"],
            ["IsPrivateUse",                          "\uE000\uF900"],
            ["IsPrivateUseArea",                      "\uE000\uF900"],
            ["IsRunic",                               "\u16A0\u1700"],
            ["IsSinhala",                             "\u0D80\u0E00"],
            ["IsSmallFormVariants",                   "\uFE50\uFE70"],
            ["IsSpacingModifierLetters",              "\u02B0\u0300"],
            ["IsSpecials",                            "\uFFF0"],
            ["IsSuperscriptsandSubscripts",           "\u2070\u20A0"],
            ["IsSupplementalArrows-A",                "\u27F0\u2800"],
            ["IsSupplementalArrows-B",                "\u2900\u2980"],
            ["IsSupplementalMathematicalOperators",   "\u2A00\u2B00"],
            ["IsSyriac",                              "\u0700\u0750"],
            ["IsTagalog",                             "\u1700\u1720"],
            ["IsTagbanwa",                            "\u1760\u1780"],
            ["IsTaiLe",                               "\u1950\u1980"],
            ["IsTamil",                               "\u0B80\u0C00"],
            ["IsTelugu",                              "\u0C00\u0C80"],
            ["IsThaana",                              "\u0780\u07C0"],
            ["IsThai",                                "\u0E00\u0E80"],
            ["IsTibetan",                             "\u0F00\u1000"],
            ["IsUnifiedCanadianAboriginalSyllabics",  "\u1400\u1680"],
            ["IsVariationSelectors",                  "\uFE00\uFE10"],
            ["IsYiRadicals",                          "\uA490\uA4D0"],
            ["IsYiSyllables",                         "\uA000\uA490"],
            ["IsYijingHexagramSymbols",               "\u4DC0\u4E00"],
            ["_xmlC", /* Name Char              */    "\u002D\u002F\u0030\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00B7\u00B8\u00C0\u00D7\u00D8\u00F7\u00F8\u0132\u0134\u013F\u0141\u0149\u014A\u017F\u0180\u01C4\u01CD\u01F1\u01F4\u01F6\u01FA\u0218\u0250\u02A9\u02BB\u02C2\u02D0\u02D2\u0300\u0346\u0360\u0362\u0386\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F4\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0483\u0487\u0490\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0559\u055A\u0561\u0587\u0591\u05A2\u05A3\u05BA\u05BB\u05BE\u05BF\u05C0\u05C1\u05C3\u05C4\u05C5\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0640\u0653\u0660\u066A\u0670\u06B8\u06BA\u06BF\u06C0\u06CF\u06D0\u06D4\u06D5\u06E9\u06EA\u06EE\u06F0\u06FA\u0901\u0904\u0905\u093A\u093C\u094E\u0951\u0955\u0958\u0964\u0966\u0970\u0981\u0984\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09BC\u09BD\u09BE\u09C5\u09C7\u09C9\u09CB\u09CE\u09D7\u09D8\u09DC"
                +"\u09DE\u09DF\u09E4\u09E6\u09F2\u0A02\u0A03\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35\u0A37\u0A38\u0A3A\u0A3C\u0A3D\u0A3E\u0A43\u0A47\u0A49\u0A4B\u0A4E\u0A59\u0A5D\u0A5E\u0A5F\u0A66\u0A75\u0A81\u0A84\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABC\u0AC6\u0AC7\u0ACA\u0ACB\u0ACE\u0AE0\u0AE1\u0AE6\u0AF0\u0B01\u0B04\u0B05\u0B0D\u0B0F\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3C\u0B44\u0B47\u0B49\u0B4B\u0B4E\u0B56\u0B58\u0B5C\u0B5E\u0B5F\u0B62\u0B66\u0B70\u0B82\u0B84\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0BBE\u0BC3\u0BC6\u0BC9\u0BCA\u0BCE\u0BD7\u0BD8\u0BE7\u0BF0\u0C01\u0C04\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C3E\u0C45\u0C46\u0C49\u0C4A\u0C4E\u0C55\u0C57\u0C60\u0C62\u0C66\u0C70\u0C82\u0C84\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CBE\u0CC5\u0CC6\u0CC9\u0CCA\u0CCE\u0CD5\u0CD7\u0CDE\u0CDF\u0CE0\u0CE2"
                +"\u0CE6\u0CF0\u0D02\u0D04\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D3E\u0D44\u0D46\u0D49\u0D4A\u0D4E\u0D57\u0D58\u0D60\u0D62\u0D66\u0D70\u0E01\u0E2F\u0E30\u0E3B\u0E40\u0E4F\u0E50\u0E5A\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EAF\u0EB0\u0EBA\u0EBB\u0EBE\u0EC0\u0EC5\u0EC6\u0EC7\u0EC8\u0ECE\u0ED0\u0EDA\u0F18\u0F1A\u0F20\u0F2A\u0F35\u0F36\u0F37\u0F38\u0F39\u0F3A\u0F3E\u0F48\u0F49\u0F6A\u0F71\u0F85\u0F86\u0F8C\u0F90\u0F96\u0F97\u0F98\u0F99\u0FAE\u0FB1\u0FB8\u0FB9\u0FBA\u10A0\u10C6\u10D0\u10F7\u1100\u1101\u1102\u1104\u1105\u1108\u1109\u110A\u110B\u110D\u110E\u1113\u113C\u113D\u113E\u113F\u1140\u1141\u114C\u114D\u114E\u114F\u1150\u1151\u1154\u1156\u1159\u115A\u115F\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116A\u116D\u116F\u1172\u1174\u1175\u1176\u119E\u119F\u11A8\u11A9\u11AB\u11AC\u11AE\u11B0\u11B7\u11B9\u11BA\u11BB\u11BC\u11C3\u11EB\u11EC\u11F0\u11F1\u11F9\u11FA\u1E00\u1E9C\u1EA0\u1EFA\u1F00"
                +"\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FBD\u1FBE\u1FBF\u1FC2\u1FC5\u1FC6\u1FCD\u1FD0\u1FD4\u1FD6\u1FDC\u1FE0\u1FED\u1FF2\u1FF5\u1FF6\u1FFD\u20D0\u20DD\u20E1\u20E2\u2126\u2127\u212A\u212C\u212E\u212F\u2180\u2183\u3005\u3006\u3007\u3008\u3021\u3030\u3031\u3036\u3041\u3095\u3099\u309B\u309D\u309F\u30A1\u30FB\u30FC\u30FF\u3105\u312D\u4E00\u9FA6\uAC00\uD7A4"],
            ["_xmlD",                                 "\u0030\u003A\u0660\u066A\u06F0\u06FA\u0966\u0970\u09E6\u09F0\u0A66\u0A70\u0AE6\u0AF0\u0B66\u0B70\u0BE7\u0BF0\u0C66\u0C70\u0CE6\u0CF0\u0D66\u0D70\u0E50\u0E5A\u0ED0\u0EDA\u0F20\u0F2A\u1040\u104A\u1369\u1372\u17E0\u17EA\u1810\u181A\uFF10\uFF1A"],
            ["_xmlI", /* Start Name Char       */     "\u003A\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00C0\u00D7\u00D8\u00F7\u00F8\u0132\u0134\u013F\u0141\u0149\u014A\u017F\u0180\u01C4\u01CD\u01F1\u01F4\u01F6\u01FA\u0218\u0250\u02A9\u02BB\u02C2\u0386\u0387\u0388\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F4\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0490\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0559\u055A\u0561\u0587\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0641\u064B\u0671\u06B8\u06BA\u06BF\u06C0\u06CF\u06D0\u06D4\u06D5\u06D6\u06E5\u06E7\u0905\u093A\u093D\u093E\u0958\u0962\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09DC\u09DE\u09DF\u09E2\u09F0\u09F2\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35\u0A37\u0A38\u0A3A\u0A59\u0A5D\u0A5E\u0A5F\u0A72\u0A75\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABD\u0ABE\u0AE0\u0AE1\u0B05\u0B0D\u0B0F"
                +"\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3D\u0B3E\u0B5C\u0B5E\u0B5F\u0B62\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C60\u0C62\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CDE\u0CDF\u0CE0\u0CE2\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D60\u0D62\u0E01\u0E2F\u0E30\u0E31\u0E32\u0E34\u0E40\u0E46\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EAF\u0EB0\u0EB1\u0EB2\u0EB4\u0EBD\u0EBE\u0EC0\u0EC5\u0F40\u0F48\u0F49\u0F6A\u10A0\u10C6\u10D0\u10F7\u1100\u1101\u1102\u1104\u1105\u1108\u1109\u110A\u110B\u110D\u110E\u1113\u113C\u113D\u113E\u113F\u1140\u1141\u114C\u114D\u114E\u114F\u1150\u1151\u1154\u1156\u1159\u115A\u115F\u1162\u1163\u1164\u1165\u1166\u1167\u1168\u1169\u116A\u116D\u116F\u1172\u1174\u1175\u1176\u119E\u119F\u11A8\u11A9\u11AB\u11AC"
                +"\u11AE\u11B0\u11B7\u11B9\u11BA\u11BB\u11BC\u11C3\u11EB\u11EC\u11F0\u11F1\u11F9\u11FA\u1E00\u1E9C\u1EA0\u1EFA\u1F00\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FBD\u1FBE\u1FBF\u1FC2\u1FC5\u1FC6\u1FCD\u1FD0\u1FD4\u1FD6\u1FDC\u1FE0\u1FED\u1FF2\u1FF5\u1FF6\u1FFD\u2126\u2127\u212A\u212C\u212E\u212F\u2180\u2183\u3007\u3008\u3021\u302A\u3041\u3095\u30A1\u30FB\u3105\u312D\u4E00\u9FA6\uAC00\uD7A4"],
            ["_xmlW",                                 "\u0024\u0025\u002B\u002C\u0030\u003A\u003C\u003F\u0041\u005B\u005E\u005F\u0060\u007B\u007C\u007D\u007E\u007F\u00A2\u00AB\u00AC\u00AD\u00AE\u00B7\u00B8\u00BB\u00BC\u00BF\u00C0\u0221\u0222\u0234\u0250\u02AE\u02B0\u02EF\u0300\u0350\u0360\u0370\u0374\u0376\u037A\u037B\u0384\u0387\u0388\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03F7\u0400\u0487\u0488\u04CF\u04D0\u04F6\u04F8\u04FA\u0500\u0510\u0531\u0557\u0559\u055A\u0561\u0588\u0591\u05A2\u05A3\u05BA\u05BB\u05BE\u05BF\u05C0\u05C1\u05C3\u05C4\u05C5\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0640\u0656\u0660\u066A\u066E\u06D4\u06D5\u06DD\u06DE\u06EE\u06F0\u06FF\u0710\u072D\u0730\u074B\u0780\u07B2\u0901\u0904\u0905\u093A\u093C\u094E\u0950\u0955\u0958\u0964\u0966\u0970\u0981\u0984\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09BC\u09BD\u09BE\u09C5\u09C7\u09C9\u09CB\u09CE\u09D7\u09D8\u09DC\u09DE\u09DF\u09E4\u09E6\u09FB\u0A02\u0A03\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35"
                +"\u0A37\u0A38\u0A3A\u0A3C\u0A3D\u0A3E\u0A43\u0A47\u0A49\u0A4B\u0A4E\u0A59\u0A5D\u0A5E\u0A5F\u0A66\u0A75\u0A81\u0A84\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0ABC\u0AC6\u0AC7\u0ACA\u0ACB\u0ACE\u0AD0\u0AD1\u0AE0\u0AE1\u0AE6\u0AF0\u0B01\u0B04\u0B05\u0B0D\u0B0F\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B3C\u0B44\u0B47\u0B49\u0B4B\u0B4E\u0B56\u0B58\u0B5C\u0B5E\u0B5F\u0B62\u0B66\u0B71\u0B82\u0B84\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0BBE\u0BC3\u0BC6\u0BC9\u0BCA\u0BCE\u0BD7\u0BD8\u0BE7\u0BF3\u0C01\u0C04\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C3E\u0C45\u0C46\u0C49\u0C4A\u0C4E\u0C55\u0C57\u0C60\u0C62\u0C66\u0C70\u0C82\u0C84\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CBE\u0CC5\u0CC6\u0CC9\u0CCA\u0CCE\u0CD5\u0CD7\u0CDE\u0CDF\u0CE0\u0CE2\u0CE6\u0CF0\u0D02\u0D04\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D3E\u0D44\u0D46\u0D49"
                +"\u0D4A\u0D4E\u0D57\u0D58\u0D60\u0D62\u0D66\u0D70\u0D82\u0D84\u0D85\u0D97\u0D9A\u0DB2\u0DB3\u0DBC\u0DBD\u0DBE\u0DC0\u0DC7\u0DCA\u0DCB\u0DCF\u0DD5\u0DD6\u0DD7\u0DD8\u0DE0\u0DF2\u0DF4\u0E01\u0E3B\u0E3F\u0E4F\u0E50\u0E5A\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EBA\u0EBB\u0EBE\u0EC0\u0EC5\u0EC6\u0EC7\u0EC8\u0ECE\u0ED0\u0EDA\u0EDC\u0EDE\u0F00\u0F04\u0F13\u0F3A\u0F3E\u0F48\u0F49\u0F6B\u0F71\u0F85\u0F86\u0F8C\u0F90\u0F98\u0F99\u0FBD\u0FBE\u0FCD\u0FCF\u0FD0\u1000\u1022\u1023\u1028\u1029\u102B\u102C\u1033\u1036\u103A\u1040\u104A\u1050\u105A\u10A0\u10C6\u10D0\u10F9\u1100\u115A\u115F\u11A3\u11A8\u11FA\u1200\u1207\u1208\u1247\u1248\u1249\u124A\u124E\u1250\u1257\u1258\u1259\u125A\u125E\u1260\u1287\u1288\u1289\u128A\u128E\u1290\u12AF\u12B0\u12B1\u12B2\u12B6\u12B8\u12BF\u12C0\u12C1\u12C2\u12C6\u12C8\u12CF\u12D0\u12D7\u12D8\u12EF\u12F0\u130F\u1310\u1311\u1312\u1316\u1318\u131F\u1320\u1347\u1348\u135B\u1369\u137D\u13A0"
                +"\u13F5\u1401\u166D\u166F\u1677\u1681\u169B\u16A0\u16EB\u16EE\u16F1\u1700\u170D\u170E\u1715\u1720\u1735\u1740\u1754\u1760\u176D\u176E\u1771\u1772\u1774\u1780\u17D4\u17D7\u17D8\u17DB\u17DD\u17E0\u17EA\u180B\u180E\u1810\u181A\u1820\u1878\u1880\u18AA\u1E00\u1E9C\u1EA0\u1EFA\u1F00\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FC5\u1FC6\u1FD4\u1FD6\u1FDC\u1FDD\u1FF0\u1FF2\u1FF5\u1FF6\u1FFF\u2044\u2045\u2052\u2053\u2070\u2072\u2074\u207D\u207F\u208D\u20A0\u20B2\u20D0\u20EB\u2100\u213B\u213D\u214C\u2153\u2184\u2190\u2329\u232B\u23B4\u23B7\u23CF\u2400\u2427\u2440\u244B\u2460\u24FF\u2500\u2614\u2616\u2618\u2619\u267E\u2680\u268A\u2701\u2705\u2706\u270A\u270C\u2728\u2729\u274C\u274D\u274E\u274F\u2753\u2756\u2757\u2758\u275F\u2761\u2768\u2776\u2795\u2798\u27B0\u27B1\u27BF\u27D0\u27E6\u27F0\u2983\u2999\u29D8\u29DC\u29FC\u29FE\u2B00\u2E80\u2E9A\u2E9B\u2EF4\u2F00\u2FD6\u2FF0\u2FFC\u3004\u3008\u3012\u3014\u3020\u3030\u3031\u303D\u303E\u3040"
                +"\u3041\u3097\u3099\u30A0\u30A1\u30FB\u30FC\u3100\u3105\u312D\u3131\u318F\u3190\u31B8\u31F0\u321D\u3220\u3244\u3251\u327C\u327F\u32CC\u32D0\u32FF\u3300\u3377\u337B\u33DE\u33E0\u33FF\u3400\u4DB6\u4E00\u9FA6\uA000\uA48D\uA490\uA4C7\uAC00\uD7A4\uF900\uFA2E\uFA30\uFA6B\uFB00\uFB07\uFB13\uFB18\uFB1D\uFB37\uFB38\uFB3D\uFB3E\uFB3F\uFB40\uFB42\uFB43\uFB45\uFB46\uFBB2\uFBD3\uFD3E\uFD50\uFD90\uFD92\uFDC8\uFDF0\uFDFD\uFE00\uFE10\uFE20\uFE24\uFE62\uFE63\uFE64\uFE67\uFE69\uFE6A\uFE70\uFE75\uFE76\uFEFD\uFF04\uFF05\uFF0B\uFF0C\uFF10\uFF1A\uFF1C\uFF1F\uFF21\uFF3B\uFF3E\uFF3F\uFF40\uFF5B\uFF5C\uFF5D\uFF5E\uFF5F\uFF66\uFFBF\uFFC2\uFFC8\uFFCA\uFFD0\uFFD2\uFFD8\uFFDA\uFFDD\uFFE0\uFFE7\uFFE8\uFFEF\uFFFC\uFFFE"],
        ];
 
        private static readonly char[] s_whitespaceChars =
            ['\u0009', '\u000A', '\u000B', '\u000C', '\u000D',
             '\u0020', '\u0085', '\u00A0', '\u1680', '\u2000',
             '\u2001', '\u2002', '\u2003', '\u2004', '\u2005',
             '\u2006', '\u2007', '\u2008', '\u2009', '\u200A',
             '\u2028', '\u2029', '\u202F', '\u205F', '\u3000'];
 
        private List<(char First, char Last)>? _rangelist;
        private StringBuilder? _categories;
        private RegexCharClass? _subtractor;
        private bool _negate;
        private RegexCaseBehavior _caseBehavior;
 
#if DEBUG
        static RegexCharClass()
        {
            // Make sure the initial capacity for s_definedCategories is correct
            Debug.Assert(
                s_definedCategories.Count == DefinedCategoriesCapacity,
                $"Expected (s_definedCategories.Count): {s_definedCategories.Count}, Actual (DefinedCategoriesCapacity): {DefinedCategoriesCapacity}");
 
            // Make sure the s_propTable is correctly ordered
            int len = s_propTable.Length;
            for (int i = 0; i < len - 1; i++)
                Debug.Assert(string.Compare(s_propTable[i][0], s_propTable[i + 1][0], StringComparison.Ordinal) < 0, $"RegexCharClass s_propTable is out of order at ({s_propTable[i][0]}, {s_propTable[i + 1][0]})");
 
            // Make sure character information is in sync with Unicode data.
            var whitespaceSet = new HashSet<char>(s_whitespaceChars);
            for (int i = 0; i <= char.MaxValue; i++)
            {
                Debug.Assert(whitespaceSet.Contains((char)i) == char.IsWhiteSpace((char)i));
            }
        }
#endif
 
        /// <summary>
        /// Creates an empty character class.
        /// </summary>
        public RegexCharClass()
        {
        }
 
        private RegexCharClass(bool negate, List<(char First, char Last)>? ranges, StringBuilder? categories, RegexCharClass? subtraction)
        {
            _rangelist = ranges;
            _categories = categories;
            _negate = negate;
            _subtractor = subtraction;
        }
 
        public bool CanMerge => !_negate && _subtractor == null;
 
        public bool Negate
        {
            set { _negate = value; }
        }
 
        public void AddChar(char c) => AddRange(c, c);
 
        /// <summary>
        /// Adds a regex char class
        /// </summary>
        public void AddCharClass(RegexCharClass cc)
        {
            Debug.Assert(cc.CanMerge && CanMerge, "Both character classes added together must be able to merge");
 
            int ccRangeCount = cc._rangelist?.Count ?? 0;
 
            if (ccRangeCount != 0)
            {
                EnsureRangeList().AddRange(cc._rangelist!);
            }
 
            if (cc._categories != null)
            {
                EnsureCategories().Append(cc._categories);
            }
        }
 
        /// <summary>Adds a regex char class if the classes are mergeable.</summary>
        public bool TryAddCharClass(RegexCharClass cc)
        {
            if (cc.CanMerge && CanMerge)
            {
                AddCharClass(cc);
                return true;
            }
 
            return false;
        }
 
        private StringBuilder EnsureCategories() =>
            _categories ??= new StringBuilder();
 
        private List<(char First, char Last)> EnsureRangeList() =>
            _rangelist ??= new List<(char First, char Last)>(6);
 
        /// <summary>
        /// Adds ranges (specified by their range string representation) to the class.
        /// </summary>
        private void AddRanges(ReadOnlySpan<char> set)
        {
            Debug.Assert(!set.IsEmpty);
 
            List<(char First, char Last)> rangeList = EnsureRangeList();
 
            int i;
            for (i = 0; i < set.Length - 1; i += 2)
            {
                rangeList.Add((set[i], (char)(set[i + 1] - 1)));
            }
 
            if (i < set.Length)
            {
                rangeList.Add((set[i], LastChar));
            }
        }
 
        public void AddSubtraction(RegexCharClass sub)
        {
            Debug.Assert(_subtractor == null, "Can't add two subtractions to a char class. ");
            _subtractor = sub;
        }
 
        /// <summary>
        /// Adds a single range of characters to the class.
        /// </summary>
        public void AddRange(char first, char last) =>
            EnsureRangeList().Add((first, last));
 
        public void AddCategoryFromName(string categoryName, bool invert, bool caseInsensitive, string pattern, int currentPos)
        {
            if (s_definedCategories.TryGetValue(categoryName, out string? category) &&
                !categoryName.Equals(InternalRegexIgnoreCase))
            {
                if (caseInsensitive && (categoryName.Equals("Ll") || categoryName.Equals("Lu") || categoryName.Equals("Lt")))
                {
                    // when RegexOptions.IgnoreCase is specified then {Ll}, {Lu}, and {Lt} cases should all match
                    category = s_definedCategories[InternalRegexIgnoreCase];
                }
 
                StringBuilder categories = EnsureCategories();
                if (invert)
                {
                    // Negate category
                    for (int i = 0; i < category.Length; i++)
                    {
                        short ch = (short)category[i];
                        categories.Append((char)-ch);
                    }
                }
                else
                {
                    categories.Append(category);
                }
            }
            else
            {
                AddRanges(RangesFromProperty(categoryName, invert, pattern, currentPos));
            }
        }
 
        private void AddCategory(string category) => EnsureCategories().Append(category);
 
        /// <summary>
        /// Adds to the class any case-equivalence versions of characters already
        /// in the class. Used for case-insensitivity.
        /// </summary>
        public void AddCaseEquivalences(CultureInfo culture)
        {
            List<(char First, char Last)>? rangeList = _rangelist;
            if (rangeList != null)
            {
                int count = rangeList.Count;
                for (int i = 0; i < count; i++)
                {
                    (char First, char Last) range = rangeList[i];
                    if (range.First == range.Last)
                    {
                        if (RegexCaseEquivalences.TryFindCaseEquivalencesForCharWithIBehavior(range.First, culture, ref _caseBehavior, out ReadOnlySpan<char> equivalences))
                        {
                            foreach (char equivalence in equivalences)
                            {
                                AddChar(equivalence);
                            }
                        }
                    }
                    else
                    {
                        AddCaseEquivalenceRange(range.First, range.Last, culture);
                    }
                }
            }
        }
 
        /// <summary>
        /// For a single range that's in the set, adds any additional ranges
        /// necessary to ensure that lowercase equivalents are also included.
        /// </summary>
        private void AddCaseEquivalenceRange(char chMin, char chMax, CultureInfo culture)
        {
            for (int i = chMin; i <= chMax; i++)
            {
                if (RegexCaseEquivalences.TryFindCaseEquivalencesForCharWithIBehavior((char)i, culture, ref _caseBehavior, out ReadOnlySpan<char> equivalences))
                {
                    foreach (char equivalence in equivalences)
                    {
                        AddChar(equivalence);
                    }
                }
            }
        }
 
        public void AddWord(bool ecma, bool negate)
        {
            if (ecma)
            {
                AddRanges((negate ? NotECMAWordRanges : ECMAWordRanges).AsSpan());
            }
            else
            {
                AddCategory(negate ? NotWordCategories : WordCategories);
            }
        }
 
        public void AddSpace(bool ecma, bool negate)
        {
            if (ecma)
            {
                AddRanges((negate ? NotECMASpaceRanges : ECMASpaceRanges).AsSpan());
            }
            else
            {
                AddCategory(negate ? NotSpaceCategories : SpaceCategories);
            }
        }
 
        public void AddDigit(bool ecma, bool negate, string pattern, int currentPos)
        {
            if (ecma)
            {
                AddRanges((negate ? NotECMADigitRanges : ECMADigitRanges).AsSpan());
            }
            else
            {
                AddCategoryFromName("Nd", negate, caseInsensitive: false, pattern, currentPos);
            }
        }
 
        public static string ConvertOldStringsToClass(string set, string category)
        {
            bool startsWithNulls = set.Length >= 2 && set[0] == '\0' && set[1] == '\0';
            int strLength = SetStartIndex + set.Length + category.Length;
            if (startsWithNulls)
            {
                strLength -= 2;
            }
 
            return
#if NET
                string
#else
                StringExtensions
#endif
                .Create(strLength, (set, category, startsWithNulls), static (span, state) =>
            {
                int index;
 
                if (state.startsWithNulls)
                {
                    span[FlagsIndex] = (char)0x1;
                    span[SetLengthIndex] = (char)(state.set.Length - 2);
                    span[CategoryLengthIndex] = (char)state.category.Length;
                    state.set.AsSpan(2).CopyTo(span.Slice(SetStartIndex));
                    index = SetStartIndex + state.set.Length - 2;
                }
                else
                {
                    span[FlagsIndex] = '\0';
                    span[SetLengthIndex] = (char)state.set.Length;
                    span[CategoryLengthIndex] = (char)state.category.Length;
                    state.set.AsSpan().CopyTo(span.Slice(SetStartIndex));
                    index = SetStartIndex + state.set.Length;
                }
 
                state.category.AsSpan().CopyTo(span.Slice(index));
            });
        }
 
        /// <summary>
        /// Returns the char
        /// </summary>
        public static char SingletonChar(string set)
        {
            Debug.Assert(IsSingleton(set) || IsSingletonInverse(set), "Tried to get the singleton char out of a non singleton character class");
            return set[SetStartIndex];
        }
 
        public static bool IsMergeable(string charClass) =>
            charClass != null &&
            !IsNegated(charClass) &&
            !IsSubtraction(charClass);
 
        public static bool IsEmpty(string charClass) =>
            charClass[CategoryLengthIndex] == 0 &&
            charClass[SetLengthIndex] == 0 &&
            !IsNegated(charClass) &&
            !IsSubtraction(charClass);
 
        /// <summary><c>true</c> if the set contains a single character only</summary>
        /// <remarks>
        /// This will happen not only from character classes manually written to contain a single character,
        /// but much more frequently by the implementation/parser itself, e.g. when looking for \n as part of
        /// finding the end of a line, when processing an alternation like "hello|hithere" where the first
        /// character of both options is the same, etc.
        /// </remarks>
        public static bool IsSingleton(string set) =>
            set[CategoryLengthIndex] == 0 &&
            set[SetLengthIndex] == 2 &&
            !IsNegated(set) &&
            !IsSubtraction(set) &&
            (set[SetStartIndex] == LastChar || set[SetStartIndex] + 1 == set[SetStartIndex + 1]);
 
        public static bool IsSingletonInverse(string set) =>
            set[CategoryLengthIndex] == 0 &&
            set[SetLengthIndex] == 2 &&
            IsNegated(set) &&
            !IsSubtraction(set) &&
            (set[SetStartIndex] == LastChar || set[SetStartIndex] + 1 == set[SetStartIndex + 1]);
 
        /// <summary>
        /// Gets the categories from a set if the set is only categories (no ranges, no subtraction),
        /// they all share the same negation status (not doing so is rare), and they all fit in the destination span.
        /// </summary>
        /// <param name="set">The character class to examine.</param>
        /// <param name="categories">The destination span into which the categories should be written.</param>
        /// <param name="numCategories">The number of categories written to <paramref name="categories"/>.</param>
        /// <param name="negated">false if the categories written to <paramref name="categories"/> represent inclusions; true if they represent exclusions.</param>
        /// <returns>true if the categories could be retrieved; otherwise, false.</returns>
        public static bool TryGetOnlyCategories(string set, Span<UnicodeCategory> categories, out int numCategories, out bool negated)
        {
            negated = false;
            numCategories = 0;
            bool sawFirstCategory = false;
 
            // Require that the character class has no ranges, has no subtraction, and has categories.
            int categoryLength = set[CategoryLengthIndex];
            if (categoryLength == 0 || set[SetLengthIndex] != 0 || IsSubtraction(set))
            {
                return false;
            }
 
            // Loop through all categories, storing them into the categories span.
            int categoryEnd = SetStartIndex + set[CategoryLengthIndex];
            for (int pos = SetStartIndex; pos < categoryEnd; pos++)
            {
                // Get the next category value.
                short c = (short)set[pos];
                if (c > 0)
                {
                    // It's a positive (inclusive) value.  Make sure all previous categories seen are also positive.
                    // Also make sure it's not the fake space category, which consumers don't handle as it's
                    // not a real UnicodeCategory.
                    if ((sawFirstCategory && negated) ||
                        c == SpaceConst ||
                        numCategories == categories.Length)
                    {
                        return false;
                    }
 
                    sawFirstCategory = true;
                    categories[numCategories++] = (UnicodeCategory)(c - 1);
                }
                else if (c < 0)
                {
                    // It's a negative (exclusive) value.  Make sure all previous categories seen are also negative.
                    // Also make sure it's not the fake non-space category, which consumers don't handle as it's
                    // not a real UnicodeCategory.
                    if ((sawFirstCategory && !negated) ||
                        c == NotSpaceConst ||
                        numCategories == categories.Length)
                    {
                        return false;
                    }
 
                    sawFirstCategory = true;
                    negated = true;
                    categories[numCategories++] = (UnicodeCategory)(-1 - c);
                }
                else // c == 0
                {
                    // It's the start of a group. Every value in the group needs to have the same orientation.
                    // We stop when we hit the next 0.
                    c = (short)set[++pos];
                    Debug.Assert(c != 0);
                    if (c > 0)
                    {
                        if (sawFirstCategory && negated)
                        {
                            return false;
                        }
                        sawFirstCategory = true;
 
                        do
                        {
                            if (numCategories == categories.Length)
                            {
                                return false;
                            }
 
                            categories[numCategories++] = (UnicodeCategory)(c - 1);
                            c = (short)set[++pos];
                        }
                        while (c != 0);
                    }
                    else
                    {
                        if (sawFirstCategory && !negated)
                        {
                            return false;
                        }
                        negated = true;
                        sawFirstCategory = true;
 
                        do
                        {
                            if (numCategories == categories.Length)
                            {
                                return false;
                            }
 
                            categories[numCategories++] = (UnicodeCategory)(-1 - c);
                            c = (short)set[++pos];
                        }
                        while (c != 0);
                    }
                }
            }
 
            // Factor in whether the entire character class is itself negated.
            negated ^= IsNegated(set);
            return true;
        }
 
        /// <summary>Attempts to get a single range stored in the set.</summary>
        /// <param name="set">The set.</param>
        /// <param name="lowInclusive">The inclusive lower-bound of the range, if available.</param>
        /// <param name="highInclusive">The inclusive upper-bound of the range, if available.</param>
        /// <returns>true if the set contained a single range; otherwise, false.</returns>
        /// <remarks>
        /// <paramref name="lowInclusive"/> and <paramref name="highInclusive"/> will be equal if the
        /// range is a singleton or singleton inverse. The range will need to be negated by the caller
        /// if <see cref="IsNegated(string)"/> is true.
        /// </remarks>
        public static bool TryGetSingleRange(string set, out char lowInclusive, out char highInclusive)
        {
            if (set[CategoryLengthIndex] == 0 && // must not have any categories
                set.Length == SetStartIndex + set[SetLengthIndex]) // and no subtraction
            {
                switch ((int)set[SetLengthIndex])
                {
                    case 1:
                        lowInclusive = set[SetStartIndex];
                        highInclusive = LastChar;
                        return true;
 
                    case 2:
                        lowInclusive = set[SetStartIndex];
                        highInclusive = (char)(set[SetStartIndex + 1] - 1);
                        return true;
                }
            }
 
            lowInclusive = highInclusive = '\0';
            return false;
        }
 
        /// <summary>Attempts to get two ranges stored in the set.  The set may be negated.</summary>
        /// <param name="set">The set.</param>
        /// <param name="range0">The first result range.</param>
        /// <param name="range1">The second result range.</param>
        /// <returns>true if the set contained exactly two ranges; otherwise, false.</returns>
        public static bool TryGetDoubleRange(
            string set,
            out (char LowInclusive, char HighInclusive) range0,
            out (char LowInclusive, char HighInclusive) range1)
        {
            if (set[CategoryLengthIndex] == 0 && // must not have any categories
                set.Length == SetStartIndex + set[SetLengthIndex]) // and no subtraction
            {
                int setLength = set[SetLengthIndex];
                if (setLength is 3 or 4)
                {
                    range0 = (set[SetStartIndex], (char)(set[SetStartIndex + 1] - 1));
                    range1 = (set[SetStartIndex + 2], setLength == 3 ? LastChar : (char)(set[SetStartIndex + 3] - 1));
                    return true;
                }
            }
 
            range0 = range1 = ('\0', '\0');
            return false;
        }
 
        /// <summary>Gets all of the characters in the specified set, storing them into the provided span.</summary>
        /// <param name="set">The character class.</param>
        /// <param name="chars">The span into which the chars should be stored.</param>
        /// <returns>
        /// The number of stored chars.  If they won't all fit, 0 is returned.
        /// If 0 is returned, no assumptions can be made about the characters.
        /// </returns>
        /// <remarks>
        /// Only considers character classes that only contain sets (no categories),
        /// just simple sets containing starting/ending pairs (subtraction from those pairs
        /// is factored in, however).The returned characters may be negated: if IsNegated(set)
        /// is false, then the returned characters are the only ones that match; if it returns
        /// true, then the returned characters are the only ones that don't match.
        /// </remarks>
        public static int GetSetChars(string set, Span<char> chars)
        {
            // We get the characters by enumerating the set portion, so we validate that it's
            // set up to enable that, e.g. no categories.
            if (!CanEasilyEnumerateSetContents(set, out bool hasSubtraction))
            {
                return 0;
            }
 
            // Negation with subtraction is too cumbersome to reason about efficiently.
            if (hasSubtraction && IsNegated(set))
            {
                return 0;
            }
 
            // Iterate through the pairs of ranges, storing each value in each range
            // into the supplied span.  If they all won't fit, we give up and return 0.
            // Otherwise we return the number found.  Note that we don't bother to handle
            // the corner case where the last range's upper bound is LastChar (\uFFFF),
            // based on it a) complicating things, and b) it being really unlikely to
            // be part of a small set.
            int setLength = set[SetLengthIndex];
            int count = 0, evaluated = 0;
            for (int i = SetStartIndex; i < SetStartIndex + setLength; i += 2)
            {
                int curSetEnd = set[i + 1];
                for (int c = set[i]; c < curSetEnd; c++)
                {
                    // Keep track of how many characters we've checked. This could work
                    // just comparing count rather than evaluated, but we also want to
                    // limit how much work is done here, which we can do by constraining
                    // the number of checks to the size of the storage provided.
                    if (++evaluated > chars.Length)
                    {
                        return 0;
                    }
 
                    // If the set is all ranges but has a subtracted class,
                    // validate the char is actually in the set prior to storing it:
                    // it might be in the subtracted range.
                    if (hasSubtraction && !CharInClass((char)c, set))
                    {
                        continue;
                    }
 
                    Debug.Assert(count <= evaluated);
                    chars[count++] = (char)c;
                }
            }
 
            return count;
        }
 
        /// <summary>
        /// Determines whether two sets may overlap.
        /// </summary>
        /// <returns>false if the two sets do not overlap; true if they may.</returns>
        /// <remarks>
        /// If the method returns false, the caller can be sure the sets do not overlap.
        /// If the method returns true, it's still possible the sets don't overlap.
        /// </remarks>
        public static bool MayOverlap(string set1, string set2)
        {
            // If the sets are identical, there's obviously overlap.
            if (set1 == set2)
            {
                return true;
            }
 
            // If either set is all-inclusive, there's overlap by definition (unless
            // the other set is empty, but that's so rare it's not worth checking.)
            if (set1 == AnyClass || set2 == AnyClass)
            {
                return true;
            }
 
            // If one set is negated and the other one isn't, we're in one of two situations:
            // - The remainder of the sets are identical, in which case these are inverses of
            //   each other, and they don't overlap.
            // - The remainder of the sets aren't identical, in which case there's very likely
            //   overlap, and it's not worth spending more time investigating.
            bool set1Negated = IsNegated(set1);
            bool set2Negated = IsNegated(set2);
            if (set1Negated != set2Negated)
            {
                return !set1.AsSpan(1).SequenceEqual(set2.AsSpan(1));
            }
 
            // If the sets are negated, since they're not equal, there's almost certainly overlap.
            Debug.Assert(set1Negated == set2Negated);
            if (set1Negated)
            {
                return true;
            }
 
            // Special-case some known, common classes that don't overlap.
            if (KnownDistinctSets(set1, set2) ||
                KnownDistinctSets(set2, set1))
            {
                return false;
            }
 
            // If set2 can be easily enumerated (e.g. no unicode categories), then enumerate it and
            // check if any of its members are in set1.  Otherwise, the same for set1.
            if (CanEasilyEnumerateSetContents(set2))
            {
                return MayOverlapByEnumeration(set1, set2);
            }
            else if (CanEasilyEnumerateSetContents(set1))
            {
                return MayOverlapByEnumeration(set2, set1);
            }
 
            // Assume that everything else might overlap.  In the future if it proved impactful, we could be more accurate here,
            // at the expense of more computation time.
            return true;
 
            static bool KnownDistinctSets(string set1, string set2) =>
                (set1 == SpaceClass || set1 == ECMASpaceClass) &&
                (set2 == DigitClass || set2 == WordClass || set2 == ECMADigitClass || set2 == ECMAWordClass);
 
            static bool MayOverlapByEnumeration(string set1, string set2)
            {
                Debug.Assert(!IsNegated(set1) && !IsNegated(set2));
                for (int i = SetStartIndex; i < SetStartIndex + set2[SetLengthIndex]; i += 2)
                {
                    int curSetEnd = set2[i + 1];
                    for (int c = set2[i]; c < curSetEnd; c++)
                    {
                        if (CharInClass((char)c, set1))
                        {
                            return true;
                        }
                    }
                }
 
                return false;
            }
        }
 
        /// <summary>
        /// Gets whether the specified set is a named set with a reasonably small count
        /// of Unicode characters.
        /// </summary>
        /// <param name="set">The set description.</param>
        /// <param name="chars">The chars that make up the known set.</param>
        /// <param name="negated">Whether the <paramref name="chars"/> need to be negated.</param>
        /// <param name="description">A description suitable for use in C# code as a variable name.</param>
        public static bool IsUnicodeCategoryOfSmallCharCount(string set, [NotNullWhen(true)] out char[]? chars, out bool negated, [NotNullWhen(true)] out string? description)
        {
            switch (set)
            {
                case SpaceClass:
                case NotSpaceClass:
                    chars = s_whitespaceChars;
                    negated = set == NotSpaceClass;
                    description = "whitespace";
                    return true;
            }
 
            chars = default;
            negated = false;
            description = null;
            return false;
        }
 
        /// <summary>Gets whether the specified character participates in case conversion.</summary>
        /// <remarks>
        /// This method is used to perform operations as if they were case-sensitive even if they're
        /// specified as being case-insensitive.  Such a reduction can be applied when the only character
        /// that would lower-case to the one being searched for / compared against is that character itself.
        /// </remarks>
        public static bool ParticipatesInCaseConversion(int comparison)
        {
            Debug.Assert((uint)comparison <= char.MaxValue);
 
            switch (char.GetUnicodeCategory((char)comparison))
            {
                case UnicodeCategory.ClosePunctuation:
                case UnicodeCategory.ConnectorPunctuation:
                case UnicodeCategory.Control:
                case UnicodeCategory.DashPunctuation:
                case UnicodeCategory.DecimalDigitNumber:
                case UnicodeCategory.FinalQuotePunctuation:
                case UnicodeCategory.InitialQuotePunctuation:
                case UnicodeCategory.LineSeparator:
                case UnicodeCategory.OpenPunctuation:
                case UnicodeCategory.OtherNumber:
                case UnicodeCategory.OtherPunctuation:
                case UnicodeCategory.ParagraphSeparator:
                case UnicodeCategory.SpaceSeparator:
                    // All chars in these categories meet the criteria that the only way
                    // `char.ToLower(toTest, AnyCulture) == charInAboveCategory` is when
                    // toTest == charInAboveCategory.
                    return false;
 
                default:
                    // We don't know (without testing the character against every other
                    // character), so assume it does.
                    return true;
            }
        }
 
        /// <summary>Gets whether the specified span participates in case conversion.</summary>
        /// <remarks>The span participates in case conversion if any of its characters do.</remarks>
        public static bool ParticipatesInCaseConversion(ReadOnlySpan<char> s)
        {
            foreach (char c in s)
            {
                if (ParticipatesInCaseConversion(c))
                {
                    return true;
                }
            }
 
            return false;
        }
 
        /// <summary>Gets whether the specified span contains only ASCII.</summary>
        public static bool IsAscii(ReadOnlySpan<char> s)
        {
#if NET
            return Ascii.IsValid(s);
#else
            foreach (char c in s)
            {
                if (c >= 128)
                {
                    return false;
                }
            }
 
            return true;
#endif
        }
 
        /// <summary>Gets whether the set description string is for two ASCII letters that case to each other under OrdinalIgnoreCase rules.</summary>
        public static bool SetContainsAsciiOrdinalIgnoreCaseCharacter(string set, Span<char> twoChars)
        {
            Debug.Assert(twoChars.Length >= 2);
            return
                !IsNegated(set) &&
                GetSetChars(set, twoChars) == 2 &&
                twoChars[0] < 128 &&
                twoChars[1] < 128 &&
                twoChars[0] != twoChars[1] &&
                char.IsLetter(twoChars[0]) &&
                char.IsLetter(twoChars[1]) &&
                (twoChars[0] | 0x20) == (twoChars[1] | 0x20);
        }
 
        /// <summary>Gets whether we can iterate through the set list pairs in order to completely enumerate the set's contents.</summary>
        /// <remarks>This may enumerate negated characters if the set is negated.  This will return false if the set has subtraction.</remarks>
        private static bool CanEasilyEnumerateSetContents(string set) =>
            CanEasilyEnumerateSetContents(set, out bool hasSubtraction) &&
            !hasSubtraction;
 
        /// <summary>Gets whether we can iterate through the set list pairs in order to completely enumerate the set's contents.</summary>
        /// <remarks>This may enumerate negated characters if the set is negated, and it may be an overestimate if the set contains subtraction.</remarks>
        private static bool CanEasilyEnumerateSetContents(string set, out bool hasSubtraction)
        {
            hasSubtraction = IsSubtraction(set);
            return
                set.Length > SetStartIndex &&
                set[SetLengthIndex] > 0 &&
                set[SetLengthIndex] % 2 == 0 &&
                set[CategoryLengthIndex] == 0;
        }
 
        /// <summary>Provides results from <see cref="Analyze"/>.</summary>
        internal struct CharClassAnalysisResults
        {
            /// <summary>true if the set contains only ranges; false if it contains Unicode categories and/or subtraction.</summary>
            public bool OnlyRanges;
            /// <summary>true if we know for sure that the set contains only ASCII values; otherwise, false.</summary>
            /// <remarks>This can only be true if <see cref="OnlyRanges"/> is true.</remarks>
            public bool ContainsOnlyAscii;
            /// <summary>true if we know for sure that the set doesn't contain any ASCII values; otherwise, false.</summary>
            /// <remarks>This can only be true if <see cref="OnlyRanges"/> is true.</remarks>
            public bool ContainsNoAscii;
            /// <summary>true if we know for sure that all ASCII values are in the set; otherwise, false.</summary>
            /// <remarks>This can only be true if <see cref="OnlyRanges"/> is true.</remarks>
            public bool AllAsciiContained;
            /// <summary>true if we know for sure that all non-ASCII values are in the set; otherwise, false.</summary>
            /// <remarks>This can only be true if <see cref="OnlyRanges"/> is true.</remarks>
            public bool AllNonAsciiContained;
            /// <summary>The inclusive lower bound.</summary>
            /// <remarks>This is only valid if <see cref="OnlyRanges"/> is true.</remarks>
            public int LowerBoundInclusiveIfOnlyRanges;
            /// <summary>The exclusive upper bound.</summary>
            /// <remarks>This is only valid if <see cref="OnlyRanges"/> is true.</remarks>
            public int UpperBoundExclusiveIfOnlyRanges;
        }
 
        /// <summary>Analyzes the set to determine some basic properties that can be used to optimize usage.</summary>
        internal static CharClassAnalysisResults Analyze(string set)
        {
            bool isNegated = IsNegated(set);
 
            // The analysis is performed based entirely on ranges contained within the set.
            // Thus, we require that it can be "easily enumerated", meaning it contains only
            // ranges (and more specifically those with both the lower inclusive and upper
            // exclusive bounds specified). We also permit the set to contain a subtracted
            // character class, as for non-negated sets, that can only narrow what's permitted,
            // and the analysis can be performed on the overestimate of the set prior to subtraction.
            // However, negation is performed before subtraction, which means we can't trust
            // the ranges to inform AllNonAsciiContained and AllAsciiContained, as the subtraction
            // could create holes in those.  As such, while we can permit subtraction for non-negated
            // sets, for negated sets, we need to bail.
            if (!CanEasilyEnumerateSetContents(set, out bool hasSubtraction) ||
                (isNegated && hasSubtraction))
            {
                // We can't make any strong claims about the set.
                return default;
            }
 
            char firstValueInclusive = set[SetStartIndex];
            char lastValueExclusive = set[SetStartIndex + set[SetLengthIndex] - 1];
 
            if (isNegated)
            {
                // We're negated: if the upper bound of the range is ASCII, that means everything
                // above it is actually included, meaning all non-ASCII are in the class.
                // Similarly if the lower bound is non-ASCII, that means in a negated world
                // everything ASCII is included.
                Debug.Assert(!hasSubtraction);
                return new CharClassAnalysisResults
                {
                    OnlyRanges = true,
                    AllNonAsciiContained = lastValueExclusive <= 128,
                    AllAsciiContained = firstValueInclusive >= 128,
                    ContainsNoAscii = firstValueInclusive == 0 && set[SetStartIndex + 1] >= 128,
                    ContainsOnlyAscii = false,
                    LowerBoundInclusiveIfOnlyRanges = firstValueInclusive,
                    UpperBoundExclusiveIfOnlyRanges = lastValueExclusive,
                };
            }
 
            // If the upper bound is ASCII, that means everything included in the class is ASCII.
            // Similarly if the lower bound is non-ASCII, that means no ASCII is in the class.
            return new CharClassAnalysisResults
            {
                OnlyRanges = true,
                AllNonAsciiContained = false,
                AllAsciiContained = firstValueInclusive == 0 && set[SetStartIndex + 1] >= 128 && !hasSubtraction,
                ContainsOnlyAscii = lastValueExclusive <= 128,
                ContainsNoAscii = firstValueInclusive >= 128,
                LowerBoundInclusiveIfOnlyRanges = firstValueInclusive,
                UpperBoundExclusiveIfOnlyRanges = lastValueExclusive,
            };
        }
 
        internal static bool IsSubtraction(string charClass) =>
            charClass.Length > SetStartIndex +
            charClass[CategoryLengthIndex] +
            charClass[SetLengthIndex];
 
        internal static bool IsNegated(string set) => set[FlagsIndex] == 1;
 
        internal static bool IsNegated(string set, int setOffset) => set[FlagsIndex + setOffset] == 1;
 
        public static bool IsECMAWordChar(char ch) =>
            // According to ECMA-262, \s, \S, ., ^, and $ use Unicode-based interpretations of
            // whitespace and newline, while \d, \D\, \w, \W, \b, and \B use ASCII-only
            // interpretations of digit, word character, and word boundary.  In other words,
            // no special treatment of Unicode ZERO WIDTH NON-JOINER (ZWNJ U+200C) and
            // ZERO WIDTH JOINER (ZWJ U+200D) is required for ECMA word boundaries.
            ((((uint)ch - 'A') & ~0x20) < 26) || // ASCII letter
            (((uint)ch - '0') < 10) || // digit
            ch == '_' || // underscore
            ch == '\u0130'; // latin capital letter I with dot above
 
        /// <summary>16 bytes, representing the chars 0 through 127, with a 1 for a bit where that char is a word char.</summary>
        private static ReadOnlySpan<byte> WordCharAsciiLookup =>
        [
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
            0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
        ];
 
        /// <summary>Mask of Unicode categories that combine to form [\\w]</summary>
        private const int WordCategoriesMask =
            1 << (int)UnicodeCategory.UppercaseLetter |
            1 << (int)UnicodeCategory.LowercaseLetter |
            1 << (int)UnicodeCategory.TitlecaseLetter |
            1 << (int)UnicodeCategory.ModifierLetter |
            1 << (int)UnicodeCategory.OtherLetter |
            1 << (int)UnicodeCategory.NonSpacingMark |
            1 << (int)UnicodeCategory.DecimalDigitNumber |
            1 << (int)UnicodeCategory.ConnectorPunctuation;
 
        /// <summary>Determines whether a character is considered a word character for the purposes of testing the \w set.</summary>
        public static bool IsWordChar(char ch)
        {
            // This is the same as IsBoundaryWordChar, except that IsBoundaryWordChar also
            // returns true for \u200c and \u200d.
 
            // Bitmap for whether each character 0 through 127 is in [\\w]
            ReadOnlySpan<byte> ascii = WordCharAsciiLookup;
 
            // If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.
            int chDiv8 = ch >> 3;
            return (uint)chDiv8 < (uint)ascii.Length ?
                (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :
                (WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0;
        }
 
        /// <summary>Determines whether a character is considered a word character for the purposes of testing a word character boundary.</summary>
        public static bool IsBoundaryWordChar(char ch)
        {
            // According to UTS#18 Unicode Regular Expressions (http://www.unicode.org/reports/tr18/)
            // RL 1.4 Simple Word Boundaries  The class of <word_character> includes all Alphabetic
            // values from the Unicode character database, from UnicodeData.txt [UData], plus the U+200C
            // ZERO WIDTH NON-JOINER and U+200D ZERO WIDTH JOINER.
            const char ZeroWidthNonJoiner = '\u200C', ZeroWidthJoiner = '\u200D';
 
            // Bitmap for whether each character 0 through 127 is in [\\w]
            ReadOnlySpan<byte> ascii = WordCharAsciiLookup;
 
            // If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.
            int chDiv8 = ch >> 3;
            return (uint)chDiv8 < (uint)ascii.Length ?
                (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :
                ((WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0 ||
                 (ch == ZeroWidthJoiner | ch == ZeroWidthNonJoiner));
        }
 
        /// <summary>Determines whether the 'a' and 'b' values differ by only a single bit, setting that bit in 'mask'.</summary>
        /// <remarks>This isn't specific to RegexCharClass; it's just a convenient place to host it.</remarks>
        public static bool DifferByOneBit(char a, char b, out int mask)
        {
            mask = a ^ b;
            return BitOperations.IsPow2(mask);
        }
 
        /// <summary>Determines a character's membership in a character class (via the string representation of the class).</summary>
        /// <param name="ch">The character.</param>
        /// <param name="set">The string representation of the character class.</param>
        /// <param name="asciiLazyCache">A lazily-populated cache for ASCII results stored in a 256-bit array.</param>
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        public static bool CharInClass(char ch, string set, ref uint[]? asciiLazyCache)
        {
            // The uint[] contains 8 ints, or 256 bits.  These are laid out as pairs, where the first bit in the pair
            // says whether the second bit in the pair has already been computed.  Once a value is computed, it's never
            // changed, so since Int32s are written/read atomically, we can trust the value bit if we see that the known bit
            // has been set.  If the known bit hasn't been set, then we proceed to look it up, and then swap in the result.
            const int CacheArrayLength = 8;
            Debug.Assert(asciiLazyCache is null || asciiLazyCache.Length == CacheArrayLength, "set lookup should be able to store two bits for each of the first 128 characters");
 
            // If the value is ASCII and already has an answer for this value, use it.
            if (asciiLazyCache is uint[] cache)
            {
                int index = ch >> 4;
                if ((uint)index < (uint)cache.Length)
                {
                    Debug.Assert(ch < 128);
                    uint current = cache[index];
                    uint bit = 1u << ((ch & 0xF) << 1);
                    if ((current & bit) != 0)
                    {
                        return (current & (bit << 1)) != 0;
                    }
                }
            }
 
            // For ASCII, lazily initialize. For non-ASCII, just compute the value.
            return ch < 128 ?
                InitializeValue(ch, set, ref asciiLazyCache) :
                CharInClassRecursive(ch, set, 0);
 
            static bool InitializeValue(char ch, string set, ref uint[]? asciiLazyCache)
            {
                // (After warm-up, we should find ourselves rarely getting here.)
                Debug.Assert(ch < 128);
 
                // Compute the result and determine which bits to write back to the array and "or" the bits back in a thread-safe manner.
                bool isInClass = CharInClass(ch, set);
                uint bitsToSet = 1u << ((ch & 0xF) << 1);
                if (isInClass)
                {
                    bitsToSet |= bitsToSet << 1;
                }
 
                uint[]? cache = asciiLazyCache ?? Interlocked.CompareExchange(ref asciiLazyCache, new uint[CacheArrayLength], null) ?? asciiLazyCache;
#if NET
                Interlocked
#else
                InterlockedExtensions
#endif
                    .Or(ref cache[ch >> 4], bitsToSet);
 
                // Return the computed value.
                return isInClass;
            }
        }
 
        /// <summary>
        /// Determines a character's membership in a character class (via the string representation of the class).
        /// </summary>
        public static bool CharInClass(char ch, string set) =>
            CharInClassRecursive(ch, set, 0);
 
        private static bool CharInClassRecursive(char ch, string set, int start)
        {
            int setLength = set[start + SetLengthIndex];
            int categoryLength = set[start + CategoryLengthIndex];
            int endPosition = start + SetStartIndex + setLength + categoryLength;
 
            bool inClass = CharInClassInternal(ch, set, start, setLength, categoryLength);
 
            // Note that we apply the negation *before* performing the subtraction.  This is because
            // the negation only applies to the first char class, not the entire subtraction.
            if (IsNegated(set, start))
            {
                inClass = !inClass;
            }
 
            // Subtract if necessary
            if (inClass && set.Length > endPosition)
            {
                inClass = !CharInClassRecursive(ch, set, endPosition);
            }
 
            return inClass;
        }
 
        /// <summary>
        /// Determines a character's membership in a character class (via the
        /// string representation of the class).
        /// </summary>
        private static bool CharInClassInternal(char ch, string set, int start, int setLength, int categoryLength)
        {
            int min = start + SetStartIndex;
            int max = min + setLength;
 
            while (min != max)
            {
                int mid = (min + max) >> 1;
                if (ch < set[mid])
                {
                    max = mid;
                }
                else
                {
                    min = mid + 1;
                }
            }
 
            // The starting position of the set within the character class determines
            // whether what an odd or even ending position means.  If the start is odd,
            // an *even* ending position means the character was in the set.  With recursive
            // subtractions in the mix, the starting position = start+SetStartIndex.  Since we know that
            // SetStartIndex is odd, we can simplify it out of the equation.  But if it changes we need to
            // reverse this check.
            Debug.Assert((SetStartIndex & 0x1) == 1, "If SetStartIndex is not odd, the calculation below this will be reversed");
            if ((min & 0x1) == (start & 0x1))
            {
                return true;
            }
 
            if (categoryLength == 0)
            {
                return false;
            }
 
            return CharInCategory(ch, set.AsSpan(SetStartIndex + start + setLength, categoryLength));
        }
 
        private static bool CharInCategory(char ch, ReadOnlySpan<char> categorySetSegment)
        {
            UnicodeCategory chcategory = char.GetUnicodeCategory(ch);
 
            for (int i = 0; i < categorySetSegment.Length; i++)
            {
                int curcat = (short)categorySetSegment[i];
 
                if (curcat == 0)
                {
                    // zero is our marker for a group of categories - treated as a unit
                    if (CharInCategoryGroup(chcategory, categorySetSegment, ref i))
                    {
                        return true;
                    }
                }
                else if (curcat > 0)
                {
                    // greater than zero is a positive case
 
                    if (curcat == SpaceConst)
                    {
                        if (char.IsWhiteSpace(ch))
                        {
                            return true;
                        }
                    }
                    else if (chcategory == (UnicodeCategory)(curcat - 1))
                    {
                        return true;
                    }
                }
                else
                {
                    // less than zero is a negative case
                    if (curcat == NotSpaceConst)
                    {
                        if (!char.IsWhiteSpace(ch))
                        {
                            return true;
                        }
                    }
                    else if (chcategory != (UnicodeCategory)(-1 - curcat))
                    {
                        return true;
                    }
                }
            }
 
            return false;
        }
 
        /// <summary>
        /// This is used for categories which are composed of other categories - L, N, Z, W...
        /// These groups need special treatment when they are negated
        /// </summary>
        private static bool CharInCategoryGroup(UnicodeCategory chcategory, ReadOnlySpan<char> category, ref int i)
        {
            int pos = i + 1;
            int curcat = (short)category[pos];
            bool result;
 
            if (curcat > 0)
            {
                // positive case - the character must be in ANY of the categories in the group
                result = false;
                do
                {
                    result |= chcategory == (UnicodeCategory)(curcat - 1);
                    curcat = (short)category[++pos];
                }
                while (curcat != 0);
            }
            else
            {
                // negative case - the character must be in NONE of the categories in the group
                Debug.Assert(curcat < 0);
                result = true;
                do
                {
                    result &= chcategory != (UnicodeCategory)(-1 - curcat);
                    curcat = (short)category[++pos];
                }
                while (curcat != 0);
            }
 
            i = pos;
            return result;
        }
 
        public static RegexCharClass Parse(string charClass) => ParseRecursive(charClass, 0);
 
        private static RegexCharClass ParseRecursive(string charClass, int start)
        {
            int setLength = charClass[start + SetLengthIndex];
            int categoryLength = charClass[start + CategoryLengthIndex];
            int endPosition = start + SetStartIndex + setLength + categoryLength;
 
            int i = start + SetStartIndex;
            int end = i + setLength;
 
            List<(char First, char Last)>? ranges = ComputeRanges(charClass.AsSpan(start));
 
            RegexCharClass? sub = null;
            if (charClass.Length > endPosition)
            {
                sub = ParseRecursive(charClass, endPosition);
            }
 
            StringBuilder? categoriesBuilder = null;
            if (categoryLength > 0)
            {
                categoriesBuilder = new StringBuilder().Append(charClass.AsSpan(end, categoryLength));
            }
 
            return new RegexCharClass(IsNegated(charClass, start), ranges, categoriesBuilder, sub);
        }
 
        /// <summary>Computes a list of all of the character ranges in the set string.</summary>
        public static List<(char First, char Last)>? ComputeRanges(ReadOnlySpan<char> set)
        {
            int setLength = set[SetLengthIndex];
            int i = SetStartIndex;
            int end = i + setLength;
 
            List<(char First, char Last)>? ranges = null;
            if (setLength > 0)
            {
                ranges = new List<(char First, char Last)>(setLength);
                while (i < end)
                {
                    char first = set[i];
                    i++;
 
                    char last = i < end ? (char)(set[i] - 1) : LastChar;
                    i++;
 
                    ranges.Add((first, last));
                }
            }
 
            return ranges;
        }
 
        /// <summary>Cache of character class strings for single ASCII characters.</summary>
        private static readonly string[] s_asciiStrings = new string[128];
        /// <summary>Cache of character class strings for pairs of upper/lower-case ASCII letters.</summary>
        private static readonly string[] s_asciiLetterPairStrings = new string[26];
 
        /// <summary>Creates a set string for a single character.</summary>
        /// <param name="c">The character for which to create the set.</param>
        /// <returns>The create set string.</returns>
        public static string OneToStringClass(char c)
            => CharsToStringClass([c]);
 
        internal static string CharsToStringClass(ReadOnlySpan<char> chars)
        {
#if DEBUG
            // Make sure they're all sorted with no duplicates
            for (int index = 0; index < chars.Length - 1; index++)
            {
                Debug.Assert(chars[index] < chars[index + 1]);
            }
#endif
 
            switch (chars.Length)
            {
                case 0:
                    // If there aren't any chars, just return an empty class.
                    return EmptyClass;
 
                case 1:
                    // Special-case ASCII characters to avoid the computation/allocation in this very common case.
                    if (chars[0] < 128)
                    {
                        string[] asciiStrings = s_asciiStrings;
                        if (chars[0] < asciiStrings.Length)
                        {
                            return asciiStrings[chars[0]] ??= $"\0\u0002\0{chars[0]}{(char)(chars[0] + 1)}";
                        }
                    }
                    break;
 
                case 2:
                    // Special-case cased ASCII letter pairs to avoid the computation/allocation in this very common case.
                    int masked0 = chars[0] | 0x20;
                    if ((uint)(masked0 - 'a') <= 'z' - 'a' && masked0 == (chars[1] | 0x20))
                    {
                        return s_asciiLetterPairStrings[masked0 - 'a'] ??= $"\0\u0004\0{(char)(masked0 & ~0x20)}{(char)((masked0 & ~0x20) + 1)}{(char)masked0}{(char)(masked0 + 1)}";
                    }
                    break;
            }
 
            // Count how many characters there actually are.  All but the very last possible
            // char value will have two characters, one for the inclusive beginning of range
            // and one for the exclusive end of range.
            int count = chars.Length * 2;
            if (chars[chars.Length - 1] == LastChar)
            {
                count--;
            }
 
            // Get the pointer/length of the span to be able to pass it into string.Create.
            ReadOnlySpan<char> tmpChars = chars; // avoid address exposing the span and impacting the other code in the method that uses it
#if NET
            return string.Create(SetStartIndex + count, tmpChars, static (span, chars) =>
            {
                // Fill in the set string
                span[FlagsIndex] = (char)0;
                span[SetLengthIndex] = (char)(span.Length - SetStartIndex);
                span[CategoryLengthIndex] = (char)0;
                int i = SetStartIndex;
                foreach (char c in chars)
                {
                    span[i++] = c;
                    if (c != LastChar)
                    {
                        span[i++] = (char)(c + 1);
                    }
                }
                Debug.Assert(i == span.Length);
            });
#else
            unsafe
            {
                return StringExtensions.Create(SetStartIndex + count, (IntPtr)(&tmpChars), static (span, charsPtr) =>
                {
                    // Fill in the set string
                    span[FlagsIndex] = (char)0;
                    span[SetLengthIndex] = (char)(span.Length - SetStartIndex);
                    span[CategoryLengthIndex] = (char)0;
                    int i = SetStartIndex;
                    ReadOnlySpan<char> chars = *(ReadOnlySpan<char>*)charsPtr;
                    foreach (char c in chars)
                    {
                        span[i++] = c;
                        if (c != LastChar)
                        {
                            span[i++] = (char)(c + 1);
                        }
                    }
                    Debug.Assert(i == span.Length);
                });
            }
#endif
        }
 
 
        /// <summary>
        /// Constructs the string representation of the class.
        /// </summary>
        public string ToStringClass()
        {
            var vsb = new ValueStringBuilder(stackalloc char[256]);
            ToStringClass(ref vsb);
            return vsb.ToString();
        }
 
        private void ToStringClass(ref ValueStringBuilder vsb)
        {
            Canonicalize();
 
            int initialLength = vsb.Length;
            int categoriesLength = _categories?.Length ?? 0;
            Span<char> headerSpan = vsb.AppendSpan(SetStartIndex);
            headerSpan[FlagsIndex] = (char)(_negate ? 1 : 0);
            headerSpan[SetLengthIndex] = '\0'; // (will be replaced once we know how long a range we've added)
            headerSpan[CategoryLengthIndex] = (char)categoriesLength;
 
            // Append ranges
            List<(char First, char Last)>? rangelist = _rangelist;
            if (rangelist != null)
            {
                for (int i = 0; i < rangelist.Count; i++)
                {
                    (char First, char Last) currentRange = rangelist[i];
                    vsb.Append(currentRange.First);
                    if (currentRange.Last != LastChar)
                    {
                        vsb.Append((char)(currentRange.Last + 1));
                    }
                }
            }
 
            // Update the range length.  The ValueStringBuilder may have already had some
            // contents (if this is a subtactor), so we need to offset by the initial length.
            vsb[initialLength + SetLengthIndex] = (char)(vsb.Length - initialLength - SetStartIndex);
 
            // Append categories
            if (categoriesLength != 0)
            {
                foreach (ReadOnlyMemory<char> chunk in _categories!.GetChunks())
                {
                    vsb.Append(chunk.Span);
                }
            }
 
            // Append a subtractor if there is one.
            _subtractor?.ToStringClass(ref vsb);
        }
 
        /// <summary>
        /// Logic to reduce a character class to a unique, sorted form.
        /// </summary>
        private void Canonicalize()
        {
            List<(char First, char Last)>? rangelist = _rangelist;
            if (rangelist != null)
            {
                // Find and eliminate overlapping or abutting ranges.
                if (rangelist.Count > 1)
                {
                    rangelist.Sort((x, y) => x.First.CompareTo(y.First));
 
                    bool done = false;
                    int j = 0;
 
                    for (int i = 1; ; i++)
                    {
                        char last;
                        for (last = rangelist[j].Last; ; i++)
                        {
                            if (i == rangelist.Count || last == LastChar)
                            {
                                done = true;
                                break;
                            }
 
                            (char First, char Last) currentRange;
                            if ((currentRange = rangelist[i]).First > last + 1)
                            {
                                break;
                            }
 
                            if (last < currentRange.Last)
                            {
                                last = currentRange.Last;
                            }
                        }
 
                        rangelist[j] = (rangelist[j].First, last);
 
                        j++;
 
                        if (done)
                        {
                            break;
                        }
 
                        if (j < i)
                        {
                            rangelist[j] = rangelist[i];
                        }
                    }
 
                    rangelist.RemoveRange(j, rangelist.Count - j);
                }
 
                // If the class now represents a single negated range, but does so by including every
                // other character, invert it to produce a normalized form with a single range.  This
                // is valuable for subsequent optimizations in most of the engines.
                if (!_negate &&
                    _subtractor is null &&
                    (_categories is null || _categories.Length == 0))
                {
                    if (rangelist.Count == 2)
                    {
                        // There are two ranges in the list.  See if there's one missing range between them.
                        // Such a range might be as small as a single character.
                        if (rangelist[0].First == 0 &&
                            rangelist[1].Last == LastChar &&
                            rangelist[0].Last < rangelist[1].First - 1)
                        {
                            rangelist[0] = ((char)(rangelist[0].Last + 1), (char)(rangelist[1].First - 1));
                            rangelist.RemoveAt(1);
                            _negate = true;
                        }
                    }
                    else if (rangelist.Count == 1)
                    {
                        if (rangelist[0].First == 0)
                        {
                            // There's only one range in the list.  Does it include everything but the last char?
                            if (rangelist[0].Last == LastChar - 1)
                            {
                                rangelist[0] = (LastChar, LastChar);
                                _negate = true;
                            }
                        }
                        else if (rangelist[0].First == 1)
                        {
                            // Or everything but the first char?
                            if (rangelist[0].Last == LastChar)
                            {
                                rangelist[0] = ('\0', '\0');
                                _negate = true;
                            }
                        }
                    }
                }
 
                // If the class now has a range that includes everything, and if it doesn't have subtraction,
                // we can remove all of its categories, as they're duplicative (the set already includes everything).
                if (!_negate &&
                    _subtractor is null &&
                    _categories?.Length > 0 &&
                    rangelist.Count == 1 && rangelist[0].First == 0 && rangelist[0].Last == LastChar)
                {
                    _categories.Clear();
                }
 
                // If there's only a single character omitted from ranges, if there's no subtractor, and if there are categories,
                // see if that character is in the categories.  If it is, then we can replace whole thing with a complete "any" range.
                // If it's not, then we can remove the categories, as they're only duplicating the rest of the range, turning the set
                // into a "not one". This primarily helps in the case of a synthesized set from analysis that ends up combining '.' with
                // categories, as we want to reduce that set down to either [^\n] or [\0-\uFFFF]. (This can be extrapolated to any number
                // of missing characters; in fact, categories in general are superfluous and the entire set can be represented as ranges.
                // But categories serve as a space optimization, and we strike a balance between testing many characters and the time/complexity
                // it takes to do so.  Thus, we limit this to the common case of a single missing character.)
                if (!_negate &&
                    _subtractor is null &&
                    _categories?.Length > 0 &&
                    rangelist.Count == 2 && rangelist[0].First == 0 && rangelist[0].Last + 2 == rangelist[1].First && rangelist[1].Last == LastChar)
                {
                    var vsb = new ValueStringBuilder(stackalloc char[256]);
                    foreach (ReadOnlyMemory<char> chunk in _categories!.GetChunks())
                    {
                        vsb.Append(chunk.Span);
                    }
 
                    if (CharInCategory((char)(rangelist[0].Last + 1), vsb.AsSpan()))
                    {
                        rangelist.RemoveAt(1);
                        rangelist[0] = ('\0', LastChar);
                    }
                    else
                    {
                        _negate = true;
                        rangelist.RemoveAt(1);
                        char notOne = (char)(rangelist[0].Last + 1);
                        rangelist[0] = (notOne, notOne);
                    }
                    _categories.Clear();
 
                    vsb.Dispose();
                }
            }
        }
 
        private static ReadOnlySpan<char> RangesFromProperty(string capname, bool invert, string pattern, int currentPos)
        {
            int min = 0;
            int max = s_propTable.Length;
            while (min != max)
            {
                int mid = (min + max) / 2;
                int res = string.Compare(capname, s_propTable[mid][0], StringComparison.Ordinal);
                if (res < 0)
                {
                    max = mid;
                }
                else if (res > 0)
                {
                    min = mid + 1;
                }
                else
                {
                    string set = s_propTable[mid][1];
                    Debug.Assert(!string.IsNullOrEmpty(set), "Found a null/empty element in RegexCharClass prop table");
                    return
                        !invert ? set.AsSpan() :
                        set[0] == '\0' ? set.AsSpan(1) :
                        ("\0" + set).AsSpan();
                }
            }
 
            throw new RegexParseException(RegexParseError.UnrecognizedUnicodeProperty, currentPos,
                SR.Format(SR.MakeException, pattern, currentPos, SR.Format(SR.UnrecognizedUnicodeProperty, capname)));
        }
 
#if DEBUG || !SYSTEM_TEXT_REGULAREXPRESSIONS
        private static readonly string[] CategoryIdToName = PopulateCategoryIdToName();
 
        private static string[] PopulateCategoryIdToName()
        {
            // Populate category reverse lookup used for diagnostic output
 
            var temp = new List<KeyValuePair<string, string>>(s_definedCategories);
            temp.RemoveAll(kvp => kvp.Value.Length != 1);
            temp.Sort((kvp1, kvp2) => ((short)kvp1.Value[0]).CompareTo((short)kvp2.Value[0]));
            return temp.ConvertAll(kvp => kvp.Key).ToArray();
        }
 
        /// <summary>
        /// Produces a human-readable description for a set string.
        /// </summary>
        public static string DescribeSet(string set)
        {
            int setLength = set[SetLengthIndex];
            int categoryLength = set[CategoryLengthIndex];
            int endPosition = SetStartIndex + setLength + categoryLength;
            bool negated = IsNegated(set);
            Span<char> scratch = stackalloc char[32];
 
            // Special-case set of a single character to output that character without set square brackets.
            if (!negated && // no negation
                categoryLength == 0 && // no categories
                endPosition >= set.Length && // no subtraction
                setLength == 2 && // don't bother handling the case of the single character being 0xFFFF, in which case setLength would be 1
                set[SetStartIndex] + 1 == set[SetStartIndex + 1])
            {
                return DescribeChar(set[SetStartIndex]);
            }
 
            int index = SetStartIndex;
            char ch1;
            char ch2;
            StringBuilder desc = new StringBuilder().Append('[');
 
            void RenderRanges()
            {
                int rangesEnd = SetStartIndex + set[SetLengthIndex];
                while (index < rangesEnd)
                {
                    ch1 = set[index];
                    if (index + 1 < rangesEnd)
                    {
                        ch2 = (char)(set[index + 1] - 1);
                        index += 2;
                    }
                    else
                    {
                        ch2 = LastChar;
                        index++;
                    }
 
                    desc.Append(DescribeChar(ch1));
 
                    if (ch2 != ch1)
                    {
                        if (ch1 + 1 != ch2)
                        {
                            desc.Append('-');
                        }
 
                        desc.Append(DescribeChar(ch2));
                    }
                }
            }
 
            // Special-case sets where the description will be more succinct by rendering it as negated, e.g. where
            // there are fewer gaps between ranges than there are ranges.  This is the case when the first range
            // includes \0 and the last range includes 0xFFFF, and typically occurs for sets that were actually
            // initially negated but ended up as non-negated from various transforms along the way.
            if (categoryLength == 0 && // no categories
                endPosition >= set.Length && // no subtraction
                setLength % 2 == 1 && // odd number of values because the last range won't include an upper bound
                set[index] == 0)
            {
                // We now have an odd number of values structures as:
                //     0,end0,start1,end1,start2,end2,...,startN
                // Rather than walking the pairs starting from index 0, we walk pairs starting from index 1 (creating a range from end0 to start1),
                // since we're creating ranges from the gaps.
                index++;
                desc.Append('^');
                RenderRanges();
                return desc.Append(']').ToString();
            }
 
            if (negated)
            {
                desc.Append('^');
            }
 
            RenderRanges();
 
            while (index < SetStartIndex + set[SetLengthIndex] + set[CategoryLengthIndex])
            {
                ch1 = set[index];
                if (ch1 == 0)
                {
                    const char GroupChar = '\0';
                    int lastindex = set.IndexOf(GroupChar, index + 1);
                    if (lastindex != -1)
                    {
                        ReadOnlySpan<char> group = set.AsSpan(index, lastindex - index + 1);
                        switch (group)
                        {
                            case WordCategories:
                                desc.Append(@"\w");
                                break;
 
                            case NotWordCategories:
                                desc.Append(@"\W");
                                break;
 
                            default:
                                // The inverse of a group as created by AddCategoryFromName simply negates every character as a 16-bit value.
                                Span<char> invertedGroup = group.Length <= scratch.Length ? scratch.Slice(0, group.Length) : new char[group.Length];
                                for (int i = 0; i < group.Length; i++)
                                {
                                    invertedGroup[i] = (char)-(short)group[i];
                                }
 
                                // Determine whether the group is a known Unicode category, e.g. \p{Mc}, or group of categories, e.g. \p{L},
                                // or the inverse of those.
                                foreach (KeyValuePair<string, string> kvp in s_definedCategories)
                                {
                                    bool equalsGroup = group.SequenceEqual(kvp.Value.AsSpan());
                                    if (equalsGroup || invertedGroup.SequenceEqual(kvp.Value.AsSpan()))
                                    {
                                        desc.Append(equalsGroup ? @"\p{" : @"\P{").Append(kvp.Key).Append('}');
                                        break;
                                    }
                                }
                                break;
                        }
 
                        index = lastindex;
                    }
                }
                else
                {
                    desc.Append(DescribeCategory(ch1));
                }
 
                index++;
            }
 
            if (set.Length > endPosition)
            {
                desc.Append('-').Append(DescribeSet(set.Substring(endPosition)));
            }
 
            return desc.Append(']').ToString();
        }
 
        /// <summary>Produces a human-readable description for a single character.</summary>
        public static string DescribeChar(char ch) =>
            ch switch
            {
                '\0' => @"\0",
                '\a' => "\\a",
                '\b' => "\\b",
                '\t' => "\\t",
                '\r' => "\\r",
                '\v' => "\\v",
                '\f' => "\\f",
                '\n' => "\\n",
                '\\' => "\\\\",
                >= ' ' and <= '~' => ch.ToString(),
                _ => $"\\u{(uint)ch:X4}"
            };
 
        private static string DescribeCategory(char ch) =>
            (short)ch switch
            {
                SpaceConst => @"\s",
                NotSpaceConst => @"\S",
                (short)(UnicodeCategory.DecimalDigitNumber + 1) => @"\d",
                -(short)(UnicodeCategory.DecimalDigitNumber + 1) => @"\D",
                < 0 => $"\\P{{{CategoryIdToName[-(short)ch - 1]}}}",
                _ => $"\\p{{{CategoryIdToName[ch - 1]}}}",
            };
#endif
    }
}
File: src\libraries\System.Text.RegularExpressions\src\System\Text\RegularExpressions\RegexCharClass.cs	Web Access
Project: src\src\libraries\System.Text.RegularExpressions\gen\System.Text.RegularExpressions.Generator.csproj (System.Text.RegularExpressions.Generator)