File: System\Text\RegularExpressions\RegexInterpreterCode.cs
Web Access
Project: src\src\libraries\System.Text.RegularExpressions\src\System.Text.RegularExpressions.csproj (System.Text.RegularExpressions)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
 
namespace System.Text.RegularExpressions
{
    /// <summary>Contains the code, written by <see cref="RegexWriter"/>, for <see cref="RegexInterpreter"/> to evaluate a regular expression.</summary>
    internal sealed class RegexInterpreterCode(RegexFindOptimizations findOptimizations, RegexOptions options, int[] codes, string[] strings, int trackcount)
    {
        /// <summary>Find logic to use to find the next possible location for a match.</summary>
        public readonly RegexFindOptimizations FindOptimizations = findOptimizations;
        /// <summary>The options associated with the regex.</summary>
        public readonly RegexOptions Options = options;
        /// <summary>RegexOpcodes and arguments written by <see cref="RegexWriter"/>.</summary>
        public readonly int[] Codes = codes;
        /// <summary>The string / set table. <see cref="Codes"/> includes offsets into this table, for string and set arguments.</summary>
        public readonly string[] Strings = strings;
        /// <summary>ASCII lookup table optimization for sets in <see cref="Strings"/>.</summary>
        public readonly uint[]?[] StringsAsciiLookup = new uint[strings.Length][];
        /// <summary>How many instructions in <see cref="Codes"/> use backtracking.</summary>
        public readonly int TrackCount = trackcount;
 
        /// <summary>Gets whether the specified opcode may incur backtracking.</summary>
        public static bool OpcodeBacktracks(RegexOpcode opcode)
        {
            opcode &= RegexOpcode.OperatorMask;
            return opcode is
                RegexOpcode.Oneloop or
                RegexOpcode.Onelazy or
                RegexOpcode.Notoneloop or
                RegexOpcode.Notonelazy or
                RegexOpcode.Setloop or
                RegexOpcode.Setlazy or
                RegexOpcode.Lazybranch or
                RegexOpcode.Branchmark or
                RegexOpcode.Lazybranchmark or
                RegexOpcode.Nullcount or
                RegexOpcode.Setcount or
                RegexOpcode.Branchcount or
                RegexOpcode.Lazybranchcount or
                RegexOpcode.Setmark or
                RegexOpcode.Capturemark or
                RegexOpcode.Getmark or
                RegexOpcode.Setjump or
                RegexOpcode.Backjump or
                RegexOpcode.Forejump or
                RegexOpcode.Goto;
        }
 
#if DEBUG
        /// <summary>Gets the number of integers required to store an operation represented by the specified opcode (including the opcode).</summary>
        /// <returns>Values range from 1 (just the opcode) to 3 (the opcode plus up to two operands).</returns>
        [ExcludeFromCodeCoverage(Justification = "Used only for debugging assistance")]
        public static int OpcodeSize(RegexOpcode opcode)
        {
            opcode &= RegexOpcode.OperatorMask;
            switch (opcode)
            {
                case RegexOpcode.Nothing:
                case RegexOpcode.Bol:
                case RegexOpcode.Eol:
                case RegexOpcode.Boundary:
                case RegexOpcode.NonBoundary:
                case RegexOpcode.ECMABoundary:
                case RegexOpcode.NonECMABoundary:
                case RegexOpcode.Beginning:
                case RegexOpcode.Start:
                case RegexOpcode.EndZ:
                case RegexOpcode.End:
                case RegexOpcode.Nullmark:
                case RegexOpcode.Setmark:
                case RegexOpcode.Getmark:
                case RegexOpcode.Setjump:
                case RegexOpcode.Backjump:
                case RegexOpcode.Forejump:
                case RegexOpcode.Stop:
                case RegexOpcode.UpdateBumpalong:
                    // The opcode has no operands.
                    return 1;
 
                case RegexOpcode.One:
                case RegexOpcode.Notone:
                case RegexOpcode.Multi:
                case RegexOpcode.Backreference:
                case RegexOpcode.TestBackreference:
                case RegexOpcode.Goto:
                case RegexOpcode.Nullcount:
                case RegexOpcode.Setcount:
                case RegexOpcode.Lazybranch:
                case RegexOpcode.Branchmark:
                case RegexOpcode.Lazybranchmark:
                case RegexOpcode.Set:
                    // The opcode has one operand.
                    return 2;
 
                case RegexOpcode.Capturemark:
                case RegexOpcode.Branchcount:
                case RegexOpcode.Lazybranchcount:
                case RegexOpcode.Onerep:
                case RegexOpcode.Notonerep:
                case RegexOpcode.Oneloop:
                case RegexOpcode.Oneloopatomic:
                case RegexOpcode.Notoneloop:
                case RegexOpcode.Notoneloopatomic:
                case RegexOpcode.Onelazy:
                case RegexOpcode.Notonelazy:
                case RegexOpcode.Setlazy:
                case RegexOpcode.Setrep:
                case RegexOpcode.Setloop:
                case RegexOpcode.Setloopatomic:
                    // The opcode has two operands.
                    return 3;
 
                default:
                    Debug.Fail($"Unknown opcode: {opcode}");
                    goto case RegexOpcode.Stop;
            }
        }
 
        [ExcludeFromCodeCoverage(Justification = "Used only for debugging assistance")]
        public override string ToString()
        {
            var sb = new StringBuilder();
 
            sb.AppendLine($"Direction: {((Options & RegexOptions.RightToLeft) != 0 ? "right-to-left" : "left-to-right")}");
            sb.AppendLine();
            for (int i = 0; i < Codes.Length; i += OpcodeSize((RegexOpcode)Codes[i]))
            {
                sb.AppendLine(DescribeInstruction(i));
            }
 
            return sb.ToString();
        }
 
        [ExcludeFromCodeCoverage(Justification = "Used only for debugging assistance")]
        internal string DescribeInstruction(int opcodeOffset)
        {
            RegexOpcode opcode = (RegexOpcode)Codes[opcodeOffset];
 
            var sb = new StringBuilder();
            sb.Append($"{opcodeOffset:D6} ");
            sb.Append(OpcodeBacktracks(opcode & RegexOpcode.OperatorMask) ? '~' : ' ');
            sb.Append(opcode & RegexOpcode.OperatorMask);
            if ((opcode & RegexOpcode.CaseInsensitive) != 0) sb.Append("-Ci");
            if ((opcode & RegexOpcode.RightToLeft) != 0) sb.Append("-Rtl");
            if ((opcode & RegexOpcode.Backtracking) != 0) sb.Append("-Back");
            if ((opcode & RegexOpcode.BacktrackingSecond) != 0) sb.Append("-Back2");
 
            opcode &= RegexOpcode.OperatorMask;
 
            switch (opcode)
            {
                case RegexOpcode.One:
                case RegexOpcode.Onerep:
                case RegexOpcode.Oneloop:
                case RegexOpcode.Oneloopatomic:
                case RegexOpcode.Onelazy:
                case RegexOpcode.Notone:
                case RegexOpcode.Notonerep:
                case RegexOpcode.Notoneloop:
                case RegexOpcode.Notoneloopatomic:
                case RegexOpcode.Notonelazy:
                    sb.Append(Indent()).Append('\'').Append(RegexCharClass.DescribeChar((char)Codes[opcodeOffset + 1])).Append('\'');
                    break;
 
                case RegexOpcode.Set:
                case RegexOpcode.Setrep:
                case RegexOpcode.Setloop:
                case RegexOpcode.Setloopatomic:
                case RegexOpcode.Setlazy:
                    sb.Append(Indent()).Append(RegexCharClass.DescribeSet(Strings[Codes[opcodeOffset + 1]]));
                    break;
 
                case RegexOpcode.Multi:
                    sb.Append(Indent()).Append('"').Append(Strings[Codes[opcodeOffset + 1]]).Append('"');
                    break;
 
                case RegexOpcode.Backreference:
                case RegexOpcode.TestBackreference:
                    sb.Append(Indent()).Append("index = ").Append(Codes[opcodeOffset + 1]);
                    break;
 
                case RegexOpcode.Capturemark:
                    sb.Append(Indent()).Append("index = ").Append(Codes[opcodeOffset + 1]);
                    if (Codes[opcodeOffset + 2] != -1)
                    {
                        sb.Append(", unindex = ").Append(Codes[opcodeOffset + 2]);
                    }
                    break;
 
                case RegexOpcode.Nullcount:
                case RegexOpcode.Setcount:
                    sb.Append(Indent()).Append("value = ").Append(Codes[opcodeOffset + 1]);
                    break;
 
                case RegexOpcode.Goto:
                case RegexOpcode.Lazybranch:
                case RegexOpcode.Branchmark:
                case RegexOpcode.Lazybranchmark:
                case RegexOpcode.Branchcount:
                case RegexOpcode.Lazybranchcount:
                    sb.Append(Indent()).Append("addr = ").Append(Codes[opcodeOffset + 1]);
                    break;
            }
 
            switch (opcode)
            {
                case RegexOpcode.Onerep:
                case RegexOpcode.Oneloop:
                case RegexOpcode.Oneloopatomic:
                case RegexOpcode.Onelazy:
                case RegexOpcode.Notonerep:
                case RegexOpcode.Notoneloop:
                case RegexOpcode.Notoneloopatomic:
                case RegexOpcode.Notonelazy:
                case RegexOpcode.Setrep:
                case RegexOpcode.Setloop:
                case RegexOpcode.Setloopatomic:
                case RegexOpcode.Setlazy:
                    sb.Append(", rep = ").Append(Codes[opcodeOffset + 2] == int.MaxValue ? "inf" : Codes[opcodeOffset + 2]);
                    break;
 
                case RegexOpcode.Branchcount:
                case RegexOpcode.Lazybranchcount:
                    sb.Append(", limit = ").Append(Codes[opcodeOffset + 2] == int.MaxValue ? "inf" : Codes[opcodeOffset + 2]);
                    break;
            }
 
            return sb.ToString();
 
            [ExcludeFromCodeCoverage(Justification = "Used only for debugging assistance")]
            string Indent() => new string(' ', Math.Max(1, 25 - sb.Length));
        }
#endif
    }
}