File: System\Text\RegularExpressions\RegexAssemblyCompiler.cs
Web Access
Project: src\src\libraries\System.Text.RegularExpressions\src\System.Text.RegularExpressions.csproj (System.Text.RegularExpressions)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Reflection;
using System.Reflection.Emit;
using System.Threading;
 
// WARNING:
// The code generated by RegexCompiler is not intended to be saved to disk and loaded back later.
// It accesses internal and private members of System.Text.RegularExpressions, which may change
// at any point in the future, and relies on details about the current machine/process, e.g. is
// it 32-bit or 64-bit. The generated surface area has also not been vetted. This code exists
// only for debugging purposes, to make it easier to examine the IL that RegexCompiler emits.
 
#if DEBUG
namespace System.Text.RegularExpressions
{
    /// <summary>Compiles a Regex to an assembly that can be saved to disk.</summary>
    [RequiresDynamicCode("The RegexAssemblyCompiler type requires dynamic code to be enabled.")]
    internal sealed class RegexAssemblyCompiler : RegexCompiler
    {
        /// <summary>Type count used to augment generated type names to create unique names.</summary>
        private static int s_typeCount;
 
        private readonly PersistedAssemblyBuilder _assembly;
        private readonly ModuleBuilder _module;
 
        internal RegexAssemblyCompiler(AssemblyName an, CustomAttributeBuilder[]? attribs, string? resourceFile)
        {
            if (resourceFile != null)
            {
                // Unmanaged resources are not supported: _assembly.DefineUnmanagedResource(resourceFile);
                throw new PlatformNotSupportedException();
            }
 
            _assembly = new PersistedAssemblyBuilder(an, typeof(object).Assembly, attribs is not null ? new List<CustomAttributeBuilder>(attribs) : null) ??
                throw new InvalidOperationException("DefinePersistedAssembly returned null");
 
            _module = _assembly.DefineDynamicModule(an.Name + ".dll");
        }
 
        internal void GenerateRegexType(string pattern, RegexOptions options, string name, bool isPublic, RegexTree tree, RegexInterpreterCode code, TimeSpan matchTimeout)
        {
            // Store arguments into the base type's fields
            _options = options;
            _regexTree = tree;
 
            // Pick a name for the class.
            string typenumString = ((uint)Interlocked.Increment(ref s_typeCount)).ToString();
 
            // Generate the RegexRunner-derived type.
            TypeBuilder regexRunnerTypeBuilder = DefineType(_module, $"{name}Runner{typenumString}", isPublic: false, isSealed: true, typeof(RegexRunner));
 
            _ilg = DefineMethod(regexRunnerTypeBuilder, "TryFindNextPossibleStartingPosition", [typeof(ReadOnlySpan<char>)], typeof(bool), out MethodBuilder tryFindNextPossibleStartingPositionMethod);
            EmitTryFindNextPossibleStartingPosition();
 
            _ilg = DefineMethod(regexRunnerTypeBuilder, "TryMatchAtCurrentPosition", [typeof(ReadOnlySpan<char>)], typeof(bool), out MethodBuilder tryMatchAtCurrentPositionMethod);
            EmitTryMatchAtCurrentPosition();
 
            _ilg = DefineMethod(regexRunnerTypeBuilder, "Scan", [typeof(ReadOnlySpan<char>)], null, out _);
            EmitScan(options, tryFindNextPossibleStartingPositionMethod, tryMatchAtCurrentPositionMethod);
 
            Type runnerType = regexRunnerTypeBuilder.CreateType()!;
 
            // Generate the RegexRunnerFactory-derived type.
            TypeBuilder regexRunnerFactoryTypeBuilder = DefineType(_module, $"{name}Factory{typenumString}", isPublic: false, isSealed: true, typeof(RegexRunnerFactory));
            _ilg = DefineMethod(regexRunnerFactoryTypeBuilder, "CreateInstance", null, typeof(RegexRunner), out _);
            GenerateCreateInstance(runnerType);
            Type regexRunnerFactoryType = regexRunnerFactoryTypeBuilder.CreateType()!;
 
            // Generate the Regex-derived type.
            TypeBuilder regexTypeBuilder = DefineType(_module, name, isPublic, isSealed: false, typeof(Regex));
            ConstructorBuilder defaultCtorBuilder = regexTypeBuilder.DefineConstructor(MethodAttributes.Public, CallingConventions.Standard, Type.EmptyTypes);
            _ilg = defaultCtorBuilder.GetILGenerator();
            GenerateRegexDefaultCtor(pattern, options, regexRunnerFactoryType, tree, code, matchTimeout);
            if (matchTimeout != Regex.InfiniteMatchTimeout)
            {
                // We only generate a constructor with a timeout parameter if the regex information supplied has a non-infinite timeout.
                // If it has an infinite timeout, then the generated code is not going to respect the timeout. This is a difference from netfx,
                // due to the fact that we now special-case an infinite timeout in the code generator to avoid spitting unnecessary code
                // and paying for the checks at run time.
                _ilg = regexTypeBuilder.DefineConstructor(MethodAttributes.Public, CallingConventions.Standard, new Type[] { typeof(TimeSpan) }).GetILGenerator();
                GenerateRegexTimeoutCtor(defaultCtorBuilder, regexTypeBuilder);
            }
            regexTypeBuilder.CreateType();
        }
 
        /// <summary>Generates a very simple factory method.</summary>
        private void GenerateCreateInstance([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type type)
        {
            // return new Type();
            _ilg!.Emit(OpCodes.Newobj, type.GetConstructor(Type.EmptyTypes)!);
            Ret();
        }
 
        private void GenerateRegexDefaultCtor(
            string pattern,
            RegexOptions options,
            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type regexRunnerFactoryType,
            RegexTree tree,
            RegexInterpreterCode code,
            TimeSpan matchTimeout)
        {
            // Call the base ctor and store pattern, options, and factory.
            // base.ctor();
            // base.pattern = pattern;
            // base.options = options;
            // base.factory = new DerivedRegexRunnerFactory();
            Ldthis();
            _ilg!.Emit(OpCodes.Call, typeof(Regex).GetConstructor(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance, null, Type.EmptyTypes, Array.Empty<ParameterModifier>())!);
            Ldthis();
            Ldstr(pattern);
            Stfld(RegexField(nameof(Regex.pattern)));
            Ldthis();
            Ldc((int)options);
            Stfld(RegexField(nameof(Regex.roptions)));
            Ldthis();
            _ilg!.Emit(OpCodes.Newobj, regexRunnerFactoryType.GetConstructor(Type.EmptyTypes)!);
            Stfld(RegexField(nameof(Regex.factory)));
 
            // Store the timeout (no need to validate as it should have happened in RegexCompilationInfo)
            Ldthis();
            if (matchTimeout == Regex.InfiniteMatchTimeout)
            {
                // base.internalMatchTimeout = Regex.InfiniteMatchTimeout;
                _ilg.Emit(OpCodes.Ldsfld, RegexField(nameof(Regex.InfiniteMatchTimeout)));
            }
            else
            {
                // base.internalMatchTimeout = TimeSpan.FromTick(matchTimeout.Ticks);
                LdcI8(matchTimeout.Ticks);
                Call(typeof(TimeSpan).GetMethod(nameof(TimeSpan.FromTicks), BindingFlags.Public | BindingFlags.Static)!);
            }
            Stfld(RegexField(nameof(Regex.internalMatchTimeout)));
 
            // Set capsize, caps, capnames, capslist.
            Ldthis();
            Ldc(tree.CaptureCount);
            Stfld(RegexField(nameof(Regex.capsize)));
            if (tree.CaptureNumberSparseMapping != null)
            {
                // Caps = new Hashtable {{0, 0}, {1, 1}, ... };
                GenerateCreateHashtable(RegexField(nameof(Regex.caps)), tree.CaptureNumberSparseMapping);
            }
            if (tree.CaptureNameToNumberMapping != null)
            {
                // CapNames = new Hashtable {{"0", 0}, {"1", 1}, ...};
                GenerateCreateHashtable(RegexField(nameof(Regex.capnames)), tree.CaptureNameToNumberMapping);
            }
            if (tree.CaptureNames != null)
            {
                // capslist = new string[...];
                // capslist[0] = "0";
                // capslist[1] = "1";
                // ...
                Ldthis();
                Ldc(tree.CaptureNames.Length);
                _ilg.Emit(OpCodes.Newarr, typeof(string));  // create new string array
                FieldInfo capslistField = RegexField(nameof(Regex.capslist));
                Stfld(capslistField);
                for (int i = 0; i < tree.CaptureNames.Length; i++)
                {
                    Ldthisfld(capslistField);
                    Ldc(i);
                    Ldstr(tree.CaptureNames[i]);
                    _ilg.Emit(OpCodes.Stelem_Ref);
                }
            }
 
            // return;
            Ret();
        }
 
        private void GenerateRegexTimeoutCtor(ConstructorBuilder defaultCtorBuilder, TypeBuilder regexTypeBuilder)
        {
            // base.ctor();
            // ValidateMatchTimeout(timeSpan);
            // base.internalMatchTimeout = timeSpan;
            Ldthis();
            _ilg!.Emit(OpCodes.Call, defaultCtorBuilder);
            _ilg.Emit(OpCodes.Ldarg_1);
            Call(typeof(Regex).GetMethod(nameof(Regex.ValidateMatchTimeout), BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static)!);
            Ldthis();
            _ilg.Emit(OpCodes.Ldarg_1);
            Stfld(RegexField(nameof(Regex.internalMatchTimeout)));
            Ret();
        }
 
        internal void GenerateCreateHashtable(FieldInfo field, Hashtable ht)
        {
            // hashtable = new Hashtable();
            Ldthis();
            _ilg!.Emit(OpCodes.Newobj, typeof(Hashtable).GetConstructor(Type.EmptyTypes)!);
            Stfld(field);
 
            // hashtable.Add(key1, value1);
            // hashtable.Add(key2, value2);
            // ...
            MethodInfo addMethod = typeof(Hashtable).GetMethod(nameof(Hashtable.Add), BindingFlags.Public | BindingFlags.Instance)!;
            IDictionaryEnumerator en = ht.GetEnumerator();
            while (en.MoveNext())
            {
                Ldthisfld(field);
 
                if (en.Key is int key)
                {
                    Ldc(key);
                    _ilg!.Emit(OpCodes.Box, typeof(int));
                }
                else
                {
                    Ldstr((string)en.Key);
                }
 
                Ldc((int)en.Value!);
                _ilg!.Emit(OpCodes.Box, typeof(int));
                Callvirt(addMethod);
            }
        }
 
        /// <summary>Gets the named instance field from the Regex type.</summary>
        private static FieldInfo RegexField(string fieldname) =>
            typeof(Regex).GetField(fieldname, BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Static)!;
 
        /// <summary>Saves the assembly to a file in the current directory based on the assembly's name.</summary>
        internal void Save(string fileName)
        {
            if (!fileName.EndsWith(".dll", StringComparison.Ordinal))
            {
                fileName += ".dll";
            }
 
            _assembly.Save(fileName);
        }
 
        /// <summary>Begins the definition of a new type with a specified base class</summary>
        private static TypeBuilder DefineType(
            ModuleBuilder moduleBuilder,
            string typeName,
            bool isPublic,
            bool isSealed,
            [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.All)] Type inheritFromClass)
        {
            TypeAttributes attrs = TypeAttributes.Class | TypeAttributes.BeforeFieldInit | (isPublic ? TypeAttributes.Public : TypeAttributes.NotPublic);
            if (isSealed)
            {
                attrs |= TypeAttributes.Sealed;
            }
 
            return moduleBuilder.DefineType(typeName, attrs, inheritFromClass);
        }
 
        /// <summary>Begins the definition of a new method (no args) with a specified return value.</summary>
        private static ILGenerator DefineMethod(TypeBuilder typeBuilder, string methname, Type[]? parameterTypes, Type? returnType, out MethodBuilder builder)
        {
            builder = typeBuilder.DefineMethod(methname, MethodAttributes.Family | MethodAttributes.Virtual, returnType, parameterTypes);
            return builder.GetILGenerator();
        }
    }
}
#endif