File: System\Text\RegularExpressions\RegexReplacement.cs
Web Access
Project: src\src\libraries\System.Text.RegularExpressions\src\System.Text.RegularExpressions.csproj (System.Text.RegularExpressions)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
 
namespace System.Text.RegularExpressions
{
    /// <summary>
    /// The RegexReplacement class represents a substitution string for
    /// use when using regexes to search/replace, etc. It's logically
    /// a sequence intermixed (1) constant strings and (2) group numbers.
    /// </summary>
    internal sealed class RegexReplacement
    {
        // Constants for special insertion patterns
        private const int Specials = 4;
        public const int LeftPortion = -1;
        public const int RightPortion = -2;
        public const int LastGroup = -3;
        public const int WholeString = -4;
 
        private readonly string[] _strings; // table of string constants
        private readonly int[] _rules;      // negative -> group #, positive -> string #
        private readonly bool _hasBackreferences;    // true if the replacement has any backreferences; otherwise, false
 
        /// <summary>
        /// Since RegexReplacement shares the same parser as Regex,
        /// the constructor takes a RegexNode which is a concatenation
        /// of constant strings and backreferences.
        /// </summary>
        public RegexReplacement(string rep, RegexNode concat, Hashtable _caps)
        {
            Debug.Assert(concat.Kind == RegexNodeKind.Concatenate, $"Expected Concatenate, got {concat.Kind}");
 
            var vsb = new ValueStringBuilder(stackalloc char[256]);
            FourStackStrings stackStrings = default;
            var strings = new ValueListBuilder<string>(stackStrings);
            var rules = new ValueListBuilder<int>(stackalloc int[64]);
 
            int childCount = concat.ChildCount();
            for (int i = 0; i < childCount; i++)
            {
                RegexNode child = concat.Child(i);
 
                switch (child.Kind)
                {
                    case RegexNodeKind.Multi:
                        vsb.Append(child.Str!);
                        break;
 
                    case RegexNodeKind.One:
                        vsb.Append(child.Ch);
                        break;
 
                    case RegexNodeKind.Backreference:
                        if (vsb.Length > 0)
                        {
                            rules.Append(strings.Length);
                            strings.Append(vsb.AsSpan().ToString());
                            vsb.Length = 0;
                        }
                        int slot = child.M;
 
                        if (_caps != null && slot >= 0)
                        {
                            slot = (int)_caps[slot]!;
                        }
 
                        rules.Append(-Specials - 1 - slot);
                        _hasBackreferences = true;
                        break;
 
                    default:
                        Debug.Fail($"Unexpected child kind {child.Kind}");
                        break;
                }
            }
 
            if (vsb.Length > 0)
            {
                rules.Append(strings.Length);
                strings.Append(vsb.ToString());
            }
            vsb.Dispose();
 
            Pattern = rep;
            _strings = strings.AsSpan().ToArray();
            _rules = rules.AsSpan().ToArray();
 
            rules.Dispose();
        }
 
        /// <summary>Simple struct of four strings.</summary>
        [InlineArray(4)]
        private struct FourStackStrings // used to do the equivalent of: Span<string> strings = stackalloc string[4];
        {
            private string? _item1;
        }
 
        /// <summary>
        /// Either returns a weakly cached RegexReplacement helper or creates one and caches it.
        /// </summary>
        /// <returns></returns>
        public static RegexReplacement GetOrCreate(WeakReference<RegexReplacement?> replRef, string replacement, Hashtable caps,
            int capsize, Hashtable capnames, RegexOptions roptions)
        {
            if (!replRef.TryGetTarget(out RegexReplacement? repl) || !repl.Pattern.Equals(replacement))
            {
                repl = RegexParser.ParseReplacement(replacement, roptions, caps, capsize, capnames);
                replRef.SetTarget(repl);
            }
 
            return repl;
        }
 
        /// <summary>The original pattern string</summary>
        public string Pattern { get; }
 
        /// <summary>
        /// Given a Match, emits into the StringBuilder the evaluated
        /// substitution pattern.
        /// </summary>
        public void ReplacementImpl(ref StructListBuilder<ReadOnlyMemory<char>> segments, Match match)
        {
            foreach (int rule in _rules)
            {
                // Get the segment to add.
                ReadOnlyMemory<char> segment =
                    rule >= 0 ? _strings[rule].AsMemory() : // string lookup
                    rule < -Specials ? match.GroupToStringImpl(-Specials - 1 - rule) : // group lookup
                    (-Specials - 1 - rule) switch // special insertion patterns
                    {
                        LeftPortion => match.GetLeftSubstring(),
                        RightPortion => match.GetRightSubstring(),
                        LastGroup => match.LastGroupToStringImpl(),
                        WholeString => match.Text.AsMemory(),
                        _ => default
                    };
 
                // Add the segment if it's not empty.  A common case for it being empty
                // is if the developer is using Regex.Replace as a way to implement
                // Regex.Remove, where the replacement string is empty.
                if (segment.Length != 0)
                {
                    segments.Add(segment);
                }
            }
        }
 
        /// <summary>
        /// Given a Match, emits into the builder the evaluated
        /// Right-to-Left substitution pattern.
        /// </summary>
        public void ReplacementImplRTL(ref StructListBuilder<ReadOnlyMemory<char>> segments, Match match)
        {
            for (int i = _rules.Length - 1; i >= 0; i--)
            {
                int rule = _rules[i];
 
                ReadOnlyMemory<char> segment =
                    rule >= 0 ? _strings[rule].AsMemory() : // string lookup
                    rule < -Specials ? match.GroupToStringImpl(-Specials - 1 - rule) : // group lookup
                    (-Specials - 1 - rule) switch // special insertion patterns
                    {
                        LeftPortion => match.GetLeftSubstring(),
                        RightPortion => match.GetRightSubstring(),
                        LastGroup => match.LastGroupToStringImpl(),
                        WholeString => match.Text.AsMemory(),
                        _ => default
                    };
 
                // Add the segment to the list if it's not empty.  A common case for it being
                // empty is if the developer is using Regex.Replace as a way to implement
                // Regex.Remove, where the replacement string is empty.
                if (segment.Length != 0)
                {
                    segments.Add(segment);
                }
            }
        }
 
        /// <summary>
        /// Replaces all occurrences of the regex in the string with the
        /// replacement pattern.
        ///
        /// Note that the special case of no matches is handled on its own:
        /// with no matches, the input string is returned unchanged.
        /// The right-to-left case is split out because StringBuilder
        /// doesn't handle right-to-left string building directly very well.
        /// </summary>
        public string Replace(Regex regex, string input, int count, int startat)
        {
            if (count < -1)
            {
                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.CountTooSmall);
            }
            if ((uint)startat > (uint)input.Length)
            {
                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.startat, ExceptionResource.BeginIndexNotNegative);
            }
 
            if (count == 0)
            {
                return input;
            }
 
            // Handle the common case of a left-to-right pattern with no backreferences in the replacement pattern such that the replacement is just a string of text.
            if (!regex.RightToLeft && !_hasBackreferences)
            {
                // With no backreferences, there should either be no rules (in the case of an empty replacement)
                // or one rule (in the case of a single text string).
                Debug.Assert(_rules.Length <= 1);
                Debug.Assert(_rules.Length == 0 || (_rules[0] == 0 && _strings.Length == 1));
 
                return ReplaceSimpleText(regex, input, _rules.Length != 0 ? _strings[0] : "", count, startat);
            }
            else
            {
                return ReplaceNonSimpleText(regex, input, count, startat);
            }
        }
 
        private static unsafe string ReplaceSimpleText(Regex regex, string input, string replacement, int count, int startat)
        {
            // As the replacement text is the same for every match, for every match we can simply store the offset/count for the match.
            // As we only split the input when there's a replacement, we know that there's then replacement text to be inserted between
            // every offset/count pair in the list.
 
            var state = (input, replacement, offsetAndCounts: new StructListBuilder<int>(), inputMemory: input.AsMemory(), prevat: 0, count);
            string result = input;
 
            regex.RunAllMatchesWithCallback(input, startat, ref state, (ref (string input, string replacement, StructListBuilder<int> segments, ReadOnlyMemory<char> inputMemory, int prevat, int count) state, Match match) =>
            {
                // Store the offset/count pair for the match.
                state.segments.Add(state.prevat);
                state.segments.Add(match.Index - state.prevat);
 
                // Update the previous offset to be the end of the match.
                state.prevat = match.Index + match.Length;
 
                // Update the number of matches and return whether to continue.
                return --state.count != 0;
            }, RegexRunnerMode.BoundsRequired, reuseMatchObject: true);
 
            // If the list is empty, there were no matches and we can just return the input string.
            // If the list isn't empty, we need to compose the result string.
            if (state.offsetAndCounts.Count != 0)
            {
                // Add the final offset/count pair for the text after the last match.
                state.offsetAndCounts.Add(state.prevat);
                state.offsetAndCounts.Add(input.Length - state.prevat);
 
                // There should now be an even number of items in the list, as each offset and count is its
                // own entry and they're added in pairs.  And there should be at least four entries, one for
                // the first segment and one for the last.
                Debug.Assert(state.offsetAndCounts.Count % 2 == 0, $"{state.offsetAndCounts.Count}");
                Debug.Assert(state.offsetAndCounts.Count >= 4, $"{state.offsetAndCounts.Count}");
 
                Span<int> span = state.offsetAndCounts.AsSpan();
 
                // Determine the final string length.
                int length = ((span.Length / 2) - 1) * replacement.Length;
                for (int i = 1; i < span.Length; i += 2) // the count of each pair is the second item
                {
                    length += span[i];
                }
 
                ReadOnlySpan<int> tmpSpan = span; // avoid address exposing the span and impacting the other code in the method that uses it
                result = string.Create(length, ((IntPtr)(&tmpSpan), input, replacement), static (dest, state) =>
                {
                    ReadOnlySpan<int> span = *(ReadOnlySpan<int>*)state.Item1;
                    for (int i = 0; i < span.Length; i += 2)
                    {
                        if (i != 0)
                        {
                            state.replacement.CopyTo(dest);
                            dest = dest.Slice(state.replacement.Length);
                        }
 
                        (int offset, int count) = (span[i], span[i + 1]);
                        state.input.AsSpan(offset, count).CopyTo(dest);
                        dest = dest.Slice(count);
                    }
                });
            }
 
            state.offsetAndCounts.Dispose();
 
            return result;
        }
 
        /// <summary>Handles cases other than left-to-right with a simple replacement string.</summary>
        private string ReplaceNonSimpleText(Regex regex, string input, int count, int startat)
        {
            var state = (replacement: this, segments: new StructListBuilder<ReadOnlyMemory<char>>(), inputMemory: input.AsMemory(), prevat: 0, count);
 
            if (!regex.RightToLeft)
            {
                regex.RunAllMatchesWithCallback(input, startat, ref state, (ref (RegexReplacement thisRef, StructListBuilder<ReadOnlyMemory<char>> segments, ReadOnlyMemory<char> inputMemory, int prevat, int count) state, Match match) =>
                {
                    state.segments.Add(state.inputMemory.Slice(state.prevat, match.Index - state.prevat));
                    state.prevat = match.Index + match.Length;
                    state.thisRef.ReplacementImpl(ref state.segments, match);
                    return --state.count != 0;
                }, _hasBackreferences ? RegexRunnerMode.FullMatchRequired : RegexRunnerMode.BoundsRequired, reuseMatchObject: true);
 
                if (state.segments.Count == 0)
                {
                    return input;
                }
 
                // Final segment of the input string after the last match.
                state.segments.Add(state.inputMemory.Slice(state.prevat));
            }
            else
            {
                state.prevat = input.Length;
 
                regex.RunAllMatchesWithCallback(input, startat, ref state, (ref (RegexReplacement thisRef, StructListBuilder<ReadOnlyMemory<char>> segments, ReadOnlyMemory<char> inputMemory, int prevat, int count) state, Match match) =>
                {
                    state.segments.Add(state.inputMemory.Slice(match.Index + match.Length, state.prevat - match.Index - match.Length));
                    state.prevat = match.Index;
                    state.thisRef.ReplacementImplRTL(ref state.segments, match);
                    return --state.count != 0;
                }, _hasBackreferences ? RegexRunnerMode.FullMatchRequired : RegexRunnerMode.BoundsRequired, reuseMatchObject: true);
 
                if (state.segments.Count == 0)
                {
                    return input;
                }
 
                // Final segment of the input string after the last match.
                state.segments.Add(state.inputMemory.Slice(0, state.prevat));
 
                // Reverse the segments as we're dealing with right-to-left handling.
                state.segments.AsSpan().Reverse();
            }
 
            // Compose the final string from the built up segments.
            return Regex.SegmentsToStringAndDispose(ref state.segments);
        }
    }
}