File: EmbeddedLanguages\RegularExpressions\RegexParser.CaptureInfoAnalyzer.cs
Web Access
Project: src\src\Features\Core\Portable\Microsoft.CodeAnalysis.Features.csproj (Microsoft.CodeAnalysis.Features)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
#nullable disable
using System.Collections.Immutable;
using System.Diagnostics;
using System.Text.RegularExpressions;
using Microsoft.CodeAnalysis.EmbeddedLanguages.Common;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Text;
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions;
using static EmbeddedSyntaxHelpers;
using static RegexHelpers;
using RegexToken = EmbeddedSyntaxToken<RegexKind>;
internal partial struct RegexParser
    /// <summary>
    /// Analyzes the first parsed tree to determine the set of capture numbers and names.  These are
    /// then used to do the second parsing pass as they can change how the regex engine interprets
    /// some parts of the pattern (though not the groups themselves).
    /// </summary>
    private struct CaptureInfoAnalyzer
        private readonly VirtualCharSequence _text;
        private readonly ImmutableDictionary<int, TextSpan>.Builder _captureNumberToSpan;
        private readonly ImmutableDictionary<string, TextSpan>.Builder _captureNameToSpan;
        private readonly ArrayBuilder<string> _captureNames;
        private int _autoNumber;
        private int _recursionDepth;
        private CaptureInfoAnalyzer(VirtualCharSequence text)
            _text = text;
            _captureNumberToSpan = ImmutableDictionary.CreateBuilder<int, TextSpan>();
            _captureNameToSpan = ImmutableDictionary.CreateBuilder<string, TextSpan>();
            _captureNames = ArrayBuilder<string>.GetInstance();
            _autoNumber = 1;
            _recursionDepth = 0;
            _captureNumberToSpan.Add(0, text.IsEmpty ? default : GetSpan(text));
        public static (ImmutableDictionary<string, TextSpan>, ImmutableDictionary<int, TextSpan>) Analyze(
            VirtualCharSequence text, RegexCompilationUnit root, RegexOptions options)
            var analyzer = new CaptureInfoAnalyzer(text);
            return analyzer.Analyze(root, options);
        private (ImmutableDictionary<string, TextSpan>, ImmutableDictionary<int, TextSpan>) Analyze(
            RegexCompilationUnit root, RegexOptions options)
            CollectCaptures(root, options);
            return (_captureNameToSpan.ToImmutable(), _captureNumberToSpan.ToImmutable());
        private void CollectCaptures(RegexNode node, RegexOptions options)
                CollectCapturesWorker(node, options);
        private void CollectCapturesWorker(RegexNode node, RegexOptions options)
            switch (node.Kind)
                case RegexKind.CaptureGrouping:
                    var captureGrouping = (RegexCaptureGroupingNode)node;
                    RecordCapture(captureGrouping.CaptureToken, GetGroupingSpan(captureGrouping));
                case RegexKind.BalancingGrouping:
                    var balancingGroup = (RegexBalancingGroupingNode)node;
                    RecordCapture(balancingGroup.FirstCaptureToken, GetGroupingSpan(balancingGroup));
                case RegexKind.ConditionalExpressionGrouping:
                    // Explicitly recurse into conditionalGrouping.Grouping.  That grouping
                    // itself does not create a capture group, but nested groupings inside of it
                    // will.
                    var conditionalGrouping = (RegexConditionalExpressionGroupingNode)node;
                    RecurseIntoChildren(conditionalGrouping.Grouping, options);
                    CollectCaptures(conditionalGrouping.Result, options);
                case RegexKind.SimpleGrouping:
                    RecordSimpleGroupingCapture((RegexSimpleGroupingNode)node, options);
                case RegexKind.NestedOptionsGrouping:
                    // When we see (?opts:...)
                    // Recurse explicitly, setting the new options as we process the inner expression.
                    // When this pops out we'll be back to these options we're currently at now.
                    var nestedOptions = (RegexNestedOptionsGroupingNode)node;
                    CollectCaptures(nestedOptions.Expression, GetNewOptionsFromToken(options, nestedOptions.OptionsToken));
            RecurseIntoChildren(node, options);
        private void RecurseIntoChildren(RegexNode node, RegexOptions options)
            foreach (var child in node)
                if (child.IsNode)
                    // When we see a SimpleOptionsGroup ```(?opts)``` then determine what the options will
                    // be for successive nodes in the sequence.
                    var childNode = child.Node;
                    if (childNode is RegexSimpleOptionsGroupingNode simpleOptions)
                        options = GetNewOptionsFromToken(options, simpleOptions.OptionsToken);
                    CollectCaptures(child.Node, options);
        private readonly TextSpan GetGroupingSpan(RegexGroupingNode grouping)
            var lastChar = grouping.CloseParenToken.IsMissing
                ? _text.Last()
                : grouping.CloseParenToken.VirtualChars.Last();
            return GetSpan(grouping.OpenParenToken.VirtualChars[0], lastChar);
        private void RecordSimpleGroupingCapture(RegexSimpleGroupingNode node, RegexOptions options)
            if (HasOption(options, RegexOptions.ExplicitCapture))
                // Don't automatically add simple groups if the explicit capture option is on.
                // Only add captures for 'CaptureGrouping' and 'BalancingGrouping' nodes.
            // Don't count a bogus (? node as a capture node.  We only have this to keep our error
            // messages in line with the native parser.  i.e. even though the bogus (? code would 
            // cause an exception, we might get an earlier exception if there's a reference to
            // this grouping.  So if we note this grouping we'll end up not causing that error
            // to happen, bringing out behavior out of sync with the native system.
            var expr = node.Expression;
            if (expr is RegexAlternationNode alternation)
                expr = alternation.SequenceList[0];
            if (expr is RegexSequenceNode sequence &&
                sequence.ChildCount > 0)
                var leftMost = sequence.ChildAt(0);
                if (leftMost.Node is RegexTextNode textNode &&
                    IsTextChar(textNode.TextToken, '?'))
            AddIfMissing(_captureNumberToSpan, list: null, _autoNumber++, GetGroupingSpan(node));
        private readonly void RecordCapture(RegexToken token, TextSpan span)
            if (!token.IsMissing)
                if (token.Kind == RegexKind.NumberToken)
                    AddIfMissing(_captureNumberToSpan, list: null, (int)token.Value, span);
                    AddIfMissing(_captureNameToSpan, list: _captureNames, (string)token.Value, span);
        private static void AddIfMissing<T>(
            ImmutableDictionary<T, TextSpan>.Builder mapping,
            ArrayBuilder<T> list,
            T val, TextSpan span)
            if (!mapping.ContainsKey(val))
                mapping.Add(val, span);
        /// <summary>
        /// Give numbers to all named captures.  They will get successive <see
        /// cref="_autoNumber"/> values that have not already been handed out to existing
        /// numbered capture groups.
        /// </summary>
        private void AssignNumbersToCaptureNames()
            foreach (var name in _captureNames)
                while (_captureNumberToSpan.ContainsKey(_autoNumber))
                _captureNumberToSpan.Add(_autoNumber, _captureNameToSpan[name]);