SectionMatcher_TryParseSection.cs

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System.Collections.Immutable;
using System.Text;
using System.Text.RegularExpressions;
using Microsoft.CodeAnalysis.PooledObjects;
using Roslyn.Utilities;
 
namespace Microsoft.CodeAnalysis.EditorConfig.Parsing;
 
internal readonly partial struct SectionMatcher
{
    public static bool TryParseSection(string headerText, out SectionMatcher matcher)
    {
        // An editorconfig section name is a language for recognizing file paths
        // defined by the following grammar:
        //
        // <path> ::= <path-list>
        // <path-list> ::= <path-item> | <path-item> <path-list>
        // <path-item> ::= "*"  | "**" | "?" | <char> | <choice> | <range>
        // <char> ::= any unicode character
        // <choice> ::= "{" <choice-list> "}"
        // <choice-list> ::= <path-list> | <path-list> "," <choice-list>
        // <range> ::= "{" <integer> ".." <integer> "}"
        // <integer> ::= "-" <digit-list> | <digit-list>
        // <digit-list> ::= <digit> | <digit> <digit-list>
        // <digit> ::= 0-9
 
        matcher = default;
        using var _0 = PooledStringBuilder.GetInstance(out var sb);
        sb.Append('^');
 
        // EditorConfig matching depends on the whether or not there are
        // directory separators and where they are located in the section
        // name. Specifically, the editorconfig core parser says:
        // https://github.com/editorconfig/editorconfig-core-c/blob/5d3996811e962a717a7d7fdd0a941192382241a7/src/lib/editorconfig.c#L231
        //
        //     Pattern would be:
        //     /dir/of/editorconfig/file[double_star]/[section] if section does not contain '/',
        //     /dir/of/editorconfig/file[section] if section starts with a '/', or
        //     /dir/of/editorconfig/file/[section] if section contains '/' but does not start with '/'.
 
        if (!headerText.Contains("/"))
        {
            sb.Append(".*/");
        }
        else if (headerText[0] != '/')
        {
            sb.Append('/');
        }
 
        var lexer = new Lexer(headerText);
        using var _1 = ArrayBuilder<(int minValue, int maxValue)>.GetInstance(out var numberRangePairs);
        if (!TryCompilePathList(ref lexer, sb, parsingChoice: false, numberRangePairs))
        {
            return false;
        }
 
        sb.Append('$');
        var pattern = sb.ToString();
        matcher = new SectionMatcher(new Regex(pattern), headerText, numberRangePairs.ToImmutableArray());
        return true;
    }
 
    private static bool TryCompilePathList(
        ref Lexer lexer,
        StringBuilder sb,
        bool parsingChoice,
        ArrayBuilder<(int minValue, int maxValue)> numberRangePairs)
    {
        while (!lexer.IsDone)
        {
            var tokenKind = lexer.Lex();
            switch (tokenKind)
            {
                case TokenKind.BadToken:
                    // Parsing failure
                    return false;
                case TokenKind.SimpleCharacter:
                    // Matches just this character
                    sb.Append(Regex.Escape(lexer.EatCurrentCharacter().ToString()));
                    break;
                case TokenKind.Question:
                    // '?' matches any single character
                    sb.Append('.');
                    break;
                case TokenKind.Star:
                    // Matches any string of characters except directory separator
                    // Directory separator is defined in editorconfig spec as '/'
                    sb.Append("[^/]*");
                    break;
                case TokenKind.StarStar:
                    // Matches any string of characters
                    sb.Append(".*");
                    break;
                case TokenKind.OpenCurly:
                    // Back up token stream. The following helpers all expect a '{'
                    lexer.Position--;
                    // This is ambiguous between {num..num} and {item1,item2}
                    // We need to look ahead to disambiguate. Looking for {num..num}
                    // is easier because it can't be recursive.
                    var numberRange = TryParseNumberRange(ref lexer);
                    if (numberRange is null)
                    {
                        // Not a number range. Try a choice expression
                        if (!TryCompileChoice(ref lexer, sb, numberRangePairs))
                        {
                            return false;
                        }
                        // Keep looping. There may be more after the '}'.
                        break;
                    }
                    else
                    {
                        var (numStart, numEnd) = numberRange.Value;
                        if (int.TryParse(numStart, out var intStart) && int.TryParse(numEnd, out var intEnd))
                        {
                            var pair = intStart < intEnd ? (intStart, intEnd) : (intEnd, intStart);
                            numberRangePairs.Add(pair);
                            // Group allowing any digit sequence. The validity will be checked outside of the regex
                            sb.Append("(-?[0-9]+)");
                            // Keep looping
                            break;
                        }
 
                        return false;
                    }
                case TokenKind.CloseCurly:
                    // Either the end of a choice, or a failed parse
                    return parsingChoice;
                case TokenKind.Comma:
                    // The end of a choice section, or a failed parse
                    return parsingChoice;
                case TokenKind.OpenBracket:
                    sb.Append('[');
                    if (!TryCompileCharacterClass(ref lexer, sb))
                    {
                        return false;
                    }
 
                    break;
                default:
                    throw ExceptionUtilities.UnexpectedValue(tokenKind);
            }
        }
        // If we're parsing a choice we should not exit without a closing '}'
        return !parsingChoice;
    }
 
    private static (string numStart, string numEnd)? TryParseNumberRange(ref Lexer lexer)
    {
        var saved = lexer.Position;
        if (lexer.Lex() != TokenKind.OpenCurly)
        {
            lexer.Position = saved;
            return null;
        }
 
        var numStart = lexer.TryLexNumber();
        if (numStart is null)
        {
            // Not a number
            lexer.Position = saved;
            return null;
        }
 
        // The next two characters must be ".."
        if (!lexer.TryEatCurrentCharacter(out var c) || c != '.' ||
            !lexer.TryEatCurrentCharacter(out c) || c != '.')
        {
            lexer.Position = saved;
            return null;
        }
 
        // Now another number
        var numEnd = lexer.TryLexNumber();
        if (numEnd is null || lexer.IsDone || lexer.Lex() != TokenKind.CloseCurly)
        {
            // Not a number or no '}'
            lexer.Position = saved;
            return null;
        }
 
        return (numStart, numEnd);
    }
 
    private static bool TryCompileCharacterClass(ref Lexer lexer, StringBuilder sb)
    {
        // [...] should match any of the characters in the brackets, with special
        // behavior for four characters: '!' immediately after the opening bracket
        // implies the negation of the character class, '-' implies matching
        // between the locale-dependent range of the previous and next characters,
        // '\' escapes the following character, and ']' ends the range
        if (!lexer.IsDone && lexer.CurrentCharacter == '!')
        {
            sb.Append('^');
            lexer.Position++;
        }
 
        while (!lexer.IsDone)
        {
            var currentChar = lexer.EatCurrentCharacter();
            switch (currentChar)
            {
                case '-':
                    // '-' means the same thing in regex as it does in the glob, so
                    // put it in verbatim
                    sb.Append(currentChar);
                    break;
 
                case '\\':
                    // Escape the next char
                    if (lexer.IsDone)
                    {
                        return false;
                    }
 
                    sb.Append('\\');
                    sb.Append(lexer.EatCurrentCharacter());
                    break;
 
                case ']':
                    sb.Append(currentChar);
                    return true;
 
                default:
                    sb.Append(Regex.Escape(currentChar.ToString()));
                    break;
            }
        }
        // Stream ended without a closing bracket
        return false;
    }
 
    private static bool TryCompileChoice(
        ref Lexer lexer,
        StringBuilder sb,
        ArrayBuilder<(int, int)> numberRangePairs)
    {
        if (lexer.Lex() != TokenKind.OpenCurly)
        {
            return false;
        }
 
        // Start a non-capturing group for the choice
        sb.Append("(?:");
 
        // We start immediately after a '{'
        // Try to compile the nested <path-list>
        while (TryCompilePathList(ref lexer, sb, parsingChoice: true, numberRangePairs))
        {
            // If we've successfully compiled a <path-list> the last token should
            // have been a ',' or a '}'
            var lastChar = lexer[lexer.Position - 1];
            if (lastChar == ',')
            {
                // Another option
                sb.Append('|');
            }
            else if (lastChar == '}')
            {
                // Close out the capture group
                sb.Append(')');
                return true;
            }
            else
            {
                throw ExceptionUtilities.UnexpectedValue(lastChar);
            }
        }
 
        // Propagate failure
        return false;
    }
}
File: src\Workspaces\SharedUtilitiesAndExtensions\Compiler\Core\EditorConfig\Parsing\Sections\SectionMatcher_TryParseSection.cs	Web Access
Project: src\src\CodeStyle\Core\Analyzers\Microsoft.CodeAnalysis.CodeStyle.csproj (Microsoft.CodeAnalysis.CodeStyle)