File: EmbeddedLanguages\StackFrame\StackFrameLexer.cs
Web Access
Project: src\src\Features\Core\Portable\Microsoft.CodeAnalysis.Features.csproj (Microsoft.CodeAnalysis.Features)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Immutable;
using System.Diagnostics;
using System.IO;
using Microsoft.CodeAnalysis.EmbeddedLanguages.Common;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Shared.Utilities;
using Microsoft.CodeAnalysis.Text;
using Roslyn.Utilities;
 
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.StackFrame;
 
using StackFrameToken = EmbeddedSyntaxToken<StackFrameKind>;
using StackFrameTrivia = EmbeddedSyntaxTrivia<StackFrameKind>;
 
internal struct StackFrameLexer
{
    public readonly VirtualCharSequence Text;
    public int Position { get; private set; }
 
    private StackFrameLexer(string text)
        : this(VirtualCharSequence.Create(0, text))
    {
    }
 
    private StackFrameLexer(VirtualCharSequence text) : this()
        => Text = text;
 
    public static StackFrameLexer? TryCreate(string text)
    {
        foreach (var c in text)
        {
            if (c == '\r' || c == '\n')
            {
                return null;
            }
        }
 
        return new(text);
    }
 
    public static StackFrameLexer? TryCreate(VirtualCharSequence text)
    {
        foreach (var c in text)
        {
            if (c.Value == '\r' || c.Value == '\n')
            {
                return null;
            }
        }
 
        return new(text);
    }
 
    public readonly VirtualChar CurrentChar => Position < Text.Length ? Text[Position] : default;
 
    public readonly VirtualCharSequence GetSubSequenceToCurrentPos(int start)
        => GetSubSequence(start, Position);
 
    public readonly VirtualCharSequence GetSubSequence(int start, int end)
        => Text.GetSubSequence(TextSpan.FromBounds(start, end));
 
    public StackFrameTrivia? TryScanRemainingTrivia()
    {
        if (Position == Text.Length)
        {
            return null;
        }
 
        var start = Position;
        Position = Text.Length;
 
        return CreateTrivia(StackFrameKind.SkippedTextTrivia, GetSubSequenceToCurrentPos(start));
    }
 
    public StackFrameToken? TryScanIdentifier()
        => TryScanIdentifier(scanAtTrivia: false, scanLeadingWhitespace: false, scanTrailingWhitespace: false);
 
    public StackFrameToken? TryScanIdentifier(bool scanAtTrivia, bool scanLeadingWhitespace, bool scanTrailingWhitespace)
    {
        var originalPosition = Position;
        var atTrivia = scanAtTrivia ? TryScanAtTrivia() : null;
        var leadingWhitespace = scanLeadingWhitespace ? TryScanWhiteSpace() : null;
 
        var startPosition = Position;
        var ch = CurrentChar;
        if (!UnicodeCharacterUtilities.IsIdentifierStartCharacter((char)ch.Value))
        {
            var ctor = TryScanConstructor();
            if (ctor.HasValue)
            {
                return ctor.Value;
            }
 
            // If we scan only trivia but don't get an identifier, we want to make sure
            // to reset back to this original position to let the trivia be consumed
            // in some other fashion if necessary 
            Position = originalPosition;
            return null;
        }
 
        while (UnicodeCharacterUtilities.IsIdentifierPartCharacter((char)ch.Value))
        {
            Position++;
            ch = CurrentChar;
        }
 
        var identifierSequence = GetSubSequenceToCurrentPos(startPosition);
        var trailingWhitespace = scanTrailingWhitespace ? TryScanWhiteSpace() : null;
 
        return CreateToken(
            StackFrameKind.IdentifierToken,
            leadingTrivia: CreateTrivia(atTrivia, leadingWhitespace),
            identifierSequence,
            trailingTrivia: CreateTrivia(trailingWhitespace));
    }
 
    public readonly StackFrameToken CurrentCharAsToken()
    {
        if (Position == Text.Length)
        {
            return CreateToken(StackFrameKind.EndOfFrame, VirtualCharSequence.Empty);
        }
 
        var ch = Text[Position];
        return CreateToken(GetKind(ch), Text.GetSubSequence(new TextSpan(Position, 1)));
    }
 
    /// <summary>
    /// Progress the position by one if the current character
    /// matches the kind.
    /// </summary>
    /// <returns>
    /// <see langword="true"/> if the position was incremented
    /// </returns>
    public bool ScanCurrentCharAsTokenIfMatch(StackFrameKind kind, out StackFrameToken token)
        => ScanCurrentCharAsTokenIfMatch(kind, scanTrailingWhitespace: false, out token);
 
    /// <summary>
    /// Progress the position by one if the current character
    /// matches the kind.
    /// </summary>
    /// <returns>
    /// <see langword="true"/> if the position was incremented
    /// </returns>
    public bool ScanCurrentCharAsTokenIfMatch(StackFrameKind kind, bool scanTrailingWhitespace, out StackFrameToken token)
    {
        if (GetKind(CurrentChar) == kind)
        {
            token = CurrentCharAsToken();
            Position++;
 
            if (scanTrailingWhitespace)
            {
                token = token.With(trailingTrivia: CreateTrivia(TryScanWhiteSpace()));
            }
 
            return true;
        }
 
        token = default;
        return false;
    }
 
    /// <summary>
    /// Progress the position by one if the current character
    /// matches the kind.
    /// </summary>
    /// <returns>
    /// <see langword="true"/> if the position was incremented
    /// </returns>
    public bool ScanCurrentCharAsTokenIfMatch(Func<StackFrameKind, bool> isMatch, out StackFrameToken token)
    {
        if (isMatch(GetKind(CurrentChar)))
        {
            token = CurrentCharAsToken();
            Position++;
            return true;
        }
 
        token = default;
        return false;
    }
 
    public StackFrameTrivia? TryScanAtTrivia()
    {
        foreach (var language in s_languages)
        {
            var result = TryScanStringTrivia(language.At, StackFrameKind.AtTrivia);
            if (result.HasValue)
            {
                return result.Value;
            }
        }
 
        return null;
    }
 
    public StackFrameTrivia? TryScanInTrivia()
    {
        foreach (var language in s_languages)
        {
            var result = TryScanStringTrivia(language.In, StackFrameKind.InTrivia);
            if (result.HasValue)
            {
                return result.Value;
            }
        }
 
        return null;
    }
 
    public StackFrameTrivia? TryScanLineTrivia()
    {
        foreach (var language in s_languages)
        {
            var result = TryScanStringTrivia(language.Line, StackFrameKind.LineTrivia);
            if (result.HasValue)
            {
                return result.Value;
            }
        }
 
        return null;
    }
 
    /// <summary>
    /// Attempts to parse <see cref="StackFrameKind.InTrivia"/> and a path following https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#file-and-directory-names
    /// Uses <see cref="FileInfo"/> as a tool to determine if the path is correct for returning. 
    /// </summary>
    public Result<StackFrameToken> TryScanPath()
    {
        var inTrivia = TryScanInTrivia();
        if (!inTrivia.HasValue)
        {
            return Result<StackFrameToken>.Empty;
        }
 
        var startPosition = Position;
 
        while (Position < Text.Length)
        {
            // Path needs to do a look ahead to determine if adding the next character
            // invalidates the path. Break if it does.
            //
            // This helps to keep the complex rules for what FileInfo does to validate path by calling to it directly.
            // We can't simply check all invalid characters for a path because location in the path is important, and we're not 
            // in the business of validating if something is correctly a file. It's cheap enough to let the FileInfo constructor do that. The downside 
            // is that we are constructing a new object with every pass. If this becomes problematic we can revisit and fine a more 
            // optimized pattern to handle all of the edge cases.
            //
            // Example: C:\my\path \:other
            //                      ^-- the ":" breaks the path, but we can't simply break on all ":" (which is included in the invalid characters for Path)
            //
 
            var str = GetSubSequence(startPosition, Position + 1).CreateString();
 
            var isValidPath = IOUtilities.PerformIO(() =>
            {
                var fileInfo = new FileInfo(str);
                return true;
            }, false);
 
            if (!isValidPath)
            {
                break;
            }
 
            Position++;
        }
 
        if (startPosition == Position)
        {
            return Result<StackFrameToken>.Abort;
        }
 
        return CreateToken(StackFrameKind.PathToken, inTrivia.ToImmutableArray(), GetSubSequenceToCurrentPos(startPosition));
    }
 
    /// <summary>
    /// Returns a number token with the <see cref="StackFrameKind.LineTrivia"/> and remaining <see cref="StackFrameKind.SkippedTextTrivia"/>
    /// attached to it. 
    /// </summary>
    /// <returns></returns>
    public StackFrameToken? TryScanRequiredLineNumber()
    {
        var lineTrivia = TryScanLineTrivia();
        if (!lineTrivia.HasValue)
        {
            return null;
        }
 
        var numberToken = TryScanNumbers();
        if (!numberToken.HasValue)
        {
            return null;
        }
 
        var remainingTrivia = TryScanRemainingTrivia();
 
        return numberToken.Value.With(
            leadingTrivia: lineTrivia.ToImmutableArray(),
            trailingTrivia: remainingTrivia.ToImmutableArray());
    }
 
    public StackFrameToken? TryScanNumbers()
    {
        var start = Position;
        while (IsNumber(CurrentChar))
        {
            Position++;
        }
 
        if (start == Position)
        {
            return null;
        }
 
        return CreateToken(StackFrameKind.NumberToken, GetSubSequenceToCurrentPos(start));
    }
 
    /// <summary>
    /// Scans a form similar to g__, where g is a GeneratedNameKind (a single character)
    /// and identifier is valid identifier characters as with <see cref="TryScanIdentifier()"/>
    /// </summary>
    public Result<StackFrameToken> TryScanRequiredGeneratedNameSeparator()
    {
        var start = Position;
        if (IsAsciiAlphaCharacter(CurrentChar))
        {
            Position++;
        }
        else
        {
            return Result<StackFrameToken>.Abort;
        }
 
        if (IsStringAtPosition("__"))
        {
            Position += 2;
        }
        else
        {
            return Result<StackFrameToken>.Abort;
        }
 
        return CreateToken(StackFrameKind.GeneratedNameSeparatorToken, GetSubSequenceToCurrentPos(start));
    }
 
    /// <summary>
    /// In a generated name for local functions the final portion is {numeric}_{numeric}
    /// </summary>
    public Result<StackFrameToken> TryScanRequiredGeneratedNameSuffix()
    {
        var start = Position;
 
        var n1 = TryScanNumbers();
        if (!n1.HasValue)
        {
            return Result<StackFrameToken>.Abort;
        }
 
        if (IsStringAtPosition("_"))
        {
            Position++;
        }
        else
        {
            return Result<StackFrameToken>.Abort;
        }
 
        var n2 = TryScanNumbers();
        if (!n2.HasValue)
        {
            return Result<StackFrameToken>.Abort;
        }
 
        return CreateToken(StackFrameKind.GeneratedNameSuffixToken, GetSubSequenceToCurrentPos(start));
    }
 
    public static bool IsBlank(VirtualChar ch)
    {
        // List taken from the native regex parser.
        switch (ch.Value)
        {
            case '\u0009':
            case '\u000A':
            case '\u000C':
            case '\u000D':
            case ' ':
                return true;
            default:
                return false;
        }
    }
 
    private static bool IsAsciiAlphaCharacter(VirtualChar ch)
        => ch.Value is (>= 'a' and <= 'z') or (>= 'A' and <= 'Z');
 
    public static StackFrameToken CreateToken(StackFrameKind kind, VirtualCharSequence virtualChars)
        => CreateToken(kind, [], virtualChars);
 
    public static StackFrameToken CreateToken(StackFrameKind kind, ImmutableArray<StackFrameTrivia> leadingTrivia, VirtualCharSequence virtualChars)
        => new(kind, leadingTrivia, virtualChars, [], [], value: null!);
 
    public static StackFrameToken CreateToken(StackFrameKind kind, ImmutableArray<StackFrameTrivia> leadingTrivia, VirtualCharSequence virtualChars, ImmutableArray<StackFrameTrivia> trailingTrivia)
        => new(kind, leadingTrivia, virtualChars, trailingTrivia, [], value: null!);
 
    private static StackFrameTrivia CreateTrivia(StackFrameKind kind, VirtualCharSequence virtualChars)
        => CreateTrivia(kind, virtualChars, []);
 
    private static StackFrameTrivia CreateTrivia(StackFrameKind kind, VirtualCharSequence virtualChars, ImmutableArray<EmbeddedDiagnostic> diagnostics)
    {
        // Empty trivia is not supported in StackFrames
        Debug.Assert(virtualChars.Length > 0);
        return new(kind, virtualChars, diagnostics);
    }
 
    private static ImmutableArray<StackFrameTrivia> CreateTrivia(params StackFrameTrivia?[] triviaArray)
    {
        using var _ = ArrayBuilder<StackFrameTrivia>.GetInstance(out var builder);
        foreach (var trivia in triviaArray)
        {
            if (trivia.HasValue)
            {
                builder.Add(trivia.Value);
            }
        }
 
        return builder.ToImmutableAndClear();
    }
 
    private readonly bool IsStringAtPosition(string val)
       => IsAtStartOfText(Position, val);
 
    private readonly bool IsAtStartOfText(int position, string val)
    {
        for (var i = 0; i < val.Length; i++)
        {
            if (position + i >= Text.Length ||
                Text[position + i] != val[i])
            {
                return false;
            }
        }
 
        return true;
    }
 
    private StackFrameTrivia? TryScanStringTrivia(string valueToLookFor, StackFrameKind triviaKind)
    {
        if (IsStringAtPosition(valueToLookFor))
        {
            var start = Position;
            Position += valueToLookFor.Length;
 
            return CreateTrivia(triviaKind, GetSubSequenceToCurrentPos(start));
        }
 
        return null;
    }
 
    private StackFrameToken? TryScanStringToken(string valueToLookFor, StackFrameKind tokenKind)
    {
        if (IsStringAtPosition(valueToLookFor))
        {
            var start = Position;
            Position += valueToLookFor.Length;
 
            return CreateToken(tokenKind, GetSubSequenceToCurrentPos(start));
        }
 
        return null;
    }
 
    private StackFrameTrivia? TryScanWhiteSpace()
    {
        var startPosition = Position;
 
        while (IsBlank(CurrentChar))
        {
            Position++;
        }
 
        if (Position == startPosition)
        {
            return null;
        }
 
        return CreateTrivia(StackFrameKind.WhitespaceTrivia, GetSubSequenceToCurrentPos(startPosition));
    }
 
    /// <summary>
    /// Scans for .ctor or .cctor as a ConstructorToken
    /// </summary>
    private StackFrameToken? TryScanConstructor()
    {
        return TryScanStringToken(".ctor", StackFrameKind.ConstructorToken) ??
               TryScanStringToken(".cctor", StackFrameKind.ConstructorToken);
    }
 
    private static StackFrameKind GetKind(VirtualChar ch)
        => ch.Value switch
        {
            '\n' => throw new InvalidOperationException(),
            '\r' => throw new InvalidOperationException(),
            '&' => StackFrameKind.AmpersandToken,
            '[' => StackFrameKind.OpenBracketToken,
            ']' => StackFrameKind.CloseBracketToken,
            '(' => StackFrameKind.OpenParenToken,
            ')' => StackFrameKind.CloseParenToken,
            '.' => StackFrameKind.DotToken,
            '+' => StackFrameKind.PlusToken,
            ',' => StackFrameKind.CommaToken,
            ':' => StackFrameKind.ColonToken,
            '=' => StackFrameKind.EqualsToken,
            '>' => StackFrameKind.GreaterThanToken,
            '<' => StackFrameKind.LessThanToken,
            '-' => StackFrameKind.MinusToken,
            '\'' => StackFrameKind.SingleQuoteToken,
            '`' => StackFrameKind.GraveAccentToken,
            '\\' => StackFrameKind.BackslashToken,
            '/' => StackFrameKind.ForwardSlashToken,
            '$' => StackFrameKind.DollarToken,
            '|' => StackFrameKind.PipeToken,
            _ => IsBlank(ch)
                ? StackFrameKind.WhitespaceTrivia
                : IsNumber(ch)
                    ? StackFrameKind.NumberToken
                    : StackFrameKind.SkippedTextTrivia
        };
 
    private static bool IsNumber(VirtualChar ch)
        => ch.Value is >= '0' and <= '9';
 
    private readonly record struct Language(string At, string In, string Line);
    private static readonly ImmutableArray<Language> s_languages =
    [
        new Language("at ", " in ", "line "),
        new Language("v ", " v ", "řádek "),
        new Language("bei ", " in ", "Zeile "),
        new Language("en ", " en ", "línea "),
        new Language("à ", " dans ", "ligne "),
        new Language("in ", " in ", "riga "),
        new Language("場所 ", " 場所 ", "行 "),
        new Language("위치: ", " 파일 ", "줄 "),
        new Language("w ", " w ", "wiersz "),
        new Language("em ", " na ", "linha "),
        new Language("в ", " в ", "строка "),
        new Language("在 ", " 位置 ", "行号 "),
        new Language("於 ", " 於 ", " 行 ")
, // zh-Hant
    ];
}