File: src\Workspaces\SharedUtilitiesAndExtensions\Compiler\Core\EmbeddedLanguages\VirtualChars\AbstractVirtualCharService.cs
Web Access
Project: src\src\RoslynAnalyzers\PerformanceSensitiveAnalyzers\Core\Microsoft.CodeAnalysis.PerformanceSensitiveAnalyzers.csproj (Microsoft.CodeAnalysis.PerformanceSensitiveAnalyzers)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Diagnostics;
using System.Text;
using Microsoft.CodeAnalysis.Collections;
using Microsoft.CodeAnalysis.LanguageService;
using Microsoft.CodeAnalysis.Text;
 
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
 
internal abstract partial class AbstractVirtualCharService : IVirtualCharService
{
    public abstract bool TryGetEscapeCharacter(VirtualChar ch, out char escapedChar);
 
    protected abstract ISyntaxFacts SyntaxFacts { get; }
 
    protected abstract VirtualCharGreenSequence TryConvertToVirtualCharsWorker(SyntaxToken token);
    protected abstract bool IsMultiLineRawStringToken(SyntaxToken token);
 
    /// <summary>
    /// Returns <see langword="true"/> if the next two characters at <c>tokenText[index]</c> are <c>{{</c> or
    /// <c>}}</c>.
    /// </summary>
    protected static bool IsLegalBraceEscape(
        string tokenText, int index, out int width)
    {
        if (index + 1 < tokenText.Length)
        {
            var ch = tokenText[index];
            var next = tokenText[index + 1];
            if ((ch == '{' && next == '{') ||
                (ch == '}' && next == '}'))
            {
                width = 2;
                return true;
            }
        }
 
        width = 0;
        return false;
    }
 
    public VirtualCharSequence TryConvertToVirtualChars(SyntaxToken token)
    {
        // We don't process any strings that contain diagnostics in it.  That means that we can 
        // trust that all the string's contents (most importantly, the escape sequences) are well
        // formed.
        if (token.ContainsDiagnostics)
            return default;
 
        var greenSequence = TryConvertToVirtualCharsWorker(token);
        var result = new VirtualCharSequence(token.SpanStart, greenSequence);
        CheckInvariants(token, result);
 
        return result;
    }
 
    [Conditional("DEBUG")]
    private void CheckInvariants(SyntaxToken token, VirtualCharSequence result)
    {
        // Do some invariant checking to make sure we processed the string token the same
        // way the C# and VB compilers did.
        if (result.IsDefault)
            return;
 
        // Ensure that we properly broke up the token into a sequence of characters that matches what the compiler did.
        // Note: we don't do this for all syntaxKinds.  For example an InterpolatedStringTextToken does not do the
        // ValueText processing that a StringLiteralToken does.  So, for example, $"{{" will have a ValueText of "{{"
        // not "{" which might otherwise be expected.
        var syntaxKinds = this.SyntaxFacts.SyntaxKinds;
        if (token.RawKind == syntaxKinds.StringLiteralToken ||
            token.RawKind == syntaxKinds.Utf8StringLiteralToken ||
            token.RawKind == syntaxKinds.CharacterLiteralToken)
        {
            var expectedValueText = token.ValueText;
            var actualValueText = result.CreateString();
            Debug.Assert(expectedValueText == actualValueText);
        }
 
        if (result.Length > 0)
        {
            var currentVC = result[0];
            Debug.Assert(currentVC.Span.Start >= token.SpanStart, "First span has to start after the start of the string token");
            if (token.RawKind == syntaxKinds.StringLiteralToken ||
                token.RawKind == syntaxKinds.CharacterLiteralToken)
            {
                Debug.Assert(currentVC.Span.Start == token.SpanStart + 1 ||
                                currentVC.Span.Start == token.SpanStart + 2, "First span should start on the second or third char of the string.");
            }
 
            if (IsMultiLineRawStringToken(token))
            {
                for (var i = 1; i < result.Length; i++)
                {
                    var nextVC = result[i];
                    Debug.Assert(currentVC.Span.End <= nextVC.Span.Start, "Virtual character spans have to be ordered.");
                    currentVC = nextVC;
                }
            }
            else
            {
                for (var i = 1; i < result.Length; i++)
                {
                    var nextVC = result[i];
                    Debug.Assert(currentVC.Span.End == nextVC.Span.Start, "Virtual character spans have to be touching.");
                    currentVC = nextVC;
                }
            }
 
            var lastVC = result[^1];
 
            if (token.RawKind == syntaxKinds.StringLiteralToken ||
                token.RawKind == syntaxKinds.CharacterLiteralToken)
            {
                Debug.Assert(lastVC.Span.End == token.Span.End - "\"".Length, "Last span has to end right before the end of the string token.");
            }
            else if (token.RawKind == syntaxKinds.Utf8StringLiteralToken)
            {
                Debug.Assert(lastVC.Span.End == token.Span.End - "\"u8".Length, "Last span has to end right before the end of the string token.");
            }
        }
    }
 
    /// <summary>
    /// Helper to convert simple string literals that escape quotes by doubling them.  This is 
    /// how normal VB literals and c# verbatim string literals work.
    /// </summary>
    /// <param name="startDelimiter">The start characters string.  " in VB and @" in C#</param>
    protected static VirtualCharGreenSequence TryConvertSimpleDoubleQuoteString(
        SyntaxToken token, string startDelimiter, string endDelimiter, bool escapeBraces)
    {
        Debug.Assert(!token.ContainsDiagnostics);
 
        if (escapeBraces)
        {
            Debug.Assert(startDelimiter == "");
            Debug.Assert(endDelimiter == "");
        }
 
        var tokenText = token.Text;
 
        if (startDelimiter.Length > 0 && !tokenText.StartsWith(startDelimiter, StringComparison.Ordinal))
        {
            Debug.Assert(false, "This should not be reachable as long as the compiler added no diagnostics.");
            return default;
        }
 
        if (endDelimiter.Length > 0 && !tokenText.EndsWith(endDelimiter, StringComparison.Ordinal))
        {
            Debug.Assert(false, "This should not be reachable as long as the compiler added no diagnostics.");
            return default;
        }
 
        var startIndexInclusive = startDelimiter.Length;
        var endIndexExclusive = tokenText.Length - endDelimiter.Length;
 
        var result = ImmutableSegmentedList.CreateBuilder<VirtualCharGreen>();
 
        for (var index = startIndexInclusive; index < endIndexExclusive;)
        {
            if (tokenText[index] == '"' && tokenText[index + 1] == '"')
            {
                result.Add(new VirtualCharGreen('"', offset: index, width: 2));
                index += 2;
                continue;
            }
            else if (escapeBraces && IsOpenOrCloseBrace(tokenText[index]))
            {
                if (!IsLegalBraceEscape(tokenText, index, out var width))
                    return default;
 
                result.Add(new VirtualCharGreen(tokenText[index], offset: index, width: width));
                index += width;
                continue;
            }
 
            index += ConvertTextAtIndexToVirtualChar(tokenText, index, result);
        }
 
        return CreateVirtualCharSequence(
            tokenText, startIndexInclusive, endIndexExclusive, result);
    }
 
    /// <summary>
    /// Returns the number of characters to jump forward (either 1 or 2);
    /// </summary>
    protected static int ConvertTextAtIndexToVirtualChar(string tokenText, int index, ImmutableSegmentedList<VirtualCharGreen>.Builder result)
        => ConvertTextAtIndexToVirtualChar(tokenText, index, new StringTextInfo(), result);
 
    protected static int ConvertTextAtIndexToVirtualChar(SourceText tokenText, int index, ImmutableSegmentedList<VirtualCharGreen>.Builder result)
        => ConvertTextAtIndexToVirtualChar(tokenText, index, new SourceTextTextInfo(), result);
 
    private static int ConvertTextAtIndexToVirtualChar<T, TTextInfo>(
        T tokenText, int index, TTextInfo info, ImmutableSegmentedList<VirtualCharGreen>.Builder result)
        where TTextInfo : struct, ITextInfo<T>
    {
        var ch = info.Get(tokenText, index);
        result.Add(new VirtualCharGreen(ch, offset: index, width: 1));
        return 1;
    }
 
    protected static bool IsOpenOrCloseBrace(char ch)
        => ch is '{' or '}';
 
    protected static VirtualCharGreenSequence CreateVirtualCharSequence(
        string tokenText,
        int startIndexInclusive,
        int endIndexExclusive,
        ImmutableSegmentedList<VirtualCharGreen>.Builder result)
    {
        // Check if we actually needed to create any special virtual chars.
        // if not, we can avoid the entire array allocation and just wrap
        // the text of the token and pass that back.
 
        var textLength = endIndexExclusive - startIndexInclusive;
        if (textLength == result.Count)
        {
            var sequence = VirtualCharGreenSequence.Create(tokenText);
            return sequence[startIndexInclusive..endIndexExclusive];
        }
 
        return VirtualCharGreenSequence.Create(result.ToImmutable());
    }
}