|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Diagnostics;
using System.Linq;
using System.Text;
using Microsoft.CodeAnalysis.Collections;
using Microsoft.CodeAnalysis.CSharp.LanguageService;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.LanguageService;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Shared.Extensions;
using Microsoft.CodeAnalysis.Text;
using Roslyn.Utilities;
namespace Microsoft.CodeAnalysis.CSharp.EmbeddedLanguages.VirtualChars;
internal class CSharpVirtualCharService : AbstractVirtualCharService
{
public static readonly IVirtualCharService Instance = new CSharpVirtualCharService();
private static readonly ObjectPool<ImmutableSegmentedList<VirtualCharGreen>.Builder> s_pooledBuilders = new(() => ImmutableSegmentedList.CreateBuilder<VirtualCharGreen>());
protected CSharpVirtualCharService()
{
}
protected override ISyntaxFacts SyntaxFacts => CSharpSyntaxFacts.Instance;
protected override bool IsMultiLineRawStringToken(SyntaxToken token)
{
if (token.Kind() is SyntaxKind.MultiLineRawStringLiteralToken or SyntaxKind.Utf8MultiLineRawStringLiteralToken)
return true;
if (token.Parent?.Parent is InterpolatedStringExpressionSyntax { StringStartToken.RawKind: (int)SyntaxKind.InterpolatedMultiLineRawStringStartToken })
return true;
return false;
}
protected override VirtualCharGreenSequence TryConvertToVirtualCharsWorker(SyntaxToken token)
{
// C# preprocessor directives can contain string literals. However, these string literals do not behave
// like normal literals. Because they are used for paths (i.e. in a #line directive), the language does not
// do any escaping within them. i.e. if you have a \ it's just a \ Note that this is not a verbatim
// string. You can't put a double quote in it either, and you cannot have newlines and whatnot.
//
// We technically could convert this trivially to an array of virtual chars. After all, there would just be
// a 1:1 correspondence with the literal contents and the chars returned. However, we don't even both
// returning anything here. That's because there's no useful features we can offer here. Because there are
// no escape characters we won't classify any escape characters. And there is no way that these strings
// would be Regex/Json snippets. So it's easier to just bail out and return nothing.
if (IsInDirective(token.Parent))
return default;
Debug.Assert(!token.ContainsDiagnostics);
switch (token.Kind())
{
case SyntaxKind.CharacterLiteralToken:
return TryConvertStringToVirtualChars(token, "'", "'", escapeBraces: false);
case SyntaxKind.StringLiteralToken:
return token.IsVerbatimStringLiteral()
? TryConvertVerbatimStringToVirtualChars(token, "@\"", "\"", escapeBraces: false)
: TryConvertStringToVirtualChars(token, "\"", "\"", escapeBraces: false);
case SyntaxKind.Utf8StringLiteralToken:
return token.IsVerbatimStringLiteral()
? TryConvertVerbatimStringToVirtualChars(token, "@\"", "\"u8", escapeBraces: false)
: TryConvertStringToVirtualChars(token, "\"", "\"u8", escapeBraces: false);
case SyntaxKind.SingleLineRawStringLiteralToken:
case SyntaxKind.Utf8SingleLineRawStringLiteralToken:
return TryConvertSingleLineRawStringToVirtualChars(token);
case SyntaxKind.MultiLineRawStringLiteralToken:
case SyntaxKind.Utf8MultiLineRawStringLiteralToken:
return token.GetRequiredParent() is LiteralExpressionSyntax literalExpression
? TryConvertMultiLineRawStringToVirtualChars(token, literalExpression, tokenIncludeDelimiters: true)
: default;
case SyntaxKind.InterpolatedStringTextToken:
{
var parent = token.GetRequiredParent();
var isFormatClause = parent is InterpolationFormatClauseSyntax;
if (isFormatClause)
parent = parent.GetRequiredParent();
var interpolatedString = (InterpolatedStringExpressionSyntax)parent.GetRequiredParent();
return interpolatedString.StringStartToken.Kind() switch
{
SyntaxKind.InterpolatedStringStartToken => TryConvertStringToVirtualChars(token, "", "", escapeBraces: true),
SyntaxKind.InterpolatedVerbatimStringStartToken => TryConvertVerbatimStringToVirtualChars(token, "", "", escapeBraces: true),
SyntaxKind.InterpolatedSingleLineRawStringStartToken => TryConvertSingleLineRawStringToVirtualChars(token),
SyntaxKind.InterpolatedMultiLineRawStringStartToken
// Format clauses must be single line, even when in a multi-line interpolation.
=> isFormatClause
? TryConvertSingleLineRawStringToVirtualChars(token)
: TryConvertMultiLineRawStringToVirtualChars(token, interpolatedString, tokenIncludeDelimiters: false),
_ => default,
};
}
}
return default;
}
private static bool IsInDirective(SyntaxNode? node)
{
while (node != null)
{
if (node is DirectiveTriviaSyntax)
return true;
node = node.GetParent(ascendOutOfTrivia: true);
}
return false;
}
private static VirtualCharGreenSequence TryConvertVerbatimStringToVirtualChars(SyntaxToken token, string startDelimiter, string endDelimiter, bool escapeBraces)
=> TryConvertSimpleDoubleQuoteString(token, startDelimiter, endDelimiter, escapeBraces);
private static VirtualCharGreenSequence TryConvertSingleLineRawStringToVirtualChars(SyntaxToken token)
{
var tokenText = token.Text;
var result = ImmutableSegmentedList.CreateBuilder<VirtualCharGreen>();
var startIndexInclusive = 0;
var endIndexExclusive = tokenText.Length;
if (token.Kind() is SyntaxKind.Utf8SingleLineRawStringLiteralToken)
{
Contract.ThrowIfFalse(tokenText is [.., 'u' or 'U', '8']);
endIndexExclusive -= "u8".Length;
}
if (token.Kind() is SyntaxKind.SingleLineRawStringLiteralToken or SyntaxKind.Utf8SingleLineRawStringLiteralToken)
{
Contract.ThrowIfFalse(tokenText[0] == '"');
while (tokenText[startIndexInclusive] == '"')
{
// All quotes should be paired at the end
Contract.ThrowIfFalse(tokenText[endIndexExclusive - 1] == '"');
startIndexInclusive++;
endIndexExclusive--;
}
}
for (var index = startIndexInclusive; index < endIndexExclusive;)
index += ConvertTextAtIndexToVirtualChar(tokenText, index, result);
return CreateVirtualCharSequence(tokenText, startIndexInclusive, endIndexExclusive, result);
}
/// <summary>
/// Creates the sequence for the <b>content</b> characters in this <paramref name="token"/>. This will not
/// include indentation whitespace that the language specifies is not part of the content.
/// </summary>
/// <param name="parentExpression">The containing expression for this token. This is needed so that we can
/// determine the indentation whitespace based on the last line of the containing multiline literal.</param>
/// <param name="tokenIncludeDelimiters">If this token includes the quote (<c>"</c>) characters for the
/// delimiters inside of it or not. If so, then those quotes will need to be skipped when determining the
/// content</param>
private static VirtualCharGreenSequence TryConvertMultiLineRawStringToVirtualChars(
SyntaxToken token, ExpressionSyntax parentExpression, bool tokenIncludeDelimiters)
{
// if this is the first text content chunk of the multi-line literal. The first chunk contains the leading
// indentation of the line it's on (which thus must be trimmed), while all subsequent chunks do not (because
// they start right after some `{...}` interpolation
var isFirstChunk =
parentExpression is LiteralExpressionSyntax ||
(parentExpression is InterpolatedStringExpressionSyntax { Contents: [var firstContent, ..] } && firstContent == token.GetRequiredParent());
if (parentExpression.GetDiagnostics().Any(d => d.Severity == DiagnosticSeverity.Error))
return default;
// Use the parent multi-line expression to determine what whitespace to remove from the start of each line.
var parentSourceText = parentExpression.SyntaxTree.GetText();
var indentationLength = parentSourceText.Lines.GetLineFromPosition(parentExpression.Span.End).GetFirstNonWhitespaceOffset() ?? 0;
// Create a source-text view over the token. This makes it very easy to treat the token as a set of lines
// that can be processed sensibly.
var tokenSourceText = SourceText.From(token.Text);
// If we're on the very first chunk of the multi-line raw string literal, then we want to start on line 1 so
// we skip the space and newline that follow the initial `"""`.
var startLineInclusive = tokenIncludeDelimiters ? 1 : 0;
// Similarly, if we're on the very last chunk of hte multi-line raw string literal, then we don't want to
// include the line contents for the line that has the final ` """` on it.
var lastLineExclusive = tokenIncludeDelimiters ? tokenSourceText.Lines.Count - 1 : tokenSourceText.Lines.Count;
var result = ImmutableSegmentedList.CreateBuilder<VirtualCharGreen>();
for (var lineNumber = startLineInclusive; lineNumber < lastLineExclusive; lineNumber++)
{
var currentLine = tokenSourceText.Lines[lineNumber];
var lineSpan = currentLine.Span;
var lineStart = lineSpan.Start;
// If we're on the second line onwards, we want to trim the indentation if we have it. We also always
// do this for the first line of the first chunk as that will contain the initial leading whitespace.
if (isFirstChunk || lineNumber > startLineInclusive)
{
lineStart = lineSpan.Length > indentationLength
? lineSpan.Start + indentationLength
: lineSpan.End;
}
// The last line of the last chunk does not include the final newline on the line.
var lineEnd = lineNumber == lastLineExclusive - 1 ? currentLine.End : currentLine.EndIncludingLineBreak;
// Now that we've found the start and end portions of that line, convert all the characters within to
// virtual chars and return.
for (var i = lineStart; i < lineEnd;)
i += ConvertTextAtIndexToVirtualChar(tokenSourceText, i, result);
}
return VirtualCharGreenSequence.Create(result.ToImmutable());
}
private static VirtualCharGreenSequence TryConvertStringToVirtualChars(
SyntaxToken token, string startDelimiter, string endDelimiter, bool escapeBraces)
{
var tokenText = token.Text;
if (startDelimiter.Length > 0 && !tokenText.StartsWith(startDelimiter))
{
Debug.Fail("This should not be reachable as long as the compiler added no diagnostics.");
return default;
}
if (endDelimiter.Length > 0 && !tokenText.EndsWith(endDelimiter, StringComparison.OrdinalIgnoreCase))
{
Debug.Fail("This should not be reachable as long as the compiler added no diagnostics.");
return default;
}
var startIndexInclusive = startDelimiter.Length;
var endIndexExclusive = tokenText.Length - endDelimiter.Length;
// Avoid creating and processsing the runes if there are no escapes or surrogates in the string.
if (!ContainsEscape(tokenText.AsSpan(startIndexInclusive, endIndexExclusive - startIndexInclusive), escapeBraces))
{
var sequence = VirtualCharGreenSequence.Create(tokenText);
return sequence[startIndexInclusive..endIndexExclusive];
}
else
{
using var pooledRuneResults = s_pooledBuilders.GetPooledObject();
var charResults = pooledRuneResults.Object;
for (var index = startIndexInclusive; index < endIndexExclusive;)
{
var ch = tokenText[index];
if (ch == '\\')
{
if (TryAddEscape(charResults, tokenText, index) is not int escapeWidth)
return default;
index += escapeWidth;
}
else if (escapeBraces && IsOpenOrCloseBrace(ch))
{
if (!IsLegalBraceEscape(tokenText, index, out var braceWidth))
return default;
charResults.Add(new VirtualCharGreen(ch, index, braceWidth));
index += braceWidth;
}
else
{
charResults.Add(new VirtualCharGreen(ch, index, width: 1));
index++;
}
}
var sequence = CreateVirtualCharSequence(tokenText, startIndexInclusive, endIndexExclusive, charResults);
charResults.Clear();
return sequence;
}
}
private static bool ContainsEscape(ReadOnlySpan<char> tokenText, bool escapeBraces)
{
foreach (var ch in tokenText)
{
if (ch == '\\')
return true;
else if (escapeBraces && IsOpenOrCloseBrace(ch))
return true;
}
return false;
}
/// <summary>Returns the number of characters consumed.</summary>
private static int? TryAddEscape(
ImmutableSegmentedList<VirtualCharGreen>.Builder result,
string tokenText,
int index)
{
// Copied from Lexer.ScanEscapeSequence.
Debug.Assert(tokenText[index] == '\\');
return TryAddSingleCharacterEscape(result, tokenText, index) ??
TryAddMultiCharacterEscape(result, tokenText, index);
}
public override bool TryGetEscapeCharacter(VirtualChar ch, out char escapedChar)
=> ch.TryGetEscapeCharacter(out escapedChar);
/// <summary>Returns the number of characters consumed.</summary>
private static int? TryAddSingleCharacterEscape(
ImmutableSegmentedList<VirtualCharGreen>.Builder result, string tokenText, int index)
{
// Copied from Lexer.ScanEscapeSequence.
Debug.Assert(tokenText[index] == '\\');
var ch = tokenText[index + 1];
// Keep in sync with EscapeForRegularString
switch (ch)
{
// escaped characters that translate to themselves
case '\'':
case '"':
case '\\':
break;
// translate escapes as per C# spec 2.4.4.4
case '0': ch = '\0'; break;
case 'a': ch = '\a'; break;
case 'b': ch = '\b'; break;
case 'e': ch = '\u001b'; break;
case 'f': ch = '\f'; break;
case 'n': ch = '\n'; break;
case 'r': ch = '\r'; break;
case 't': ch = '\t'; break;
case 'v': ch = '\v'; break;
default:
return null;
}
result.Add(new VirtualCharGreen(ch, offset: index, width: 2));
return result.Last().Width;
}
/// <summary>Returns the number of characters consumed.</summary>
private static int? TryAddMultiCharacterEscape(
ImmutableSegmentedList<VirtualCharGreen>.Builder result, string tokenText, int index)
{
// Copied from Lexer.ScanEscapeSequence.
Debug.Assert(tokenText[index] == '\\');
var ch = tokenText[index + 1];
switch (ch)
{
case 'x':
case 'u':
case 'U':
return TryAddMultiCharacterEscape(result, tokenText, index, ch);
default:
Debug.Fail("This should not be reachable as long as the compiler added no diagnostics.");
return null;
}
}
/// <summary>Returns the number of characters consumed.</summary>
private static int? TryAddMultiCharacterEscape(
ImmutableSegmentedList<VirtualCharGreen>.Builder result,
string tokenText,
int index,
char character)
{
var startIndex = index;
Debug.Assert(tokenText[index] == '\\');
// skip past the / and the escape type.
index += 2;
if (character == 'U')
{
// 8 character escape. May represent 1 or 2 actual chars.
uint uintChar = 0;
if (!IsHexDigit(tokenText[index]))
{
Debug.Fail("This should not be reachable as long as the compiler added no diagnostics.");
return null;
}
for (var i = 0; i < 8; i++)
{
character = tokenText[index + i];
if (!IsHexDigit(character))
{
Debug.Fail("This should not be reachable as long as the compiler added no diagnostics.");
return null;
}
uintChar = (uint)((uintChar << 4) + HexValue(character));
}
// Copied from Lexer.cs and SlidingTextWindow.cs
if (uintChar > 0x0010FFFF)
{
Debug.Fail("This should not be reachable as long as the compiler added no diagnostics.");
return null;
}
if (uintChar < 0x00010000)
{
// something like \U0000000A
//
// Represents a single char value.
result.Add(new VirtualCharGreen((char)uintChar, offset: startIndex, width: 2 + 8));
}
else
{
Debug.Assert(uintChar is > 0x0000FFFF and <= 0x0010FFFF);
var lowSurrogate = ((uintChar - 0x00010000) % 0x0400) + 0xDC00;
var highSurrogate = ((uintChar - 0x00010000) / 0x0400) + 0xD800;
// Encode this as a surrogate pair. For the purposes of mapping, we'll say the high surrogate maps to
// the first 6 chars (the \UAAAA in \UAAAABBBB) and the low surrogate maps to the last 4 chars (the BBBB
// in \UAAAABBBB).
const string prefix = @"\UAAAA";
result.Add(new VirtualCharGreen((char)highSurrogate, offset: startIndex, width: prefix.Length));
result.Add(new VirtualCharGreen((char)lowSurrogate, offset: startIndex + prefix.Length, width: 4));
}
return @"\UAAAABBBB".Length;
}
else if (character == 'u')
{
// 4 character escape representing one char.
var intChar = 0;
if (!IsHexDigit(tokenText[index]))
{
Debug.Fail("This should not be reachable as long as the compiler added no diagnostics.");
return null;
}
for (var i = 0; i < 4; i++)
{
var ch2 = tokenText[index + i];
if (!IsHexDigit(ch2))
{
Debug.Fail("This should not be reachable as long as the compiler added no diagnostics.");
return null;
}
intChar = (intChar << 4) + HexValue(ch2);
}
character = (char)intChar;
var width = @"\uAAAA".Length;
result.Add(new VirtualCharGreen(character, offset: startIndex, width));
return width;
}
else
{
Debug.Assert(character == 'x');
// Variable length (up to 4 chars) hexadecimal escape.
var intChar = 0;
if (!IsHexDigit(tokenText[index]))
{
Debug.Fail("This should not be reachable as long as the compiler added no diagnostics.");
return null;
}
var endIndex = index;
for (var i = 0; i < 4 && endIndex < tokenText.Length; i++)
{
var ch2 = tokenText[index + i];
if (!IsHexDigit(ch2))
{
// This is possible. These escape sequences are variable length.
break;
}
intChar = (intChar << 4) + HexValue(ch2);
endIndex++;
}
character = (char)intChar;
var width = endIndex - startIndex;
result.Add(new VirtualCharGreen(character, offset: startIndex, width));
return width;
}
}
private static int HexValue(char c)
{
Debug.Assert(IsHexDigit(c));
return (c is >= '0' and <= '9') ? c - '0' : (c & 0xdf) - 'A' + 10;
}
private static bool IsHexDigit(char c)
=> c is (>= '0' and <= '9') or
(>= 'A' and <= 'F') or
(>= 'a' and <= 'f');
}
|