|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Diagnostics;
using System.Text;
using Microsoft.CodeAnalysis.Collections;
using Microsoft.CodeAnalysis.LanguageService;
using Microsoft.CodeAnalysis.Text;
namespace Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
internal abstract partial class AbstractVirtualCharService : IVirtualCharService
{
public abstract bool TryGetEscapeCharacter(VirtualChar ch, out char escapedChar);
protected abstract ISyntaxFacts SyntaxFacts { get; }
protected abstract VirtualCharGreenSequence TryConvertToVirtualCharsWorker(SyntaxToken token);
protected abstract bool IsMultiLineRawStringToken(SyntaxToken token);
/// <summary>
/// Returns <see langword="true"/> if the next two characters at <c>tokenText[index]</c> are <c>{{</c> or
/// <c>}}</c>.
/// </summary>
protected static bool IsLegalBraceEscape(
string tokenText, int index, out int width)
{
if (index + 1 < tokenText.Length)
{
var ch = tokenText[index];
var next = tokenText[index + 1];
if ((ch == '{' && next == '{') ||
(ch == '}' && next == '}'))
{
width = 2;
return true;
}
}
width = 0;
return false;
}
public VirtualCharSequence TryConvertToVirtualChars(SyntaxToken token)
{
// We don't process any strings that contain diagnostics in it. That means that we can
// trust that all the string's contents (most importantly, the escape sequences) are well
// formed.
if (token.ContainsDiagnostics)
return default;
var greenSequence = TryConvertToVirtualCharsWorker(token);
var result = new VirtualCharSequence(token.SpanStart, greenSequence);
CheckInvariants(token, result);
return result;
}
[Conditional("DEBUG")]
private void CheckInvariants(SyntaxToken token, VirtualCharSequence result)
{
// Do some invariant checking to make sure we processed the string token the same
// way the C# and VB compilers did.
if (result.IsDefault)
return;
// Ensure that we properly broke up the token into a sequence of characters that matches what the compiler did.
// Note: we don't do this for all syntaxKinds. For example an InterpolatedStringTextToken does not do the
// ValueText processing that a StringLiteralToken does. So, for example, $"{{" will have a ValueText of "{{"
// not "{" which might otherwise be expected.
var syntaxKinds = this.SyntaxFacts.SyntaxKinds;
if (token.RawKind == syntaxKinds.StringLiteralToken ||
token.RawKind == syntaxKinds.Utf8StringLiteralToken ||
token.RawKind == syntaxKinds.CharacterLiteralToken)
{
var expectedValueText = token.ValueText;
var actualValueText = result.CreateString();
Debug.Assert(expectedValueText == actualValueText);
}
if (result.Length > 0)
{
var currentVC = result[0];
Debug.Assert(currentVC.Span.Start >= token.SpanStart, "First span has to start after the start of the string token");
if (token.RawKind == syntaxKinds.StringLiteralToken ||
token.RawKind == syntaxKinds.CharacterLiteralToken)
{
Debug.Assert(currentVC.Span.Start == token.SpanStart + 1 ||
currentVC.Span.Start == token.SpanStart + 2, "First span should start on the second or third char of the string.");
}
if (IsMultiLineRawStringToken(token))
{
for (var i = 1; i < result.Length; i++)
{
var nextVC = result[i];
Debug.Assert(currentVC.Span.End <= nextVC.Span.Start, "Virtual character spans have to be ordered.");
currentVC = nextVC;
}
}
else
{
for (var i = 1; i < result.Length; i++)
{
var nextVC = result[i];
Debug.Assert(currentVC.Span.End == nextVC.Span.Start, "Virtual character spans have to be touching.");
currentVC = nextVC;
}
}
var lastVC = result[^1];
if (token.RawKind == syntaxKinds.StringLiteralToken ||
token.RawKind == syntaxKinds.CharacterLiteralToken)
{
Debug.Assert(lastVC.Span.End == token.Span.End - "\"".Length, "Last span has to end right before the end of the string token.");
}
else if (token.RawKind == syntaxKinds.Utf8StringLiteralToken)
{
Debug.Assert(lastVC.Span.End == token.Span.End - "\"u8".Length, "Last span has to end right before the end of the string token.");
}
}
}
/// <summary>
/// Helper to convert simple string literals that escape quotes by doubling them. This is
/// how normal VB literals and c# verbatim string literals work.
/// </summary>
/// <param name="startDelimiter">The start characters string. " in VB and @" in C#</param>
protected static VirtualCharGreenSequence TryConvertSimpleDoubleQuoteString(
SyntaxToken token, string startDelimiter, string endDelimiter, bool escapeBraces)
{
Debug.Assert(!token.ContainsDiagnostics);
if (escapeBraces)
{
Debug.Assert(startDelimiter == "");
Debug.Assert(endDelimiter == "");
}
var tokenText = token.Text;
if (startDelimiter.Length > 0 && !tokenText.StartsWith(startDelimiter, StringComparison.Ordinal))
{
Debug.Assert(false, "This should not be reachable as long as the compiler added no diagnostics.");
return default;
}
if (endDelimiter.Length > 0 && !tokenText.EndsWith(endDelimiter, StringComparison.Ordinal))
{
Debug.Assert(false, "This should not be reachable as long as the compiler added no diagnostics.");
return default;
}
var startIndexInclusive = startDelimiter.Length;
var endIndexExclusive = tokenText.Length - endDelimiter.Length;
var result = ImmutableSegmentedList.CreateBuilder<VirtualCharGreen>();
for (var index = startIndexInclusive; index < endIndexExclusive;)
{
if (tokenText[index] == '"' && tokenText[index + 1] == '"')
{
result.Add(new VirtualCharGreen('"', offset: index, width: 2));
index += 2;
continue;
}
else if (escapeBraces && IsOpenOrCloseBrace(tokenText[index]))
{
if (!IsLegalBraceEscape(tokenText, index, out var width))
return default;
result.Add(new VirtualCharGreen(tokenText[index], offset: index, width: width));
index += width;
continue;
}
index += ConvertTextAtIndexToVirtualChar(tokenText, index, result);
}
return CreateVirtualCharSequence(
tokenText, startIndexInclusive, endIndexExclusive, result);
}
/// <summary>
/// Returns the number of characters to jump forward (either 1 or 2);
/// </summary>
protected static int ConvertTextAtIndexToVirtualChar(string tokenText, int index, ImmutableSegmentedList<VirtualCharGreen>.Builder result)
=> ConvertTextAtIndexToVirtualChar(tokenText, index, new StringTextInfo(), result);
protected static int ConvertTextAtIndexToVirtualChar(SourceText tokenText, int index, ImmutableSegmentedList<VirtualCharGreen>.Builder result)
=> ConvertTextAtIndexToVirtualChar(tokenText, index, new SourceTextTextInfo(), result);
private static int ConvertTextAtIndexToVirtualChar<T, TTextInfo>(
T tokenText, int index, TTextInfo info, ImmutableSegmentedList<VirtualCharGreen>.Builder result)
where TTextInfo : struct, ITextInfo<T>
{
var ch = info.Get(tokenText, index);
result.Add(new VirtualCharGreen(ch, offset: index, width: 1));
return 1;
}
protected static bool IsOpenOrCloseBrace(char ch)
=> ch is '{' or '}';
protected static VirtualCharGreenSequence CreateVirtualCharSequence(
string tokenText,
int startIndexInclusive,
int endIndexExclusive,
ImmutableSegmentedList<VirtualCharGreen>.Builder result)
{
// Check if we actually needed to create any special virtual chars.
// if not, we can avoid the entire array allocation and just wrap
// the text of the token and pass that back.
var textLength = endIndexExclusive - startIndexInclusive;
if (textLength == result.Count)
{
var sequence = VirtualCharGreenSequence.Create(tokenText);
return sequence[startIndexInclusive..endIndexExclusive];
}
return VirtualCharGreenSequence.Create(result.ToImmutable());
}
}
|