|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Text;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Text;
using Roslyn.Utilities;
namespace Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax
{
internal partial class LanguageParser
{
private ExpressionSyntax ParseInterpolatedStringToken()
{
// We don't want to make the scanner stateful (between tokens) if we can possibly avoid it.
// The approach implemented here is
//
// (1) Scan the whole interpolated string literal as a single token. Now the statefulness of
// the scanner (to match { }'s) is limited to its behavior while scanning a single token.
//
// (2) When the parser gets such a token, here, it spins up another scanner / parser on each of
// the holes and builds a tree for the whole thing (resulting in an InterpolatedStringExpressionSyntax).
//
// (3) The parser discards the original token and replaces it with this tree. (In other words,
// it replaces one token with a different set of tokens that have already been parsed)
//
// (4) On an incremental change, we widen the invalidated region to include any enclosing interpolated
// string nonterminal so that we never reuse tokens inside a changed interpolated string.
//
// This has the secondary advantage that it can reasonably be specified.
//
// The substitution will end up being invisible to external APIs and clients such as the IDE, as
// they have no way to ask for the stream of tokens before parsing.
Debug.Assert(this.CurrentToken.Kind == SyntaxKind.InterpolatedStringToken);
var originalToken = this.EatToken();
var originalText = originalToken.ValueText; // this is actually the source text
var originalTextSpan = originalText.AsSpan();
Debug.Assert(originalText[0] == '$' || originalText[0] == '@');
// compute the positions of the interpolations in the original string literal, if there was an error or not,
// and where the open and close quotes can be found.
var interpolations = ArrayBuilder<Lexer.Interpolation>.GetInstance();
rescanInterpolation(out var kind, out var error, out var openQuoteRange, interpolations, out var closeQuoteRange);
// Only bother trying to do dedentation if we have a multiline literal without errors. There's no point
// trying in the presence of errors as we may not even be able to determine what the dedentation should be.
var needsDedentation = kind == Lexer.InterpolatedStringKind.MultiLineRaw && error == null;
var result = SyntaxFactory.InterpolatedStringExpression(getOpenQuote(), getContent(originalTextSpan), getCloseQuote());
interpolations.Free();
if (error != null)
{
// Errors are positioned relative to the start of the token that was lexed. Specifically relative to
// the starting `$` or `@`. However, when placed on a node like this, it will be relative to the node's
// full start. So we have to adjust the diagnostics taking that into account.
result = result.WithDiagnosticsGreen(MoveDiagnostics(new[] { error }, originalToken.GetLeadingTrivia()?.FullWidth ?? 0));
}
Debug.Assert(originalToken.ToFullString() == result.ToFullString()); // yield from text equals yield from node
return result;
void rescanInterpolation(out Lexer.InterpolatedStringKind kind, out SyntaxDiagnosticInfo? error, out Range openQuoteRange, ArrayBuilder<Lexer.Interpolation> interpolations, out Range closeQuoteRange)
{
using var tempLexer = new Lexer(SourceText.From(originalText), this.Options, allowPreprocessorDirectives: false);
var info = default(Lexer.TokenInfo);
tempLexer.ScanInterpolatedStringLiteralTop(ref info, out error, out kind, out openQuoteRange, interpolations, out closeQuoteRange);
}
SyntaxToken getOpenQuote()
{
return SyntaxFactory.Token(
originalToken.GetLeadingTrivia(),
kind switch
{
Lexer.InterpolatedStringKind.Normal => SyntaxKind.InterpolatedStringStartToken,
Lexer.InterpolatedStringKind.Verbatim => SyntaxKind.InterpolatedVerbatimStringStartToken,
Lexer.InterpolatedStringKind.SingleLineRaw => SyntaxKind.InterpolatedSingleLineRawStringStartToken,
Lexer.InterpolatedStringKind.MultiLineRaw => SyntaxKind.InterpolatedMultiLineRawStringStartToken,
_ => throw ExceptionUtilities.UnexpectedValue(kind),
},
originalText[openQuoteRange],
trailing: null);
}
CodeAnalysis.Syntax.InternalSyntax.SyntaxList<InterpolatedStringContentSyntax> getContent(ReadOnlySpan<char> originalTextSpan)
{
var content = PooledStringBuilder.GetInstance();
var builder = _pool.Allocate<InterpolatedStringContentSyntax>();
var indentationWhitespace = needsDedentation ? getIndentationWhitespace(originalTextSpan) : default;
var currentContentStart = openQuoteRange.End;
for (var i = 0; i < interpolations.Count; i++)
{
var interpolation = interpolations[i];
// Add a token for text preceding the interpolation
builder.Add(makeContent(
indentationWhitespace, content, isFirst: i == 0, isLast: false,
originalTextSpan[currentContentStart..interpolation.OpenBraceRange.Start]));
// Now parse the interpolation itself.
var interpolationNode = ParseInterpolation(this.Options, originalText, interpolation, kind, IsInFieldKeywordContext);
// Make sure the interpolation starts at the right location.
var indentationError = getInterpolationIndentationError(indentationWhitespace, interpolation);
if (indentationError != null)
interpolationNode = interpolationNode.WithDiagnosticsGreen(new[] { indentationError });
builder.Add(interpolationNode);
currentContentStart = interpolation.CloseBraceRange.End;
}
// Add a token for text following the last interpolation
builder.Add(makeContent(
indentationWhitespace, content, isFirst: interpolations.Count == 0, isLast: true,
originalTextSpan[currentContentStart..closeQuoteRange.Start]));
CodeAnalysis.Syntax.InternalSyntax.SyntaxList<InterpolatedStringContentSyntax> result = builder;
_pool.Free(builder);
content.Free();
return result;
}
// Gets the indentation whitespace from the last line of a multi-line raw literal.
ReadOnlySpan<char> getIndentationWhitespace(ReadOnlySpan<char> originalTextSpan)
{
// The content we want to create text token out of. Effectively, what is in the text sections
// minus leading whitespace.
var closeQuoteText = originalTextSpan[closeQuoteRange];
// A multi-line raw interpolation without errors always ends with a new-line, some number of spaces, and
// the quotes. So it's safe to just pull off the first two characters here to find where the
// newline-ends.
var afterNewLine = SlidingTextWindow.GetNewLineWidth(closeQuoteText[0], closeQuoteText[1]);
var afterWhitespace = SkipWhitespace(closeQuoteText, afterNewLine);
Debug.Assert(closeQuoteText[afterWhitespace] == '"');
return closeQuoteText[afterNewLine..afterWhitespace];
}
InterpolatedStringContentSyntax? makeContent(
ReadOnlySpan<char> indentationWhitespace, StringBuilder content, bool isFirst, bool isLast, ReadOnlySpan<char> text)
{
if (text.Length == 0)
return null;
// If we're not dedenting then just make a standard interpolated text token. Also, we can short-circuit
// if the indentation whitespace is empty (nothing to dedent in that case).
if (!needsDedentation || indentationWhitespace.IsEmpty)
return SyntaxFactory.InterpolatedStringText(MakeInterpolatedStringTextToken(kind, text.ToString()));
content.Clear();
var currentIndex = 0;
// If we're not processing the first content chunk, then we must be processing a chunk that came after
// an interpolation. In that case, we need to consume up through the next newline of that chunk as
// content that is not subject to dedentation.
if (!isFirst)
currentIndex = ConsumeRemainingContentThroughNewLine(content, text, currentIndex);
// We're either the first item, or we consumed up through a newline from the previous line. We're
// definitely at the start of a new line (or at the end). Regardless, we want to consume each
// successive line, making sure its indentation is correct.
// Consume one line at a time.
SyntaxDiagnosticInfo? indentationError = null;
while (currentIndex < text.Length)
{
var lineStartPosition = currentIndex;
// Only bother reporting a single indentation error on a text chunk.
if (indentationError == null)
{
currentIndex = SkipWhitespace(text, currentIndex);
var currentLineWhitespace = text[lineStartPosition..currentIndex];
if (!currentLineWhitespace.StartsWith(indentationWhitespace))
{
// We have a line where the indentation of that line isn't a prefix of indentation
// whitespace.
//
// If we're not on a blank line then this is bad. That's a content line that doesn't start
// with the indentation whitespace. If we are on a blank line then it's ok if the whitespace
// we do have is a prefix of the indentation whitespace.
var isBlankLine = (currentIndex == text.Length && isLast) || (currentIndex < text.Length && SyntaxFacts.IsNewLine(text[currentIndex]));
var isLegalBlankLine = isBlankLine && indentationWhitespace.StartsWith(currentLineWhitespace);
if (!isLegalBlankLine)
{
// Specialized error message if this is a spacing difference.
if (CheckForSpaceDifference(
currentLineWhitespace, indentationWhitespace,
out var currentLineWhitespaceChar, out var indentationWhitespaceChar))
{
indentationError ??= MakeError(
lineStartPosition,
width: currentIndex - lineStartPosition,
ErrorCode.ERR_LineContainsDifferentWhitespace,
currentLineWhitespaceChar, indentationWhitespaceChar);
}
else
{
indentationError ??= MakeError(
lineStartPosition,
width: currentIndex - lineStartPosition,
ErrorCode.ERR_LineDoesNotStartWithSameWhitespace);
}
}
}
}
// Skip the leading whitespace that matches the terminator line and add any text after that to our content.
currentIndex = Math.Min(currentIndex, lineStartPosition + indentationWhitespace.Length);
currentIndex = ConsumeRemainingContentThroughNewLine(content, text, currentIndex);
}
// if we ran into any errors, don't give this item any special value. It just has the value of our actual text.
var textString = text.ToString();
var valueString = indentationError != null ? textString : content.ToString();
var node = SyntaxFactory.InterpolatedStringText(
SyntaxFactory.Literal(leading: null, textString, SyntaxKind.InterpolatedStringTextToken, valueString, trailing: null));
return indentationError != null
? node.WithDiagnosticsGreen(new[] { indentationError })
: node;
}
SyntaxToken getCloseQuote()
{
// Make a token for the close quote " (even if it was missing)
return TokenOrMissingToken(
leading: null,
kind switch
{
Lexer.InterpolatedStringKind.Normal => SyntaxKind.InterpolatedStringEndToken,
Lexer.InterpolatedStringKind.Verbatim => SyntaxKind.InterpolatedStringEndToken,
Lexer.InterpolatedStringKind.SingleLineRaw => SyntaxKind.InterpolatedRawStringEndToken,
Lexer.InterpolatedStringKind.MultiLineRaw => SyntaxKind.InterpolatedRawStringEndToken,
_ => throw ExceptionUtilities.UnexpectedValue(kind),
},
originalText[closeQuoteRange],
originalToken.GetTrailingTrivia());
}
// if the interpolation starts on its own line, then it has to have correct indentation whitespace
// before it. e.g.:
//
// var x = """
// {1 + 1}
// """
//
// Not:
//
// var x = """
// {1 + 1}
// """
//
// Note: We don't need to check
//
// var x = """
// <space>{1 + 1}
// """
//
// as initial whitespace in text will already be checked in makeContent. This is only for the case where
// the interpolation is at the start of a line.
SyntaxDiagnosticInfo? getInterpolationIndentationError(
ReadOnlySpan<char> indentationWhitespace,
Lexer.Interpolation interpolation)
{
if (needsDedentation && !indentationWhitespace.IsEmpty)
{
var openBracePosition = interpolation.OpenBraceRange.Start.Value;
if (openBracePosition > 0 && SyntaxFacts.IsNewLine(originalText[openBracePosition - 1]))
// Pass 0 as the offset to give the error on the interpolation brace.
return MakeError(offset: 0, width: 1, ErrorCode.ERR_LineDoesNotStartWithSameWhitespace);
}
return null;
}
}
private static bool CheckForSpaceDifference(
ReadOnlySpan<char> currentLineWhitespace,
ReadOnlySpan<char> indentationLineWhitespace,
[NotNullWhen(true)] out string? currentLineMessage,
[NotNullWhen(true)] out string? indentationLineMessage)
{
for (int i = 0, n = Math.Min(currentLineWhitespace.Length, indentationLineWhitespace.Length); i < n; i++)
{
var currentLineChar = currentLineWhitespace[i];
var indentationLineChar = indentationLineWhitespace[i];
if (currentLineChar != indentationLineChar &&
SyntaxFacts.IsWhitespace(currentLineChar) &&
SyntaxFacts.IsWhitespace(indentationLineChar))
{
currentLineMessage = Lexer.CharToString(currentLineChar);
indentationLineMessage = Lexer.CharToString(indentationLineChar);
return true;
}
}
currentLineMessage = null;
indentationLineMessage = null;
return false;
}
private static SyntaxToken TokenOrMissingToken(GreenNode? leading, SyntaxKind kind, string text, GreenNode? trailing)
=> text == ""
? SyntaxFactory.MissingToken(leading, kind, trailing)
: SyntaxFactory.Token(leading, kind, text, trailing);
private static int SkipWhitespace(ReadOnlySpan<char> text, int currentIndex)
{
while (currentIndex < text.Length && SyntaxFacts.IsWhitespace(text[currentIndex]))
currentIndex++;
return currentIndex;
}
private static int ConsumeRemainingContentThroughNewLine(StringBuilder content, ReadOnlySpan<char> text, int currentIndex)
{
var start = currentIndex;
while (currentIndex < text.Length)
{
var ch = text[currentIndex];
if (!SyntaxFacts.IsNewLine(ch))
{
currentIndex++;
continue;
}
currentIndex += SlidingTextWindow.GetNewLineWidth(ch, currentIndex + 1 < text.Length ? text[currentIndex + 1] : '\0');
break;
}
var slice = text[start..currentIndex];
#if NET
content.Append(slice);
#else
unsafe
{
fixed (char* pointer = slice)
content.Append(pointer, slice.Length);
}
#endif
return currentIndex;
}
private static InterpolationSyntax ParseInterpolation(
CSharpParseOptions options,
string text,
Lexer.Interpolation interpolation,
Lexer.InterpolatedStringKind kind,
bool isInFieldKeywordContext)
{
// Grab the text from after the { all the way to the start of the } (or the start of the : if present). This
// will be used to parse out the expression of the interpolation.
//
// The parsing of the open brace, close brace and colon is specially handled in ParseInterpolation below.
var followingRange = interpolation.HasColon ? interpolation.ColonRange : interpolation.CloseBraceRange;
var expressionText = text[interpolation.OpenBraceRange.End..followingRange.Start];
using var tempLexer = new Lexer(SourceText.From(expressionText), options, allowPreprocessorDirectives: false, interpolationFollowedByColon: interpolation.HasColon);
// First grab any trivia right after the {, it will be trailing trivia for the { token.
var openTokenTrailingTrivia = tempLexer.LexSyntaxTrailingTrivia().Node;
// Now create a parser to actually handle the expression portion of the interpolation
using var tempParser = new LanguageParser(tempLexer, oldTree: null, changes: null);
using var _ = new FieldKeywordContext(tempParser, isInFieldKeywordContext);
var result = tempParser.ParseInterpolation(
text, interpolation, kind,
SyntaxFactory.Token(leading: null, SyntaxKind.OpenBraceToken, text[interpolation.OpenBraceRange], openTokenTrailingTrivia));
Debug.Assert(text[interpolation.OpenBraceRange.Start..interpolation.CloseBraceRange.End] == result.ToFullString()); // yield from text equals yield from node
return result;
}
private InterpolationSyntax ParseInterpolation(
string text,
Lexer.Interpolation interpolation,
Lexer.InterpolatedStringKind kind,
SyntaxToken openBraceToken)
{
var (expression, alignment) = getExpressionAndAlignment();
var (format, closeBraceToken) = getFormatAndCloseBrace();
var result = SyntaxFactory.Interpolation(openBraceToken, expression, alignment, format, closeBraceToken);
#if DEBUG
Debug.Assert(text[interpolation.OpenBraceRange.Start..interpolation.CloseBraceRange.End] == result.ToFullString()); // yield from text equals yield from node
#endif
return result;
(ExpressionSyntax expression, InterpolationAlignmentClauseSyntax? alignment) getExpressionAndAlignment()
{
var expression = this.ParseExpressionCore();
if (this.CurrentToken.Kind != SyntaxKind.CommaToken)
{
return (this.ConsumeUnexpectedTokens(expression), alignment: null);
}
var alignment = SyntaxFactory.InterpolationAlignmentClause(
this.EatToken(SyntaxKind.CommaToken),
this.ConsumeUnexpectedTokens(this.ParseExpressionCore()));
return (expression, alignment);
}
(InterpolationFormatClauseSyntax? format, SyntaxToken closeBraceToken) getFormatAndCloseBrace()
{
var leading = this.CurrentToken.GetLeadingTrivia();
if (interpolation.HasColon)
{
var format = SyntaxFactory.InterpolationFormatClause(
SyntaxFactory.Token(leading, SyntaxKind.ColonToken, text[interpolation.ColonRange], trailing: null),
MakeInterpolatedStringTextToken(kind, text[interpolation.ColonRange.End..interpolation.CloseBraceRange.Start]));
return (format, getInterpolationCloseToken(leading: null));
}
else
{
return (format: null, getInterpolationCloseToken(leading));
}
}
SyntaxToken getInterpolationCloseToken(GreenNode? leading)
{
return TokenOrMissingToken(
leading,
SyntaxKind.CloseBraceToken,
text[interpolation.CloseBraceRange],
trailing: null);
}
}
/// <summary>
/// Interpret the given raw text from source as an InterpolatedStringTextToken.
/// </summary>
/// <param name="text">The text for the full string literal, including the quotes and contents</param>
/// <param name="kind">The kind of the interpolated string we were processing</param>
private SyntaxToken MakeInterpolatedStringTextToken(Lexer.InterpolatedStringKind kind, string text)
{
// with a raw string, we don't do any interpretation of the content. Note: removal of indentation is
// handled already in splitContent
if (kind is Lexer.InterpolatedStringKind.SingleLineRaw or Lexer.InterpolatedStringKind.MultiLineRaw)
return SyntaxFactory.Literal(leading: null, text, SyntaxKind.InterpolatedStringTextToken, text, trailing: null);
Debug.Assert(kind is Lexer.InterpolatedStringKind.Normal or Lexer.InterpolatedStringKind.Verbatim);
// For a normal/verbatim piece of content, process the inner content as if it was in a corresponding
// *non*-interpolated string to get the correct meaning of all the escapes/diagnostics within.
var prefix = kind is Lexer.InterpolatedStringKind.Verbatim ? "@\"" : "\"";
var fakeString = prefix + text + "\"";
using var tempLexer = new Lexer(SourceText.From(fakeString), this.Options, allowPreprocessorDirectives: false);
var mode = LexerMode.Syntax;
var token = tempLexer.Lex(ref mode);
Debug.Assert(token.Kind == SyntaxKind.StringLiteralToken);
var result = SyntaxFactory.Literal(leading: null, text, SyntaxKind.InterpolatedStringTextToken, token.ValueText, trailing: null);
if (token.ContainsDiagnostics)
result = result.WithDiagnosticsGreen(MoveDiagnostics(token.GetDiagnostics(), -prefix.Length));
return result;
}
private static DiagnosticInfo[] MoveDiagnostics(DiagnosticInfo[] infos, int offset)
{
Debug.Assert(infos.Length > 0);
var builder = ArrayBuilder<DiagnosticInfo>.GetInstance(infos.Length);
foreach (var info in infos)
{
// This cast should always be safe. We are only moving diagnostics produced on syntax nodes and tokens.
var sd = (SyntaxDiagnosticInfo)info;
builder.Add(sd.WithOffset(sd.Offset + offset));
}
return builder.ToArrayAndFree();
}
}
}
|