|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Immutable;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Linq;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using Microsoft.CodeAnalysis.Editor.Shared.Extensions;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Text;
using Microsoft.CodeAnalysis.Text.Shared.Extensions;
using Microsoft.VisualStudio.Text;
using Roslyn.Utilities;
namespace Microsoft.CodeAnalysis.Editor.CSharp.StringCopyPaste;
internal static class StringCopyPasteHelpers
{
public static bool HasNewLine(TextLine line)
=> line.Span.End != line.SpanIncludingLineBreak.End;
/// <summary>
/// Gets the character at the requested position, or <c>\0</c> if out of bounds.
/// </summary>
public static char SafeCharAt(SourceText text, int index)
=> index >= 0 && index < text.Length ? text[index] : '\0';
/// <summary>
/// True if the string literal contains an error diagnostic that indicates a parsing problem with it. For
/// interpolated strings, this only includes the text sections, and not any interpolation holes in the literal.
/// </summary>
public static bool ContainsError(ExpressionSyntax stringExpression)
{
if (stringExpression is LiteralExpressionSyntax)
return NodeOrTokenContainsError(stringExpression);
if (stringExpression is InterpolatedStringExpressionSyntax interpolatedString)
{
using var _ = PooledHashSet<Diagnostic>.GetInstance(out var errors);
foreach (var diagnostic in interpolatedString.GetDiagnostics())
{
if (diagnostic.Severity == DiagnosticSeverity.Error)
errors.Add(diagnostic);
}
// we don't care about errors in holes. Only errors in the content portions of the string.
for (int i = 0, n = interpolatedString.Contents.Count; i < n && errors.Count > 0; i++)
{
if (interpolatedString.Contents[i] is InterpolatedStringTextSyntax text)
{
foreach (var diagnostic in text.GetDiagnostics())
errors.Remove(diagnostic);
}
}
return errors.Count > 0;
}
throw ExceptionUtilities.UnexpectedValue(stringExpression);
}
public static bool NodeOrTokenContainsError(SyntaxNodeOrToken nodeOrToken)
{
foreach (var diagnostic in nodeOrToken.GetDiagnostics())
{
if (diagnostic.Severity == DiagnosticSeverity.Error)
return true;
}
return false;
}
public static bool AllWhitespace(INormalizedTextChangeCollection changes)
{
foreach (var change in changes)
{
if (!AllWhitespace(change.NewText))
return false;
}
return true;
}
private static bool AllWhitespace(string text)
{
foreach (var ch in text)
{
if (!SyntaxFacts.IsWhitespace(ch))
return false;
}
return true;
}
/// <summary>
/// Given a TextLine, returns the index (in the SourceText) of the first character of it that is not a
/// Whitespace character. The LineBreak parts of the line are not considered here. If the line is empty/blank
/// (again, not counting LineBreak characters) then -1 is returned.
/// </summary>
public static int GetFirstNonWhitespaceIndex(SourceText text, TextLine line)
{
for (int i = line.Start, n = line.End; i < n; i++)
{
if (!SyntaxFacts.IsWhitespace(text[i]))
return i;
}
return -1;
}
public static bool ContainsControlCharacter(INormalizedTextChangeCollection changes)
{
foreach (var change in changes)
{
if (ContainsControlCharacter(change.NewText))
return true;
}
return false;
}
public static bool ContainsControlCharacter(string newText)
{
foreach (var c in newText)
{
if (char.IsControl(c))
return true;
}
return false;
}
/// <summary>
/// Removes all characters matching <see cref="SyntaxFacts.IsWhitespace(char)"/> from the start of <paramref
/// name="value"/>.
/// </summary>
public static (string whitespace, string contents) ExtractWhitespace(string value)
{
var start = 0;
while (start < value.Length && SyntaxFacts.IsWhitespace(value[start]))
start++;
return (value[..start], value[start..]);
}
public static bool IsVerbatimStringExpression(ExpressionSyntax stringExpression)
=> stringExpression is LiteralExpressionSyntax literalExpression && literalExpression.Token.IsVerbatimStringLiteral() ||
stringExpression is InterpolatedStringExpressionSyntax { StringStartToken.RawKind: (int)SyntaxKind.InterpolatedVerbatimStringStartToken };
public static bool IsAnyRawStringExpression(ExpressionSyntax expression)
=> expression is LiteralExpressionSyntax literal
? IsRawStringLiteral(literal)
: IsRawStringLiteral((InterpolatedStringExpressionSyntax)expression);
public static bool IsAnyMultiLineRawStringExpression(ExpressionSyntax expression)
=> expression is LiteralExpressionSyntax { Token.RawKind: (int)SyntaxKind.MultiLineRawStringLiteralToken } or
InterpolatedStringExpressionSyntax { StringStartToken.RawKind: (int)SyntaxKind.InterpolatedMultiLineRawStringStartToken };
public static bool IsRawStringLiteral(InterpolatedStringExpressionSyntax interpolatedString)
=> interpolatedString.StringStartToken.Kind() is SyntaxKind.InterpolatedSingleLineRawStringStartToken or SyntaxKind.InterpolatedMultiLineRawStringStartToken;
public static bool IsRawStringLiteral(LiteralExpressionSyntax literal)
=> literal.Token.Kind() is SyntaxKind.SingleLineRawStringLiteralToken or SyntaxKind.MultiLineRawStringLiteralToken;
public static int SkipU8Suffix(SourceText text, int end)
{
if (SafeCharAt(text, end - 1) == '8')
end--;
if (SafeCharAt(text, end - 1) is 'u' or 'U')
end--;
return end;
}
/// <summary>
/// Given a section of a document, finds the longest sequence of quote (<c>"</c>) characters in it. Used to
/// determine if a raw string literal needs to grow its delimiters to ensure that the quote sequence will no
/// longer be a problem.
/// </summary>
public static int GetLongestQuoteSequence(SourceText text, TextSpan span)
=> GetLongestCharacterSequence(text, span, '"');
public static int GetLongestOpenBraceSequence(SourceText text, TextSpan span)
=> GetLongestCharacterSequence(text, span, '{');
public static int GetLongestCloseBraceSequence(SourceText text, TextSpan span)
=> GetLongestCharacterSequence(text, span, '}');
/// <summary>
/// Given a section of a document, finds the longest sequence of of a given <paramref name="character"/> in it.
/// Used to determine if a raw string literal needs to grow its delimiters to ensure that the sequence
/// will no longer be a problem.
/// </summary>
private static int GetLongestCharacterSequence(SourceText text, TextSpan span, char character)
{
var longestCount = 0;
for (int currentIndex = span.Start, contentEnd = span.End; currentIndex < contentEnd;)
{
if (text[currentIndex] == character)
{
var endQuoteIndex = currentIndex;
while (endQuoteIndex < contentEnd && text[endQuoteIndex] == character)
endQuoteIndex++;
longestCount = Math.Max(longestCount, endQuoteIndex - currentIndex);
currentIndex = endQuoteIndex;
}
else
{
currentIndex++;
}
}
return longestCount;
}
/// <summary>
/// Given a set of selections, finds the innermost string-literal/interpolation that they are all contained in.
/// If no such literal/interpolation exists, this returns null.
/// </summary>
public static ExpressionSyntax? FindCommonContainingStringExpression(
SyntaxNode root, NormalizedSnapshotSpanCollection selectionsBeforePaste)
{
ExpressionSyntax? expression = null;
foreach (var snapshotSpan in selectionsBeforePaste)
{
var container = FindContainingSupportedStringExpression(root, snapshotSpan.Start.Position);
if (container == null)
return null;
expression ??= container;
if (expression != container)
return null;
}
return expression;
}
public static ExpressionSyntax? FindContainingSupportedStringExpression(SyntaxNode root, int position)
{
var node = root.FindToken(position).Parent;
for (var current = node; current != null; current = current.Parent)
{
if (current is LiteralExpressionSyntax literalExpression)
return IsSupportedStringExpression(literalExpression) ? literalExpression : null;
if (current is InterpolatedStringExpressionSyntax interpolatedString)
return IsSupportedStringExpression(interpolatedString) ? interpolatedString : null;
}
return null;
}
public static bool IsSupportedStringExpression(ExpressionSyntax expression)
{
// When new string forms are added, support for them can be introduced here. However, by checking the exact
// types of strings supported, downstream code can know exactly what forms they should be looking for and
// that nothing else may flow down to them.
if (expression is LiteralExpressionSyntax
{
RawKind: (int)SyntaxKind.StringLiteralExpression,
Token.RawKind: (int)SyntaxKind.StringLiteralToken or
(int)SyntaxKind.SingleLineRawStringLiteralToken or
(int)SyntaxKind.MultiLineRawStringLiteralToken,
})
{
return true;
}
if (expression is InterpolatedStringExpressionSyntax
{
StringStartToken.RawKind: (int)SyntaxKind.InterpolatedStringStartToken or
(int)SyntaxKind.InterpolatedVerbatimStringStartToken or
(int)SyntaxKind.InterpolatedSingleLineRawStringStartToken or
(int)SyntaxKind.InterpolatedMultiLineRawStringStartToken,
})
{
return true;
}
return false;
}
public static string EscapeForNonRawStringLiteral_DoNotCallDirectly(bool isVerbatim, bool isInterpolated, bool trySkipExistingEscapes, string value)
{
if (isVerbatim)
return EscapeForNonRawVerbatimStringLiteral(isInterpolated, trySkipExistingEscapes, value);
// Standard strings have a much larger set of cases to consider.
using var _ = PooledStringBuilder.GetInstance(out var builder);
// taken from object-display
for (var i = 0; i < value.Length; i++)
{
var ch = value[i];
var nextCh = i == value.Length - 1 ? 0 : value[i + 1];
if (CharUnicodeInfo.GetUnicodeCategory(ch) == UnicodeCategory.Surrogate)
{
var category = CharUnicodeInfo.GetUnicodeCategory(value, i);
if (category == UnicodeCategory.Surrogate)
{
// an unpaired surrogate
builder.Append("\\u" + ((int)ch).ToString("x4"));
}
else if (NeedsEscaping(category))
{
// a surrogate pair that needs to be escaped
var unicode = char.ConvertToUtf32(value, i);
builder.Append("\\U" + unicode.ToString("x8"));
i++; // skip the already-encoded second surrogate of the pair
}
else
{
// copy a printable surrogate pair directly
builder.Append(ch);
builder.Append(value[++i]);
}
}
else if (TryReplaceChar(ch, out var replaceWith))
{
builder.Append(replaceWith);
}
else
{
builder.Append(ch);
// if we see a special character then skip the following one if the following one already escapes it.
// Otherwise, if it's not already escaped, then escape it.
if (isInterpolated && ch is '{' or '}')
{
if (trySkipExistingEscapes && nextCh == ch)
i++;
else
builder.Append(ch);
}
}
}
return builder.ToString();
static bool TryReplaceChar(char c, [NotNullWhen(true)] out string? replaceWith)
{
replaceWith = null;
switch (c)
{
case '\\':
replaceWith = "\\\\";
break;
case '\0':
replaceWith = "\\0";
break;
case '\a':
replaceWith = "\\a";
break;
case '\b':
replaceWith = "\\b";
break;
case '\f':
replaceWith = "\\f";
break;
case '\n':
replaceWith = "\\n";
break;
case '\r':
replaceWith = "\\r";
break;
case '\t':
replaceWith = "\\t";
break;
case '\v':
replaceWith = "\\v";
break;
case '"':
replaceWith = "\\\"";
break;
}
if (replaceWith != null)
return true;
if (NeedsEscaping(CharUnicodeInfo.GetUnicodeCategory(c)))
{
replaceWith = "\\u" + ((int)c).ToString("x4");
return true;
}
return false;
}
static bool NeedsEscaping(UnicodeCategory category)
{
switch (category)
{
case UnicodeCategory.Control:
case UnicodeCategory.OtherNotAssigned:
case UnicodeCategory.ParagraphSeparator:
case UnicodeCategory.LineSeparator:
case UnicodeCategory.Surrogate:
return true;
default:
return false;
}
}
}
private static string EscapeForNonRawVerbatimStringLiteral(bool isInterpolated, bool trySkipExistingEscapes, string value)
{
using var _ = PooledStringBuilder.GetInstance(out var builder);
// First, go through and see if we're escaping *anything* in the original. If so, then we'll escape
// everything. In other words, say we're pasting `[SuppressMessage("", "CA2013")]`. We technically don't
// need to escape the `""` (since that is legal in a verbatim string). However, we will be escaping the
// quotes in teh `"CA2013"` to become `""CA2013""`. Once we decide we're escaping some quotes, we should
// then realize that we *should* escape the `""` to `""""` to be consistent.
// So if we determine that we will be escaping all code, then just recurse, this time setting
// trySkipExistingEscapes to false. That will prevent calling back into this check and it means whatever we
// run into we will escape.
if (trySkipExistingEscapes && WillEscapeAnyCharacters(isInterpolated, value))
return EscapeForNonRawVerbatimStringLiteral(isInterpolated, trySkipExistingEscapes: false, value);
for (var i = 0; i < value.Length; i++)
{
var ch = value[i];
var nextCh = i == value.Length - 1 ? 0 : value[i + 1];
builder.Append(ch);
// if we see a special character then skip the following one if the following one already escapes it.
// Otherwise, if it's not already escaped, then escape it.
if (ch == '"')
{
builder.Append(ch);
if (trySkipExistingEscapes && nextCh == ch)
i++;
}
else if (isInterpolated && ch is '{' or '}')
{
builder.Append(ch);
if (trySkipExistingEscapes && nextCh == ch)
i++;
}
}
return builder.ToString();
static bool WillEscapeAnyCharacters(bool isInterpolated, string value)
{
for (var i = 0; i < value.Length; i++)
{
var ch = value[i];
var nextCh = i == value.Length - 1 ? 0 : value[i + 1];
if (ch == '"')
{
// we have an isolated quote. we will need to escape it (and thus should escape everything in
// the string.
if (nextCh != ch)
return true;
// Quotes are paired. This is already escaped fine. Skip both quotes.
i++;
}
else if (isInterpolated && ch is '{' or '}')
{
// we have an isolated brace. we will need to escape it (and thus should escape everything in
// the string.
if (nextCh != ch)
return true;
// Braces are paired. This is already escaped fine. Skip both braces.
i++;
}
// continue looking forward.
}
return false;
}
}
/// <summary>
/// Given a set of source text lines, determines what common whitespace prefix each line has. Note that this
/// does *not* include the first line as it's super common for someone to copy a set of lines while only
/// starting the selection at the start of the content on the first line. This also does not include empty
/// lines as they're also very common, but are clearly not a way of indicating indentation indent for the normal
/// lines.
/// </summary>
public static string? GetCommonIndentationPrefix(INormalizedTextChangeCollection textChanges)
{
string? commonIndentPrefix = null;
var first = true;
foreach (var change in textChanges)
{
var text = SourceText.From(change.NewText);
foreach (var line in text.Lines)
{
if (first)
{
first = false;
continue;
}
var nonWhitespaceIndex = GetFirstNonWhitespaceIndex(text, line);
if (nonWhitespaceIndex >= 0)
commonIndentPrefix = GetCommonIndentationPrefix(commonIndentPrefix, text, TextSpan.FromBounds(line.Start, nonWhitespaceIndex));
}
}
return commonIndentPrefix;
}
private static string? GetCommonIndentationPrefix(string? commonIndentPrefix, SourceText text, TextSpan lineWhitespaceSpan)
{
// first line with indentation whitespace we're seeing. Just keep track of that.
if (commonIndentPrefix == null)
return text.ToString(lineWhitespaceSpan);
// we have indentation whitespace from a previous line. Figure out the max commonality between it and the
// line we're currently looking at.
var commonPrefixLength = 0;
for (var n = Math.Min(commonIndentPrefix.Length, lineWhitespaceSpan.Length); commonPrefixLength < n; commonPrefixLength++)
{
if (commonIndentPrefix[commonPrefixLength] != text[lineWhitespaceSpan.Start + commonPrefixLength])
break;
}
return commonIndentPrefix[..commonPrefixLength];
}
public static TextSpan MapSpan(TextSpan span, ITextSnapshot from, ITextSnapshot to)
=> from.CreateTrackingSpan(span.ToSpan(), SpanTrackingMode.EdgeInclusive).GetSpan(to).Span.ToTextSpan();
public static bool RawContentMustBeMultiLine(SourceText text, ImmutableArray<TextSpan> spans)
{
Contract.ThrowIfTrue(spans.Length == 0);
// Empty raw string must be multiline.
if (spans is [{ IsEmpty: true }])
return true;
// Or if it starts/ends with a quote
if (spans.First().Length > 0 && text[spans.First().Start] == '"')
return true;
if (spans.Last().Length > 0 && text[spans.Last().End - 1] == '"')
return true;
// or contains a newline
foreach (var span in spans)
{
for (var i = span.Start; i < span.End; i++)
{
if (SyntaxFacts.IsNewLine(text[i]))
return true;
}
}
return false;
}
}
|