AbstractSpellCheckSpanService.cs

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Immutable;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.Classification;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.Host;
using Microsoft.CodeAnalysis.LanguageService;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Shared.Extensions;
using Microsoft.CodeAnalysis.Text;
 
namespace Microsoft.CodeAnalysis.SpellCheck;
 
internal abstract class AbstractSpellCheckSpanService(char? escapeCharacter) : ISpellCheckSpanService
{
    private readonly char? _escapeCharacter = escapeCharacter;
 
    public async Task<ImmutableArray<SpellCheckSpan>> GetSpansAsync(Document document, CancellationToken cancellationToken)
    {
        var root = await document.GetRequiredSyntaxRootAsync(cancellationToken).ConfigureAwait(false);
        return GetSpans();
 
        // Broken into its own method as it uses a ref-struct, which isn't allowed with the async call above.
        ImmutableArray<SpellCheckSpan> GetSpans()
        {
            var syntaxFacts = document.GetRequiredLanguageService<ISyntaxFactsService>();
            var classifier = document.GetRequiredLanguageService<ISyntaxClassificationService>();
            var virtualCharService = document.GetRequiredLanguageService<IVirtualCharLanguageService>();
 
            using var _ = ArrayBuilder<SpellCheckSpan>.GetInstance(out var spans);
 
            var worker = new Worker(this, syntaxFacts, classifier, virtualCharService, spans);
            worker.Recurse(root, cancellationToken);
 
            return spans.ToImmutableAndClear();
        }
    }
 
    private readonly ref struct Worker(
        AbstractSpellCheckSpanService spellCheckSpanService,
        ISyntaxFactsService syntaxFacts,
        ISyntaxClassificationService classifier,
        IVirtualCharLanguageService virtualCharService,
        ArrayBuilder<SpellCheckSpan> spans)
    {
        private readonly AbstractSpellCheckSpanService _spellCheckSpanService = spellCheckSpanService;
        private readonly ISyntaxFactsService _syntaxFacts = syntaxFacts;
        private readonly ISyntaxKinds _syntaxKinds = syntaxFacts.SyntaxKinds;
        private readonly ISyntaxClassificationService _classifier = classifier;
        private readonly IVirtualCharLanguageService _virtualCharService = virtualCharService;
        private readonly ArrayBuilder<SpellCheckSpan> _spans = spans;
 
        private void AddSpan(SpellCheckSpan span)
        {
            if (span.TextSpan.Length > 0)
                _spans.Add(span);
        }
 
        public void Recurse(SyntaxNode root, CancellationToken cancellationToken)
        {
            cancellationToken.ThrowIfCancellationRequested();
 
            using var _ = ArrayBuilder<SyntaxNodeOrToken>.GetInstance(out var stack);
            stack.Push(root);
 
            while (stack.TryPop(out var current))
            {
                if (current.IsToken)
                {
                    ProcessToken(current.AsToken(), cancellationToken);
                }
                else if (current.IsNode)
                {
                    foreach (var child in current.ChildNodesAndTokens().Reverse())
                        stack.Push(child);
                }
            }
        }
 
        private void ProcessToken(
            SyntaxToken token,
            CancellationToken cancellationToken)
        {
            ProcessTriviaList(token.LeadingTrivia, cancellationToken);
 
            if (_syntaxFacts.IsStringLiteral(token))
            {
                AddStringSpans(token, canContainEscapes: !_syntaxFacts.IsVerbatimStringLiteral(token));
            }
            else if (
                token.RawKind == _syntaxKinds.SingleLineRawStringLiteralToken ||
                token.RawKind == _syntaxKinds.MultiLineRawStringLiteralToken)
            {
                AddStringSpans(token, canContainEscapes: false);
            }
            else if (token.RawKind == _syntaxKinds.InterpolatedStringTextToken &&
                     token.Parent?.RawKind == _syntaxKinds.InterpolatedStringText)
            {
                AddStringSpans(token, canContainEscapes: !_syntaxFacts.IsVerbatimInterpolatedStringExpression(token.Parent.Parent));
            }
            else if (token.RawKind == _syntaxKinds.IdentifierToken)
            {
                TryAddSpanForIdentifier(token);
            }
 
            ProcessTriviaList(token.TrailingTrivia, cancellationToken);
        }
 
        private void AddStringSpans(SyntaxToken token, bool canContainEscapes)
        {
            // Don't bother with strings that are in error.  This is both because we can't properly break them into
            // pieces, and also because a string in error often may be grabbing more of the file than intended, and
            // we don't want to start spell checking normal code that is caught up in the middle of being edited.
            if (token.ContainsDiagnostics)
                return;
 
            // First, see if there's actually the presence of an escape character in the string token.  If not, we
            // can just provide the entire string as-is to the caller to spell check since there's no escapes for
            // them to be confused by.
            //
            // Note: .Text on a string token is non-allocating.  It is captured at the time of token creation and
            // held by the token.
            var escapeChar = _spellCheckSpanService._escapeCharacter;
            if (canContainEscapes &&
                escapeChar != null &&
                token.Text.AsSpan().IndexOf(escapeChar.Value) >= 0)
            {
                AddStringSubSpans(token);
            }
            else
            {
                // Just add the full string span as is and let the client handle it.
                AddSpan(new SpellCheckSpan(token.Span, SpellCheckKind.String));
            }
        }
 
        private void AddStringSubSpans(SyntaxToken token)
        {
            var virtualChars = _virtualCharService.TryConvertToVirtualChars(token);
            if (virtualChars.IsDefaultOrEmpty)
                return;
 
            // find the sequences of letters in a row that should be spell checked. if any part of that sequence is
            // an escaped character (like `\u0065`) then filter that out.  The platform won't be able to understand
            // this word and will report bogus spell checking mistakes.
            var currentCharIndex = 0;
            while (currentCharIndex < virtualChars.Length)
            {
                var currentChar = virtualChars[currentCharIndex];
                if (!IsWordPart(currentChar))
                {
                    currentCharIndex++;
                    continue;
                }
 
                var spanStart = currentChar.Span.Start;
                var spanEnd = currentChar.Span.End;
 
                var seenEscape = false;
                while (currentCharIndex < virtualChars.Length)
                {
                    var endChar = virtualChars[currentCharIndex];
                    if (IsWordPart(endChar))
                    {
                        // we know if we've seen a letter that is an escape character if it takes more than two actual
                        // characters in the source.
                        seenEscape = seenEscape || endChar.Span.Length > 1;
                        spanEnd = endChar.Span.End;
                        currentCharIndex++;
                    }
                    else if (endChar == ' ' && endChar.Span.Length == 1)
                    {
                        // Consume a regular space (common between words) to keep the number of spans we report low.
                        spanEnd = endChar.Span.End;
                        currentCharIndex++;
                    }
                    else
                    {
                        break;
                    }
                }
 
                if (!seenEscape)
                    AddSpan(new SpellCheckSpan(TextSpan.FromBounds(spanStart, spanEnd), SpellCheckKind.String));
            }
 
            return;
 
            static bool IsWordPart(VirtualChar ch)
            {
                if (ch.IsLetter)
                    return true;
 
                // Add more cases here as necessary.
                return ch.Value switch
                {
                    // Apostrophe is a totally reasonable word character (for example, in an abbreviation).
                    '\'' => true,
                    _ => false,
                };
            }
        }
 
        private void TryAddSpanForIdentifier(SyntaxToken token)
        {
            // Leverage syntactic classification which already has to determine if an identifier token is the name of
            // some construct.
            var classification = _classifier.GetSyntacticClassificationForIdentifier(token);
            switch (classification)
            {
                case ClassificationTypeNames.ClassName:
                case ClassificationTypeNames.RecordClassName:
                case ClassificationTypeNames.DelegateName:
                case ClassificationTypeNames.EnumName:
                case ClassificationTypeNames.InterfaceName:
                case ClassificationTypeNames.ModuleName:
                case ClassificationTypeNames.StructName:
                case ClassificationTypeNames.RecordStructName:
                case ClassificationTypeNames.TypeParameterName:
                case ClassificationTypeNames.FieldName:
                case ClassificationTypeNames.EnumMemberName:
                case ClassificationTypeNames.ConstantName:
                case ClassificationTypeNames.LocalName:
                case ClassificationTypeNames.ParameterName:
                case ClassificationTypeNames.MethodName:
                case ClassificationTypeNames.ExtensionMethodName:
                case ClassificationTypeNames.PropertyName:
                case ClassificationTypeNames.EventName:
                case ClassificationTypeNames.NamespaceName:
                case ClassificationTypeNames.LabelName:
                    AddSpan(new SpellCheckSpan(token.Span, SpellCheckKind.Identifier));
                    break;
            }
        }
 
        private void ProcessTriviaList(SyntaxTriviaList triviaList, CancellationToken cancellationToken)
        {
            foreach (var trivia in triviaList)
                ProcessTrivia(trivia, cancellationToken);
        }
 
        private void ProcessTrivia(SyntaxTrivia trivia, CancellationToken cancellationToken)
        {
            if (_syntaxFacts.IsRegularComment(trivia))
            {
                AddSpan(new SpellCheckSpan(trivia.Span, SpellCheckKind.Comment));
            }
            else if (_syntaxFacts.IsDocumentationComment(trivia))
            {
                ProcessDocComment(trivia.GetStructure()!, cancellationToken);
            }
        }
 
        private void ProcessDocComment(SyntaxNode node, CancellationToken cancellationToken)
        {
            cancellationToken.ThrowIfCancellationRequested();
            using var _ = ArrayBuilder<SyntaxNodeOrToken>.GetInstance(out var stack);
            stack.Push(node);
 
            while (stack.TryPop(out var current))
            {
                if (current.IsToken)
                {
                    var token = current.AsToken();
                    if (token.RawKind == _syntaxFacts.SyntaxKinds.XmlTextLiteralToken)
                        AddSpan(new SpellCheckSpan(token.Span, SpellCheckKind.Comment));
                }
                else if (current.IsNode)
                {
                    foreach (var child in current.ChildNodesAndTokens().Reverse())
                        stack.Push(child);
                }
            }
        }
    }
}
File: SpellCheck\AbstractSpellCheckSpanService.cs	Web Access
Project: src\src\Features\Core\Portable\Microsoft.CodeAnalysis.Features.csproj (Microsoft.CodeAnalysis.Features)