File: Handler\SemanticTokens\SemanticTokensHelpers.cs
Web Access
Project: src\src\LanguageServer\Protocol\Microsoft.CodeAnalysis.LanguageServer.Protocol.csproj (Microsoft.CodeAnalysis.LanguageServer.Protocol)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.Classification;
using Microsoft.CodeAnalysis.Collections;
using Microsoft.CodeAnalysis.Host;
using Microsoft.CodeAnalysis.Options;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Shared.Extensions;
using Microsoft.CodeAnalysis.Text;
using Roslyn.Utilities;
using LSP = Roslyn.LanguageServer.Protocol;
 
namespace Microsoft.CodeAnalysis.LanguageServer.Handler.SemanticTokens
{
    internal static class SemanticTokensHelpers
    {
        private static readonly ObjectPool<List<int>> s_tokenListPool = new ObjectPool<List<int>>(() => new List<int>(capacity: 1000));
 
        internal static async Task<int[]> HandleRequestHelperAsync(
            IGlobalOptionService globalOptions,
            SemanticTokensRefreshQueue semanticTokensRefreshQueue,
            LSP.Range[] ranges,
            RequestContext context,
            CancellationToken cancellationToken)
        {
            if (ranges.Length == 0)
            {
                return [];
            }
 
            var contextDocument = context.GetRequiredDocument();
            var project = contextDocument.Project;
            var options = globalOptions.GetClassificationOptions(project.Language);
            var supportsVisualStudioExtensions = context.GetRequiredClientCapabilities().HasVisualStudioLspCapability();
 
            var spans = new FixedSizeArrayBuilder<LinePositionSpan>(ranges.Length);
            foreach (var range in ranges)
                spans.Add(ProtocolConversions.RangeToLinePositionSpan(range));
 
            var tokensData = await HandleRequestHelperAsync(contextDocument, spans.MoveToImmutable(), supportsVisualStudioExtensions, options, cancellationToken).ConfigureAwait(false);
 
            // The above call to get semantic tokens may be inaccurate (because we use frozen partial semantics).  Kick
            // off a request to ensure that the OOP side gets a fully up to compilation for this project.  Once it does
            // we can optionally choose to notify our caller to do a refresh if we computed a compilation for a new
            // solution snapshot.
            await semanticTokensRefreshQueue.TryEnqueueRefreshComputationAsync(project, cancellationToken).ConfigureAwait(false);
            return tokensData;
        }
 
        public static async Task<int[]> HandleRequestHelperAsync(Document document, ImmutableArray<LinePositionSpan> spans, bool supportsVisualStudioExtensions, ClassificationOptions options, CancellationToken cancellationToken)
        {
            // If the full compilation is not yet available, we'll try getting a partial one. It may contain inaccurate
            // results but will speed up how quickly we can respond to the client's request.
            document = document.WithFrozenPartialSemantics(cancellationToken);
            options = options with { FrozenPartialSemantics = true };
 
            // The results from the range handler should not be cached since we don't want to cache
            // partial token results. In addition, a range request is only ever called with a whole
            // document request, so caching range results is unnecessary since the whole document
            // handler will cache the results anyway.
            return await ComputeSemanticTokensDataAsync(
                document,
                spans,
                supportsVisualStudioExtensions,
                options,
                cancellationToken).ConfigureAwait(false);
        }
 
        /// <summary>
        /// Returns the semantic tokens data for a given document with an optional ranges.
        /// </summary>
        /// <param name="spans">Spans to compute tokens for. If empty, the whole document will be used.</param>
        public static async Task<int[]> ComputeSemanticTokensDataAsync(
            Document document,
            ImmutableArray<LinePositionSpan> spans,
            bool supportsVisualStudioExtensions,
            ClassificationOptions options,
            CancellationToken cancellationToken)
        {
            var tokenTypesToIndex = SemanticTokensSchema.GetSchema(supportsVisualStudioExtensions).TokenTypeToIndex;
            var root = await document.GetRequiredSyntaxRootAsync(cancellationToken).ConfigureAwait(false);
            var text = await document.GetValueTextAsync(cancellationToken).ConfigureAwait(false);
            using var _1 = Classifier.GetPooledList(out var classifiedSpans);
            using var _2 = Classifier.GetPooledList(out var updatedClassifiedSpans);
 
            // We either calculate the tokens for the full document span, or the user
            // can pass in a range from the full document if they wish.
            ImmutableArray<TextSpan> textSpans;
            if (spans.Length == 0)
            {
                textSpans = [root.FullSpan];
            }
            else
            {
                var textSpansBuilder = new FixedSizeArrayBuilder<TextSpan>(spans.Length);
                foreach (var span in spans)
                    textSpansBuilder.Add(text.Lines.GetTextSpan(span));
 
                textSpans = textSpansBuilder.MoveToImmutable();
            }
 
            await GetClassifiedSpansForDocumentAsync(
                classifiedSpans, document, textSpans, options, cancellationToken).ConfigureAwait(false);
 
            // Classified spans are not guaranteed to be returned in a certain order so we sort them to be safe.
            classifiedSpans.Sort(ClassifiedSpanComparer.Instance);
 
            // Multi-line tokens are not supported by VS (tracked by https://devdiv.visualstudio.com/DevDiv/_workitems/edit/1265495).
            // Roslyn's classifier however can return multi-line classified spans, so we must break these up into single-line spans.
            ConvertMultiLineToSingleLineSpans(text, classifiedSpans, updatedClassifiedSpans);
 
            // TO-DO: We should implement support for streaming if LSP adds support for it:
            // https://devdiv.visualstudio.com/DevDiv/_workitems/edit/1276300
            return ComputeTokens(text.Lines, updatedClassifiedSpans, supportsVisualStudioExtensions, tokenTypesToIndex);
        }
 
        private static async Task GetClassifiedSpansForDocumentAsync(
            SegmentedList<ClassifiedSpan> classifiedSpans,
            Document document,
            ImmutableArray<TextSpan> textSpans,
            ClassificationOptions options,
            CancellationToken cancellationToken)
        {
            var classificationService = document.GetRequiredLanguageService<IClassificationService>();
 
            // We always return both syntactic and semantic classifications.  If there is a syntactic classifier running on the client
            // then the semantic token classifications will override them.
 
            // `includeAdditiveSpans` will add token modifiers such as 'static', which we want to include in LSP.
            var spans = await ClassifierHelper.GetClassifiedSpansAsync(
                document, textSpans, options, includeAdditiveSpans: true, cancellationToken).ConfigureAwait(false);
 
            // The spans returned to us may include some empty spans, which we don't care about. We also don't care
            // about the 'text' classification.  It's added for everything between real classifications (including
            // whitespace), and just means 'don't classify this'.  No need for us to actually include that in
            // semantic tokens as it just wastes space in the result.
            var nonEmptySpans = spans.Where(s => !s.TextSpan.IsEmpty && s.ClassificationType != ClassificationTypeNames.Text);
            classifiedSpans.AddRange(nonEmptySpans);
        }
 
        private static void ConvertMultiLineToSingleLineSpans(SourceText text, SegmentedList<ClassifiedSpan> classifiedSpans, SegmentedList<ClassifiedSpan> updatedClassifiedSpans)
        {
 
            for (var spanIndex = 0; spanIndex < classifiedSpans.Count; spanIndex++)
            {
                var span = classifiedSpans[spanIndex];
                text.GetLinesAndOffsets(span.TextSpan, out var startLine, out var startOffset, out var endLine, out var endOffSet);
 
                // If the start and end of the classified span are not on the same line, we're dealing with a multi-line span.
                // Since VS doesn't support multi-line spans/tokens, we need to break the span up into single-line spans.
                if (startLine != endLine)
                {
                    ConvertToSingleLineSpan(
                        text, classifiedSpans, updatedClassifiedSpans, ref spanIndex, span.ClassificationType,
                        startLine, startOffset, endLine, endOffSet);
                }
                else
                {
                    // This is already a single-line span, so no modification is necessary.
                    updatedClassifiedSpans.Add(span);
                }
            }
 
            static void ConvertToSingleLineSpan(
                SourceText text,
                SegmentedList<ClassifiedSpan> originalClassifiedSpans,
                SegmentedList<ClassifiedSpan> updatedClassifiedSpans,
                ref int spanIndex,
                string classificationType,
                int startLine,
                int startOffset,
                int endLine,
                int endOffSet)
            {
                var numLinesInSpan = endLine - startLine + 1;
                Contract.ThrowIfTrue(numLinesInSpan < 1);
 
                for (var currentLine = 0; currentLine < numLinesInSpan; currentLine++)
                {
                    TextSpan textSpan;
                    var line = text.Lines[startLine + currentLine];
 
                    // Case 1: First line of span
                    if (currentLine == 0)
                    {
                        var absoluteStart = line.Start + startOffset;
 
                        // This start could be past the regular end of the line if it's within the newline character if we have a CRLF newline. In that case, just skip emitting a span for the LF.
                        // One example where this could happen is an embedded regular expression that we're classifying; regular expression comments contained within a multi-line string
                        // contain the carriage return but not the linefeed, so the linefeed could be the start of the next classification.
                        textSpan = TextSpan.FromBounds(Math.Min(absoluteStart, line.End), line.End);
                    }
                    // Case 2: Any of the span's middle lines
                    else if (currentLine != numLinesInSpan - 1)
                    {
                        textSpan = line.Span;
                    }
                    // Case 3: Last line of span
                    else
                    {
                        textSpan = new TextSpan(line.Start, endOffSet);
                    }
 
                    // Omit 0-length spans created in this fashion.
                    if (textSpan.Length > 0)
                    {
                        var updatedClassifiedSpan = new ClassifiedSpan(textSpan, classificationType);
                        updatedClassifiedSpans.Add(updatedClassifiedSpan);
                    }
 
                    // Since spans are expected to be ordered, when breaking up a multi-line span, we may have to insert
                    // other spans in-between. For example, we may encounter this case when breaking up a multi-line verbatim
                    // string literal containing escape characters:
                    //     var x = @"one ""
                    //               two";
                    // The check below ensures we correctly return the spans in the correct order, i.e. 'one', '""', 'two'.
                    while (spanIndex + 1 < originalClassifiedSpans.Count &&
                        textSpan.Contains(originalClassifiedSpans[spanIndex + 1].TextSpan))
                    {
                        updatedClassifiedSpans.Add(originalClassifiedSpans[spanIndex + 1]);
                        spanIndex++;
                    }
                }
            }
        }
 
        private static int[] ComputeTokens(
            TextLineCollection lines,
            SegmentedList<ClassifiedSpan> classifiedSpans,
            bool supportsVisualStudioExtensions,
            IReadOnlyDictionary<string, int> tokenTypesToIndex)
        {
            // We keep track of the last line number and last start character since tokens are
            // reported relative to each other.
            var lastLineNumber = 0;
            var lastStartCharacter = 0;
 
            var tokenTypeMap = SemanticTokensSchema.GetSchema(supportsVisualStudioExtensions).TokenTypeMap;
 
            using var pooledData = s_tokenListPool.GetPooledObject();
            var data = pooledData.Object;
 
            // Items in the pool may not have been cleared
            data.Clear();
 
            for (var currentClassifiedSpanIndex = 0; currentClassifiedSpanIndex < classifiedSpans.Count; currentClassifiedSpanIndex++)
            {
                currentClassifiedSpanIndex = ComputeNextToken(
                    lines, ref lastLineNumber, ref lastStartCharacter, classifiedSpans,
                    currentClassifiedSpanIndex, tokenTypeMap, tokenTypesToIndex,
                    out var deltaLine, out var startCharacterDelta, out var tokenLength,
                    out var tokenType, out var tokenModifiers);
 
                data.Add(deltaLine);
                data.Add(startCharacterDelta);
                data.Add(tokenLength);
                data.Add(tokenType);
                data.Add(tokenModifiers);
            }
 
            return [.. data];
        }
 
        private static int ComputeNextToken(
            TextLineCollection lines,
            ref int lastLineNumber,
            ref int lastStartCharacter,
            SegmentedList<ClassifiedSpan> classifiedSpans,
            int currentClassifiedSpanIndex,
            IReadOnlyDictionary<string, string> tokenTypeMap,
            IReadOnlyDictionary<string, int> tokenTypesToIndex,
            out int deltaLineOut,
            out int startCharacterDeltaOut,
            out int tokenLengthOut,
            out int tokenTypeOut,
            out int tokenModifiersOut)
        {
            // Each semantic token is represented in LSP by five numbers:
            //     1. Token line number delta, relative to the previous token
            //     2. Token start character delta, relative to the previous token
            //     3. Token length
            //     4. Token type (index) - looked up in SemanticTokensLegend.tokenTypes
            //     5. Token modifiers - each set bit will be looked up in SemanticTokensLegend.tokenModifiers
 
            var classifiedSpan = classifiedSpans[currentClassifiedSpanIndex];
            var originalTextSpan = classifiedSpan.TextSpan;
            var linePosition = lines.GetLinePositionSpan(originalTextSpan).Start;
            var lineNumber = linePosition.Line;
 
            // 1. Token line number delta, relative to the previous token
            var deltaLine = lineNumber - lastLineNumber;
            Contract.ThrowIfTrue(deltaLine < 0, $"deltaLine is less than 0: {deltaLine}");
 
            // 2. Token start character delta, relative to the previous token
            // (Relative to 0 or the previous token’s start if they're on the same line)
            var deltaStartCharacter = linePosition.Character;
            if (lastLineNumber == lineNumber)
            {
                deltaStartCharacter -= lastStartCharacter;
            }
 
            lastLineNumber = lineNumber;
            lastStartCharacter = linePosition.Character;
 
            // 3. Token length
            var tokenLength = originalTextSpan.Length;
            Contract.ThrowIfFalse(tokenLength > 0);
 
            // We currently only have one modifier (static). The logic below will need to change in the future if other
            // modifiers are added in the future.
            var modifierBits = TokenModifiers.None;
            var tokenTypeIndex = 0;
 
            // Classified spans with the same text span should be combined into one token.
            while (classifiedSpans[currentClassifiedSpanIndex].TextSpan == originalTextSpan)
            {
                var classificationType = classifiedSpans[currentClassifiedSpanIndex].ClassificationType;
                if (classificationType == ClassificationTypeNames.StaticSymbol)
                {
                    // 4. Token modifiers - each set bit will be looked up in SemanticTokensLegend.tokenModifiers
                    modifierBits |= TokenModifiers.Static;
                }
                else if (classificationType == ClassificationTypeNames.ReassignedVariable)
                {
                    // 5. Token modifiers - each set bit will be looked up in SemanticTokensLegend.tokenModifiers
                    modifierBits |= TokenModifiers.ReassignedVariable;
                }
                else if (classificationType == ClassificationTypeNames.ObsoleteSymbol)
                {
                    // 6. Token modifiers - each set bit will be looked up in SemanticTokensLegend.tokenModifiers
                    modifierBits |= TokenModifiers.Deprecated;
                }
                else if (classificationType == ClassificationTypeNames.TestCode)
                {
                    // Skip additive types that are not being converted to token modifiers.
                }
                else
                {
                    // 7. Token type - looked up in SemanticTokensLegend.tokenTypes (language server defined mapping
                    // from integer to LSP token types).
                    tokenTypeIndex = GetTokenTypeIndex(classificationType);
                }
 
                // Break out of the loop if we have no more classified spans left, or if the next classified span has
                // a different text span than our current text span.
                if (currentClassifiedSpanIndex + 1 >= classifiedSpans.Count || classifiedSpans[currentClassifiedSpanIndex + 1].TextSpan != originalTextSpan)
                {
                    break;
                }
 
                currentClassifiedSpanIndex++;
            }
 
            deltaLineOut = deltaLine;
            startCharacterDeltaOut = deltaStartCharacter;
            tokenLengthOut = tokenLength;
            tokenTypeOut = tokenTypeIndex;
            tokenModifiersOut = (int)modifierBits;
 
            return currentClassifiedSpanIndex;
 
            int GetTokenTypeIndex(string classificationType)
            {
                if (!tokenTypeMap.TryGetValue(classificationType, out var tokenTypeStr))
                {
                    tokenTypeStr = classificationType;
                }
 
                Contract.ThrowIfFalse(tokenTypesToIndex.TryGetValue(tokenTypeStr, out var tokenTypeIndex), "No matching token type index found.");
                return tokenTypeIndex;
            }
        }
 
        private class ClassifiedSpanComparer : IComparer<ClassifiedSpan>
        {
            public static readonly ClassifiedSpanComparer Instance = new();
 
            public int Compare(ClassifiedSpan x, ClassifiedSpan y) => x.TextSpan.CompareTo(y.TextSpan);
        }
    }
}