File: Classification\ClassifierHelper.cs
Web Access
Project: src\src\Workspaces\Core\Portable\Microsoft.CodeAnalysis.Workspaces.csproj (Microsoft.CodeAnalysis.Workspaces)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.Collections;
using Microsoft.CodeAnalysis.Host;
using Microsoft.CodeAnalysis.Shared.Collections;
using Microsoft.CodeAnalysis.Shared.Extensions;
using Microsoft.CodeAnalysis.Text;
using Microsoft.CodeAnalysis.Utilities;
 
namespace Microsoft.CodeAnalysis.Classification;
 
internal static partial class ClassifierHelper
{
    /// <summary>
    /// Classifies the provided <paramref name="span"/> in the given <paramref name="document"/>. This will do this
    /// using an appropriate <see cref="IClassificationService"/> if that can be found.  <see
    /// cref="ImmutableArray{T}.IsDefault"/> will be returned if this fails.
    /// </summary>
    /// <param name="includeAdditiveSpans">Whether or not 'additive' classification spans are included in the
    /// results or not.  'Additive' spans are things like 'this variable is static' or 'this variable is
    /// overwritten'.  i.e. they add additional information to a previous classification.</param>
    public static async Task<ImmutableArray<ClassifiedSpan>> GetClassifiedSpansAsync(
        Document document,
        TextSpan span,
        ClassificationOptions options,
        bool includeAdditiveSpans,
        CancellationToken cancellationToken)
    {
        return await GetClassifiedSpansAsync(document, [span], options, includeAdditiveSpans, cancellationToken)
            .ConfigureAwait(false);
    }
 
    /// <summary>
    /// Classifies the provided <paramref name="spans"/> in the given <paramref name="document"/>. This will do this
    /// using an appropriate <see cref="IClassificationService"/> if that can be found.  <see
    /// cref="ImmutableArray{T}.IsDefault"/> will be returned if this fails.
    /// </summary>
    /// <param name="document">the current document.</param>
    /// <param name="spans">The non-intersecting portions of the document to get classified spans for.</param>
    /// <param name="options">The options to use when getting classified spans.</param>
    /// <param name="includeAdditiveSpans">Whether or not 'additive' classification spans are included in the
    /// results or not.  'Additive' spans are things like 'this variable is static' or 'this variable is
    /// overwritten'.  i.e. they add additional information to a previous classification.</param>
    /// <param name="cancellationToken">A cancellation token.</param>
    public static async Task<ImmutableArray<ClassifiedSpan>> GetClassifiedSpansAsync(
        Document document,
        ImmutableArray<TextSpan> spans,
        ClassificationOptions options,
        bool includeAdditiveSpans,
        CancellationToken cancellationToken)
    {
        var classificationService = document.GetLanguageService<IClassificationService>();
        if (classificationService == null)
            return default;
 
        // Call out to the individual language to classify the chunk of text around the
        // reference. We'll get both the syntactic and semantic spans for this region.
        // Because the semantic tags may override the semantic ones (for example, 
        // "DateTime" might be syntactically an identifier, but semantically a struct
        // name), we'll do a later merging step to get the final correct list of 
        // classifications.  For tagging, normally the editor handles this.  But as
        // we're producing the list of Inlines ourselves, we have to handles this here.
        using var _1 = Classifier.GetPooledList(out var syntaxSpans);
        using var _2 = Classifier.GetPooledList(out var semanticSpans);
 
        await classificationService.AddSyntacticClassificationsAsync(document, spans, syntaxSpans, cancellationToken).ConfigureAwait(false);
 
        // Intentional that we're adding both semantic and embedded lang classifications to the same array.  Both
        // are 'semantic' from the perspective of this helper method.
        await classificationService.AddSemanticClassificationsAsync(document, spans, options, semanticSpans, cancellationToken).ConfigureAwait(false);
        await classificationService.AddEmbeddedLanguageClassificationsAsync(document, spans, options, semanticSpans, cancellationToken).ConfigureAwait(false);
 
        // MergeClassifiedSpans will ultimately filter multiple classifications for the same
        // span down to one. We know that additive classifications are there just to 
        // provide additional information about the true classification. By default, we will
        // remove additive ClassifiedSpans until we have support for additive classifications
        // in classified spans. https://github.com/dotnet/roslyn/issues/32770
        // The exception to this is LSP, which expects the additive spans.
        if (!includeAdditiveSpans)
        {
            RemoveAdditiveSpans(syntaxSpans);
            RemoveAdditiveSpans(semanticSpans);
        }
 
        var widenedSpan = new TextSpan(spans[0].Start, spans[^1].End);
        var classifiedSpans = MergeClassifiedSpans(syntaxSpans, semanticSpans, widenedSpan);
        return classifiedSpans;
    }
 
    private static void RemoveAdditiveSpans(SegmentedList<ClassifiedSpan> spans)
    {
        for (var i = spans.Count - 1; i >= 0; i--)
        {
            var span = spans[i];
            if (ClassificationTypeNames.AdditiveTypeNames.Contains(span.ClassificationType))
                spans.RemoveAt(i);
        }
    }
 
    private static ImmutableArray<ClassifiedSpan> MergeClassifiedSpans(
        SegmentedList<ClassifiedSpan> syntaxSpans,
        SegmentedList<ClassifiedSpan> semanticSpans,
        TextSpan widenedSpan)
    {
        // The spans produced by the language services may not be ordered
        // (indeed, this happens with semantic classification as different
        // providers produce different results in an arbitrary order).  Order
        // them first before proceeding.
        Order(syntaxSpans);
        Order(semanticSpans);
 
        // It's possible for us to get classified spans that occur *before*
        // or after the span we want to present. This happens because the calls to
        // AddSyntacticClassificationsAsync and AddSemanticClassificationsAsync 
        // may return more spans than the range asked for.  While bad form,
        // it's never been a requirement that implementation not do that.
        // For example, the span may be the non-full-span of a node, but the
        // classifiers may still return classifications for leading/trailing
        // trivia even if it's out of the bounds of that span.
        // 
        // To deal with that, we adjust all spans so that they don't go outside
        // of the range we care about.
        AdjustSpans(syntaxSpans, widenedSpan);
        AdjustSpans(semanticSpans, widenedSpan);
 
        using var _1 = Classifier.GetPooledList(out var mergedSpans);
 
        MergeParts(syntaxSpans, semanticSpans, mergedSpans);
        Order(mergedSpans);
 
        // The classification service will only produce classifications for things it knows about.  i.e. there will
        // be gaps in what it produces. Fill in those gaps so we have *all* parts of the span classified properly.
        using var _2 = Classifier.GetPooledList(out var filledInSpans);
        FillInClassifiedSpanGaps(widenedSpan.Start, mergedSpans, filledInSpans);
        return [.. filledInSpans];
    }
 
    private static readonly Comparison<ClassifiedSpan> s_spanComparison = static (s1, s2) => s1.TextSpan.Start - s2.TextSpan.Start;
 
    private static void Order(SegmentedList<ClassifiedSpan> syntaxSpans)
        => syntaxSpans.Sort(s_spanComparison);
 
    /// <summary>
    /// Ensures that all spans in <paramref name="spans"/> do not go beyond the spans in <paramref
    /// name="widenedSpan"/>. Any spans that are entirely outside of <paramref name="widenedSpan"/> are replaced
    /// with <see langword="default"/>.
    /// </summary>
    private static void AdjustSpans(SegmentedList<ClassifiedSpan> spans, TextSpan widenedSpan)
    {
        for (var i = 0; i < spans.Count; i++)
        {
            var span = spans[i];
 
            // Make sure the span actually intersects 'widenedSpan'.  If it 
            // does not, just put in an empty length span.  It will get ignored later
            // when we walk through this list.
            var intersection = span.TextSpan.Intersection(widenedSpan);
 
            if (i > 0 && intersection != null)
            {
                // The additiveType's may appear before or after their modifier due to sorting.
                var previousSpan = spans[i - 1];
                var isAdditiveClassification = previousSpan.TextSpan == span.TextSpan &&
                    ClassificationTypeNames.AdditiveTypeNames.Contains(span.ClassificationType) || ClassificationTypeNames.AdditiveTypeNames.Contains(previousSpan.ClassificationType);
 
                // Additive classifications are intended to overlap so do not ignore it.
                if (!isAdditiveClassification && previousSpan.TextSpan.End > intersection.Value.Start)
                {
                    // This span isn't strictly after the previous span.  Ignore it.
                    intersection = null;
                }
            }
 
            var newSpan = new ClassifiedSpan(span.ClassificationType, intersection.GetValueOrDefault());
            spans[i] = newSpan;
        }
    }
 
    public static void FillInClassifiedSpanGaps(
        int startPosition, SegmentedList<ClassifiedSpan> classifiedSpans, SegmentedList<ClassifiedSpan> result)
    {
        foreach (var span in classifiedSpans)
        {
            // Should not get empty spans.  They are filtered out in MergeParts
            Debug.Assert(!span.TextSpan.IsEmpty);
 
            // Ignore empty spans.  We can get those when the classification service
            // returns spans outside of the range of the span we asked to classify.
            if (span.TextSpan.Length == 0)
                continue;
 
            // If there is space between this span and the last one, then add a space.
            if (startPosition < span.TextSpan.Start)
            {
                result.Add(new ClassifiedSpan(ClassificationTypeNames.Text,
                    TextSpan.FromBounds(
                        startPosition, span.TextSpan.Start)));
            }
 
            result.Add(span);
            startPosition = span.TextSpan.End;
        }
    }
 
    /// <summary>
    /// Adds all semantic parts to final parts, and adds all portions of <paramref name="syntaxParts"/> that do not
    /// overlap with any semantic parts as well.  All final parts will be non-empty.  Both <paramref
    /// name="syntaxParts"/> and <paramref name="semanticParts"/> must be sorted.
    /// </summary>
    private static void MergeParts(
        SegmentedList<ClassifiedSpan> syntaxParts,
        SegmentedList<ClassifiedSpan> semanticParts,
        SegmentedList<ClassifiedSpan> finalParts)
    {
        MergeParts<ClassifiedSpan, ClassifiedSpanIntervalIntrospector>(
            syntaxParts, semanticParts, finalParts,
            static span => span.TextSpan,
            static (original, final) => new ClassifiedSpan(original.ClassificationType, final));
 
        // now that we've added all semantic parts and syntactic-portions, sort the final result.
        finalParts.Sort(s_spanComparison);
    }
 
    /// <inheritdoc cref="MergeParts(SegmentedList{ClassifiedSpan}, SegmentedList{ClassifiedSpan}, SegmentedList{ClassifiedSpan})"/>
    public static void MergeParts<TClassifiedSpan, TClassifiedSpanIntervalIntrospector>(
        SegmentedList<TClassifiedSpan> syntaxParts,
        SegmentedList<TClassifiedSpan> semanticParts,
        SegmentedList<TClassifiedSpan> finalParts,
        Func<TClassifiedSpan, TextSpan> getSpan,
        Func<TClassifiedSpan, TextSpan, TClassifiedSpan> createSpan)
        where TClassifiedSpanIntervalIntrospector : struct, IIntervalIntrospector<TClassifiedSpan>
    {
        // Create an interval tree so we can easily determine which semantic parts intersect with the 
        // syntactic parts we're looking at.
        using var _1 = SegmentedListPool.GetPooledList<TClassifiedSpan>(out var semanticSpans);
 
        // Add all the non-empty semantic parts to the tree.
        foreach (var part in semanticParts)
        {
            if (!getSpan(part).IsEmpty)
            {
                semanticSpans.Add(part);
                finalParts.Add(part);
            }
        }
 
        var semanticPartsTree = ImmutableIntervalTree<TClassifiedSpan>.CreateFromUnsorted(
            default(TClassifiedSpanIntervalIntrospector), semanticSpans);
 
        using var tempBuffer = TemporaryArray<TClassifiedSpan>.Empty;
 
        foreach (var syntacticPart in syntaxParts)
        {
            // ignore empty parts.
            var syntacticPartSpan = getSpan(syntacticPart);
            if (syntacticPartSpan.IsEmpty)
                continue;
 
            tempBuffer.Clear();
            semanticPartsTree.Algorithms.FillWithIntervalsThatOverlapWith(
                syntacticPartSpan.Start, syntacticPartSpan.Length, ref tempBuffer.AsRef(),
                default(TClassifiedSpanIntervalIntrospector));
 
            if (tempBuffer.Count == 0)
            {
                // semantic parts didn't overlap with this syntax part at all.  Just add in directly.
                finalParts.Add(syntacticPart);
                continue;
            }
 
            // One or Multiple semantic parts.
            // Add the syntactic portion before the first semantic part,
            // the syntactic pieces between the semantic parts,
            // and the syntactic portion after the last semantic part.
 
            var firstSemanticPart = tempBuffer[0];
            var lastSemanticPart = tempBuffer[tempBuffer.Count - 1];
 
            var firstSemanticPartSpan = getSpan(firstSemanticPart);
            var lastSemanticPartSpan = getSpan(lastSemanticPart);
 
            Debug.Assert(firstSemanticPartSpan.OverlapsWith(syntacticPartSpan));
            Debug.Assert(lastSemanticPartSpan.OverlapsWith(syntacticPartSpan));
 
            if (syntacticPartSpan.Start < firstSemanticPartSpan.Start)
            {
                finalParts.Add(createSpan(syntacticPart, TextSpan.FromBounds(
                    syntacticPartSpan.Start,
                    firstSemanticPartSpan.Start)));
            }
 
            for (var i = 0; i < tempBuffer.Count - 1; i++)
            {
                var semanticPart1 = tempBuffer[i];
                var semanticPart2 = tempBuffer[i + 1];
 
                var semanticPart1Span = getSpan(semanticPart1);
                var semanticPart2Span = getSpan(semanticPart2);
 
                Debug.Assert(semanticPart1Span.OverlapsWith(syntacticPartSpan));
                Debug.Assert(semanticPart1Span.OverlapsWith(syntacticPartSpan));
 
                if (semanticPart1Span.End < semanticPart2Span.Start)
                {
                    finalParts.Add(createSpan(syntacticPart, TextSpan.FromBounds(
                        semanticPart1Span.End,
                        semanticPart2Span.Start)));
                }
            }
 
            if (lastSemanticPartSpan.End < syntacticPartSpan.End)
            {
                finalParts.Add(createSpan(syntacticPart, TextSpan.FromBounds(
                    lastSemanticPartSpan.End,
                    syntacticPartSpan.End)));
            }
        }
    }
}