File: NavigateToPreFilterBenchmarks.cs
Web Access
Project: src\src\Tools\IdeCoreBenchmarks\IdeCoreBenchmarks.csproj (IdeCoreBenchmarks)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Text;
using BenchmarkDotNet.Attributes;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.Collections;
using Microsoft.CodeAnalysis.FindSymbols;
using Microsoft.CodeAnalysis.PatternMatching;
using Roslyn.Utilities;
 
namespace IdeCoreBenchmarks;
 
/// <summary>
/// Measures NavigateTo pre-filter performance across different index sizes and matching algorithms.
/// For each index, benchmarks test both hit (filter says "probably matches") and miss (filter rejects).
/// The full-scan baseline shows the cost we avoid by rejecting early.
/// </summary>
[MemoryDiagnoser]
public class NavigateToPreFilterBenchmarks
{
    private NavigateToSearchIndex _realistic = null!;
    private NavigateToSearchIndex _stressAll = null!;
    private NavigateToSearchIndex _stressHump = null!;
    private NavigateToSearchIndex _stressPrefix = null!;
    private NavigateToSearchIndex _stressTrigram = null!;
    private NavigateToSearchIndex _stressContainer = null!;
    private string[] _realisticSymbolNames = null!;
 
    [GlobalSetup]
    public void GlobalSetup()
    {
        _realistic = CreateRealisticIndex(out _realisticSymbolNames);
        _stressAll = CreateStressAllIndex();
        _stressHump = CreateStressHumpSetIndex();
        _stressPrefix = CreateStressHumpPrefixIndex();
        _stressTrigram = CreateStressTrigramIndex();
        _stressContainer = CreateStressContainerIndex();
    }
 
    // ═══════════════════════════════════════════════════════════════════════════
    //  Baseline: full PatternMatcher scan (the cost we're avoiding)
    // ═══════════════════════════════════════════════════════════════════════════
 
    [Benchmark(Baseline = true, Description = "FullScan (10k symbols, no pre-filter)")]
    public int FullScan()
    {
        using var matcher = PatternMatcher.CreatePatternMatcher("FoNa", includeMatchedSpans: false, allowFuzzyMatching: true);
        var count = 0;
        foreach (var name in _realisticSymbolNames)
        {
            using var matches = TemporaryArray<PatternMatch>.Empty;
            if (matcher.AddMatches(name, ref matches.AsRef()))
                count++;
        }
 
        return count;
    }
 
    // ═══════════════════════════════════════════════════════════════════════════
    //  Realistic index: 10k symbols from hard-coded CamelCase names + 5 containers
    // ═══════════════════════════════════════════════════════════════════════════
 
    private static NavigateToSearchIndex CreateRealisticIndex(out string[] symbolNames)
    {
        var stringTable = new StringTable();
        const int count = 10_000;
        var infos = new DeclaredSymbolInfo[count];
        symbolNames = new string[count];
 
        // Names like "GetApplicationContext0", "SetDatabaseConnection1", etc.
        // Humps: G,A,C / S,D,C / C,S,P / P,X,D / V,U,I / T,D,R / C,H,C / I,C / D,R,M / S,O,G
        // Containers: System.Collections.Generic, Microsoft.CodeAnalysis.CSharp, etc.
        var namePrefixes = new[]
        {
            "GetApplicationContext",
            "SetDatabaseConnection",
            "CreateServiceProvider",
            "ParseXmlDocument",
            "ValidateUserInput",
            "TransformDataRecord",
            "CalculateHashCode",
            "InitializeComponent",
            "DisposeResourceManager",
            "SerializeObjectGraph",
        };
 
        var containers = new[]
        {
            "System.Collections.Generic",
            "Microsoft.CodeAnalysis.CSharp",
            "Azure.Storage.Blobs",
            "Newtonsoft.Json.Linq",
            "System.Threading.Tasks",
        };
 
        for (var i = 0; i < count; i++)
        {
            var name = $"{namePrefixes[i % namePrefixes.Length]}{i}";
            symbolNames[i] = name;
            infos[i] = MakeInfo(stringTable, name, containers[i % containers.Length]);
        }
 
        return NavigateToSearchIndex.TestAccessor.CreateIndex(infos.ToImmutableArray());
    }
 
    // "FoNa" → humps F,N → bigram "FN" not in hump set (no name starts with F+N humps) → miss.
    [Benchmark(Description = "Realistic: CamelCase miss")]
    public bool Realistic_CamelCase_Miss()
        => _realistic.CouldContainNavigateToMatch("FoNa", null, out _);
 
    // "GetApp" → humps G,A → bigram "GA" is stored from "GetApplicationContext" → hit.
    [Benchmark(Description = "Realistic: CamelCase hit")]
    public bool Realistic_CamelCase_Hit()
        => _realistic.CouldContainNavigateToMatch("GetApp", null, out _);
 
    // "getapp" → all-lowercase → DP splits "get"+"app", both are hump prefixes of
    // "Get" and "Application" → hit.
    [Benchmark(Description = "Realistic: lowercase hit (hump-prefix DP)")]
    public bool Realistic_Lowercase_Hit()
        => _realistic.CouldContainNavigateToMatch("getapp", null, out _);
 
    // "foozy" → all-lowercase → DP can't split into any stored hump prefixes (no name starts
    // with "foo...") → hump miss. Trigrams "foo","ooz","ozy" not stored either → miss.
    [Benchmark(Description = "Realistic: lowercase miss (hump-prefix DP)")]
    public bool Realistic_Lowercase_Miss()
        => _realistic.CouldContainNavigateToMatch("foozy", null, out _);
 
    // "context" → all-lowercase, len 7 → trigrams "con","ont","nte","tex","ext" all stored
    // from "GetApplicationContext" → hit.
    [Benchmark(Description = "Realistic: lowercase trigram hit")]
    public bool Realistic_Lowercase_Trigram_Hit()
        => _realistic.CouldContainNavigateToMatch("context", null, out _);
 
    // "Context" → mixed-case → MixedCaseHumpCheckPasses → single hump 'C' in hump set → hit.
    [Benchmark(Description = "Realistic: CamelCase trigram hit")]
    public bool Realistic_CamelCase_Trigram_Hit()
        => _realistic.CouldContainNavigateToMatch("Context", null, out _);
 
    // "zqjxw" → all-lowercase, len 5 → trigrams "zqj","qjx","jxw" none stored → miss.
    [Benchmark(Description = "Realistic: lowercase trigram miss")]
    public bool Realistic_Lowercase_Trigram_Miss()
        => _realistic.CouldContainNavigateToMatch("zqjxw", null, out _);
 
    // "Zqjxw" → mixed-case → MixedCaseHumpCheckPasses → single hump 'Z' not in hump set → miss.
    [Benchmark(Description = "Realistic: CamelCase trigram miss")]
    public bool Realistic_CamelCase_Trigram_Miss()
        => _realistic.CouldContainNavigateToMatch("Zqjxw", null, out _);
 
    // "GetApp" hits name check. "System.Collections" → humps S,C match container chars
    // from "System.Collections.Generic" → hit.
    [Benchmark(Description = "Realistic: container hit")]
    public bool Realistic_Container_Hit()
        => _realistic.CouldContainNavigateToMatch("GetApp", "System.Collections", out _);
 
    // "GetApp" hits name check. "Zebra.Unknown" → hump Z not in any container → miss.
    [Benchmark(Description = "Realistic: container miss")]
    public bool Realistic_Container_Miss()
        => _realistic.CouldContainNavigateToMatch("GetApp", "Zebra.Unknown", out _);
 
    // "@getapp" → strip leading '@' → "getapp" → all-lowercase → hump-prefix DP hit.
    [Benchmark(Description = "Realistic: lowercase verbatim hit")]
    public bool Realistic_Lowercase_Verbatim_Hit()
        => _realistic.CouldContainNavigateToMatch("@getapp", null, out _);
 
    // "@GetApp" → strip leading '@' → "GetApp" → CamelCase → bigram "GA" hit.
    [Benchmark(Description = "Realistic: CamelCase verbatim hit")]
    public bool Realistic_CamelCase_Verbatim_Hit()
        => _realistic.CouldContainNavigateToMatch("@GetApp", null, out _);
 
    // "get context" → split at space → both all-lowercase words checked via hump-prefix DP.
    [Benchmark(Description = "Realistic: lowercase multi-word hit")]
    public bool Realistic_Lowercase_MultiWord_Hit()
        => _realistic.CouldContainNavigateToMatch("get context", null, out _);
 
    // "Get Context" → split at space → both CamelCase words checked via hump set.
    [Benchmark(Description = "Realistic: CamelCase multi-word hit")]
    public bool Realistic_CamelCase_MultiWord_Hit()
        => _realistic.CouldContainNavigateToMatch("Get Context", null, out _);
 
    // ═══════════════════════════════════════════════════════════════════════════
    //  Stress-all index: every structure saturated simultaneously
    // ═══════════════════════════════════════════════════════════════════════════
 
    /// <summary>
    /// Saturates every structure at once: all 676 CamelCase bigrams, 500 long-hump names for prefix
    /// bloom, 500 long-word names for trigram bloom, and containers spanning all 26 uppercase initials.
    /// </summary>
    private static NavigateToSearchIndex CreateStressAllIndex()
    {
        var stringTable = new StringTable();
        var infos = new List<DeclaredSymbolInfo>();
        var sb = new StringBuilder();
 
        // 676 two-hump names covering every (A..Z, A..Z) bigram pair.
        for (var c1 = 'A'; c1 <= 'Z'; c1++)
        {
            for (var c2 = 'A'; c2 <= 'Z'; c2++)
            {
                sb.Clear().Append(c1).Append("aa").Append(c2).Append("bb");
                infos.Add(MakeInfo(stringTable, sb.ToString(), "All.Stress"));
            }
        }
 
        // 500 single-hump names with long unique lowercase tails → many hump prefixes.
        for (var i = 0; i < 500; i++)
        {
            sb.Clear().Append((char)('A' + (i % 26)));
            for (var j = 0; j < 20; j++)
                sb.Append((char)('a' + ((i * 7 + j * 13) % 26)));
            infos.Add(MakeInfo(stringTable, sb.ToString(), "All.Stress"));
        }
 
        // 500 single-word names with long varied sequences → many trigrams.
        for (var i = 0; i < 500; i++)
        {
            sb.Clear().Append((char)('A' + (i % 26)));
            for (var j = 0; j < 25; j++)
                sb.Append((char)('a' + ((i * 11 + j * 17) % 26)));
            infos.Add(MakeInfo(stringTable, sb.ToString(), "All.Stress"));
        }
 
        // Containers spanning all 26 uppercase initials.
        for (var c = 'A'; c <= 'Z'; c++)
            infos.Add(MakeInfo(stringTable, "Method" + c, $"{c}lpha.{(char)('A' + (c - 'A' + 13) % 26)}eta"));
 
        return NavigateToSearchIndex.TestAccessor.CreateIndex(infos.ToImmutableArray());
    }
 
    // "AaBb" → humps A,B → bigram "AB" is stored (all 676 bigrams present) → hit.
    [Benchmark(Description = "StressAll: hit")]
    public bool StressAll_Hit()
        => _stressAll.CouldContainNavigateToMatch("AaBb", null, out _);
 
    // "FoNa" → humps F,N → bigram "FN" is stored (all bigrams present), so hump check hits.
    // However the name "FoNa" doesn't actually exist — this tests the cost of a false positive
    // from the hump set (it passes but the actual PatternMatcher would reject later).
    [Benchmark(Description = "StressAll: miss")]
    public bool StressAll_Miss()
        => _stressAll.CouldContainNavigateToMatch("FoNa", null, out _);
 
    // ═══════════════════════════════════════════════════════════════════════════
    //  Stress _humpSet: large frozen set of bigrams/chars
    // ═══════════════════════════════════════════════════════════════════════════
 
    /// <summary>
    /// 5,000 names with 3 humps each, systematically varying initials across A-Z.
    /// Names like "AabAcdAef", "BabAcdAef", ..., covering many unique bigram pairs.
    /// Word-parts kept short (3 chars each) to minimize trigram/prefix noise.
    /// </summary>
    private static NavigateToSearchIndex CreateStressHumpSetIndex()
    {
        var stringTable = new StringTable();
        var infos = new DeclaredSymbolInfo[5000];
        var sb = new StringBuilder();
 
        for (var i = 0; i < 5000; i++)
        {
            sb.Clear();
            var h1 = (char)('A' + (i % 26));
            var h2 = (char)('A' + ((i / 26) % 26));
            var h3 = (char)('A' + ((i / 676) % 26));
            sb.Append(h1).Append("ab");
            sb.Append(h2).Append("cd");
            sb.Append(h3).Append("ef");
            infos[i] = MakeInfo(stringTable, sb.ToString(), "X.Y");
        }
 
        return NavigateToSearchIndex.TestAccessor.CreateIndex(infos.ToImmutableArray());
    }
 
    // "AaCd" → humps A,C → bigram "AC" is stored (name "AabCcdXef" exists) → hit.
    [Benchmark(Description = "StressHumpSet: CamelCase hit")]
    public bool StressHumpSet_Hit()
        => _stressHump.CouldContainNavigateToMatch("AaCd", null, out _);
 
    // "FoNa" → humps F,N → bigram "FN" not stored (h2 varies with h1, not all pairs covered).
    // No names have both F and N as adjacent hump initials → miss.
    [Benchmark(Description = "StressHumpSet: CamelCase miss")]
    public bool StressHumpSet_Miss()
        => _stressHump.CouldContainNavigateToMatch("FoNa", null, out _);
 
    // ═══════════════════════════════════════════════════════════════════════════
    //  Stress _humpPrefixFilter: dense bloom of lowercased hump prefixes
    // ═══════════════════════════════════════════════════════════════════════════
 
    /// <summary>
    /// 2,000 names each with a single long hump (20 lowercase chars after the initial).
    /// Each name produces ~20 unique prefixes, flooding the bloom filter with ~40,000
    /// distinct prefix strings. Character sequences use (i*7 + j*13) % 26 for variety.
    /// </summary>
    private static NavigateToSearchIndex CreateStressHumpPrefixIndex()
    {
        var stringTable = new StringTable();
        var infos = new DeclaredSymbolInfo[2000];
        var sb = new StringBuilder();
 
        for (var i = 0; i < 2000; i++)
        {
            sb.Clear().Append((char)('A' + (i % 26)));
            for (var j = 0; j < 20; j++)
                sb.Append((char)('a' + ((i * 7 + j * 13) % 26)));
            infos[i] = MakeInfo(stringTable, sb.ToString(), "X.Y");
        }
 
        return NavigateToSearchIndex.TestAccessor.CreateIndex(infos.ToImmutableArray());
    }
 
    // "aahub" → all-lowercase → DP tries to split into hump prefixes. Names starting with 'A'
    // have varied prefixes; "a","aa","aah",... likely stored in the dense bloom → hit.
    [Benchmark(Description = "StressHumpPrefix: lowercase hit")]
    public bool StressHumpPrefix_Hit()
        => _stressPrefix.CouldContainNavigateToMatch("aahub", null, out _);
 
    // "zzzzqqqq" → all-lowercase → DP tries prefixes "z","zz","zzz","zzzz",...
    // No name has these as hump prefixes (no repeated-z sequences stored) → miss.
    [Benchmark(Description = "StressHumpPrefix: lowercase miss")]
    public bool StressHumpPrefix_Miss()
        => _stressPrefix.CouldContainNavigateToMatch("zzzzqqqq", null, out _);
 
    // ═══════════════════════════════════════════════════════════════════════════
    //  Stress _trigramFilter: dense bloom of 3-char sliding windows
    // ═══════════════════════════════════════════════════════════════════════════
 
    /// <summary>
    /// 2,000 names each with a long single word-part (25 lowercase chars after the initial).
    /// Each produces ~23 trigrams, flooding the bloom filter with ~46,000 distinct trigram
    /// strings. Character sequences use (i*11 + j*17) % 26 for variety.
    /// </summary>
    private static NavigateToSearchIndex CreateStressTrigramIndex()
    {
        var stringTable = new StringTable();
        var infos = new DeclaredSymbolInfo[2000];
        var sb = new StringBuilder();
 
        for (var i = 0; i < 2000; i++)
        {
            sb.Clear().Append((char)('A' + (i % 26)));
            for (var j = 0; j < 25; j++)
                sb.Append((char)('a' + ((i * 11 + j * 17) % 26)));
            infos[i] = MakeInfo(stringTable, sb.ToString(), "X.Y");
        }
 
        return NavigateToSearchIndex.TestAccessor.CreateIndex(infos.ToImmutableArray());
    }
 
    // "aahub" → all-lowercase, len 5 → trigrams "aah","ahu","hub" checked against dense bloom.
    // With 46k trigrams stored, common 3-char sequences are likely present → hit.
    [Benchmark(Description = "StressTrigram: lowercase hit")]
    public bool StressTrigram_Hit()
        => _stressTrigram.CouldContainNavigateToMatch("aahub", null, out _);
 
    // "zqxjw" → all-lowercase, len 5 → trigrams "zqx","qxj","xjw" are rare 3-char sequences
    // unlikely to appear even in a dense bloom → miss.
    [Benchmark(Description = "StressTrigram: lowercase miss")]
    public bool StressTrigram_Miss()
        => _stressTrigram.CouldContainNavigateToMatch("zqxjw", null, out _);
 
    // ═══════════════════════════════════════════════════════════════════════════
    //  Stress _containerCharSet: all 26 A-Z initials in frozen char set
    // ═══════════════════════════════════════════════════════════════════════════
 
    /// <summary>
    /// 26 symbols with containers whose hump initials span all of A-Z (e.g.,
    /// "Aoo.Far.Naz", "Boo.Gar.Oaz", ...), making the frozen char set as full as possible.
    /// All symbols share name "SimpleMethod".
    /// </summary>
    private static NavigateToSearchIndex CreateStressContainerIndex()
    {
        var stringTable = new StringTable();
        var infos = new DeclaredSymbolInfo[26];
 
        for (var i = 0; i < 26; i++)
        {
            var c = (char)('A' + i);
            infos[i] = MakeInfo(stringTable, "SimpleMethod",
                $"{c}oo.{(char)('A' + (i + 5) % 26)}ar.{(char)('A' + (i + 13) % 26)}az");
        }
 
        return NavigateToSearchIndex.TestAccessor.CreateIndex(infos.ToImmutableArray());
    }
 
    // "Simple" hits name check (hump 'S' matches "SimpleMethod"). "Foo.Bar" → humps F,B →
    // both present in full A-Z container char set → hit.
    [Benchmark(Description = "StressContainer: container hit")]
    public bool StressContainer_Hit()
        => _stressContainer.CouldContainNavigateToMatch("Simple", "Foo.Bar", out _);
 
    // "Simple" hits name check. "0Invalid.1Bad" → hump initials '0','1' are digits,
    // not in the A-Z container char set → miss.
    [Benchmark(Description = "StressContainer: container miss")]
    public bool StressContainer_Miss()
        => _stressContainer.CouldContainNavigateToMatch("Simple", "0Invalid.1Bad", out _);
 
    // ═══════════════════════════════════════════════════════════════════════════
 
    private static DeclaredSymbolInfo MakeInfo(StringTable stringTable, string name, string container)
        => DeclaredSymbolInfo.Create(
            stringTable, name, nameSuffix: null, containerDisplayName: null,
            fullyQualifiedContainerName: container,
            isPartial: false, hasAttributes: false,
            DeclaredSymbolInfoKind.Method, Accessibility.Public,
            default, ImmutableArray<string>.Empty);
}