File: Workspace\Solution\SolutionState.CachingFilePathComparer.cs
Web Access
Project: src\src\Workspaces\Core\Portable\Microsoft.CodeAnalysis.Workspaces.csproj (Microsoft.CodeAnalysis.Workspaces)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
// To quiet analyzers asking to change code from dotnet/runtime style to dotnet/roslyn style
// <auto-generated/>
 
#nullable enable
 
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Threading;
 
namespace Microsoft.CodeAnalysis;
 
internal sealed partial class SolutionState
{
    /// <summary>
    /// String comparer for file paths that caches the last result of the comparison to avoid expensive rehashing of the
    /// same string over and over again.
    /// </summary>
    private sealed class CachingFilePathComparer : IEqualityComparer<string>
    {
        /// <summary>
        /// Note: this insensitive comparer is busted on many systems.  But we do things this way for compat with the logic
        /// we've had on windows since forever.
        /// </summary>
        private static readonly StringComparer s_comparer = StringComparer.OrdinalIgnoreCase;
 
        public static readonly CachingFilePathComparer Instance = new();
 
        /// <summary>
        /// ThreadStatic so that gets its own copy it can safely read/write from, removing the need for expensive
        /// contentious locks.  The purpose of this type is to allow lookup of the same key across N dictionaries
        /// efficiently from the same thread.  So this accomplishes that purpose.
        /// </summary>
        [ThreadStatic]
        private static (string? lastString, int lastHashCode) s_data;
 
        private CachingFilePathComparer()
        {
        }
 
        public bool Equals(string? x, string? y)
            => s_comparer.Equals(x, y);
 
        public int GetHashCode([DisallowNull] string obj)
        {
            // SToub thinks this may be faster on NetFx as it will help the runtime with reading/writing from a single location.
            ref var data = ref s_data;
            if (ReferenceEquals(data.lastString, obj))
                return data.lastHashCode;
 
            // Hashing a different string than last time.  Compute the hash and cache the value.
 
            // Specialized impl of OrdinalIgnoreCase.GetHashCode that is faster for the common case of an all-ASCII
            // string. Falls back to normal OrdinalIgnoreCase.GetHashCode for the uncommon case.
            var hash = GetNonRandomizedHashCodeOrdinalIgnoreCase(obj);
 
            data = (obj, hash);
            return hash;
        }
 
        // From https://github.com/dotnet/runtime/blob/5aa9687e110faa19d1165ba680e52585a822464d/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs#L921
 
        // We "normalize to lowercase" every char by ORing with 0x0020. This casts
        // a very wide net because it will change, e.g., '^' to '~'. But that should
        // be ok because we expect this to be very rare in practice. These are valid
        // for both for big-endian and for little-endian.
        private const uint NormalizeToLowercase = 0x0020_0020u;
 
        private unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase(string obj)
        {
            uint hash1 = (5381 << 16) + 5381;
            uint hash2 = hash1;
 
            int length = obj.Length;
            fixed (char* src = obj)
            {
                Debug.Assert(src[obj.Length] == '\0', "src[this.Length] == '\\0'");
                Debug.Assert(((int)src) % 4 == 0, "Managed string should start at 4 bytes boundary");
 
                uint* ptr = (uint*)src;
 
                while (length > 2)
                {
                    uint p0 = ptr[0];
                    uint p1 = ptr[1];
                    if (!AllCharsInUInt32AreAscii(p0 | p1))
                    {
                        goto NotAscii;
                    }
 
                    length -= 4;
                    hash1 = (RuntimeBitOperations.RotateLeft(hash1, 5) + hash1) ^ (p0 | NormalizeToLowercase);
                    hash2 = (RuntimeBitOperations.RotateLeft(hash2, 5) + hash2) ^ (p1 | NormalizeToLowercase);
                    ptr += 2;
                }
 
                if (length > 0)
                {
                    uint p0 = ptr[0];
                    if (!AllCharsInUInt32AreAscii(p0))
                    {
                        goto NotAscii;
                    }
 
                    hash2 = (RuntimeBitOperations.RotateLeft(hash2, 5) + hash2) ^ (p0 | NormalizeToLowercase);
                }
            }
 
            return (int)(hash1 + (hash2 * 1566083941));
 
NotAscii:
            return s_comparer.GetHashCode(obj);
        }
 
        // From https://github.com/dotnet/runtime/blob/5aa9687e110faa19d1165ba680e52585a822464d/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs#L16.
 
        /// <summary>
        /// Returns true iff the UInt32 represents two ASCII UTF-16 characters in machine endianness.
        /// </summary>
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static bool AllCharsInUInt32AreAscii(uint value)
        {
            return (value & ~0x007F_007Fu) == 0;
        }
    }
}