File: Protocol\DocumentUri.cs
Web Access
Project: src\src\LanguageServer\Protocol\Microsoft.CodeAnalysis.LanguageServer.Protocol.csproj (Microsoft.CodeAnalysis.LanguageServer.Protocol)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using Microsoft.CodeAnalysis;
 
namespace Roslyn.LanguageServer.Protocol;
 
/// <summary>
/// Datatype used to hold URI strings for LSP message serialization.  For details on how URIs are communicated in LSP,
/// see https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#uri
/// </summary>
/// <remarks>
/// While .NET has a type represent URIs (System.Uri), we do not want to use this type directly in serialization and
/// deserialization.  System.Uri does full parsing and validation on the URI upfront, so any issues in the uri format
/// will cause deserialization to fail and bypass any of our error recovery.
/// 
/// Compounding this problem, System.Uri will fail to parse various RFC spec valid URIs. In order to gracefully handle
/// these issues, we defer the parsing of the URI until someone actually asks for it (and can handle the failure).
/// </remarks>
internal sealed record class DocumentUri(string UriString)
{
    private Optional<Uri> _parsedUri;
 
    public DocumentUri(Uri parsedUri) : this(parsedUri.AbsoluteUri)
        => _parsedUri = parsedUri;
 
    /// <summary>
    /// Gets the parsed System.Uri for the URI string.
    /// </summary>
    /// <returns>
    /// Null if the URI string is not parse-able with System.Uri.
    /// </returns>
    /// <remarks>
    /// Invalid RFC spec URI strings are not parse-able as so will return null here. However, System.Uri can also fail
    /// to parse certain valid RFC spec URI strings.
    /// 
    /// For example, any URI containing a 'sub-delims' character in the host name is a valid RFC spec URI, but will fail
    /// with System.Uri
    /// </remarks>
    public Uri? ParsedUri
    {
        get
        {
            _parsedUri = _parsedUri.HasValue ? _parsedUri : ParseUri(UriString);
            return _parsedUri.Value;
        }
    }
 
    private static Uri? ParseUri(string uriString)
    {
        try
        {
            return new Uri(uriString);
        }
        catch (UriFormatException)
        {
            // This is not a URI that System.Uri can handle.
            return null;
        }
    }
 
    public override string ToString() => UriString;
 
    public bool Equals(DocumentUri otherUri)
    {
        if (otherUri is null)
            return false;
 
        // 99% of the time the equivalent URIs will have equivalent URI strings, as the client is expected to be
        // consistent in how it sends the URIs to the server, either always encoded or always unencoded. See
        // https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#uri
        if (this.UriString == otherUri.UriString)
            return true;
 
        // Bail if we cannot parse either of the URIs.  We already determined the URI strings are not equal and we need
        // to be able to parse the URIs to do deeper equivalency checks.
        if (otherUri.ParsedUri is null || this.ParsedUri is null)
            return false;
 
        // Next we compare the parsed URIs to handle various casing and encoding scenarios (for example - different
        // schemes may handle casing differently).
 
        // Uri.Equals will not always consider a percent encoded URI equal to an unencoded URI, even if they point to
        // the same resource. As above, the client is supposed to be consistent in which kind of URI they send.
        //
        // However, there are rare cases where we are comparing an unencoded URI to an encoded URI and should consider
        // them equivalent if they point to the same file path. For example - say the client generally sends us the
        // unencoded URI.  When we serialize URIs back to the client, we always serialize the AbsoluteUri property (see
        // FromUri). The AbsoluteUri property is *always* percent encoded - if this URI gets sent back to us as part of
        // a data object on a request (e.g. codelens/resolve), the client will leave the URI untouched, even if they
        // generally send unencoded URIs.  In such cases we need to consider the encoded and unencoded URI equivalent.
        //
        // To handle that, we first compare the AbsoluteUri properties on both, which are always percent encoded.
        return (this.ParsedUri.IsAbsoluteUri && otherUri.ParsedUri.IsAbsoluteUri && this.ParsedUri.AbsoluteUri == otherUri.ParsedUri.AbsoluteUri) ||
            Equals(this.ParsedUri, otherUri.ParsedUri);
    }
 
    public override int GetHashCode()
    {
        // We can't do anything better than the uri string hash code if we cannot parse the URI.
        if (this.ParsedUri is null)
            return this.UriString.GetHashCode();
 
        // Since the Uri type does not consider an encoded Uri equal to an unencoded Uri, we need to handle this
        // ourselves. The AbsoluteUri property is always encoded, so we can use this to compare the URIs (see Equals
        // above).
        //
        // However, depending on the kind of URI, case sensitivity in AbsoluteUri should be ignored. Uri.GetHashCode
        // normally handles this internally, but the parameters it uses to determine which comparison to use are not
        // exposed.
        //
        // Instead, we will always create the hash code ignoring case, and will rely on the Equals implementation to
        // handle collisions (between two Uris with different casing).  This should be very rare in practice. Collisions
        // can happen for non UNC URIs (e.g. `git:/blah` vs `git:/Blah`).
        return this.ParsedUri.IsAbsoluteUri
            ? StringComparer.OrdinalIgnoreCase.GetHashCode(this.ParsedUri.AbsoluteUri)
            : this.ParsedUri.GetHashCode();
    }
}