|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Text;
using Microsoft.CodeAnalysis.PooledObjects;
using Roslyn.Utilities;
namespace Microsoft.CodeAnalysis
{
/// <summary>
/// Case-insensitive operations (mostly comparison) on unicode strings.
/// </summary>
#if COMPILERCORE
public
#else
internal
#endif
static class CaseInsensitiveComparison
{
// PERF: Cache a TextInfo for Unicode ToLower since this will be accessed very frequently
private static readonly TextInfo s_unicodeCultureTextInfo = GetUnicodeCulture().TextInfo;
private static CultureInfo GetUnicodeCulture()
{
try
{
// We use the "en" culture to get the Unicode ToLower mapping, as it implements
// a much more recent Unicode version (6.0+) than the invariant culture (1.0),
// and it matches the Unicode version used for character categorization.
return new CultureInfo("en");
}
catch (ArgumentException) // System.Globalization.CultureNotFoundException not on all platforms
{
// If "en" is not available, fall back to the invariant culture. Although it has bugs
// specific to the invariant culture (e.g. being version-locked to Unicode 1.0), at least
// we can rely on it being present on all platforms.
return CultureInfo.InvariantCulture;
}
}
/// <summary>
/// ToLower implements the Unicode lowercase mapping
/// as described in ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt.
/// VB uses these mappings for case-insensitive comparison.
/// </summary>
/// <param name="c"></param>
/// <returns>If <paramref name="c"/> is upper case, then this returns its Unicode lower case equivalent. Otherwise, <paramref name="c"/> is returned unmodified.</returns>
public static char ToLower(char c)
{
// PERF: This is a very hot code path in VB, optimize for ASCII
// Perform a range check with a single compare by using unsigned arithmetic
if (unchecked((uint)(c - 'A')) <= ('Z' - 'A'))
{
return (char)(c | 0x20);
}
if (c < 0xC0) // Covers ASCII (U+0000 - U+007F) and up to the next upper-case codepoint (Latin Capital Letter A with Grave)
{
return c;
}
return ToLowerNonAscii(c);
}
private static char ToLowerNonAscii(char c)
{
if (c == '\u0130')
{
// Special case Turkish I (LATIN CAPITAL LETTER I WITH DOT ABOVE)
// This corrects for the fact that the invariant culture only supports Unicode 1.0
// and therefore does not "know about" this character.
return 'i';
}
return s_unicodeCultureTextInfo.ToLower(c);
}
/// <summary>
/// This class seeks to perform the lowercase Unicode case mapping.
/// </summary>
private sealed class OneToOneUnicodeComparer : StringComparer
{
private static int CompareLowerUnicode(char c1, char c2)
{
return (c1 == c2) ? 0 : ToLower(c1) - ToLower(c2);
}
public override int Compare(string? str1, string? str2)
{
if ((object?)str1 == str2)
{
return 0;
}
if (str1 is null)
{
return -1;
}
if (str2 is null)
{
return 1;
}
int len = Math.Min(str1.Length, str2.Length);
for (int i = 0; i < len; i++)
{
int ordDiff = CompareLowerUnicode(str1[i], str2[i]);
if (ordDiff != 0)
{
return ordDiff;
}
}
// return the smaller string, or 0 if they are equal in length
return str1.Length - str2.Length;
}
public int Compare(ReadOnlySpan<char> str1, ReadOnlySpan<char> str2)
{
int len = Math.Min(str1.Length, str2.Length);
for (int i = 0; i < len; i++)
{
int ordDiff = CompareLowerUnicode(str1[i], str2[i]);
if (ordDiff != 0)
{
return ordDiff;
}
}
// return the smaller string, or 0 if they are equal in length
return str1.Length - str2.Length;
}
private static bool AreEqualLowerUnicode(char c1, char c2)
{
return c1 == c2 || ToLower(c1) == ToLower(c2);
}
public override bool Equals(string? str1, string? str2)
{
if ((object?)str1 == str2)
{
return true;
}
if (str1 is null || str2 is null)
{
return false;
}
if (str1.Length != str2.Length)
{
return false;
}
for (int i = 0; i < str1.Length; i++)
{
if (!AreEqualLowerUnicode(str1[i], str2[i]))
{
return false;
}
}
return true;
}
public bool Equals(ReadOnlySpan<char> str1, ReadOnlySpan<char> str2)
{
if (str1.Length != str2.Length)
{
return false;
}
for (int i = 0; i < str1.Length; i++)
{
if (!AreEqualLowerUnicode(str1[i], str2[i]))
{
return false;
}
}
return true;
}
public static bool EndsWith(string value, string possibleEnd)
{
if ((object)value == possibleEnd)
{
return true;
}
if ((object)value == null || (object)possibleEnd == null)
{
return false;
}
int i = value.Length - 1;
int j = possibleEnd.Length - 1;
if (i < j)
{
return false;
}
while (j >= 0)
{
if (!AreEqualLowerUnicode(value[i], possibleEnd[j]))
{
return false;
}
i--;
j--;
}
return true;
}
public static bool StartsWith(string value, string possibleStart)
{
if ((object)value == possibleStart)
{
return true;
}
if ((object)value == null || (object)possibleStart == null)
{
return false;
}
if (value.Length < possibleStart.Length)
{
return false;
}
for (int i = 0; i < possibleStart.Length; i++)
{
if (!AreEqualLowerUnicode(value[i], possibleStart[i]))
{
return false;
}
}
return true;
}
public override int GetHashCode(string str)
{
int hashCode = Hash.FnvOffsetBias;
for (int i = 0; i < str.Length; i++)
{
hashCode = Hash.CombineFNVHash(hashCode, ToLower(str[i]));
}
return hashCode;
}
}
/// <summary>
/// Returns a StringComparer that compares strings according to Unicode rules for case-insensitive
/// identifier comparison (lower-case mapping).
/// </summary>
/// <remarks>
/// These are also the rules used for VB identifier comparison.
/// </remarks>
private static readonly OneToOneUnicodeComparer s_comparer = new OneToOneUnicodeComparer();
/// <summary>
/// Returns a StringComparer that compares strings according to Unicode rules for case-insensitive
/// identifier comparison (lower-case mapping).
/// </summary>
/// <remarks>
/// These are also the rules used for VB identifier comparison.
/// </remarks>
public static StringComparer Comparer => s_comparer;
/// <summary>
/// Determines if two strings are equal according to Unicode rules for case-insensitive
/// identifier comparison (lower-case mapping).
/// </summary>
/// <param name="left">First identifier to compare</param>
/// <param name="right">Second identifier to compare</param>
/// <returns>true if the identifiers should be considered the same.</returns>
/// <remarks>
/// These are also the rules used for VB identifier comparison.
/// </remarks>
public static bool Equals(string left, string right) => s_comparer.Equals(left, right);
/// <summary>
/// Determines if two strings are equal according to Unicode rules for case-insensitive
/// identifier comparison (lower-case mapping).
/// </summary>
/// <param name="left">First identifier to compare</param>
/// <param name="right">Second identifier to compare</param>
/// <returns>true if the identifiers should be considered the same.</returns>
/// <remarks>
/// These are also the rules used for VB identifier comparison.
/// </remarks>
public static bool Equals(ReadOnlySpan<char> left, ReadOnlySpan<char> right) => s_comparer.Equals(left, right);
/// <summary>
/// Determines if the string 'value' end with string 'possibleEnd'.
/// </summary>
/// <param name="value"></param>
/// <param name="possibleEnd"></param>
/// <returns></returns>
public static bool EndsWith(string value, string possibleEnd) => OneToOneUnicodeComparer.EndsWith(value, possibleEnd);
/// <summary>
/// Determines if the string 'value' starts with string 'possibleStart'.
/// </summary>
/// <param name="value"></param>
/// <param name="possibleStart"></param>
/// <returns></returns>
public static bool StartsWith(string value, string possibleStart) => OneToOneUnicodeComparer.StartsWith(value, possibleStart);
/// <summary>
/// Compares two strings according to the Unicode rules for case-insensitive
/// identifier comparison (lower-case mapping).
/// </summary>
/// <param name="left">First identifier to compare</param>
/// <param name="right">Second identifier to compare</param>
/// <returns>-1 if <paramref name="left"/> < <paramref name="right"/>, 1 if <paramref name="left"/> > <paramref name="right"/>, 0 if they are equal.</returns>
/// <remarks>
/// These are also the rules used for VB identifier comparison.
/// </remarks>
public static int Compare(string left, string right) => s_comparer.Compare(left, right);
/// <summary>
/// Compares two strings according to the Unicode rules for case-insensitive
/// identifier comparison (lower-case mapping).
/// </summary>
/// <param name="left">First identifier to compare</param>
/// <param name="right">Second identifier to compare</param>
/// <returns>-1 if <paramref name="left"/> < <paramref name="right"/>, 1 if <paramref name="left"/> > <paramref name="right"/>, 0 if they are equal.</returns>
/// <remarks>
/// These are also the rules used for VB identifier comparison.
/// </remarks>
public static int Compare(ReadOnlySpan<char> left, ReadOnlySpan<char> right) => s_comparer.Compare(left, right);
/// <summary>
/// Gets a case-insensitive hash code for Unicode identifiers.
/// </summary>
/// <param name="value">identifier to get the hash code for</param>
/// <returns>The hash code for the given identifier</returns>
/// <remarks>
/// These are also the rules used for VB identifier comparison.
/// </remarks>
public static int GetHashCode(string value)
{
RoslynDebug.Assert(value != null);
return s_comparer.GetHashCode(value);
}
/// <summary>
/// Convert a string to lower case per Unicode
/// </summary>
/// <param name="value"></param>
/// <returns></returns>
[return: NotNullIfNotNull(parameterName: nameof(value))]
public static string? ToLower(string? value)
{
if (value is null)
return null;
if (value.Length == 0)
return value;
var pooledStrbuilder = PooledStringBuilder.GetInstance();
StringBuilder builder = pooledStrbuilder.Builder;
builder.Append(value);
ToLower(builder);
return pooledStrbuilder.ToStringAndFree();
}
/// <summary>
/// In-place convert string in StringBuilder to lower case per Unicode rules
/// </summary>
/// <param name="builder"></param>
public static void ToLower(StringBuilder builder)
{
if (builder == null)
return;
for (int i = 0; i < builder.Length; i++)
{
builder[i] = ToLower(builder[i]);
}
}
}
}
|