File: src\libraries\System.Private.CoreLib\src\System\Globalization\OrdinalCasing.Icu.cs
Web Access
Project: src\src\coreclr\System.Private.CoreLib\System.Private.CoreLib.csproj (System.Private.CoreLib)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
 
namespace System.Globalization
{
    internal static partial class OrdinalCasing
    {
        // NoCasingPage means the Unicode page doesn't support any casing and no case translation is needed.
        private static ushort[] NoCasingPage => Array.Empty<ushort>();
 
        // s_basicLatin is covering the casing for the Basic Latin & C0 Controls range.
        // we are not lazy initializing this range because it is the most common used range and we'll cache it anyway very early.
        private static readonly ushort[] s_basicLatin =
        {
            // Upper Casing
 
            /* 0000-000f */  0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
            /* 0010-001f */  0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
            /* 0020-002f */  0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
            /* 0030-003f */  0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
            /* 0040-004f */  0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
            /* 0050-005f */  0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
            /* 0060-006f */  0x0060, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
            /* 0070-007f */  0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
            /* 0080-008f */  0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
            /* 0090-009f */  0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
            /* 00a0-00af */  0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
            /* 00b0-00bf */  0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x039c, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
            /* 00c0-00cf */  0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
            /* 00d0-00df */  0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
            /* 00e0-00ef */  0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
            /* 00f0-00ff */  0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00f7, 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x0178,
        };
 
        // s_casingTable is covering the Unicode BMP plane only. Surrogate casing is handled separately.
        // Every cell in the table is covering the casing of 256 characters in the BMP.
        // Every cell is array of 512 character for uppercasing mapping.
        private static readonly ushort[]?[] s_casingTable = InitCasingTable();
 
        /*
         The table is initialized to:
        {
            // 0000-07FF //       s_basicLatin,            null,            null,            null,            null,            null,            null,            null,
            // 0800-0FFF //               null,            null,            null,            null,            null,            null,            null,            null,
            // 1000-17FF //               null,    NoCasingPage,            null,            null,    NoCasingPage,    NoCasingPage,            null,            null,
            // 1800-1FFF //               null,            null,            null,            null,            null,            null,            null,            null,
            // 2000-27FF //               null,            null,    NoCasingPage,    NoCasingPage,            null,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // 2800-2FFF //       NoCasingPage,    NoCasingPage,    NoCasingPage,            null,            null,            null,            null,            null,
            // 3000-37FF //               null,            null,            null,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // 3800-3FFF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // 4000-47FF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // 4800-4FFF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // 5000-57FF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // 5800-5FFF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // 6000-67FF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // 6800-6FFF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // 7000-77FF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // 7800-7FFF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // 8000-87FF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // 8800-8FFF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // 9000-97FF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // 9800-9FFF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,            null,
            // A000-A7FF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,            null,    NoCasingPage,            null,            null,
            // A800-AFFF //               null,            null,            null,            null,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // B000-B7FF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // B800-BFFF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // C000-C7FF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // C800-CFFF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // D000-D7FF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,            null,
            // D800-DFFF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // E000-E7FF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // E800-EFFF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // F000-F7FF //       NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,    NoCasingPage,
            // F800-FFFF //       NoCasingPage,    NoCasingPage,            null,            null,    NoCasingPage,            null,            null,            null,
        };
*/
 
        // 0 - null
        // 1 - NoCasingPage
        // The bits are in reverse order for readability, i.e. the highest order bit refers to
        // the lowest index.
        private static ReadOnlySpan<byte> s_casingTableInit => // 32
        [
            /* 0000-07FF */    0b00000000,
            /* 0800-0FFF */    0b00000000,
            /* 1000-17FF */    0b01001100,
            /* 1800-1FFF */    0b00000000,
            /* 2000-27FF */    0b00110111,
            /* 2800-2FFF */    0b11100000,
            /* 3000-37FF */    0b00011111,
            /* 3800-3FFF */    0b11111111,
            /* 4000-47FF */    0b11111111,
            /* 4800-4FFF */    0b11111111,
            /* 5000-57FF */    0b11111111,
            /* 5800-5FFF */    0b11111111,
            /* 6000-67FF */    0b11111111,
            /* 6800-6FFF */    0b11111111,
            /* 7000-77FF */    0b11111111,
            /* 7800-7FFF */    0b11111111,
            /* 8000-87FF */    0b11111111,
            /* 8800-8FFF */    0b11111111,
            /* 9000-97FF */    0b11111111,
            /* 9800-9FFF */    0b11111110,
            /* A000-A7FF */    0b11110100,
            /* A800-AFFF */    0b00001111,
            /* B000-B7FF */    0b11111111,
            /* B800-BFFF */    0b11111111,
            /* C000-C7FF */    0b11111111,
            /* C800-CFFF */    0b11111111,
            /* D000-D7FF */    0b11111110,
            /* D800-DFFF */    0b11111111,
            /* E000-E7FF */    0b11111111,
            /* E800-EFFF */    0b11111111,
            /* F000-F7FF */    0b11111111,
            /* F800-FFFF */    0b11001000,
        ];
 
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        internal static char ToUpper(char c)
        {
            int pageNumber = ((int)c) >> 8;
            if (pageNumber == 0) // optimize for ASCII range
            {
                return (char)s_basicLatin[(int)c];
            }
 
            ushort[]? casingTable = s_casingTable[pageNumber];
 
            if (casingTable == NoCasingPage)
            {
                return c;
            }
 
            casingTable ??= InitOrdinalCasingPage(pageNumber);
 
            return (char)casingTable[((int)c) & 0xFF];
        }
 
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        internal static char ToUpperInvariantMode(char c) => c <= '\u00FF' ? (char)s_basicLatin[(int)c] : c;
 
        public static void ToUpperInvariantMode(this ReadOnlySpan<char> source, Span<char> destination)
        {
            for (int i = 0; i < source.Length; i++)
            {
                destination[i] = ToUpperInvariantMode(source[i]);
            }
        }
 
        internal static void ToUpperOrdinal(ReadOnlySpan<char> source, Span<char> destination)
        {
            Debug.Assert(!GlobalizationMode.Invariant);
            Debug.Assert(!GlobalizationMode.UseNls);
 
            for (int i = 0; i < source.Length; i++)
            {
                char c = source[i];
                if (c <= '\u00FF') // optimize ASCII/Latin
                {
                    destination[i] = (char)s_basicLatin[c];
                    continue;
                }
 
                if (char.IsHighSurrogate(c) && i < source.Length - 1)
                {
                    char cl = source[i + 1];
                    if (char.IsLowSurrogate(cl))
                    {
                        // well formed surrogates
                        SurrogateCasing.ToUpper(c, cl, out destination[i], out destination[i + 1]);
                        i++; // skip the low surrogate
                        continue;
                    }
                }
 
                destination[i] = ToUpper(c);
            }
        }
 
        internal static int CompareStringIgnoreCase(ref char strA, int lengthA, ref char strB, int lengthB)
        {
            Debug.Assert(!GlobalizationMode.Invariant);
            Debug.Assert(!GlobalizationMode.UseNls);
 
            int length = Math.Min(lengthA, lengthB);
 
            ref char charA = ref strA;
            ref char charB = ref strB;
 
            int index = 0;
 
            while (index < length)
            {
                char a = charA;
                char b = charB;
                char lowSurrogateA = '\0';
 
                if (!char.IsHighSurrogate(a) || index >= lengthA - 1 || !char.IsLowSurrogate(lowSurrogateA = Unsafe.Add(ref charA, 1)))
                {
                    if (!char.IsHighSurrogate(b) || index >= lengthB - 1 || !char.IsLowSurrogate(Unsafe.Add(ref charB, 1)))
                    {
                        //
                        // Neither A or B are surrogates
                        //
 
                        if (b == a)
                        {
                            index++;
                            charA = ref Unsafe.Add(ref charA, 1);
                            charB = ref Unsafe.Add(ref charB, 1);
                            continue;
                        }
 
                        char aUpper = ToUpper(a);
                        char bUpper = ToUpper(b);
 
                        if (aUpper == bUpper)
                        {
                            index++;
                            charA = ref Unsafe.Add(ref charA, 1);
                            charB = ref Unsafe.Add(ref charB, 1);
                            continue;
                        }
 
                        return aUpper - bUpper;
                    }
 
                    //
                    // charA is not surrogate and charB is valid surrogate
                    //
 
                    return -1;
                }
 
                //
                // A is Surrogate
                //
 
                char lowSurrogateB = '\0';
 
                if (!char.IsHighSurrogate(b) || index >= lengthB - 1 || !char.IsLowSurrogate(lowSurrogateB = Unsafe.Add(ref charB, 1)))
                {
                    //
                    // charB is not surrogate and charA is surrogate
                    //
 
                    return 1;
                }
 
                //
                // charA and charB are surrogates
                //
 
                Debug.Assert(lowSurrogateA != '\0');
                Debug.Assert(lowSurrogateB != '\0');
 
                if (a == b && lowSurrogateA == lowSurrogateB)
                {
                    index += 2;
                    charA = ref Unsafe.Add(ref charA, 2);
                    charB = ref Unsafe.Add(ref charB, 2);
                    continue;
                }
 
                uint upperSurrogateA = CharUnicodeInfo.ToUpper(UnicodeUtility.GetScalarFromUtf16SurrogatePair(a, lowSurrogateA));
                uint upperSurrogateB = CharUnicodeInfo.ToUpper(UnicodeUtility.GetScalarFromUtf16SurrogatePair(b, lowSurrogateB));
 
                if (upperSurrogateA == upperSurrogateB)
                {
                    index += 2;
                    charA = ref Unsafe.Add(ref charA, 2);
                    charB = ref Unsafe.Add(ref charB, 2);
                    continue;
                }
 
                return (int)upperSurrogateA - (int)upperSurrogateB;
            }
 
            return lengthA - lengthB;
        }
 
        internal static unsafe int IndexOf(ReadOnlySpan<char> source, ReadOnlySpan<char> value)
        {
            Debug.Assert(value.Length > 0);
            Debug.Assert(value.Length <= source.Length);
 
            Debug.Assert(!GlobalizationMode.Invariant);
            Debug.Assert(!GlobalizationMode.UseNls);
 
            fixed (char* pSource = &MemoryMarshal.GetReference(source))
            fixed (char* pValue  = &MemoryMarshal.GetReference(value))
            {
                char* pSourceLimit = pSource + (source.Length - value.Length);
                char* pValueLimit = pValue + value.Length - 1;
                char* pCurrentSource = pSource;
 
                while (pCurrentSource <= pSourceLimit)
                {
                    char *pVal = pValue;
                    char *pSrc = pCurrentSource;
 
                    while (pVal <= pValueLimit)
                    {
                        if (!char.IsHighSurrogate(*pVal) || pVal == pValueLimit)
                        {
                            if (*pVal != *pSrc && ToUpper(*pVal) != ToUpper(*pSrc))
                                break; // no match
 
                            pVal++;
                            pSrc++;
                            continue;
                        }
 
                        if (char.IsHighSurrogate(*pSrc) && char.IsLowSurrogate(*(pSrc + 1)) && char.IsLowSurrogate(*(pVal + 1)))
                        {
                            // Well formed surrogates
                            // both the source and the Value have well-formed surrogates.
                            if (!SurrogateCasing.Equal(*pSrc, *(pSrc + 1), *pVal, *(pVal + 1)))
                                break; // no match
 
                            pSrc += 2;
                            pVal += 2;
                            continue;
                        }
 
                        if (*pVal != *pSrc)
                            break; // no match
 
                        pSrc++;
                        pVal++;
                    }
 
                    if (pVal > pValueLimit)
                    {
                        // Found match.
                        return (int) (pCurrentSource - pSource);
                    }
 
                    pCurrentSource++;
                }
 
                return -1;
            }
        }
 
        internal static unsafe int LastIndexOf(ReadOnlySpan<char> source, ReadOnlySpan<char> value)
        {
            Debug.Assert(value.Length > 0);
            Debug.Assert(value.Length <= source.Length);
 
            Debug.Assert(!GlobalizationMode.Invariant);
            Debug.Assert(!GlobalizationMode.UseNls);
 
            fixed (char* pSource = &MemoryMarshal.GetReference(source))
            fixed (char* pValue  = &MemoryMarshal.GetReference(value))
            {
                char* pValueLimit = pValue + value.Length - 1;
                char* pCurrentSource = pSource + (source.Length - value.Length);
 
                while (pCurrentSource >= pSource)
                {
                    char *pVal = pValue;
                    char *pSrc = pCurrentSource;
 
                    while (pVal <= pValueLimit)
                    {
                        if (!char.IsHighSurrogate(*pVal) || pVal == pValueLimit)
                        {
                            if (*pVal != *pSrc && ToUpper(*pVal) != ToUpper(*pSrc))
                                break; // no match
 
                            pVal++;
                            pSrc++;
                            continue;
                        }
 
                        if (char.IsHighSurrogate(*pSrc) && char.IsLowSurrogate(*(pSrc + 1)) && char.IsLowSurrogate(*(pVal + 1)))
                        {
                            // Well formed surrogates
                            // both the source and the Value have well-formed surrogates.
                            if (!SurrogateCasing.Equal(*pSrc, *(pSrc + 1), *pVal, *(pVal + 1)))
                                break; // no match
 
                            pSrc += 2;
                            pVal += 2;
                            continue;
                        }
 
                        if (*pVal != *pSrc)
                            break; // no match
 
                        pSrc++;
                        pVal++;
                    }
 
                    if (pVal > pValueLimit)
                    {
                        // Found match.
                        return (int)(pCurrentSource - pSource);
                    }
 
                    pCurrentSource--;
                }
 
                return -1;
            }
        }
 
        private static ushort[]?[] InitCasingTable()
        {
            ushort[]?[] table = new ushort[]?[s_casingTableInit.Length * 8];
            for (int i = 0; i < s_casingTableInit.Length * 8; ++i)
            {
                // The bits are in reverse order
                byte val = (byte)(s_casingTableInit[i / 8] >> (7 - (i % 8)));
                if ((val & 1) == 1)
                    table[i] = NoCasingPage;
            }
            table[0] = s_basicLatin;
            return table;
        }
 
        private static unsafe ushort[] InitOrdinalCasingPage(int pageNumber)
        {
            Debug.Assert(pageNumber >= 0 && pageNumber < 256);
 
            ushort[] casingTable = new ushort[256];
            fixed (ushort* table = casingTable)
            {
                char* pTable = (char*)table;
                Interop.Globalization.InitOrdinalCasingPage(pageNumber, pTable);
            }
            Volatile.Write(ref s_casingTable[pageNumber], casingTable);
            return casingTable;
        }
    }
}