Normalization.Icu.cs

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Buffers;
using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Text;
 
namespace System.Globalization
{
    internal static partial class Normalization
    {
        private static unsafe bool IcuIsNormalized(string strInput, NormalizationForm normalizationForm)
        {
            Debug.Assert(!GlobalizationMode.Invariant);
            Debug.Assert(!GlobalizationMode.UseNls);
 
            ValidateArguments(strInput, normalizationForm);
 
            int ret;
            fixed (char* pInput = strInput)
            {
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
                if (GlobalizationMode.Hybrid)
                    ret = Interop.Globalization.IsNormalizedNative(normalizationForm, pInput, strInput.Length);
                else
                    ret = Interop.Globalization.IsNormalized(normalizationForm, pInput, strInput.Length);
#else
                ret = Interop.Globalization.IsNormalized(normalizationForm, pInput, strInput.Length);
#endif
            }
 
            if (ret == -1)
            {
                throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput));
            }
 
            return ret == 1;
        }
 
        private static unsafe string IcuNormalize(string strInput, NormalizationForm normalizationForm)
        {
            Debug.Assert(!GlobalizationMode.Invariant);
            Debug.Assert(!GlobalizationMode.UseNls);
 
            ValidateArguments(strInput, normalizationForm);
 
            char[]? toReturn = null;
            try
            {
                const int StackallocThreshold = 512;
 
                Span<char> buffer = strInput.Length <= StackallocThreshold
                    ? stackalloc char[StackallocThreshold]
                    : (toReturn = ArrayPool<char>.Shared.Rent(strInput.Length));
 
                for (int attempt = 0; attempt < 2; attempt++)
                {
                    int realLen;
                    fixed (char* pInput = strInput)
                    fixed (char* pDest = &MemoryMarshal.GetReference(buffer))
                    {
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
                    if (GlobalizationMode.Hybrid)
                        realLen = Interop.Globalization.NormalizeStringNative(normalizationForm, pInput, strInput.Length, pDest, buffer.Length);
                    else
                        realLen = Interop.Globalization.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length);
#else
                        realLen = Interop.Globalization.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length);
#endif
                    }
 
                    if (realLen == -1)
                    {
                        throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput));
                    }
 
                    if (realLen <= buffer.Length)
                    {
                        ReadOnlySpan<char> result = buffer.Slice(0, realLen);
                        return result.SequenceEqual(strInput)
                            ? strInput
                            : new string(result);
                    }
 
                    Debug.Assert(realLen > StackallocThreshold);
 
                    if (attempt == 0)
                    {
                        if (toReturn != null)
                        {
                            // Clear toReturn first to ensure we don't return the same buffer twice
                            char[] temp = toReturn;
                            toReturn = null;
                            ArrayPool<char>.Shared.Return(temp);
                        }
 
                        buffer = toReturn = ArrayPool<char>.Shared.Rent(realLen);
                    }
                }
 
                throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput));
            }
            finally
            {
                if (toReturn != null)
                {
                    ArrayPool<char>.Shared.Return(toReturn);
                }
            }
        }
 
        private static void ValidateArguments(string strInput, NormalizationForm normalizationForm)
        {
            Debug.Assert(strInput != null);
 
            if (OperatingSystem.IsBrowser() && (normalizationForm == NormalizationForm.FormKC || normalizationForm == NormalizationForm.FormKD))
            {
                // Browser's ICU doesn't contain data needed for FormKC and FormKD
                throw new PlatformNotSupportedException();
            }
 
            if (normalizationForm != NormalizationForm.FormC && normalizationForm != NormalizationForm.FormD &&
                normalizationForm != NormalizationForm.FormKC && normalizationForm != NormalizationForm.FormKD)
            {
                throw new ArgumentException(SR.Argument_InvalidNormalizationForm, nameof(normalizationForm));
            }
 
            if (HasInvalidUnicodeSequence(strInput))
            {
                throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput));
            }
        }
 
        /// <summary>
        /// ICU does not signal an error during normalization if the input string has invalid unicode,
        /// unlike Windows (which uses the ERROR_NO_UNICODE_TRANSLATION error value to signal an error).
        ///
        /// We walk the string ourselves looking for these bad sequences so we can continue to throw
        /// ArgumentException in these cases.
        /// </summary>
        private static bool HasInvalidUnicodeSequence(string s)
        {
            for (int i = 0; i < s.Length; i++)
            {
                char c = s[i];
 
                if (c < '\ud800')
                {
                    continue;
                }
 
                if (c == '\uFFFE')
                {
                    return true;
                }
 
                // If we see low surrogate before a high one, the string is invalid.
                if (char.IsLowSurrogate(c))
                {
                    return true;
                }
 
                if (char.IsHighSurrogate(c))
                {
                    if (i + 1 >= s.Length || !char.IsLowSurrogate(s[i + 1]))
                    {
                        // A high surrogate at the end of the string or a high surrogate
                        // not followed by a low surrogate
                        return true;
                    }
                    else
                    {
                        i++; // consume the low surrogate.
                        continue;
                    }
                }
            }
 
            return false;
        }
    }
}
File: src\libraries\System.Private.CoreLib\src\System\Globalization\Normalization.Icu.cs	Web Access
Project: src\src\coreclr\System.Private.CoreLib\System.Private.CoreLib.csproj (System.Private.CoreLib)