File: src\libraries\System.Private.CoreLib\src\System\Globalization\IdnMapping.cs
Web Access
Project: src\src\coreclr\System.Private.CoreLib\System.Private.CoreLib.csproj (System.Private.CoreLib)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
// This file contains the IDN functions and implementation.
//
// This allows encoding of non-ASCII domain names in a "punycode" form,
// for example:
//
//     \u5B89\u5BA4\u5948\u7F8E\u6075-with-SUPER-MONKEYS
//
// is encoded as:
//
//     xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n
//
// Additional options are provided to allow unassigned IDN characters and
// to validate according to the Std3ASCII Rules (like DNS names).
//
// There are also rules regarding bidirectionality of text and the length
// of segments.
//
// For additional rules see also:
//  RFC 3490 - Internationalizing Domain Names in Applications (IDNA)
//  RFC 3491 - Nameprep: A Stringprep Profile for Internationalized Domain Names (IDN)
//  RFC 3492 - Punycode: A Bootstring encoding of Unicode for Internationalized Domain Names in Applications (IDNA)
 
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
 
namespace System.Globalization
{
    // IdnMapping class used to map names to Punycode
    public sealed partial class IdnMapping
    {
        private bool _allowUnassigned;
        private bool _useStd3AsciiRules;
 
        public IdnMapping()
        {
        }
 
        public bool AllowUnassigned
        {
            get => _allowUnassigned;
            set => _allowUnassigned = value;
        }
 
        public bool UseStd3AsciiRules
        {
            get => _useStd3AsciiRules;
            set => _useStd3AsciiRules = value;
        }
 
        // Gets ASCII (Punycode) version of the string
        public string GetAscii(string unicode) =>
            GetAscii(unicode, 0);
 
        public string GetAscii(string unicode, int index)
        {
            ArgumentNullException.ThrowIfNull(unicode);
 
            return GetAscii(unicode, index, unicode.Length - index);
        }
 
        public string GetAscii(string unicode, int index, int count)
        {
            ArgumentNullException.ThrowIfNull(unicode);
 
            ArgumentOutOfRangeException.ThrowIfNegative(index);
            ArgumentOutOfRangeException.ThrowIfNegative(count);
            if (index > unicode.Length)
                throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_IndexMustBeLessOrEqual);
            if (index > unicode.Length - count)
                throw new ArgumentOutOfRangeException(nameof(unicode), SR.ArgumentOutOfRange_IndexCountBuffer);
 
            if (count == 0)
            {
                throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));
            }
            if (unicode[index + count - 1] == 0)
            {
                throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, index + count - 1), nameof(unicode));
            }
 
            if (GlobalizationMode.Invariant)
            {
                return GetAsciiInvariant(unicode, index, count);
            }
 
            return GlobalizationMode.UseNls ?
                NlsGetAsciiCore(unicode, index, count) :
                IcuGetAsciiCore(unicode, index, count);
        }
 
        /// <summary>
        /// Encodes a Unicode domain name to its ASCII (Punycode) equivalent.
        /// </summary>
        /// <param name="unicode">The Unicode domain name to convert.</param>
        /// <param name="destination">The buffer to write the ASCII result to. This buffer must not overlap with <paramref name="unicode"/>.</param>
        /// <param name="charsWritten">When this method returns, contains the number of characters that were written to <paramref name="destination"/>.</param>
        /// <returns><see langword="true"/> if the conversion was successful and the result was written to <paramref name="destination"/>; otherwise, <see langword="false"/> if <paramref name="destination"/> is too small to contain the result.</returns>
        /// <exception cref="ArgumentException"><paramref name="unicode"/> is invalid based on the <see cref="AllowUnassigned"/> and <see cref="UseStd3AsciiRules"/> properties, and the IDNA standard, or the source and destination buffers overlap.</exception>
        public bool TryGetAscii(ReadOnlySpan<char> unicode, Span<char> destination, out int charsWritten)
        {
            if (unicode.Length == 0)
            {
                throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));
            }
 
            if (unicode[^1] == 0)
            {
                throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode));
            }
 
            if (unicode.Overlaps(destination))
            {
                ThrowHelper.ThrowArgumentException(ExceptionResource.InvalidOperation_SpanOverlappedOperation);
            }
 
            if (GlobalizationMode.Invariant)
            {
                return TryGetAsciiInvariant(unicode, destination, out charsWritten);
            }
 
            return GlobalizationMode.UseNls ?
                NlsTryGetAsciiCore(unicode, destination, out charsWritten) :
                IcuTryGetAsciiCore(unicode, destination, out charsWritten);
        }
 
        // Gets Unicode version of the string.  Normalized and limited to IDNA characters.
        public string GetUnicode(string ascii) =>
            GetUnicode(ascii, 0);
 
        public string GetUnicode(string ascii, int index)
        {
            ArgumentNullException.ThrowIfNull(ascii);
 
            return GetUnicode(ascii, index, ascii.Length - index);
        }
 
        public string GetUnicode(string ascii, int index, int count)
        {
            ArgumentNullException.ThrowIfNull(ascii);
 
            ArgumentOutOfRangeException.ThrowIfNegative(index);
            ArgumentOutOfRangeException.ThrowIfNegative(count);
            if (index > ascii.Length)
                throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_IndexMustBeLessOrEqual);
            if (index > ascii.Length - count)
                throw new ArgumentOutOfRangeException(nameof(ascii), SR.ArgumentOutOfRange_IndexCountBuffer);
 
            // This is a case (i.e. explicitly null-terminated input) where behavior in .NET and Win32 intentionally differ.
            // The .NET APIs should (and did in v4.0 and earlier) throw an ArgumentException on input that includes a terminating null.
            // The Win32 APIs fail on an embedded null, but not on a terminating null.
            if (count > 0 && ascii[index + count - 1] == (char)0)
                throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
 
            if (GlobalizationMode.Invariant)
            {
                return GetUnicodeInvariant(ascii, index, count);
            }
 
            return GlobalizationMode.UseNls ?
                NlsGetUnicodeCore(ascii, index, count) :
                IcuGetUnicodeCore(ascii, index, count);
        }
 
        /// <summary>
        /// Decodes one or more encoded domain name labels to a string of Unicode characters.
        /// </summary>
        /// <param name="ascii">The ASCII domain name to convert. The string may contain one or more labels, where each label is prefixed by "xn--".</param>
        /// <param name="destination">The buffer to write the Unicode result to. This buffer must not overlap with <paramref name="ascii"/>.</param>
        /// <param name="charsWritten">When this method returns, contains the number of characters that were written to <paramref name="destination"/>.</param>
        /// <returns><see langword="true"/> if the conversion was successful and the result was written to <paramref name="destination"/>; otherwise, <see langword="false"/> if <paramref name="destination"/> is too small to contain the result.</returns>
        /// <exception cref="ArgumentException"><paramref name="ascii"/> is invalid based on the <see cref="AllowUnassigned"/> and <see cref="UseStd3AsciiRules"/> properties, and the IDNA standard, or the source and destination buffers overlap.</exception>
        public bool TryGetUnicode(ReadOnlySpan<char> ascii, Span<char> destination, out int charsWritten)
        {
            // This is a case (i.e. explicitly null-terminated input) where behavior in .NET and Win32 intentionally differ.
            // The .NET APIs should (and did in v4.0 and earlier) throw an ArgumentException on input that includes a terminating null.
            // The Win32 APIs fail on an embedded null, but not on a terminating null.
            if (ascii.Length > 0 && ascii[^1] == (char)0)
            {
                throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
            }
 
            if (ascii.Overlaps(destination))
            {
                ThrowHelper.ThrowArgumentException(ExceptionResource.InvalidOperation_SpanOverlappedOperation);
            }
 
            if (GlobalizationMode.Invariant)
            {
                return TryGetUnicodeInvariant(ascii, destination, out charsWritten);
            }
 
            return GlobalizationMode.UseNls ?
                NlsTryGetUnicodeCore(ascii, destination, out charsWritten) :
                IcuTryGetUnicodeCore(ascii, destination, out charsWritten);
        }
 
        public override bool Equals([NotNullWhen(true)] object? obj) =>
            obj is IdnMapping that &&
            _allowUnassigned == that._allowUnassigned &&
            _useStd3AsciiRules == that._useStd3AsciiRules;
 
        public override int GetHashCode() =>
            (_allowUnassigned ? 100 : 200) + (_useStd3AsciiRules ? 1000 : 2000);
 
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static string GetStringForOutput(string? originalString, ReadOnlySpan<char> input, ReadOnlySpan<char> output)
        {
            Debug.Assert(input.Length > 0);
 
            if (originalString is not null &&
                originalString.Length == input.Length &&
                input.Length == output.Length &&
                Ordinal.EqualsIgnoreCase(ref MemoryMarshal.GetReference(input), ref MemoryMarshal.GetReference(output), input.Length))
            {
                return originalString;
            }
 
            return output.ToString();
        }
 
        //
        // Invariant implementation
        //
 
        private const char c_delimiter = '-';
        private const string c_strAcePrefix = "xn--";
        private const int c_labelLimit = 63;          // Not including dots
        private const int c_defaultNameLimit = 255;   // Including dots
        private const int c_initialN = 0x80;
        private const int c_maxint = 0x7ffffff;
        private const int c_initialBias = 72;
        private const int c_punycodeBase = 36;
        private const int c_tmin = 1;
        private const int c_tmax = 26;
        private const int c_skew = 38;
        private const int c_damp = 700;
 
        private string GetAsciiInvariant(string unicodeString, int index, int count)
        {
            ReadOnlySpan<char> unicode = unicodeString.AsSpan(index, count);
 
            // Check for ASCII only string, which will be unchanged
            if (ValidateStd3AndAscii(unicode, UseStd3AsciiRules, true))
            {
                // Return original string if the entire string was requested and it doesn't need modification
                if (index == 0 && count == unicodeString.Length)
                {
                    return unicodeString;
                }
 
                return unicode.ToString();
            }
 
            // Cannot be null terminated (normalization won't help us with this one, and
            // may have returned false before checking the whole string above)
            Debug.Assert(unicode.Length >= 1, "[IdnMapping.GetAscii] Expected 0 length strings to fail before now.");
            if (unicode[^1] <= 0x1f)
            {
                throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode));
            }
 
            // May need to check Std3 rules again for non-ascii
            if (UseStd3AsciiRules)
            {
                ValidateStd3AndAscii(unicode, true, false);
            }
 
            // Go ahead and encode it
            return PunycodeEncode(unicode);
        }
 
        private bool TryGetAsciiInvariant(ReadOnlySpan<char> unicode, Span<char> destination, out int charsWritten)
        {
            // Check for ASCII only string, which will be unchanged
            if (ValidateStd3AndAscii(unicode, UseStd3AsciiRules, true))
            {
                if (unicode.Length <= destination.Length)
                {
                    unicode.CopyTo(destination);
                    charsWritten = unicode.Length;
                    return true;
                }
 
                charsWritten = 0;
                return false;
            }
 
            // Cannot be null terminated (normalization won't help us with this one, and
            // may have returned false before checking the whole string above)
            Debug.Assert(unicode.Length >= 1, "[IdnMapping.GetAscii] Expected 0 length strings to fail before now.");
            if (unicode[^1] <= 0x1f)
            {
                throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode));
            }
 
            // May need to check Std3 rules again for non-ascii
            if (UseStd3AsciiRules)
            {
                ValidateStd3AndAscii(unicode, true, false);
            }
 
            // Go ahead and encode it
            string result = PunycodeEncode(unicode);
            if (result.Length <= destination.Length)
            {
                result.CopyTo(destination);
                charsWritten = result.Length;
                return true;
            }
 
            charsWritten = 0;
            return false;
        }
 
        // See if we're only ASCII
        private static bool ValidateStd3AndAscii(ReadOnlySpan<char> unicode, bool bUseStd3, bool bCheckAscii)
        {
            // If its empty, then its too small
            if (unicode.Length == 0)
                throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));
 
            int iLastDot = -1;
 
            // Loop the whole string
            for (int i = 0; i < unicode.Length; i++)
            {
                // Aren't allowing control chars (or 7f, but idn tables catch that, they don't catch \0 at end though)
                if (unicode[i] <= 0x1f)
                {
                    throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, i), nameof(unicode));
                }
 
                // If its Unicode or a control character, return false (non-ascii)
                if (bCheckAscii && unicode[i] >= 0x7f)
                    return false;
 
                // Check for dots
                if (IsDot(unicode[i]))
                {
                    // Can't have 2 dots in a row
                    if (i == iLastDot + 1)
                        throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));
 
                    // If its too far between dots then fail
                    if (i - iLastDot > c_labelLimit + 1)
                        throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));
 
                    // If validating Std3, then char before dot can't be - char
                    if (bUseStd3 && i > 0)
                        ValidateStd3(unicode[i - 1], true);
 
                    // Remember where the last dot is
                    iLastDot = i;
                    continue;
                }
 
                // If necessary, make sure its a valid std3 character
                if (bUseStd3)
                {
                    ValidateStd3(unicode[i], i == iLastDot + 1);
                }
            }
 
            // If we never had a dot, then we need to be shorter than the label limit
            if (iLastDot == -1 && unicode.Length > c_labelLimit)
                throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));
 
            // Need to validate entire string length, 1 shorter if last char wasn't a dot
            if (unicode.Length > c_defaultNameLimit - (IsDot(unicode[^1]) ? 0 : 1))
                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize,
                                                        c_defaultNameLimit - (IsDot(unicode[^1]) ? 0 : 1)), nameof(unicode));
 
            // If last char wasn't a dot we need to check for trailing -
            if (bUseStd3 && !IsDot(unicode[^1]))
                ValidateStd3(unicode[^1], true);
 
            return true;
        }
 
        /* PunycodeEncode() converts Unicode to Punycode.  The input     */
        /* is represented as an array of Unicode code points (not code    */
        /* units; surrogate pairs are not allowed), and the output        */
        /* will be represented as an array of ASCII code points.  The     */
        /* output string is *not* null-terminated; it will contain        */
        /* zeros if and only if the input contains zeros.  (Of course     */
        /* the caller can leave room for a terminator and add one if      */
        /* needed.)  The input_length is the number of code points in     */
        /* the input.  The output_length is an in/out argument: the       */
        /* caller passes in the maximum number of code points that it     */
 
        /* can receive, and on successful return it will contain the      */
        /* number of code points actually output.  The case_flags array   */
        /* holds input_length boolean values, where nonzero suggests that */
        /* the corresponding Unicode character be forced to uppercase     */
        /* after being decoded (if possible), and zero suggests that      */
        /* it be forced to lowercase (if possible).  ASCII code points    */
        /* are encoded literally, except that ASCII letters are forced    */
        /* to uppercase or lowercase according to the corresponding       */
        /* uppercase flags.  If case_flags is a null pointer then ASCII   */
        /* letters are left as they are, and other code points are        */
        /* treated as if their uppercase flags were zero.  The return     */
        /* value can be any of the punycode_status values defined above   */
        /* except punycode_bad_input; if not punycode_success, then       */
        /* output_size and output might contain garbage.                  */
        private static string PunycodeEncode(ReadOnlySpan<char> unicode)
        {
            // 0 length strings aren't allowed
            if (unicode.Length == 0)
                throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));
 
            StringBuilder output = new StringBuilder(unicode.Length);
            int iNextDot = 0;
            int iAfterLastDot = 0;
            int iOutputAfterLastDot = 0;
 
            // Find the next dot
            while (iNextDot < unicode.Length)
            {
                // Legal "dot" separators (i.e: . in www.microsoft.com)
                const string DotSeparators = ".\u3002\uFF0E\uFF61";
 
                // Find end of this segment
                iNextDot = unicode.Slice(iAfterLastDot).IndexOfAny(DotSeparators);
                iNextDot = iNextDot < 0 ? unicode.Length : iNextDot + iAfterLastDot;
 
                // Only allowed to have empty . section at end (www.microsoft.com.)
                if (iNextDot == iAfterLastDot)
                {
                    // Only allowed to have empty sections as trailing .
                    if (iNextDot != unicode.Length)
                        throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));
                    // Last dot, stop
                    break;
                }
 
                // We'll need an Ace prefix
                output.Append(c_strAcePrefix);
 
                // Everything resets every segment.
                bool bRightToLeft = false;
 
                // Check for RTL.  If right-to-left, then 1st & last chars must be RTL
                StrongBidiCategory eBidi = CharUnicodeInfo.GetBidiCategory(unicode, iAfterLastDot);
                if (eBidi == StrongBidiCategory.StrongRightToLeft)
                {
                    // It has to be right to left.
                    bRightToLeft = true;
 
                    // Check last char
                    int iTest = iNextDot - 1;
                    if (char.IsLowSurrogate(unicode[iTest]))
                    {
                        iTest--;
                    }
 
                    eBidi = CharUnicodeInfo.GetBidiCategory(unicode, iTest);
                    if (eBidi != StrongBidiCategory.StrongRightToLeft)
                    {
                        // Oops, last wasn't RTL, last should be RTL if first is RTL
                        throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(unicode));
                    }
                }
 
                // Handle the basic code points
                int basicCount;
                int numProcessed = 0;           // Num code points that have been processed so far (this segment)
                for (basicCount = iAfterLastDot; basicCount < iNextDot; basicCount++)
                {
                    // Can't be lonely surrogate because it would've thrown in normalization
                    Debug.Assert(!char.IsLowSurrogate(unicode[basicCount]), "[IdnMapping.punycode_encode]Unexpected low surrogate");
 
                    // Double check our bidi rules
                    StrongBidiCategory testBidi = CharUnicodeInfo.GetBidiCategory(unicode, basicCount);
 
                    // If we're RTL, we can't have LTR chars
                    if (bRightToLeft && testBidi == StrongBidiCategory.StrongLeftToRight)
                    {
                        // Oops, throw error
                        throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(unicode));
                    }
 
                    // If we're not RTL we can't have RTL chars
                    if (!bRightToLeft && testBidi == StrongBidiCategory.StrongRightToLeft)
                    {
                        // Oops, throw error
                        throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(unicode));
                    }
 
                    // If its basic then add it
                    if (Basic(unicode[basicCount]))
                    {
                        output.Append(EncodeBasic(unicode[basicCount]));
                        numProcessed++;
                    }
                    // If its a surrogate, skip the next since our bidi category tester doesn't handle it.
                    else if (basicCount + 1 < iNextDot && char.IsSurrogatePair(unicode[basicCount], unicode[basicCount + 1]))
                        basicCount++;
                }
 
                int numBasicCodePoints = numProcessed;     // number of basic code points
 
                // Stop if we ONLY had basic code points
                if (numBasicCodePoints == iNextDot - iAfterLastDot)
                {
                    // Get rid of xn-- and this segments done
                    output.Remove(iOutputAfterLastDot, c_strAcePrefix.Length);
                }
                else
                {
                    // If it has some non-basic code points the input cannot start with xn--
                    if (unicode.Slice(iAfterLastDot).StartsWith(c_strAcePrefix, StringComparison.OrdinalIgnoreCase))
                        throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(unicode));
 
                    // Need to do ACE encoding
                    int numSurrogatePairs = 0;            // number of surrogate pairs so far
 
                    // Add a delimiter (-) if we had any basic code points (between basic and encoded pieces)
                    if (numBasicCodePoints > 0)
                    {
                        output.Append(c_delimiter);
                    }
 
                    // Initialize the state
                    int n = c_initialN;
                    int delta = 0;
                    int bias = c_initialBias;
 
                    // Main loop
                    while (numProcessed < (iNextDot - iAfterLastDot))
                    {
                        /* All non-basic code points < n have been     */
                        /* handled already.  Find the next larger one: */
                        int j;
                        int m;
                        int test;
                        for (m = c_maxint, j = iAfterLastDot;
                             j < iNextDot;
                             j += IsSupplementary(test) ? 2 : 1)
                        {
                            test = GetCodePoint(unicode, j);
                            if (test >= n && test < m) m = test;
                        }
 
                        /* Increase delta enough to advance the decoder's    */
                        /* <n,i> state to <m,0>, but guard against overflow: */
                        delta += (int)((m - n) * ((numProcessed - numSurrogatePairs) + 1));
                        Debug.Assert(delta > 0, "[IdnMapping.cs]1 punycode_encode - delta overflowed int");
                        n = m;
 
                        for (j = iAfterLastDot; j < iNextDot; j += IsSupplementary(test) ? 2 : 1)
                        {
                            // Make sure we're aware of surrogates
                            test = GetCodePoint(unicode, j);
 
                            // Adjust for character position (only the chars in our string already, some
                            // haven't been processed.
 
                            if (test < n)
                            {
                                delta++;
                                Debug.Assert(delta > 0, "[IdnMapping.cs]2 punycode_encode - delta overflowed int");
                            }
 
                            if (test == n)
                            {
                                // Represent delta as a generalized variable-length integer:
                                int q, k;
                                for (q = delta, k = c_punycodeBase; ; k += c_punycodeBase)
                                {
                                    int t = k <= bias ? c_tmin : k >= bias + c_tmax ? c_tmax : k - bias;
                                    if (q < t) break;
                                    Debug.Assert(c_punycodeBase != t, "[IdnMapping.punycode_encode]Expected c_punycodeBase (36) to be != t");
                                    output.Append(EncodeDigit(t + (q - t) % (c_punycodeBase - t)));
                                    q = (q - t) / (c_punycodeBase - t);
                                }
 
                                output.Append(EncodeDigit(q));
                                bias = Adapt(delta, (numProcessed - numSurrogatePairs) + 1, numProcessed == numBasicCodePoints);
                                delta = 0;
                                numProcessed++;
 
                                if (IsSupplementary(m))
                                {
                                    numProcessed++;
                                    numSurrogatePairs++;
                                }
                            }
                        }
                        ++delta;
                        ++n;
                        Debug.Assert(delta > 0, "[IdnMapping.cs]3 punycode_encode - delta overflowed int");
                    }
                }
 
                // Make sure its not too big
                if (output.Length - iOutputAfterLastDot > c_labelLimit)
                    throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));
 
                // Done with this segment, add dot if necessary
                if (iNextDot != unicode.Length)
                    output.Append('.');
 
                iAfterLastDot = iNextDot + 1;
                iOutputAfterLastDot = output.Length;
            }
 
            // Throw if we're too long
            if (output.Length > c_defaultNameLimit - (IsDot(unicode[^1]) ? 0 : 1))
                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize,
                                                c_defaultNameLimit - (IsDot(unicode[^1]) ? 0 : 1)), nameof(unicode));
            // Return our output string
            return output.ToString();
        }
 
        // Is it a dot?
        // are we U+002E (., full stop), U+3002 (ideographic full stop), U+FF0E (fullwidth full stop), or
        // U+FF61 (halfwidth ideographic full stop).
        // Note: IDNA Normalization gets rid of dots now, but testing for last dot is before normalization
        private static bool IsDot(char c) =>
            c == '.' || c == '\u3002' || c == '\uFF0E' || c == '\uFF61';
 
        private static bool IsSupplementary(int cTest) =>
            cTest >= 0x10000;
 
        private static bool Basic(uint cp) =>
            // Is it in ASCII range?
            cp < 0x80;
 
        private static int GetCodePoint(ReadOnlySpan<char> s, int index)
        {
            // Check if the character at index is a high surrogate.
            if (char.IsHighSurrogate(s[index]) && index + 1 < s.Length && char.IsLowSurrogate(s[index + 1]))
            {
                return char.ConvertToUtf32(s[index], s[index + 1]);
            }
 
            return s[index];
        }
 
        // Validate Std3 rules for a character
        private static void ValidateStd3(char c, bool bNextToDot)
        {
            // Check for illegal characters
            if (c <= ',' || c == '/' || (c >= ':' && c <= '@') ||      // Lots of characters not allowed
                (c >= '[' && c <= '`') || (c >= '{' && c <= (char)0x7F) ||
                (c == '-' && bNextToDot))
                throw new ArgumentException(SR.Format(SR.Argument_IdnBadStd3, c), nameof(c));
        }
 
        private string GetUnicodeInvariant(string ascii, int index, int count)
        {
            // Convert Punycode to Unicode
            string asciiSlice = ascii.Substring(index, count);
            string strUnicode = PunycodeDecode(asciiSlice);
 
            // Output name MUST obey IDNA rules & round trip (casing differences are allowed)
            string asciiRoundtrip = GetAscii(strUnicode);
            if (!asciiRoundtrip.Equals(asciiSlice, StringComparison.OrdinalIgnoreCase))
            {
                throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii));
            }
 
            // If the ASCII round-trip equals the original string, return it as-is (no allocation)
            if (index == 0 && count == ascii.Length && strUnicode.Equals(ascii, StringComparison.OrdinalIgnoreCase))
            {
                return ascii;
            }
 
            return strUnicode;
        }
 
        private bool TryGetUnicodeInvariant(ReadOnlySpan<char> ascii, Span<char> destination, out int charsWritten)
        {
            // Convert the span to a string for PunycodeDecode since it uses string operations extensively
            string asciiString = ascii.ToString();
 
            // Convert Punycode to Unicode
            string strUnicode = PunycodeDecode(asciiString);
 
            // Output name MUST obey IDNA rules & round trip (casing differences are allowed)
            if (!asciiString.Equals(GetAscii(strUnicode), StringComparison.OrdinalIgnoreCase))
            {
                throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii));
            }
 
            if (strUnicode.Length <= destination.Length)
            {
                strUnicode.CopyTo(destination);
                charsWritten = strUnicode.Length;
                return true;
            }
 
            charsWritten = 0;
            return false;
        }
 
        /* PunycodeDecode() converts Punycode to Unicode.  The input is  */
        /* represented as an array of ASCII code points, and the output   */
        /* will be represented as an array of Unicode code points.  The   */
        /* input_length is the number of code points in the input.  The   */
        /* output_length is an in/out argument: the caller passes in      */
        /* the maximum number of code points that it can receive, and     */
        /* on successful return it will contain the actual number of      */
        /* code points output.  The case_flags array needs room for at    */
        /* least output_length values, or it can be a null pointer if the */
        /* case information is not needed.  A nonzero flag suggests that  */
        /* the corresponding Unicode character be forced to uppercase     */
        /* by the caller (if possible), while zero suggests that it be    */
        /* forced to lowercase (if possible).  ASCII code points are      */
        /* output already in the proper case, but their flags will be set */
        /* appropriately so that applying the flags would be harmless.    */
        /* The return value can be any of the punycode_status values      */
        /* defined above; if not punycode_success, then output_length,    */
        /* output, and case_flags might contain garbage.  On success, the */
        /* decoder will never need to write an output_length greater than */
        /* input_length, because of how the encoding is defined.          */
 
        private static string PunycodeDecode(string ascii)
        {
            // 0 length strings aren't allowed
            if (ascii.Length == 0)
                throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));
 
            // Throw if we're too long
            if (ascii.Length > c_defaultNameLimit - (IsDot(ascii[^1]) ? 0 : 1))
                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize,
                                            c_defaultNameLimit - (IsDot(ascii[^1]) ? 0 : 1)), nameof(ascii));
 
            // output stringbuilder
            StringBuilder output = new StringBuilder(ascii.Length);
 
            // Dot searching
            int iNextDot = 0;
            int iAfterLastDot = 0;
            int iOutputAfterLastDot = 0;
 
            while (iNextDot < ascii.Length)
            {
                // Find end of this segment
                iNextDot = ascii.IndexOf('.', iAfterLastDot);
                if (iNextDot < 0 || iNextDot > ascii.Length)
                    iNextDot = ascii.Length;
 
                // Only allowed to have empty . section at end (www.microsoft.com.)
                if (iNextDot == iAfterLastDot)
                {
                    // Only allowed to have empty sections as trailing .
                    if (iNextDot != ascii.Length)
                        throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));
 
                    // Last dot, stop
                    break;
                }
 
                // In either case it can't be bigger than segment size
                if (iNextDot - iAfterLastDot > c_labelLimit)
                    throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));
 
                // See if this section's ASCII or ACE
                if (ascii.Length < c_strAcePrefix.Length + iAfterLastDot ||
                    string.Compare(ascii, iAfterLastDot, c_strAcePrefix, 0, c_strAcePrefix.Length, StringComparison.OrdinalIgnoreCase) != 0)
                {
                    // Its ASCII, copy it
                    output.Append(ascii, iAfterLastDot, iNextDot - iAfterLastDot);
                }
                else
                {
                    // Not ASCII, bump up iAfterLastDot to be after ACE Prefix
                    iAfterLastDot += c_strAcePrefix.Length;
 
                    // Get number of basic code points (where delimiter is)
                    // numBasicCodePoints < 0 if there're no basic code points
                    int iTemp = ascii.LastIndexOf(c_delimiter, iNextDot - 1);
 
                    // Trailing - not allowed
                    if (iTemp == iNextDot - 1)
                        throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
 
                    int numBasicCodePoints;
                    if (iTemp <= iAfterLastDot)
                        numBasicCodePoints = 0;
                    else
                    {
                        numBasicCodePoints = iTemp - iAfterLastDot;
 
                        // Copy all the basic code points, making sure they're all in the allowed range,
                        // and losing the casing for all of them.
                        for (int copyAscii = iAfterLastDot; copyAscii < iAfterLastDot + numBasicCodePoints; copyAscii++)
                        {
                            // Make sure we don't allow unicode in the ascii part
                            if (ascii[copyAscii] > 0x7f)
                                throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
 
                            // When appending make sure they get lower cased
                            output.Append((char)(char.IsAsciiLetterUpper(ascii[copyAscii]) ? ascii[copyAscii] - 'A' + 'a' : ascii[copyAscii]));
                        }
                    }
 
                    // Get ready for main loop.  Start at beginning if we didn't have any
                    // basic code points, otherwise start after the -.
                    // asciiIndex will be next character to read from ascii
                    int asciiIndex = iAfterLastDot + (numBasicCodePoints > 0 ? numBasicCodePoints + 1 : 0);
 
                    // initialize our state
                    int n = c_initialN;
                    int bias = c_initialBias;
                    int i = 0;
 
                    int w, k;
 
                    // no Supplementary characters yet
                    int numSurrogatePairs = 0;
 
                    // Main loop, read rest of ascii
                    while (asciiIndex < iNextDot)
                    {
                        /* Decode a generalized variable-length integer into delta,  */
                        /* which gets added to i.  The overflow checking is easier   */
                        /* if we increase i as we go, then subtract off its starting */
                        /* value at the end to obtain delta.                         */
                        int oldi = i;
 
                        for (w = 1, k = c_punycodeBase; ; k += c_punycodeBase)
                        {
                            // Check to make sure we aren't overrunning our ascii string
                            if (asciiIndex >= iNextDot)
                                throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
 
                            // decode the digit from the next char
                            int digit = DecodeDigit(ascii[asciiIndex++]);
 
                            Debug.Assert(w > 0, "[IdnMapping.punycode_decode]Expected w > 0");
                            if (digit > (c_maxint - i) / w)
                                throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
 
                            i += (int)(digit * w);
                            int t = k <= bias ? c_tmin : k >= bias + c_tmax ? c_tmax : k - bias;
                            if (digit < t)
                                break;
                            Debug.Assert(c_punycodeBase != t, "[IdnMapping.punycode_decode]Expected t != c_punycodeBase (36)");
                            if (w > c_maxint / (c_punycodeBase - t))
                                throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
                            w *= (c_punycodeBase - t);
                        }
 
                        bias = Adapt(i - oldi, (output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1, oldi == 0);
 
                        /* i was supposed to wrap around from output.Length to 0,   */
                        /* incrementing n each time, so we'll fix that now: */
                        Debug.Assert((output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1 > 0,
                            "[IdnMapping.punycode_decode]Expected to have added > 0 characters this segment");
                        if (i / ((output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1) > c_maxint - n)
                            throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
                        n += (int)(i / (output.Length - iOutputAfterLastDot - numSurrogatePairs + 1));
                        i %= (output.Length - iOutputAfterLastDot - numSurrogatePairs + 1);
 
                        // Make sure n is legal
                        if (n < 0 || n > 0x10ffff || (n >= 0xD800 && n <= 0xDFFF))
                            throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
 
                        // insert n at position i of the output:  Really tricky if we have surrogates
                        int iUseInsertLocation;
                        string strTemp = char.ConvertFromUtf32(n);
 
                        // If we have supplimentary characters
                        if (numSurrogatePairs > 0)
                        {
                            // Hard way, we have supplimentary characters
                            int iCount;
                            for (iCount = i, iUseInsertLocation = iOutputAfterLastDot; iCount > 0; iCount--, iUseInsertLocation++)
                            {
                                // If its a surrogate, we have to go one more
                                if (iUseInsertLocation >= output.Length)
                                    throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
                                if (char.IsSurrogate(output[iUseInsertLocation]))
                                    iUseInsertLocation++;
                            }
                        }
                        else
                        {
                            // No Supplementary chars yet, just add i
                            iUseInsertLocation = iOutputAfterLastDot + i;
                        }
 
                        // Insert it
                        output.Insert(iUseInsertLocation, strTemp);
 
                        // If it was a surrogate increment our counter
                        if (IsSupplementary(n))
                            numSurrogatePairs++;
 
                        // Index gets updated
                        i++;
                    }
 
                    // Do BIDI testing
                    bool bRightToLeft = false;
 
                    // Check for RTL.  If right-to-left, then 1st & last chars must be RTL
                    StrongBidiCategory eBidi = CharUnicodeInfo.GetBidiCategory(output, iOutputAfterLastDot);
                    if (eBidi == StrongBidiCategory.StrongRightToLeft)
                    {
                        // It has to be right to left.
                        bRightToLeft = true;
                    }
 
                    // Check the rest of them to make sure RTL/LTR is consistent
                    for (int iTest = iOutputAfterLastDot; iTest < output.Length; iTest++)
                    {
                        // This might happen if we run into a pair
                        if (char.IsLowSurrogate(output[iTest]))
                            continue;
 
                        // Check to see if its LTR
                        eBidi = CharUnicodeInfo.GetBidiCategory(output, iTest);
                        if ((bRightToLeft && eBidi == StrongBidiCategory.StrongLeftToRight) ||
                            (!bRightToLeft && eBidi == StrongBidiCategory.StrongRightToLeft))
                            throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(ascii));
                    }
 
                    // Its also a requirement that the last one be RTL if 1st is RTL
                    if (bRightToLeft && eBidi != StrongBidiCategory.StrongRightToLeft)
                    {
                        // Oops, last wasn't RTL, last should be RTL if first is RTL
                        throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(ascii));
                    }
                }
 
                // See if this label was too long
                if (iNextDot - iAfterLastDot > c_labelLimit)
                    throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));
 
                // Done with this segment, add dot if necessary
                if (iNextDot != ascii.Length)
                    output.Append('.');
 
                iAfterLastDot = iNextDot + 1;
                iOutputAfterLastDot = output.Length;
            }
 
            // Throw if we're too long
            if (output.Length > c_defaultNameLimit - (IsDot(output[^1]) ? 0 : 1))
                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, c_defaultNameLimit - (IsDot(output[^1]) ? 0 : 1)), nameof(ascii));
 
            // Return our output string
            return output.ToString();
        }
 
        // DecodeDigit(cp) returns the numeric value of a basic code */
        // point (for use in representing integers) in the range 0 to */
        // c_punycodeBase-1, or <0 if cp is does not represent a value. */
 
        private static int DecodeDigit(char cp)
        {
            if (char.IsAsciiDigit(cp))
                return cp - '0' + 26;
 
            // Two flavors for case differences
            if (char.IsAsciiLetterLower(cp))
                return cp - 'a';
 
            if (char.IsAsciiLetterUpper(cp))
                return cp - 'A';
 
            // Expected 0-9, A-Z or a-z, everything else is illegal
            throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(cp));
        }
 
        private static int Adapt(int delta, int numpoints, bool firsttime)
        {
            uint k;
 
            delta = firsttime ? delta / c_damp : delta / 2;
            Debug.Assert(numpoints != 0, "[IdnMapping.adapt]Expected non-zero numpoints.");
            delta += delta / numpoints;
 
            for (k = 0; delta > ((c_punycodeBase - c_tmin) * c_tmax) / 2; k += c_punycodeBase)
            {
                delta /= c_punycodeBase - c_tmin;
            }
 
            Debug.Assert(delta + c_skew != 0, "[IdnMapping.adapt]Expected non-zero delta+skew.");
            return (int)(k + (c_punycodeBase - c_tmin + 1) * delta / (delta + c_skew));
        }
 
        /* EncodeBasic(bcp,flag) forces a basic code point to lowercase */
        /* if flag is false, uppercase if flag is true, and returns    */
        /* the resulting code point.  The code point is unchanged if it  */
        /* is caseless.  The behavior is undefined if bcp is not a basic */
        /* code point.                                                   */
 
        private static char EncodeBasic(char bcp)
        {
            if (char.IsAsciiLetterUpper(bcp))
                bcp += (char)('a' - 'A');
 
            return bcp;
        }
 
        /* EncodeDigit(d,flag) returns the basic code point whose value      */
        /* (when used for representing integers) is d, which needs to be in   */
        /* the range 0 to punycodeBase-1.  The lowercase form is used unless flag is  */
        /* true, in which case the uppercase form is used. */
 
        private static char EncodeDigit(int d)
        {
            Debug.Assert(d >= 0 && d < c_punycodeBase, "[IdnMapping.encode_digit]Expected 0 <= d < punycodeBase");
            // 26-35 map to ASCII 0-9
            if (d > 25) return (char)(d - 26 + '0');
 
            // 0-25 map to a-z or A-Z
            return (char)(d + 'a');
        }
    }
}