File: System\IriHelper.cs
Web Access
Project: src\src\libraries\System.Private.Uri\src\System.Private.Uri.csproj (System.Private.Uri)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Text;
 
namespace System
{
    internal static class IriHelper
    {
        //
        // Checks if provided non surrogate char lies in iri range
        // This method implements the ABNF checks per https://tools.ietf.org/html/rfc3987#section-2.2
        //
        internal static bool CheckIriUnicodeRange(char unicode, bool isQuery)
        {
            return IsInInclusiveRange(unicode, '\u00A0', '\uD7FF')
                || IsInInclusiveRange(unicode, '\uF900', '\uFDCF')
                || IsInInclusiveRange(unicode, '\uFDF0', '\uFFEF')
                || (isQuery && IsInInclusiveRange(unicode, '\uE000', '\uF8FF'));
        }
 
        //
        // Check if highSurr and lowSurr are a surrogate pair then
        // it checks if the combined char is in the range
        // Takes in isQuery because iri restrictions for query are different
        // This method implements the ABNF checks per https://tools.ietf.org/html/rfc3987#section-2.2
        //
        internal static bool CheckIriUnicodeRange(char highSurr, char lowSurr, out bool isSurrogatePair, bool isQuery)
        {
            Debug.Assert(char.IsHighSurrogate(highSurr));
 
            if (Rune.TryCreate(highSurr, lowSurr, out Rune rune))
            {
                isSurrogatePair = true;
 
                // U+xxFFFE..U+xxFFFF is always private use for all planes, so we exclude it.
                // U+E0000..U+E0FFF is disallowed per the 'ucschar' definition in the ABNF.
                // U+F0000 and above are only allowed for 'iprivate' per the ABNF (isQuery = true).
 
                return ((rune.Value & 0xFFFF) < 0xFFFE)
                    && ((uint)(rune.Value - 0xE0000) >= (0xE1000 - 0xE0000))
                    && (isQuery || rune.Value < 0xF0000);
            }
 
            isSurrogatePair = false;
            return false;
        }
 
        internal static bool CheckIriUnicodeRange(uint value, bool isQuery)
        {
            if (value <= 0xFFFF)
            {
                return IsInInclusiveRange(value, '\u00A0', '\uD7FF')
                    || IsInInclusiveRange(value, '\uF900', '\uFDCF')
                    || IsInInclusiveRange(value, '\uFDF0', '\uFFEF')
                    || (isQuery && IsInInclusiveRange(value, '\uE000', '\uF8FF'));
            }
            else
            {
                return ((value & 0xFFFF) < 0xFFFE)
                    && !IsInInclusiveRange(value, 0xE0000, 0xE0FFF)
                    && (isQuery || value < 0xF0000);
            }
        }
 
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        public static bool IsInInclusiveRange(uint value, uint min, uint max)
            => (value - min) <= (max - min);
 
        //
        // Check reserved chars according to RFC 3987 in a specific component
        //
        internal static bool CheckIsReserved(char ch, UriComponents component)
        {
            if ((UriComponents.AbsoluteUri & component) == 0)
            {
                return component == 0 && UriHelper.IsGenDelim(ch);
            }
 
            return UriHelper.RFC3986ReservedMarks.Contains(ch);
        }
 
        //
        // IRI normalization for strings containing characters that are not allowed or
        // escaped characters that should be unescaped in the context of the specified Uri component.
        //
        internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end, UriComponents component)
        {
            int size = end - start;
            var dest = size <= Uri.StackallocThreshold
                ? new ValueStringBuilder(stackalloc char[Uri.StackallocThreshold])
                : new ValueStringBuilder(size);
 
            Span<byte> maxUtf8EncodedSpan = stackalloc byte[4];
 
            for (int i = start; i < end; ++i)
            {
                char ch = pInput[i];
                if (ch == '%')
                {
                    if (end - i > 2)
                    {
                        ch = UriHelper.DecodeHexChars(pInput[i + 1], pInput[i + 2]);
 
                        // Do not unescape a reserved char
                        if (ch == Uri.c_DummyChar || ch == '%' || CheckIsReserved(ch, component) || UriHelper.IsNotSafeForUnescape(ch))
                        {
                            // keep as is
                            dest.Append(pInput[i++]);
                            dest.Append(pInput[i++]);
                            dest.Append(pInput[i]);
                            continue;
                        }
                        else if (ch <= '\x7F')
                        {
                            Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
                            //ASCII
                            dest.Append(ch);
                            i += 2;
                            continue;
                        }
                        else
                        {
                            // possibly utf8 encoded sequence of unicode
                            int charactersRead = PercentEncodingHelper.UnescapePercentEncodedUTF8Sequence(
                                pInput + i,
                                end - i,
                                ref dest,
                                component == UriComponents.Query,
                                iriParsing: true);
 
                            Debug.Assert(charactersRead > 0);
                            i += charactersRead - 1; // -1 as i will be incremented in the loop
                        }
                    }
                    else
                    {
                        dest.Append(pInput[i]);
                    }
                }
                else if (ch > '\x7f')
                {
                    // unicode
 
                    bool isInIriUnicodeRange;
                    bool surrogatePair = false;
 
                    char ch2 = '\0';
 
                    if ((char.IsHighSurrogate(ch)) && (i + 1 < end))
                    {
                        ch2 = pInput[i + 1];
                        isInIriUnicodeRange = CheckIriUnicodeRange(ch, ch2, out surrogatePair, component == UriComponents.Query);
                    }
                    else
                    {
                        isInIriUnicodeRange = CheckIriUnicodeRange(ch, component == UriComponents.Query);
                    }
 
                    if (isInIriUnicodeRange)
                    {
                        dest.Append(ch);
                        if (surrogatePair)
                        {
                            dest.Append(ch2);
                        }
                    }
                    else
                    {
                        Rune rune;
                        if (surrogatePair)
                        {
                            rune = new Rune(ch, ch2);
                        }
                        else if (!Rune.TryCreate(ch, out rune))
                        {
                            rune = Rune.ReplacementChar;
                        }
 
                        int bytesWritten = rune.EncodeToUtf8(maxUtf8EncodedSpan);
                        ReadOnlySpan<byte> encodedBytes = maxUtf8EncodedSpan.Slice(0, bytesWritten);
 
                        foreach (byte b in encodedBytes)
                        {
                            UriHelper.PercentEncodeByte(b, ref dest);
                        }
                    }
 
                    if (surrogatePair)
                    {
                        i++;
                    }
                }
                else
                {
                    // just copy the character
                    dest.Append(pInput[i]);
                }
            }
 
            return dest.ToString();
        }
    }
}