// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Text;
namespace System
internal static class IriHelper
// Checks if provided non surrogate char lies in iri range
// This method implements the ABNF checks per https://tools.ietf.org/html/rfc3987#section-2.2
internal static bool CheckIriUnicodeRange(char unicode, bool isQuery)
return IsInInclusiveRange(unicode, '\u00A0', '\uD7FF')
|| IsInInclusiveRange(unicode, '\uF900', '\uFDCF')
|| IsInInclusiveRange(unicode, '\uFDF0', '\uFFEF')
|| (isQuery && IsInInclusiveRange(unicode, '\uE000', '\uF8FF'));
// Check if highSurr and lowSurr are a surrogate pair then
// it checks if the combined char is in the range
// Takes in isQuery because iri restrictions for query are different
// This method implements the ABNF checks per https://tools.ietf.org/html/rfc3987#section-2.2
internal static bool CheckIriUnicodeRange(char highSurr, char lowSurr, out bool isSurrogatePair, bool isQuery)
if (Rune.TryCreate(highSurr, lowSurr, out Rune rune))
isSurrogatePair = true;
// U+xxFFFE..U+xxFFFF is always private use for all planes, so we exclude it.
// U+E0000..U+E0FFF is disallowed per the 'ucschar' definition in the ABNF.
// U+F0000 and above are only allowed for 'iprivate' per the ABNF (isQuery = true).
return ((rune.Value & 0xFFFF) < 0xFFFE)
&& ((uint)(rune.Value - 0xE0000) >= (0xE1000 - 0xE0000))
&& (isQuery || rune.Value < 0xF0000);
isSurrogatePair = false;
return false;
internal static bool CheckIriUnicodeRange(uint value, bool isQuery)
if (value <= 0xFFFF)
return IsInInclusiveRange(value, '\u00A0', '\uD7FF')
|| IsInInclusiveRange(value, '\uF900', '\uFDCF')
|| IsInInclusiveRange(value, '\uFDF0', '\uFFEF')
|| (isQuery && IsInInclusiveRange(value, '\uE000', '\uF8FF'));
return ((value & 0xFFFF) < 0xFFFE)
&& !IsInInclusiveRange(value, 0xE0000, 0xE0FFF)
&& (isQuery || value < 0xF0000);
public static bool IsInInclusiveRange(uint value, uint min, uint max)
=> (value - min) <= (max - min);
// Check reserved chars according to RFC 3987 in a specific component
internal static bool CheckIsReserved(char ch, UriComponents component)
if ((UriComponents.AbsoluteUri & component) == 0)
return component == 0 && UriHelper.IsGenDelim(ch);
return UriHelper.RFC3986ReservedMarks.Contains(ch);
// IRI normalization for strings containing characters that are not allowed or
// escaped characters that should be unescaped in the context of the specified Uri component.
internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end, UriComponents component)
int size = end - start;
var dest = size <= Uri.StackallocThreshold
? new ValueStringBuilder(stackalloc char[Uri.StackallocThreshold])
: new ValueStringBuilder(size);
Span<byte> maxUtf8EncodedSpan = stackalloc byte[4];
for (int i = start; i < end; ++i)
char ch = pInput[i];
if (ch == '%')
if (end - i > 2)
ch = UriHelper.DecodeHexChars(pInput[i + 1], pInput[i + 2]);
// Do not unescape a reserved char
if (ch == Uri.c_DummyChar || ch == '%' || CheckIsReserved(ch, component) || UriHelper.IsNotSafeForUnescape(ch))
// keep as is
else if (ch <= '\x7F')
Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
i += 2;
// possibly utf8 encoded sequence of unicode
int charactersRead = PercentEncodingHelper.UnescapePercentEncodedUTF8Sequence(
pInput + i,
end - i,
ref dest,
component == UriComponents.Query,
iriParsing: true);
Debug.Assert(charactersRead > 0);
i += charactersRead - 1; // -1 as i will be incremented in the loop
else if (ch > '\x7f')
// unicode
bool isInIriUnicodeRange;
bool surrogatePair = false;
char ch2 = '\0';
if ((char.IsHighSurrogate(ch)) && (i + 1 < end))
ch2 = pInput[i + 1];
isInIriUnicodeRange = CheckIriUnicodeRange(ch, ch2, out surrogatePair, component == UriComponents.Query);
isInIriUnicodeRange = CheckIriUnicodeRange(ch, component == UriComponents.Query);
if (isInIriUnicodeRange)
if (surrogatePair)
Rune rune;
if (surrogatePair)
rune = new Rune(ch, ch2);
else if (!Rune.TryCreate(ch, out rune))
rune = Rune.ReplacementChar;
int bytesWritten = rune.EncodeToUtf8(maxUtf8EncodedSpan);
ReadOnlySpan<byte> encodedBytes = maxUtf8EncodedSpan.Slice(0, bytesWritten);
foreach (byte b in encodedBytes)
UriHelper.PercentEncodeByte(b, ref dest);
if (surrogatePair)
// just copy the character
return dest.ToString();