|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Buffers;
using System.Buffers.Text;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Text.Unicode;
namespace System.Text.Json
{
internal static partial class JsonReaderHelper
{
public static bool TryGetUnescapedBase64Bytes(ReadOnlySpan<byte> utf8Source, [NotNullWhen(true)] out byte[]? bytes)
{
byte[]? unescapedArray = null;
Span<byte> utf8Unescaped = utf8Source.Length <= JsonConstants.StackallocByteThreshold ?
stackalloc byte[JsonConstants.StackallocByteThreshold] :
(unescapedArray = ArrayPool<byte>.Shared.Rent(utf8Source.Length));
Unescape(utf8Source, utf8Unescaped, out int written);
Debug.Assert(written > 0);
utf8Unescaped = utf8Unescaped.Slice(0, written);
Debug.Assert(!utf8Unescaped.IsEmpty);
bool result = TryDecodeBase64InPlace(utf8Unescaped, out bytes!);
if (unescapedArray != null)
{
utf8Unescaped.Clear();
ArrayPool<byte>.Shared.Return(unescapedArray);
}
return result;
}
// Reject any invalid UTF-8 data rather than silently replacing.
public static readonly UTF8Encoding s_utf8Encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
// TODO: Similar to escaping, replace the unescaping logic with publicly shipping APIs from https://github.com/dotnet/runtime/issues/27919
public static string GetUnescapedString(ReadOnlySpan<byte> utf8Source)
{
// The escaped name is always >= than the unescaped, so it is safe to use escaped name for the buffer length.
int length = utf8Source.Length;
byte[]? pooledName = null;
Span<byte> utf8Unescaped = length <= JsonConstants.StackallocByteThreshold ?
stackalloc byte[JsonConstants.StackallocByteThreshold] :
(pooledName = ArrayPool<byte>.Shared.Rent(length));
Unescape(utf8Source, utf8Unescaped, out int written);
Debug.Assert(written > 0);
utf8Unescaped = utf8Unescaped.Slice(0, written);
Debug.Assert(!utf8Unescaped.IsEmpty);
string utf8String = TranscodeHelper(utf8Unescaped);
if (pooledName != null)
{
utf8Unescaped.Clear();
ArrayPool<byte>.Shared.Return(pooledName);
}
return utf8String;
}
public static ReadOnlySpan<byte> GetUnescapedSpan(ReadOnlySpan<byte> utf8Source)
{
// The escaped name is always >= than the unescaped, so it is safe to use escaped name for the buffer length.
int length = utf8Source.Length;
byte[]? pooledName = null;
Span<byte> utf8Unescaped = length <= JsonConstants.StackallocByteThreshold ?
stackalloc byte[JsonConstants.StackallocByteThreshold] :
(pooledName = ArrayPool<byte>.Shared.Rent(length));
Unescape(utf8Source, utf8Unescaped, out int written);
Debug.Assert(written > 0);
ReadOnlySpan<byte> propertyName = utf8Unescaped.Slice(0, written).ToArray();
Debug.Assert(!propertyName.IsEmpty);
if (pooledName != null)
{
new Span<byte>(pooledName, 0, written).Clear();
ArrayPool<byte>.Shared.Return(pooledName);
}
return propertyName;
}
public static bool UnescapeAndCompare(ReadOnlySpan<byte> utf8Source, ReadOnlySpan<byte> other)
{
Debug.Assert(utf8Source.Length >= other.Length && utf8Source.Length / JsonConstants.MaxExpansionFactorWhileEscaping <= other.Length);
byte[]? unescapedArray = null;
Span<byte> utf8Unescaped = utf8Source.Length <= JsonConstants.StackallocByteThreshold ?
stackalloc byte[JsonConstants.StackallocByteThreshold] :
(unescapedArray = ArrayPool<byte>.Shared.Rent(utf8Source.Length));
Unescape(utf8Source, utf8Unescaped, 0, out int written);
Debug.Assert(written > 0);
utf8Unescaped = utf8Unescaped.Slice(0, written);
Debug.Assert(!utf8Unescaped.IsEmpty);
bool result = other.SequenceEqual(utf8Unescaped);
if (unescapedArray != null)
{
utf8Unescaped.Clear();
ArrayPool<byte>.Shared.Return(unescapedArray);
}
return result;
}
public static bool UnescapeAndCompare(ReadOnlySequence<byte> utf8Source, ReadOnlySpan<byte> other)
{
Debug.Assert(!utf8Source.IsSingleSegment);
Debug.Assert(utf8Source.Length >= other.Length && utf8Source.Length / JsonConstants.MaxExpansionFactorWhileEscaping <= other.Length);
byte[]? escapedArray = null;
byte[]? unescapedArray = null;
int length = checked((int)utf8Source.Length);
Span<byte> utf8Unescaped = length <= JsonConstants.StackallocByteThreshold ?
stackalloc byte[JsonConstants.StackallocByteThreshold] :
(unescapedArray = ArrayPool<byte>.Shared.Rent(length));
Span<byte> utf8Escaped = length <= JsonConstants.StackallocByteThreshold ?
stackalloc byte[JsonConstants.StackallocByteThreshold] :
(escapedArray = ArrayPool<byte>.Shared.Rent(length));
utf8Source.CopyTo(utf8Escaped);
utf8Escaped = utf8Escaped.Slice(0, length);
Unescape(utf8Escaped, utf8Unescaped, 0, out int written);
Debug.Assert(written > 0);
utf8Unescaped = utf8Unescaped.Slice(0, written);
Debug.Assert(!utf8Unescaped.IsEmpty);
bool result = other.SequenceEqual(utf8Unescaped);
if (unescapedArray != null)
{
Debug.Assert(escapedArray != null);
utf8Unescaped.Clear();
ArrayPool<byte>.Shared.Return(unescapedArray);
utf8Escaped.Clear();
ArrayPool<byte>.Shared.Return(escapedArray);
}
return result;
}
public static bool UnescapeAndCompareBothInputs(ReadOnlySpan<byte> utf8Source1, ReadOnlySpan<byte> utf8Source2)
{
int index1 = utf8Source1.IndexOf(JsonConstants.BackSlash);
int index2 = utf8Source2.IndexOf(JsonConstants.BackSlash);
Debug.Assert(index1 >= 0, "the first parameter is not escaped");
Debug.Assert(index2 >= 0, "the second parameter is not escaped");
byte[]? unescapedArray1 = null;
byte[]? unescapedArray2 = null;
Span<byte> utf8Unescaped1 = utf8Source1.Length <= JsonConstants.StackallocByteThreshold ?
stackalloc byte[JsonConstants.StackallocByteThreshold] :
(unescapedArray1 = ArrayPool<byte>.Shared.Rent(utf8Source1.Length));
Span<byte> utf8Unescaped2 = utf8Source2.Length <= JsonConstants.StackallocByteThreshold ?
stackalloc byte[JsonConstants.StackallocByteThreshold] :
(unescapedArray2 = ArrayPool<byte>.Shared.Rent(utf8Source2.Length));
Unescape(utf8Source1, utf8Unescaped1, index1, out int written);
utf8Unescaped1 = utf8Unescaped1.Slice(0, written);
Debug.Assert(!utf8Unescaped1.IsEmpty);
Unescape(utf8Source2, utf8Unescaped2, index2, out written);
utf8Unescaped2 = utf8Unescaped2.Slice(0, written);
Debug.Assert(!utf8Unescaped2.IsEmpty);
bool result = utf8Unescaped1.SequenceEqual(utf8Unescaped2);
if (unescapedArray1 != null)
{
utf8Unescaped1.Clear();
ArrayPool<byte>.Shared.Return(unescapedArray1);
}
if (unescapedArray2 != null)
{
utf8Unescaped2.Clear();
ArrayPool<byte>.Shared.Return(unescapedArray2);
}
return result;
}
public static bool TryDecodeBase64InPlace(Span<byte> utf8Unescaped, [NotNullWhen(true)] out byte[]? bytes)
{
OperationStatus status = Base64.DecodeFromUtf8InPlace(utf8Unescaped, out int bytesWritten);
if (status != OperationStatus.Done)
{
bytes = null;
return false;
}
bytes = utf8Unescaped.Slice(0, bytesWritten).ToArray();
return true;
}
public static bool TryDecodeBase64(ReadOnlySpan<byte> utf8Unescaped, [NotNullWhen(true)] out byte[]? bytes)
{
byte[]? pooledArray = null;
Span<byte> byteSpan = utf8Unescaped.Length <= JsonConstants.StackallocByteThreshold ?
stackalloc byte[JsonConstants.StackallocByteThreshold] :
(pooledArray = ArrayPool<byte>.Shared.Rent(utf8Unescaped.Length));
OperationStatus status = Base64.DecodeFromUtf8(utf8Unescaped, byteSpan, out int bytesConsumed, out int bytesWritten);
if (status != OperationStatus.Done)
{
bytes = null;
if (pooledArray != null)
{
byteSpan.Clear();
ArrayPool<byte>.Shared.Return(pooledArray);
}
return false;
}
Debug.Assert(bytesConsumed == utf8Unescaped.Length);
bytes = byteSpan.Slice(0, bytesWritten).ToArray();
if (pooledArray != null)
{
byteSpan.Clear();
ArrayPool<byte>.Shared.Return(pooledArray);
}
return true;
}
public static string TranscodeHelper(ReadOnlySpan<byte> utf8Unescaped)
{
try
{
#if NET
return s_utf8Encoding.GetString(utf8Unescaped);
#else
if (utf8Unescaped.IsEmpty)
{
return string.Empty;
}
unsafe
{
fixed (byte* bytePtr = utf8Unescaped)
{
return s_utf8Encoding.GetString(bytePtr, utf8Unescaped.Length);
}
}
#endif
}
catch (DecoderFallbackException ex)
{
// We want to be consistent with the exception being thrown
// so the user only has to catch a single exception.
// Since we already throw InvalidOperationException for mismatch token type,
// and while unescaping, using that exception for failure to decode invalid UTF-8 bytes as well.
// Therefore, wrapping the DecoderFallbackException around an InvalidOperationException.
throw ThrowHelper.GetInvalidOperationException_ReadInvalidUTF8(ex);
}
}
public static int TranscodeHelper(ReadOnlySpan<byte> utf8Unescaped, Span<char> destination)
{
try
{
#if NET
return s_utf8Encoding.GetChars(utf8Unescaped, destination);
#else
if (utf8Unescaped.IsEmpty)
{
return 0;
}
unsafe
{
fixed (byte* srcPtr = utf8Unescaped)
fixed (char* destPtr = destination)
{
return s_utf8Encoding.GetChars(srcPtr, utf8Unescaped.Length, destPtr, destination.Length);
}
}
#endif
}
catch (DecoderFallbackException dfe)
{
// We want to be consistent with the exception being thrown
// so the user only has to catch a single exception.
// Since we already throw InvalidOperationException for mismatch token type,
// and while unescaping, using that exception for failure to decode invalid UTF-8 bytes as well.
// Therefore, wrapping the DecoderFallbackException around an InvalidOperationException.
throw ThrowHelper.GetInvalidOperationException_ReadInvalidUTF8(dfe);
}
catch (ArgumentException)
{
// Destination buffer was too small; clear it up since the encoder might have not.
destination.Clear();
throw;
}
}
public static void ValidateUtf8(ReadOnlySpan<byte> utf8Buffer)
{
#if NET8_0_OR_GREATER
if (!Utf8.IsValid(utf8Buffer))
{
throw ThrowHelper.GetInvalidOperationException_ReadInvalidUTF8();
}
#else
try
{
#if NET
s_utf8Encoding.GetCharCount(utf8Buffer);
#else
if (utf8Buffer.IsEmpty)
{
return;
}
unsafe
{
fixed (byte* srcPtr = utf8Buffer)
{
s_utf8Encoding.GetCharCount(srcPtr, utf8Buffer.Length);
}
}
#endif
}
catch (DecoderFallbackException ex)
{
// We want to be consistent with the exception being thrown
// so the user only has to catch a single exception.
// Since we already throw InvalidOperationException for mismatch token type,
// and while unescaping, using that exception for failure to decode invalid UTF-8 bytes as well.
// Therefore, wrapping the DecoderFallbackException around an InvalidOperationException.
throw ThrowHelper.GetInvalidOperationException_ReadInvalidUTF8(ex);
}
#endif
}
internal static int GetUtf8ByteCount(ReadOnlySpan<char> text)
{
try
{
#if NET
return s_utf8Encoding.GetByteCount(text);
#else
if (text.IsEmpty)
{
return 0;
}
unsafe
{
fixed (char* charPtr = text)
{
return s_utf8Encoding.GetByteCount(charPtr, text.Length);
}
}
#endif
}
catch (EncoderFallbackException ex)
{
// We want to be consistent with the exception being thrown
// so the user only has to catch a single exception.
// Since we already throw ArgumentException when validating other arguments,
// using that exception for failure to encode invalid UTF-16 chars as well.
// Therefore, wrapping the EncoderFallbackException around an ArgumentException.
throw ThrowHelper.GetArgumentException_ReadInvalidUTF16(ex);
}
}
internal static int GetUtf8FromText(ReadOnlySpan<char> text, Span<byte> dest)
{
try
{
#if NET
return s_utf8Encoding.GetBytes(text, dest);
#else
if (text.IsEmpty)
{
return 0;
}
unsafe
{
fixed (char* charPtr = text)
fixed (byte* destPtr = dest)
{
return s_utf8Encoding.GetBytes(charPtr, text.Length, destPtr, dest.Length);
}
}
#endif
}
catch (EncoderFallbackException ex)
{
// We want to be consistent with the exception being thrown
// so the user only has to catch a single exception.
// Since we already throw ArgumentException when validating other arguments,
// using that exception for failure to encode invalid UTF-16 chars as well.
// Therefore, wrapping the EncoderFallbackException around an ArgumentException.
throw ThrowHelper.GetArgumentException_ReadInvalidUTF16(ex);
}
}
internal static string GetTextFromUtf8(ReadOnlySpan<byte> utf8Text)
{
#if NET
return s_utf8Encoding.GetString(utf8Text);
#else
if (utf8Text.IsEmpty)
{
return string.Empty;
}
unsafe
{
fixed (byte* bytePtr = utf8Text)
{
return s_utf8Encoding.GetString(bytePtr, utf8Text.Length);
}
}
#endif
}
internal static void Unescape(ReadOnlySpan<byte> source, Span<byte> destination, out int written)
{
Debug.Assert(destination.Length >= source.Length);
int idx = source.IndexOf(JsonConstants.BackSlash);
Debug.Assert(idx >= 0);
bool result = TryUnescape(source, destination, idx, out written);
Debug.Assert(result);
}
internal static void Unescape(ReadOnlySpan<byte> source, Span<byte> destination, int idx, out int written)
{
Debug.Assert(idx >= 0 && idx < source.Length);
Debug.Assert(source[idx] == JsonConstants.BackSlash);
Debug.Assert(destination.Length >= source.Length);
bool result = TryUnescape(source, destination, idx, out written);
Debug.Assert(result);
}
/// <summary>
/// Used when writing to buffers not guaranteed to fit the unescaped result.
/// </summary>
internal static bool TryUnescape(ReadOnlySpan<byte> source, Span<byte> destination, out int written)
{
int idx = source.IndexOf(JsonConstants.BackSlash);
Debug.Assert(idx >= 0);
return TryUnescape(source, destination, idx, out written);
}
/// <summary>
/// Used when writing to buffers not guaranteed to fit the unescaped result.
/// </summary>
private static bool TryUnescape(ReadOnlySpan<byte> source, Span<byte> destination, int idx, out int written)
{
Debug.Assert(idx >= 0 && idx < source.Length);
Debug.Assert(source[idx] == JsonConstants.BackSlash);
if (!source.Slice(0, idx).TryCopyTo(destination))
{
written = 0;
goto DestinationTooShort;
}
written = idx;
while (true)
{
Debug.Assert(source[idx] == JsonConstants.BackSlash);
if (written == destination.Length)
{
goto DestinationTooShort;
}
switch (source[++idx])
{
case JsonConstants.Quote:
destination[written++] = JsonConstants.Quote;
break;
case (byte)'n':
destination[written++] = JsonConstants.LineFeed;
break;
case (byte)'r':
destination[written++] = JsonConstants.CarriageReturn;
break;
case JsonConstants.BackSlash:
destination[written++] = JsonConstants.BackSlash;
break;
case JsonConstants.Slash:
destination[written++] = JsonConstants.Slash;
break;
case (byte)'t':
destination[written++] = JsonConstants.Tab;
break;
case (byte)'b':
destination[written++] = JsonConstants.BackSpace;
break;
case (byte)'f':
destination[written++] = JsonConstants.FormFeed;
break;
default:
Debug.Assert(source[idx] == 'u', "invalid escape sequences must have already been caught by Utf8JsonReader.Read()");
// The source is known to be valid JSON, and hence if we see a \u, it is guaranteed to have 4 hex digits following it
// Otherwise, the Utf8JsonReader would have already thrown an exception.
Debug.Assert(source.Length >= idx + 5);
bool result = Utf8Parser.TryParse(source.Slice(idx + 1, 4), out int scalar, out int bytesConsumed, 'x');
Debug.Assert(result);
Debug.Assert(bytesConsumed == 4);
idx += 4;
if (JsonHelpers.IsInRangeInclusive((uint)scalar, JsonConstants.HighSurrogateStartValue, JsonConstants.LowSurrogateEndValue))
{
// The first hex value cannot be a low surrogate.
if (scalar >= JsonConstants.LowSurrogateStartValue)
{
ThrowHelper.ThrowInvalidOperationException_ReadInvalidUTF16(scalar);
}
Debug.Assert(JsonHelpers.IsInRangeInclusive((uint)scalar, JsonConstants.HighSurrogateStartValue, JsonConstants.HighSurrogateEndValue));
// We must have a low surrogate following a high surrogate.
if (source.Length < idx + 7 || source[idx + 1] != '\\' || source[idx + 2] != 'u')
{
ThrowHelper.ThrowInvalidOperationException_ReadIncompleteUTF16();
}
// The source is known to be valid JSON, and hence if we see a \u, it is guaranteed to have 4 hex digits following it
// Otherwise, the Utf8JsonReader would have already thrown an exception.
result = Utf8Parser.TryParse(source.Slice(idx + 3, 4), out int lowSurrogate, out bytesConsumed, 'x');
Debug.Assert(result);
Debug.Assert(bytesConsumed == 4);
idx += 6;
// If the first hex value is a high surrogate, the next one must be a low surrogate.
if (!JsonHelpers.IsInRangeInclusive((uint)lowSurrogate, JsonConstants.LowSurrogateStartValue, JsonConstants.LowSurrogateEndValue))
{
ThrowHelper.ThrowInvalidOperationException_ReadInvalidUTF16(lowSurrogate);
}
// To find the unicode scalar:
// (0x400 * (High surrogate - 0xD800)) + Low surrogate - 0xDC00 + 0x10000
scalar = (JsonConstants.BitShiftBy10 * (scalar - JsonConstants.HighSurrogateStartValue))
+ (lowSurrogate - JsonConstants.LowSurrogateStartValue)
+ JsonConstants.UnicodePlane01StartValue;
}
var rune = new Rune(scalar);
bool success = rune.TryEncodeToUtf8(destination.Slice(written), out int bytesWritten);
if (!success)
{
goto DestinationTooShort;
}
Debug.Assert(bytesWritten <= 4);
written += bytesWritten;
break;
}
if (++idx == source.Length)
{
goto Success;
}
if (source[idx] != JsonConstants.BackSlash)
{
ReadOnlySpan<byte> remaining = source.Slice(idx);
int nextUnescapedSegmentLength = remaining.IndexOf(JsonConstants.BackSlash);
if (nextUnescapedSegmentLength < 0)
{
nextUnescapedSegmentLength = remaining.Length;
}
if ((uint)(written + nextUnescapedSegmentLength) >= (uint)destination.Length)
{
goto DestinationTooShort;
}
Debug.Assert(nextUnescapedSegmentLength > 0);
switch (nextUnescapedSegmentLength)
{
case 1:
destination[written++] = source[idx++];
break;
case 2:
destination[written++] = source[idx++];
destination[written++] = source[idx++];
break;
case 3:
destination[written++] = source[idx++];
destination[written++] = source[idx++];
destination[written++] = source[idx++];
break;
default:
remaining.Slice(0, nextUnescapedSegmentLength).CopyTo(destination.Slice(written));
written += nextUnescapedSegmentLength;
idx += nextUnescapedSegmentLength;
break;
}
Debug.Assert(idx == source.Length || source[idx] == JsonConstants.BackSlash);
if (idx == source.Length)
{
goto Success;
}
}
}
Success:
return true;
DestinationTooShort:
return false;
}
}
}
|