File: System\Text\Encodings\Web\OptimizedInboxTextEncoder.Ascii.cs
Web Access
Project: src\src\libraries\System.Text.Encodings.Web\src\System.Text.Encodings.Web.csproj (System.Text.Encodings.Web)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
 
#if NET
using System.Runtime.Intrinsics;
#endif
 
namespace System.Text.Encodings.Web
{
    internal sealed partial class OptimizedInboxTextEncoder
    {
        /// <summary>
        /// A bitmap which represents allowed ASCII code points.
        /// </summary>
        [StructLayout(LayoutKind.Explicit)]
        private unsafe partial struct AllowedAsciiCodePoints
        {
            [FieldOffset(0)] // ensure same offset with AsVector field
            private fixed byte AsBytes[16];
 
#if NET
#if !TARGET_BROWSER
            [FieldOffset(0)] // ensure same offset with AsBytes field
            internal Vector128<byte> AsVector;
#else
            // This member shouldn't be accessed from browser-based code paths.
            // All call sites should be trimmed away, which will also trim this member
            // and the type hierarchy it links to.
#pragma warning disable CA1822
            internal Vector128<byte> AsVector => throw new PlatformNotSupportedException();
#pragma warning restore CA1822
#endif
#endif
 
            [MethodImpl(MethodImplOptions.AggressiveInlining)]
            internal readonly bool IsAllowedAsciiCodePoint(uint codePoint)
            {
                if (codePoint > 0x7F)
                {
                    return false; // non-ASCII
                }
 
                uint mask = AsBytes[codePoint & 0xF];
                if ((mask & (0x1u << (int)(codePoint >> 4))) == 0)
                {
                    return false; // ASCII but disallowed
                }
 
                return true;
            }
 
            internal void PopulateAllowedCodePoints(in AllowedBmpCodePointsBitmap allowedBmpCodePoints)
            {
                this = default; // clear all existing data
 
                // we only care about ASCII non-control chars; all control chars and non-ASCII chars are disallowed
                for (int i = 0x20; i < 0x7F; i++)
                {
                    if (allowedBmpCodePoints.IsCharAllowed((char)i))
                    {
                        AsBytes[i & 0xF] |= (byte)(1 << (i >> 4));
                    }
                }
            }
        }
 
        /// <summary>
        /// A bitmap which represents the 64-bit pre-escaped form of the ASCII code points.
        /// A pre-escaped code point has the form [ WW 00 FF EE DD CC BB AA ],
        /// where AA - FF are the six-ASCII-byte escaped representation of the
        /// code point, zero-padded at the end. The high byte of the pre-escaped form
        /// is the number of non-zero bytes which make up the pre-escaped data.
        ///
        /// Example: If the escaped form of "@" is "%40", the pre-escaped form will be:
        /// 0x30_00_00_00_00_30_34_25. Iterate over the least significant bytes one-by-one
        /// to reconstruct the escaped representation, stopping when you hit a null byte.
        /// </summary>
        private unsafe struct AsciiPreescapedData
        {
            private fixed ulong Data[128];
 
            internal void PopulatePreescapedData(in AllowedBmpCodePointsBitmap allowedCodePointsBmp, ScalarEscaperBase innerEncoder)
            {
                this = default; // clear all existing data
 
                Span<char> tempBuffer = stackalloc char[8] { '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0' };
                for (int i = 0; i < 128; i++)
                {
                    ulong thisPreescapedData;
                    int encodedCharCount;
 
                    Rune rune = new Rune(i); // guaranteed to succeed
                    if (!Rune.IsControl(rune) && allowedCodePointsBmp.IsCharAllowed((char)i))
                    {
                        thisPreescapedData = (uint)i; // char maps to itself
                        encodedCharCount = 1;
                    }
                    else
                    {
                        encodedCharCount = innerEncoder.EncodeUtf16(rune, tempBuffer.Slice(0, 6));
                        Debug.Assert(encodedCharCount > 0 && encodedCharCount <= 6, "Inner encoder returned bad length.");
 
                        thisPreescapedData = 0;
                        tempBuffer.Slice(encodedCharCount).Clear();
                        for (int j = encodedCharCount - 1; j >= 0; j--)
                        {
                            uint thisChar = tempBuffer[j];
                            Debug.Assert(thisChar <= 0x7F, "Inner encoder returned non-ASCII data.");
                            thisPreescapedData = (thisPreescapedData << 8) | thisChar;
                        }
                    }
 
                    Data[i] = thisPreescapedData | ((ulong)(uint)encodedCharCount << 56);
                }
            }
 
            [MethodImpl(MethodImplOptions.AggressiveInlining)]
            internal readonly bool TryGetPreescapedData(uint codePoint, out ulong preescapedData)
            {
                if (codePoint <= 0x7F)
                {
                    preescapedData = Data[codePoint];
                    return true;
                }
                else
                {
                    preescapedData = default;
                    return false;
                }
            }
        }
    }
}