File: Utils\Helpers.netcoreapp.cs
Web Access
Project: src\src\Microsoft.ML.Tokenizers\Microsoft.ML.Tokenizers.csproj (Microsoft.ML.Tokenizers)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Buffers.Text;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Text;
using System.Threading.Tasks;
using System.Threading;
using System.Net.Http;
 
namespace Microsoft.ML.Tokenizers
{
    internal static partial class Helpers
    {
        public static ValueTask<string?> ReadLineAsync(StreamReader reader, CancellationToken cancellationToken) =>
            reader.ReadLineAsync(cancellationToken);
 
        public static Task<Stream> GetStreamAsync(HttpClient client, string url, CancellationToken cancellationToken = default) =>
            client.GetStreamAsync(url, cancellationToken);
 
        public static Stream GetStream(HttpClient client, string url)
        {
            HttpResponseMessage response = client.Send(new HttpRequestMessage(HttpMethod.Get, url), HttpCompletionOption.ResponseHeadersRead);
            response.EnsureSuccessStatusCode();
            return response.Content.ReadAsStream();
        }
 
        public static byte[] FromBase64String(string base64String, int offset, int length)
        {
            if (!Base64.IsValid(base64String.AsSpan(offset, length), out int decodedLength))
            {
                throw new FormatException($"Invalid base64 string '{base64String.Substring(offset, length)}'");
            }
 
            byte[] bytes = new byte[decodedLength];
            bool success = Convert.TryFromBase64Chars(base64String.AsSpan(offset, length), bytes, out int bytesWritten);
            Debug.Assert(success);
            Debug.Assert(bytes.Length == bytesWritten);
            return bytes;
        }
 
        internal static bool TryParseInt32(string s, int offset, out int result)
            => int.TryParse(s.AsSpan().Slice(offset), NumberStyles.None, CultureInfo.InvariantCulture, out result);
 
        internal static int GetHashCode(ReadOnlySpan<char> span) => string.GetHashCode(span);
 
        internal static unsafe int GetUtf8Bytes(ReadOnlySpan<char> source, Span<byte> destination)
            => Encoding.UTF8.GetBytes(source, destination);
 
        internal static unsafe bool TryGetUtf8Bytes(ReadOnlySpan<char> source, Span<byte> destination, out int bytesWritten)
            => Encoding.UTF8.TryGetBytes(source, destination, out bytesWritten);
 
        internal static string GetString(ReadOnlySpan<byte> utf8Bytes)
            => Encoding.UTF8.GetString(utf8Bytes);
 
        internal static int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
            => Encoding.UTF8.GetChars(bytes, chars);
 
        internal static void Replace(Span<char> span, char oldValue, char newValue) => span.Replace(oldValue, newValue);
 
        /// <summary>
        /// Encode the next code point in the text to UTF-8.
        /// </summary>
        /// <param name="text">The text to encode the first code point from.</param>
        /// <param name="textIndex">The index of the first code point to encode.</param>
        /// <param name="destination">The buffer to write the UTF-8 bytes to.</param>
        /// <param name="bytesIndex">The index in the buffer to write the UTF-8 encoded bytes to.</param>
        /// <returns>The number of characters consumed from the text.</returns>
        internal static int EncodeCodePointToUtf8(ReadOnlySpan<char> text, int textIndex, ref byte[] destination, ref int bytesIndex)
        {
            Debug.Assert(textIndex < text.Length);
 
            Rune.DecodeFromUtf16(text.Slice(textIndex), out Rune rune, out int charsConsumed);
 
            Span<byte> buffer = stackalloc byte[4]; // max number of bytes for a single code point utf-8 encoding.
            int bytesWritten = rune.EncodeToUtf8(buffer);
            if (bytesIndex + bytesWritten > destination.Length)
            {
                Helpers.ArrayPoolGrow(ref destination, destination.Length * 2);
            }
 
            buffer.Slice(0, bytesWritten).CopyTo(destination.AsSpan(bytesIndex));
            bytesIndex += bytesWritten;
 
            return charsConsumed;
        }
    }
}