File: src\libraries\System.Private.CoreLib\src\System\Text\DecoderFallback.cs
Web Access
Project: src\src\coreclr\System.Private.CoreLib\System.Private.CoreLib.csproj (System.Private.CoreLib)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
 
namespace System.Text
{
    public abstract class DecoderFallback
    {
        // Default fallback, uses no best fit & "?"
        public static DecoderFallback ReplacementFallback => DecoderReplacementFallback.s_default;
 
        public static DecoderFallback ExceptionFallback => DecoderExceptionFallback.s_default;
 
        // Fallback
        //
        // Return the appropriate unicode string alternative to the character that need to fall back.
        // Most implementations will be:
        //      return new MyCustomDecoderFallbackBuffer(this);
 
        public abstract DecoderFallbackBuffer CreateFallbackBuffer();
 
        // Maximum number of characters that this instance of this fallback could return
 
        public abstract int MaxCharCount { get; }
    }
 
 
    public abstract class DecoderFallbackBuffer
    {
        // Most implementations will probably need an implementation-specific constructor
 
        // internal methods that cannot be overridden that let us do our fallback thing
        // These wrap the internal methods so that we can check for people doing stuff that's incorrect
 
        public abstract bool Fallback(byte[] bytesUnknown, int index);
 
        // Get next character
 
        public abstract char GetNextChar();
 
        // Back up a character
 
        public abstract bool MovePrevious();
 
        // How many chars left in this fallback?
 
        public abstract int Remaining { get; }
 
        // Clear the buffer
 
        public virtual void Reset()
        {
            while (GetNextChar() != (char)0) ;
        }
 
        // Internal items to help us figure out what we're doing as far as error messages, etc.
        // These help us with our performance and messages internally
        internal unsafe byte* byteStart;
        internal unsafe char* charEnd;
 
        internal Encoding? _encoding;
        internal DecoderNLS? _decoder;
        private int _originalByteCount;
 
        // Internal Reset
        internal unsafe void InternalReset()
        {
            byteStart = null;
            Reset();
        }
 
        // Set the above values
        // This can't be part of the constructor because DecoderFallbacks would have to know how to implement these.
        internal unsafe void InternalInitialize(byte* byteStart, char* charEnd)
        {
            this.byteStart = byteStart;
            this.charEnd = charEnd;
        }
 
        internal static DecoderFallbackBuffer CreateAndInitialize(Encoding encoding, DecoderNLS? decoder, int originalByteCount)
        {
            // The original byte count is only used for keeping track of what 'index' value needs
            // to be passed to the abstract Fallback method. The index value is calculated by subtracting
            // 'bytes.Length' (where bytes is expected to be the entire remaining input buffer)
            // from the 'originalByteCount' value specified here.
 
            DecoderFallbackBuffer fallbackBuffer = (decoder is null) ? encoding.DecoderFallback.CreateFallbackBuffer() : decoder.FallbackBuffer;
 
            fallbackBuffer._encoding = encoding;
            fallbackBuffer._decoder = decoder;
            fallbackBuffer._originalByteCount = originalByteCount;
 
            return fallbackBuffer;
        }
 
        // Fallback the current byte by sticking it into the remaining char buffer.
        // This can only be called by our encodings (other have to use the public fallback methods), so
        // we can use our DecoderNLS here too (except we don't).
        // Returns true if we are successful, false if we can't fallback the character (no buffer space)
        // So caller needs to throw buffer space if return false.
        // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
        // array, and we might need the index, hence the byte*
        // Don't touch ref chars unless we succeed
        internal virtual unsafe bool InternalFallback(byte[] bytes, byte* pBytes, ref char* chars)
        {
            Debug.Assert(byteStart != null, "[DecoderFallback.InternalFallback]Used InternalFallback without calling InternalInitialize");
 
            // See if there's a fallback character and we have an output buffer then copy our string.
            if (this.Fallback(bytes, (int)(pBytes - byteStart - bytes.Length)))
            {
                // Copy the chars to our output
                char ch;
                char* charTemp = chars;
                bool bHighSurrogate = false;
                while ((ch = GetNextChar()) != 0)
                {
                    // Make sure no mixed up surrogates
                    if (char.IsSurrogate(ch))
                    {
                        if (char.IsHighSurrogate(ch))
                        {
                            // High Surrogate
                            if (bHighSurrogate)
                                throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
                            bHighSurrogate = true;
                        }
                        else
                        {
                            // Low surrogate
                            if (!bHighSurrogate)
                                throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
                            bHighSurrogate = false;
                        }
                    }
 
                    if (charTemp >= charEnd)
                    {
                        // No buffer space
                        return false;
                    }
 
                    *(charTemp++) = ch;
                }
 
                // Need to make sure that bHighSurrogate isn't true
                if (bHighSurrogate)
                    throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
 
                // Now we aren't going to be false, so its OK to update chars
                chars = charTemp;
            }
 
            return true;
        }
 
        // This version just counts the fallback and doesn't actually copy anything.
        internal virtual unsafe int InternalFallback(byte[] bytes, byte* pBytes)
        // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
        // array, and we might need the index, hence the byte*
        {
            Debug.Assert(byteStart != null, "[DecoderFallback.InternalFallback]Used InternalFallback without calling InternalInitialize");
 
            // See if there's a fallback character and we have an output buffer then copy our string.
            if (this.Fallback(bytes, (int)(pBytes - byteStart - bytes.Length)))
            {
                int count = 0;
 
                char ch;
                bool bHighSurrogate = false;
                while ((ch = GetNextChar()) != 0)
                {
                    // Make sure no mixed up surrogates
                    if (char.IsSurrogate(ch))
                    {
                        if (char.IsHighSurrogate(ch))
                        {
                            // High Surrogate
                            if (bHighSurrogate)
                                throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
                            bHighSurrogate = true;
                        }
                        else
                        {
                            // Low surrogate
                            if (!bHighSurrogate)
                                throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
                            bHighSurrogate = false;
                        }
                    }
 
                    count++;
                }
 
                // Need to make sure that bHighSurrogate isn't true
                if (bHighSurrogate)
                    throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
 
                return count;
            }
 
            // If no fallback return 0
            return 0;
        }
 
        internal int InternalFallbackGetCharCount(ReadOnlySpan<byte> remainingBytes, int fallbackLength)
        {
            return (Fallback(remainingBytes.Slice(0, fallbackLength).ToArray(), index: _originalByteCount - remainingBytes.Length))
                ? DrainRemainingDataForGetCharCount()
                : 0;
        }
 
        internal bool TryInternalFallbackGetChars(ReadOnlySpan<byte> remainingBytes, int fallbackLength, Span<char> chars, out int charsWritten)
        {
            if (Fallback(remainingBytes.Slice(0, fallbackLength).ToArray(), index: _originalByteCount - remainingBytes.Length))
            {
                return TryDrainRemainingDataForGetChars(chars, out charsWritten);
            }
            else
            {
                // Return true because we weren't asked to write anything, so this is a "success" in the sense that
                // the output buffer was large enough to hold the desired 0 chars of output.
 
                charsWritten = 0;
                return true;
            }
        }
 
        private Rune GetNextRune()
        {
            // Call GetNextChar() and try treating it as a non-surrogate character.
            // If that fails, call GetNextChar() again and attempt to treat the two chars
            // as a surrogate pair. If that still fails, throw an exception since the fallback
            // mechanism is giving us a bad replacement character.
 
            char ch = GetNextChar();
            if (!Rune.TryCreate(ch, out Rune rune) && !Rune.TryCreate(ch, GetNextChar(), out rune))
            {
                throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
            }
 
            return rune;
        }
 
        internal int DrainRemainingDataForGetCharCount()
        {
            int totalCharCount = 0;
 
            Rune thisRune;
            while ((thisRune = GetNextRune()).Value != 0)
            {
                // We need to check for overflow while tallying the fallback char count.
 
                totalCharCount += thisRune.Utf16SequenceLength;
                if (totalCharCount < 0)
                {
                    InternalReset();
                    Encoding.ThrowConversionOverflow();
                }
            }
 
            return totalCharCount;
        }
 
        internal bool TryDrainRemainingDataForGetChars(Span<char> chars, out int charsWritten)
        {
            int originalCharCount = chars.Length;
 
            Rune thisRune;
            while ((thisRune = GetNextRune()).Value != 0)
            {
                if (thisRune.TryEncodeToUtf16(chars, out int charsWrittenJustNow))
                {
                    chars = chars.Slice(charsWrittenJustNow);
                    continue;
                }
                else
                {
                    InternalReset();
                    charsWritten = default;
                    return false;
                }
            }
 
            charsWritten = originalCharCount - chars.Length;
            return true;
        }
 
        // private helper methods
        [DoesNotReturn]
        internal static void ThrowLastBytesRecursive(byte[] bytesUnknown)
        {
            bytesUnknown ??= Array.Empty<byte>();
 
            // Create a string representation of our bytes.
            StringBuilder strBytes = new StringBuilder(bytesUnknown.Length * 3);
            int i;
            for (i = 0; i < bytesUnknown.Length && i < 20; i++)
            {
                if (strBytes.Length > 0)
                    strBytes.Append(' ');
                strBytes.Append($"\\x{bytesUnknown[i]:X2}");
            }
            // In case the string's really long
            if (i == 20)
                strBytes.Append(" ...");
 
            // Throw it, using our complete bytes
            throw new ArgumentException(
                SR.Format(SR.Argument_RecursiveFallbackBytes,
                    strBytes.ToString()), nameof(bytesUnknown));
        }
    }
}