File: src\libraries\System.Private.CoreLib\src\System\Text\EncoderLatin1BestFitFallback.cs
Web Access
Project: src\src\coreclr\System.Private.CoreLib\System.Private.CoreLib.csproj (System.Private.CoreLib)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
//
// This is used internally to create best fit behavior as per the original windows best fit behavior.
//
 
using System.Diagnostics;
using System.Globalization;
 
namespace System.Text
{
    internal sealed class EncoderLatin1BestFitFallback : EncoderFallback
    {
        // Provides access to the singleton instance of this fallback mechanism
        internal static readonly EncoderLatin1BestFitFallback SingletonInstance = new EncoderLatin1BestFitFallback();
 
        private EncoderLatin1BestFitFallback()
        {
        }
 
        public override EncoderFallbackBuffer CreateFallbackBuffer() =>
            new EncoderLatin1BestFitFallbackBuffer();
 
        // Maximum number of characters that this instance of this fallback could return
        public override int MaxCharCount => 1;
    }
 
    internal sealed partial class EncoderLatin1BestFitFallbackBuffer : EncoderFallbackBuffer
    {
        // Our variables
        private char _cBestFit;
        private int _iCount = -1;
        private int _iSize;
 
        // Fallback methods
        public override bool Fallback(char charUnknown, int index)
        {
            // If we had a buffer already we're being recursive, throw, it's probably at the suspect
            // character in our array.
            // Shouldn't be able to get here for all of our code pages, table would have to be messed up.
            Debug.Assert(_iCount < 1, $"[EncoderLatin1BestFitFallbackBuffer.Fallback(non surrogate)] Fallback char {(int)_cBestFit:X4} caused recursive fallback");
 
            _iCount = _iSize = 1;
            _cBestFit = TryBestFit(charUnknown);
            if (_cBestFit == '\0')
                _cBestFit = '?';
 
            return true;
        }
 
        public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index)
        {
            // Double check input surrogate pair
            if (!char.IsHighSurrogate(charUnknownHigh))
                throw new ArgumentOutOfRangeException(nameof(charUnknownHigh),
                    SR.Format(SR.ArgumentOutOfRange_Range,
                    0xD800, 0xDBFF));
 
            if (!char.IsLowSurrogate(charUnknownLow))
                throw new ArgumentOutOfRangeException(nameof(charUnknownLow),
                    SR.Format(SR.ArgumentOutOfRange_Range,
                    0xDC00, 0xDFFF));
 
            // If we had a buffer already we're being recursive, throw, it's probably at the suspect
            // character in our array.  0 is processing last character, < 0 is not falling back
            // Shouldn't be able to get here, table would have to be messed up.
            Debug.Assert(_iCount < 1, $"[EncoderLatin1BestFitFallbackBuffer.Fallback(surrogate)] Fallback char {(int)_cBestFit:X4} caused recursive fallback");
 
            // Go ahead and get our fallback, surrogates don't have best fit
            _cBestFit = '?';
            _iCount = _iSize = 2;
 
            return true;
        }
 
        // Default version is overridden in EncoderReplacementFallback.cs
        public override char GetNextChar()
        {
            // We want it to get < 0 because == 0 means that the current/last character is a fallback
            // and we need to detect recursion.  We could have a flag but we already have this counter.
            _iCount--;
 
            // Do we have anything left? 0 is now last fallback char, negative is nothing left
            if (_iCount < 0)
                return '\0';
 
            // Need to get it out of the buffer.
            // Make sure it didn't wrap from the fast count-- path
            if (_iCount == int.MaxValue)
            {
                _iCount = -1;
                return '\0';
            }
 
            // Return the best fit character
            return _cBestFit;
        }
 
        public override bool MovePrevious()
        {
            // Exception fallback doesn't have anywhere to back up to.
            if (_iCount >= 0)
                _iCount++;
 
            // Return true if we could do it.
            return _iCount >= 0 && _iCount <= _iSize;
        }
 
        // How many characters left to output?
        public override int Remaining => (_iCount > 0) ? _iCount : 0;
 
        // Clear the buffer
        public override unsafe void Reset()
        {
            _iCount = -1;
            charStart = null;
            bFallingBack = false;
        }
 
        // private helper methods
        private static char TryBestFit(char cUnknown)
        {
            // Need to figure out our best fit character, low is beginning of array, high is 1 AFTER end of array
            int lowBound = 0;
            int highBound = ArrayCharBestFit.Length;
            int index;
 
            // Binary search the array
            int iDiff;
            while ((iDiff = (highBound - lowBound)) > 6)
            {
                // Look in the middle, which is complicated by the fact that we have 2 #s for each pair,
                // so we don't want index to be odd because we want to be on word boundaries.
                // Also note that index can never == highBound (because diff is rounded down)
                index = ((iDiff / 2) + lowBound) & 0xFFFE;
 
                char cTest = ArrayCharBestFit[index];
                if (cTest == cUnknown)
                {
                    // We found it
                    Debug.Assert(index + 1 < ArrayCharBestFit.Length,
                        "[EncoderLatin1BestFitFallbackBuffer.TryBestFit]Expected replacement character at end of array");
                    return ArrayCharBestFit[index + 1];
                }
                else if (cTest < cUnknown)
                {
                    // We weren't high enough
                    lowBound = index;
                }
                else
                {
                    // We weren't low enough
                    highBound = index;
                }
            }
 
            for (index = lowBound; index < highBound; index += 2)
            {
                if (ArrayCharBestFit[index] == cUnknown)
                {
                    // We found it
                    Debug.Assert(index + 1 < ArrayCharBestFit.Length,
                        "[EncoderLatin1BestFitFallbackBuffer.TryBestFit]Expected replacement character at end of array");
                    return ArrayCharBestFit[index + 1];
                }
            }
 
            // Char wasn't in our table
            return '\0';
        }
    }
}