File: System\Text\EncodingNLS.cs
Web Access
Project: src\src\libraries\System.Text.Encoding.CodePages\src\System.Text.Encoding.CodePages.csproj (System.Text.Encoding.CodePages)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System;
using System.Collections;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Resources;
using System.Runtime.CompilerServices;
using System.Runtime.Serialization;
using System.Threading;
 
namespace System.Text
{
    // This class overrides Encoding with the things we need for our NLS Encodings
    //
    // All of the GetBytes/Chars GetByte/CharCount methods are just wrappers for the pointer
    // plus decoder/encoder method that is our real workhorse.  Note that this is an internal
    // class, so our public classes cannot derive from this class.  Because of this, all of the
    // GetBytes/Chars GetByte/CharCount wrapper methods are duplicated in all of our public
    // encodings.
    // So if you change the wrappers in this class, you must change the wrappers in the other classes
    // as well because they should have the same behavior.
    internal abstract class EncodingNLS : Encoding
    {
        private string? _encodingName;
        private string? _webName;
 
        protected EncodingNLS(int codePage) : base(codePage)
        {
        }
 
        protected EncodingNLS(int codePage, EncoderFallback enc, DecoderFallback dec)
            : base(codePage, enc, dec)
        {
        }
 
        public abstract unsafe int GetByteCount(char* chars, int count, EncoderNLS? encoder);
        public abstract unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS? encoder);
        public abstract unsafe int GetCharCount(byte* bytes, int count, DecoderNLS? decoder);
        public abstract unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS? decoder);
 
        // Returns the number of bytes required to encode a range of characters in
        // a character array.
        //
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.
        // parent method is safe
        public override unsafe int GetByteCount(char[] chars, int index, int count)
        {
            if (chars is null)
                throw new ArgumentNullException(nameof(chars));
 
            if (index < 0 || count < 0)
                throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
 
            if (chars.Length - index < count)
                throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
 
            // If no input, return 0, avoid fixed empty array problem
            if (chars.Length == 0)
                return 0;
 
            // Just call the pointer version
            fixed (char* pChars = &chars[0])
                return GetByteCount(pChars + index, count, null);
        }
 
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.
        // parent method is safe
        public override unsafe int GetByteCount(string s)
        {
            if (s is null)
                throw new ArgumentNullException(nameof(s));
 
            fixed (char* pChars = s)
                return GetByteCount(pChars, s.Length, null);
        }
 
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.
        public override unsafe int GetByteCount(char* chars, int count)
        {
            if (chars is null)
                throw new ArgumentNullException(nameof(chars));
 
            if (count < 0)
                throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
 
            // Call it with empty encoder
            return GetByteCount(chars, count, null);
        }
 
        // Parent method is safe.
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.
 
        public override unsafe int GetBytes(string s, int charIndex, int charCount,
                                            byte[] bytes, int byteIndex)
        {
            if (s is null)
                throw new ArgumentNullException(nameof(s));
 
            if (bytes is null)
                throw new ArgumentNullException(nameof(bytes));
 
            if (charIndex < 0 || charCount < 0)
                throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
 
            if (s.Length - charIndex < charCount)
                throw new ArgumentOutOfRangeException(nameof(s), SR.ArgumentOutOfRange_IndexCount);
 
            if (byteIndex < 0 || byteIndex > bytes.Length)
                throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_IndexMustBeLessOrEqual);
 
            int byteCount = bytes.Length - byteIndex;
 
            fixed (char* pChars = s)
            fixed (byte* pBytes = &CodePagesEncodingProvider.GetNonNullPinnableReference(bytes))
            {
                return GetBytes(pChars + charIndex, charCount,
                                pBytes + byteIndex, byteCount, null);
            }
        }
 
        // Encodes a range of characters in a character array into a range of bytes
        // in a byte array. An exception occurs if the byte array is not large
        // enough to hold the complete encoding of the characters. The
        // GetByteCount method can be used to determine the exact number of
        // bytes that will be produced for a given range of characters.
        // Alternatively, the GetMaxByteCount method can be used to
        // determine the maximum number of bytes that will be produced for a given
        // number of characters, regardless of the actual character values.
        //
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.
        // parent method is safe
        public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
                                            byte[] bytes, int byteIndex)
        {
            if (chars is null)
                throw new ArgumentNullException(nameof(chars));
 
            if (bytes is null)
                throw new ArgumentNullException(nameof(bytes));
 
            if (charIndex < 0 || charCount < 0)
                throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
 
            if (chars.Length - charIndex < charCount)
                throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
 
            if (byteIndex < 0 || byteIndex > bytes.Length)
                throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_IndexMustBeLessOrEqual);
 
            // If nothing to encode return 0
            if (chars.Length == 0)
                return 0;
 
            // Just call pointer version
            int byteCount = bytes.Length - byteIndex;
 
            fixed (char* pChars = &chars[0])
            fixed (byte* pBytes = &CodePagesEncodingProvider.GetNonNullPinnableReference(bytes))
            {
                // Remember that byteCount is # to decode, not size of array.
                return GetBytes(pChars + charIndex, charCount,
                                pBytes + byteIndex, byteCount, null);
            }
        }
 
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.
        public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
        {
            if (chars is null)
                throw new ArgumentNullException(nameof(chars));
 
            if (bytes is null)
                throw new ArgumentNullException(nameof(bytes));
 
            if (charCount < 0 || byteCount < 0)
                throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
 
            return GetBytes(chars, charCount, bytes, byteCount, null);
        }
 
        // Returns the number of characters produced by decoding a range of bytes
        // in a byte array.
        //
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.
        // parent method is safe
        public override unsafe int GetCharCount(byte[] bytes, int index, int count)
        {
            if (bytes is null)
                throw new ArgumentNullException(nameof(bytes));
 
            if (index < 0 || count < 0)
                throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
 
            if (bytes.Length - index < count)
                throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
 
            // If no input just return 0, fixed doesn't like 0 length arrays
            if (bytes.Length == 0)
                return 0;
 
            // Just call pointer version
            fixed (byte* pBytes = &bytes[0])
                return GetCharCount(pBytes + index, count, null);
        }
 
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.
        public override unsafe int GetCharCount(byte* bytes, int count)
        {
            if (bytes is null)
                throw new ArgumentNullException(nameof(bytes));
 
            if (count < 0)
                throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
 
            return GetCharCount(bytes, count, null);
        }
 
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.
        // parent method is safe
        public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
                                            char[] chars, int charIndex)
        {
            if (bytes is null)
                throw new ArgumentNullException(nameof(bytes));
 
            if (chars is null)
                throw new ArgumentNullException(nameof(chars));
 
            if (byteIndex < 0 || byteCount < 0)
                throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
 
            if (bytes.Length - byteIndex < byteCount)
                throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
 
            if (charIndex < 0 || charIndex > chars.Length)
                throw new ArgumentOutOfRangeException(nameof(charIndex), SR.ArgumentOutOfRange_IndexMustBeLessOrEqual);
 
            // If no input, return 0
            if (bytes.Length == 0)
                return 0;
 
            // Just call pointer version
            int charCount = chars.Length - charIndex;
 
            fixed (byte* pBytes = &bytes[0])
            fixed (char* pChars = &CodePagesEncodingProvider.GetNonNullPinnableReference(chars))
            {
                // Remember that charCount is # to decode, not size of array
                return GetChars(pBytes + byteIndex, byteCount,
                                pChars + charIndex, charCount, null);
            }
        }
 
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.
        public override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
        {
            if (bytes is null)
                throw new ArgumentNullException(nameof(bytes));
 
            if (chars is null)
                throw new ArgumentNullException(nameof(chars));
 
            if (charCount < 0 || byteCount < 0)
                throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
 
            return GetChars(bytes, byteCount, chars, charCount, null);
        }
 
        // Returns a string containing the decoded representation of a range of
        // bytes in a byte array.
        //
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.
        // parent method is safe
        public override unsafe string GetString(byte[] bytes, int index, int count)
        {
            if (bytes is null)
                throw new ArgumentNullException(nameof(bytes));
 
            if (index < 0 || count < 0)
                throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
 
            if (bytes.Length - index < count)
                throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
 
            // Avoid problems with empty input buffer
            if (bytes.Length == 0) return string.Empty;
 
            fixed (byte* pBytes = &bytes[0])
                return GetString(pBytes + index, count);
        }
 
        public override Decoder GetDecoder()
        {
            return new DecoderNLS(this);
        }
 
        public override Encoder GetEncoder()
        {
            return new EncoderNLS(this);
        }
 
        internal void ThrowBytesOverflow(EncoderNLS? encoder, bool nothingEncoded)
        {
            if (encoder is null || encoder.m_throwOnOverflow || nothingEncoded)
            {
                if (encoder is not null && encoder.InternalHasFallbackBuffer)
                    encoder.FallbackBuffer.Reset();
                // Special message to include fallback type in case fallback's GetMaxCharCount is broken
                // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
                ThrowBytesOverflow();
            }
 
            // If we didn't throw, we are in convert and have to remember our flushing
            encoder.ClearMustFlush();
        }
 
        internal void ThrowCharsOverflow(DecoderNLS? decoder, bool nothingDecoded)
        {
            if (decoder is null || decoder.m_throwOnOverflow || nothingDecoded)
            {
                if (decoder is not null && decoder.InternalHasFallbackBuffer)
                    decoder.FallbackBuffer.Reset();
 
                // Special message to include fallback type in case fallback's GetMaxCharCount is broken
                // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
                ThrowCharsOverflow();
            }
 
            // If we didn't throw, we are in convert and have to remember our flushing
            decoder.ClearMustFlush();
        }
 
        [DoesNotReturn]
        internal void ThrowBytesOverflow()
        {
            // Special message to include fallback type in case fallback's GetMaxCharCount is broken
            // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
            throw new ArgumentException(SR.Format(SR.Argument_EncodingConversionOverflowBytes, EncodingName, EncoderFallback.GetType()), "bytes");
        }
 
        [DoesNotReturn]
        internal void ThrowCharsOverflow()
        {
            // Special message to include fallback type in case fallback's GetMaxCharCount is broken
            // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
            throw new ArgumentException(SR.Format(SR.Argument_EncodingConversionOverflowChars, EncodingName, DecoderFallback.GetType()), "chars");
        }
 
        public override string EncodingName
        {
            get
            {
                if (_encodingName is null)
                {
                    _encodingName = GetLocalizedEncodingNameResource(CodePage);
                    if (_encodingName is null)
                    {
                        throw new NotSupportedException(
                            SR.Format(SR.MissingEncodingNameResource, WebName, CodePage));
                    }
 
                    if (_encodingName.StartsWith("Globalization_cp_", StringComparison.OrdinalIgnoreCase))
                    {
                        // Resource strings may be stripped from retail builds and replaced by
                        // their identifier names. Since this property is meant to be a localized string,
                        // we specifically need to do something reasonable in this case. This currently
                        // returns the English name of the encoding from a static data table.
                        _encodingName = EncodingTable.GetEnglishNameFromCodePage(CodePage);
                        if (_encodingName is null)
                        {
                            throw new NotSupportedException(
                                SR.Format(SR.MissingEncodingNameResource, WebName, CodePage));
                        }
                    }
                }
                return _encodingName;
            }
        }
 
        internal static string? GetLocalizedEncodingNameResource(int codePage) =>
            codePage switch
            {
                37 => SR.Globalization_cp_37,
                437 => SR.Globalization_cp_437,
                500 => SR.Globalization_cp_500,
                708 => SR.Globalization_cp_708,
                720 => SR.Globalization_cp_720,
                737 => SR.Globalization_cp_737,
                775 => SR.Globalization_cp_775,
                850 => SR.Globalization_cp_850,
                852 => SR.Globalization_cp_852,
                855 => SR.Globalization_cp_855,
                857 => SR.Globalization_cp_857,
                858 => SR.Globalization_cp_858,
                860 => SR.Globalization_cp_860,
                861 => SR.Globalization_cp_861,
                862 => SR.Globalization_cp_862,
                863 => SR.Globalization_cp_863,
                864 => SR.Globalization_cp_864,
                865 => SR.Globalization_cp_865,
                866 => SR.Globalization_cp_866,
                869 => SR.Globalization_cp_869,
                870 => SR.Globalization_cp_870,
                874 => SR.Globalization_cp_874,
                875 => SR.Globalization_cp_875,
                932 => SR.Globalization_cp_932,
                936 => SR.Globalization_cp_936,
                949 => SR.Globalization_cp_949,
                950 => SR.Globalization_cp_950,
                1026 => SR.Globalization_cp_1026,
                1047 => SR.Globalization_cp_1047,
                1140 => SR.Globalization_cp_1140,
                1141 => SR.Globalization_cp_1141,
                1142 => SR.Globalization_cp_1142,
                1143 => SR.Globalization_cp_1143,
                1144 => SR.Globalization_cp_1144,
                1145 => SR.Globalization_cp_1145,
                1146 => SR.Globalization_cp_1146,
                1147 => SR.Globalization_cp_1147,
                1148 => SR.Globalization_cp_1148,
                1149 => SR.Globalization_cp_1149,
                1250 => SR.Globalization_cp_1250,
                1251 => SR.Globalization_cp_1251,
                1252 => SR.Globalization_cp_1252,
                1253 => SR.Globalization_cp_1253,
                1254 => SR.Globalization_cp_1254,
                1255 => SR.Globalization_cp_1255,
                1256 => SR.Globalization_cp_1256,
                1257 => SR.Globalization_cp_1257,
                1258 => SR.Globalization_cp_1258,
                1361 => SR.Globalization_cp_1361,
                10000 => SR.Globalization_cp_10000,
                10001 => SR.Globalization_cp_10001,
                10002 => SR.Globalization_cp_10002,
                10003 => SR.Globalization_cp_10003,
                10004 => SR.Globalization_cp_10004,
                10005 => SR.Globalization_cp_10005,
                10006 => SR.Globalization_cp_10006,
                10007 => SR.Globalization_cp_10007,
                10008 => SR.Globalization_cp_10008,
                10010 => SR.Globalization_cp_10010,
                10017 => SR.Globalization_cp_10017,
                10021 => SR.Globalization_cp_10021,
                10029 => SR.Globalization_cp_10029,
                10079 => SR.Globalization_cp_10079,
                10081 => SR.Globalization_cp_10081,
                10082 => SR.Globalization_cp_10082,
                20000 => SR.Globalization_cp_20000,
                20001 => SR.Globalization_cp_20001,
                20002 => SR.Globalization_cp_20002,
                20003 => SR.Globalization_cp_20003,
                20004 => SR.Globalization_cp_20004,
                20005 => SR.Globalization_cp_20005,
                20105 => SR.Globalization_cp_20105,
                20106 => SR.Globalization_cp_20106,
                20107 => SR.Globalization_cp_20107,
                20108 => SR.Globalization_cp_20108,
                20261 => SR.Globalization_cp_20261,
                20269 => SR.Globalization_cp_20269,
                20273 => SR.Globalization_cp_20273,
                20277 => SR.Globalization_cp_20277,
                20278 => SR.Globalization_cp_20278,
                20280 => SR.Globalization_cp_20280,
                20284 => SR.Globalization_cp_20284,
                20285 => SR.Globalization_cp_20285,
                20290 => SR.Globalization_cp_20290,
                20297 => SR.Globalization_cp_20297,
                20420 => SR.Globalization_cp_20420,
                20423 => SR.Globalization_cp_20423,
                20424 => SR.Globalization_cp_20424,
                20833 => SR.Globalization_cp_20833,
                20838 => SR.Globalization_cp_20838,
                20866 => SR.Globalization_cp_20866,
                20871 => SR.Globalization_cp_20871,
                20880 => SR.Globalization_cp_20880,
                20905 => SR.Globalization_cp_20905,
                20924 => SR.Globalization_cp_20924,
                20932 => SR.Globalization_cp_20932,
                20936 => SR.Globalization_cp_20936,
                20949 => SR.Globalization_cp_20949,
                21025 => SR.Globalization_cp_21025,
                21027 => SR.Globalization_cp_21027,
                21866 => SR.Globalization_cp_21866,
                28592 => SR.Globalization_cp_28592,
                28593 => SR.Globalization_cp_28593,
                28594 => SR.Globalization_cp_28594,
                28595 => SR.Globalization_cp_28595,
                28596 => SR.Globalization_cp_28596,
                28597 => SR.Globalization_cp_28597,
                28598 => SR.Globalization_cp_28598,
                28599 => SR.Globalization_cp_28599,
                28603 => SR.Globalization_cp_28603,
                28605 => SR.Globalization_cp_28605,
                29001 => SR.Globalization_cp_29001,
                38598 => SR.Globalization_cp_38598,
                50000 => SR.Globalization_cp_50000,
                50220 => SR.Globalization_cp_50220,
                50221 => SR.Globalization_cp_50221,
                50222 => SR.Globalization_cp_50222,
                50225 => SR.Globalization_cp_50225,
                50227 => SR.Globalization_cp_50227,
                50229 => SR.Globalization_cp_50229,
                50930 => SR.Globalization_cp_50930,
                50931 => SR.Globalization_cp_50931,
                50933 => SR.Globalization_cp_50933,
                50935 => SR.Globalization_cp_50935,
                50937 => SR.Globalization_cp_50937,
                50939 => SR.Globalization_cp_50939,
                51932 => SR.Globalization_cp_51932,
                51936 => SR.Globalization_cp_51936,
                51949 => SR.Globalization_cp_51949,
                52936 => SR.Globalization_cp_52936,
                54936 => SR.Globalization_cp_54936,
                57002 => SR.Globalization_cp_57002,
                57003 => SR.Globalization_cp_57003,
                57004 => SR.Globalization_cp_57004,
                57005 => SR.Globalization_cp_57005,
                57006 => SR.Globalization_cp_57006,
                57007 => SR.Globalization_cp_57007,
                57008 => SR.Globalization_cp_57008,
                57009 => SR.Globalization_cp_57009,
                57010 => SR.Globalization_cp_57010,
                57011 => SR.Globalization_cp_57011,
                _ => null,
            };
 
        // Returns the IANA preferred name for this encoding
        public override string WebName
        {
            get
            {
                if (_webName is null)
                {
                    _webName = EncodingTable.GetWebNameFromCodePage(CodePage);
                    if (_webName is null)
                    {
                        throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, CodePage));
                    }
                }
                return _webName;
            }
        }
 
        public override string HeaderName =>
            CodePage switch
            {
                932 => "iso-2022-jp",
                50221 => "iso-2022-jp",
                50225 => "euc-kr",
                _ => WebName,
            };
 
        public override string BodyName =>
            CodePage switch
            {
                932 =>   "iso-2022-jp",
                1250 =>  "iso-8859-2",
                1251 =>  "koi8-r",
                1252 =>  "iso-8859-1",
                1253 =>  "iso-8859-7",
                1254 =>  "iso-8859-9",
                50221 => "iso-2022-jp",
                50225 => "iso-2022-kr",
                _ => WebName,
            };
    }
}