|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Buffers;
using System.Diagnostics;
using System.Runtime.InteropServices;
namespace System.Text
{
// An Encoder is used to encode a sequence of blocks of characters into
// a sequence of blocks of bytes. Following instantiation of an encoder,
// sequential blocks of characters are converted into blocks of bytes through
// calls to the GetBytes method. The encoder maintains state between the
// conversions, allowing it to correctly encode character sequences that span
// adjacent blocks.
//
// Instances of specific implementations of the Encoder abstract base
// class are typically obtained through calls to the GetEncoder method
// of Encoding objects.
//
internal class EncoderNLS : Encoder
{
// Need a place for the last left over character, most of our encodings use this
internal char _charLeftOver;
private readonly Encoding _encoding;
private bool _mustFlush;
internal bool _throwOnOverflow;
internal int _charsUsed;
internal EncoderNLS(Encoding encoding)
{
_encoding = encoding;
_fallback = _encoding.EncoderFallback;
this.Reset();
}
public override void Reset()
{
_charLeftOver = (char)0;
_fallbackBuffer?.Reset();
}
public override unsafe int GetByteCount(char[] chars, int index, int count, bool flush)
{
ArgumentNullException.ThrowIfNull(chars);
ArgumentOutOfRangeException.ThrowIfNegative(index);
ArgumentOutOfRangeException.ThrowIfNegative(count);
if (chars.Length - index < count)
throw new ArgumentOutOfRangeException(nameof(chars),
SR.ArgumentOutOfRange_IndexCountBuffer);
// Just call the pointer version
int result = -1;
fixed (char* pChars = &MemoryMarshal.GetArrayDataReference(chars))
{
result = GetByteCount(pChars + index, count, flush);
}
return result;
}
public override unsafe int GetByteCount(char* chars, int count, bool flush)
{
ArgumentNullException.ThrowIfNull(chars);
ArgumentOutOfRangeException.ThrowIfNegative(count);
_mustFlush = flush;
_throwOnOverflow = true;
Debug.Assert(_encoding is not null);
return _encoding.GetByteCount(chars, count, this);
}
public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
byte[] bytes, int byteIndex, bool flush)
{
ArgumentNullException.ThrowIfNull(chars);
ArgumentNullException.ThrowIfNull(bytes);
ArgumentOutOfRangeException.ThrowIfNegative(charIndex);
ArgumentOutOfRangeException.ThrowIfNegative(charCount);
if (chars.Length - charIndex < charCount)
throw new ArgumentOutOfRangeException(nameof(chars),
SR.ArgumentOutOfRange_IndexCountBuffer);
if (byteIndex < 0 || byteIndex > bytes.Length)
throw new ArgumentOutOfRangeException(nameof(byteIndex),
SR.ArgumentOutOfRange_IndexMustBeLessOrEqual);
int byteCount = bytes.Length - byteIndex;
// Just call pointer version
fixed (char* pChars = &MemoryMarshal.GetArrayDataReference(chars))
fixed (byte* pBytes = &MemoryMarshal.GetArrayDataReference(bytes))
{
// Remember that charCount is # to decode, not size of array.
return GetBytes(pChars + charIndex, charCount,
pBytes + byteIndex, byteCount, flush);
}
}
public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, bool flush)
{
ArgumentNullException.ThrowIfNull(chars);
ArgumentNullException.ThrowIfNull(bytes);
ArgumentOutOfRangeException.ThrowIfNegative(byteCount);
ArgumentOutOfRangeException.ThrowIfNegative(charCount);
_mustFlush = flush;
_throwOnOverflow = true;
Debug.Assert(_encoding is not null);
return _encoding.GetBytes(chars, charCount, bytes, byteCount, this);
}
// This method is used when your output buffer might not be large enough for the entire result.
// Just call the pointer version. (This gets bytes)
public override unsafe void Convert(char[] chars, int charIndex, int charCount,
byte[] bytes, int byteIndex, int byteCount, bool flush,
out int charsUsed, out int bytesUsed, out bool completed)
{
ArgumentNullException.ThrowIfNull(chars);
ArgumentNullException.ThrowIfNull(bytes);
ArgumentOutOfRangeException.ThrowIfNegative(charIndex);
ArgumentOutOfRangeException.ThrowIfNegative(charCount);
ArgumentOutOfRangeException.ThrowIfNegative(byteIndex);
ArgumentOutOfRangeException.ThrowIfNegative(byteCount);
if (chars.Length - charIndex < charCount)
throw new ArgumentOutOfRangeException(nameof(chars),
SR.ArgumentOutOfRange_IndexCountBuffer);
if (bytes.Length - byteIndex < byteCount)
throw new ArgumentOutOfRangeException(nameof(bytes),
SR.ArgumentOutOfRange_IndexCountBuffer);
// Just call the pointer version (can't do this for non-msft encoders)
fixed (char* pChars = &MemoryMarshal.GetArrayDataReference(chars))
fixed (byte* pBytes = &MemoryMarshal.GetArrayDataReference(bytes))
{
Convert(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, flush,
out charsUsed, out bytesUsed, out completed);
}
}
// This is the version that uses pointers. We call the base encoding worker function
// after setting our appropriate internal variables. This is getting bytes
public override unsafe void Convert(char* chars, int charCount,
byte* bytes, int byteCount, bool flush,
out int charsUsed, out int bytesUsed, out bool completed)
{
ArgumentNullException.ThrowIfNull(chars);
ArgumentNullException.ThrowIfNull(bytes);
ArgumentOutOfRangeException.ThrowIfNegative(charCount);
ArgumentOutOfRangeException.ThrowIfNegative(byteCount);
// We don't want to throw
_mustFlush = flush;
_throwOnOverflow = false;
_charsUsed = 0;
// Do conversion
Debug.Assert(_encoding is not null);
bytesUsed = _encoding.GetBytes(chars, charCount, bytes, byteCount, this);
charsUsed = _charsUsed;
// If the 'completed' out parameter is set to false, it means one of two things:
// a) this call to Convert did not consume the entire source buffer; or
// b) this call to Convert did consume the entire source buffer, but there's
// still pending data that needs to be written to the destination buffer.
//
// In either case, the caller should slice the input buffer, provide a fresh
// destination buffer, and call Convert again in a loop until 'completed' is true.
//
// The caller *must* specify flush = true on the final iteration(s) of the loop
// and iterate until 'completed' is set to true. Otherwise data loss may occur.
//
// Technically, the expected logic is detailed below.
//
// If 'flush' = false, the 'completed' parameter MUST be set to false if not all
// elements of the source buffer have been consumed. The 'completed' parameter MUST
// be set to true once the entire source buffer has been consumed and there is no
// pending data for the destination buffer. (In other words, the 'completed' parameter
// MUST be set to true if passing a zero-length source buffer and an infinite-length
// destination buffer will make no forward progress.) The 'completed' parameter value
// is undefined for the case where all source data has been consumed but there remains
// pending data for the destination buffer.
//
// If 'flush' = true, the 'completed' parameter is set to true IF AND ONLY IF:
// a) all elements of the source buffer have been transcoded into the destination buffer; AND
// b) there remains no internal partial read state within this instance; AND
// c) there remains no pending data for the destination buffer.
//
// In other words, if 'flush' = true, then when 'completed' is set to true it should mean
// that all data has been converted and that this instance is indistinguishable from a
// freshly-reset instance.
completed = (charsUsed == charCount)
&& (!flush || !this.HasState)
&& (_fallbackBuffer is null || _fallbackBuffer.Remaining == 0);
}
public Encoding Encoding
{
get
{
Debug.Assert(_encoding is not null);
return _encoding;
}
}
public bool MustFlush => _mustFlush;
/// <summary>
/// States whether a call to <see cref="Encoding.GetBytes(char*, int, byte*, int, EncoderNLS)"/> must first drain data on this <see cref="EncoderNLS"/> instance.
/// </summary>
internal bool HasLeftoverData => _charLeftOver != default || (_fallbackBuffer is not null && _fallbackBuffer.Remaining > 0);
// Anything left in our encoder?
internal virtual bool HasState => _charLeftOver != (char)0;
// Allow encoding to clear our must flush instead of throwing (in ThrowBytesOverflow)
internal void ClearMustFlush()
{
_mustFlush = false;
}
internal int DrainLeftoverDataForGetByteCount(ReadOnlySpan<char> chars, out int charsConsumed)
{
// Quick check: we _should not_ have leftover fallback data from a previous invocation,
// as we'd end up consuming any such data and would corrupt whatever Convert call happens
// to be in progress.
if (_fallbackBuffer is not null && _fallbackBuffer.Remaining > 0)
{
throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, Encoding.EncodingName, _fallbackBuffer.GetType()));
}
// If we have a leftover high surrogate from a previous operation, consume it now.
// We won't clear the _charLeftOver field since GetByteCount is supposed to be
// a non-mutating operation, and we need the field to retain its value for the
// next call to Convert.
charsConsumed = 0; // could be incorrect, will fix up later in the method
if (_charLeftOver == default)
{
return 0; // no leftover high surrogate char - short-circuit and finish
}
else
{
char secondChar = default;
if (chars.IsEmpty)
{
// If the input buffer is empty and we're not being asked to flush, no-op and return
// success to our caller. If we're being asked to flush, the leftover high surrogate from
// the previous operation will go through the fallback mechanism by itself.
if (!MustFlush)
{
return 0; // no-op = success
}
}
else
{
secondChar = chars[0];
}
// If we have to fallback the chars we're reading immediately below, populate the
// fallback buffer with the invalid data. We'll just fall through to the "consume
// fallback buffer" logic at the end of the method.
if (Rune.TryCreate(_charLeftOver, secondChar, out Rune rune))
{
charsConsumed = 1; // consumed the leftover high surrogate + the first char in the input buffer
Debug.Assert(_encoding is not null);
if (_encoding.TryGetByteCount(rune, out int byteCount))
{
Debug.Assert(byteCount >= 0, "Encoding shouldn't have returned a negative byte count.");
return byteCount;
}
else
{
// The fallback mechanism relies on a negative index to convey "the start of the invalid
// sequence was some number of chars back before the current buffer." In this block and
// in the block immediately thereafter, we know we have a single leftover high surrogate
// character from a previous operation, so we provide an index of -1 to convey that the
// char immediately before the current buffer was the start of the invalid sequence.
FallbackBuffer.Fallback(_charLeftOver, secondChar, index: -1);
}
}
else
{
FallbackBuffer.Fallback(_charLeftOver, index: -1);
}
// Now tally the number of bytes that would've been emitted as part of fallback.
Debug.Assert(_fallbackBuffer is not null);
return _fallbackBuffer.DrainRemainingDataForGetByteCount();
}
}
internal bool TryDrainLeftoverDataForGetBytes(ReadOnlySpan<char> chars, Span<byte> bytes, out int charsConsumed, out int bytesWritten)
{
// We may have a leftover high surrogate data from a previous invocation, or we may have leftover
// data in the fallback buffer, or we may have neither, but we will never have both. Check for these
// conditions and handle them now.
charsConsumed = 0; // could be incorrect, will fix up later in the method
bytesWritten = 0; // could be incorrect, will fix up later in the method
if (_charLeftOver != default)
{
char secondChar = default;
if (chars.IsEmpty)
{
// If the input buffer is empty and we're not being asked to flush, no-op and return
// success to our caller. If we're being asked to flush, the leftover high surrogate from
// the previous operation will go through the fallback mechanism by itself.
if (!MustFlush)
{
charsConsumed = 0;
bytesWritten = 0;
return true; // no-op = success
}
}
else
{
secondChar = chars[0];
}
// We're about to consume the leftover char. Make a local copy of it and clear
// the backing field. We don't bother restoring its value if an exception occurs
// because exceptional code paths corrupt instance state anyway (e.g., by
// mutating the fallback buffer contents).
char charLeftOver = _charLeftOver;
_charLeftOver = default;
// If we have to fallback the chars we're reading immediately below, populate the
// fallback buffer with the invalid data. We'll just fall through to the "consume
// fallback buffer" logic at the end of the method.
if (Rune.TryCreate(charLeftOver, secondChar, out Rune rune))
{
charsConsumed = 1; // at the very least, we consumed 1 char from the input
Debug.Assert(_encoding is not null);
switch (_encoding.EncodeRune(rune, bytes, out bytesWritten))
{
case OperationStatus.Done:
return true; // that's all - we've handled the leftover data
case OperationStatus.DestinationTooSmall:
_encoding.ThrowBytesOverflow(this, nothingEncoded: true); // will throw
break;
case OperationStatus.InvalidData:
FallbackBuffer.Fallback(charLeftOver, secondChar, index: -1); // see comment in DrainLeftoverDataForGetByteCount
break;
default:
Debug.Fail("Unknown return value.");
break;
}
}
else
{
FallbackBuffer.Fallback(charLeftOver, index: -1); // see comment in DrainLeftoverDataForGetByteCount
}
}
// Now check the fallback buffer for any remaining data.
if (_fallbackBuffer is not null && _fallbackBuffer.Remaining > 0)
{
return _fallbackBuffer.TryDrainRemainingDataForGetBytes(bytes, out bytesWritten);
}
// And we're done!
return true; // success
}
}
}
|