// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. using System; using System.Diagnostics; using System.IO; using System.Runtime.ExceptionServices; using System.Text; using Microsoft.CodeAnalysis; namespace Roslyn.Utilities; using TypeCode = ObjectWriter.TypeCode; /// <summary> /// An <see cref="ObjectReader"/> that deserializes objects from a byte stream. /// </summary> internal sealed partial class ObjectReader : IDisposable { /// <summary> /// We start the version at something reasonably random. That way an older file, with /// some random start-bytes, has little chance of matching our version. When incrementing /// this version, just change VersionByte2. /// </summary> internal const byte VersionByte1 = 0b10101010; internal const byte VersionByte2 = 0b00001101; private readonly BinaryReader _reader; /// <summary> /// Map of reference id's to deserialized strings. /// </summary> private readonly ReaderReferenceMap _stringReferenceMap; /// <summary> /// Creates a new instance of a <see cref="ObjectReader"/>. /// </summary> /// <param name="stream">The stream to read objects from.</param> /// <param name="leaveOpen">True to leave the <paramref name="stream"/> open after the <see cref="ObjectWriter"/> is disposed.</param> private ObjectReader(Stream stream, bool leaveOpen) { // String serialization assumes both reader and writer to be of the same endianness. // It can be adjusted for BigEndian if needed. Debug.Assert(BitConverter.IsLittleEndian); _reader = new BinaryReader(stream, Encoding.UTF8, leaveOpen); _stringReferenceMap = ReaderReferenceMap.Create(); } /// <summary> /// Attempts to create a <see cref="ObjectReader"/> from the provided <paramref name="stream"/>. /// If the <paramref name="stream"/> does not start with a valid header, then <see langword="null"/> will /// be returned. /// </summary> public static ObjectReader? TryGetReader(Stream? stream, bool leaveOpen = false) { if (stream == null) { return null; } try { if (stream.ReadByte() != VersionByte1 || stream.ReadByte() != VersionByte2) { return null; } } catch (AggregateException ex) when (ex.InnerException is not null) { // PipeReaderStream wraps any exception it throws in an AggregateException, which is not expected by // callers treating it as a normal stream. Unwrap and rethrow the inner exception for clarity. // https://github.com/dotnet/runtime/issues/70206 #if NET ExceptionDispatchInfo.Throw(ex.InnerException); #else ExceptionDispatchInfo.Capture(ex.InnerException).Throw(); #endif } return new ObjectReader(stream, leaveOpen); } /// <summary> /// Creates an <see cref="ObjectReader"/> from the provided <paramref name="stream"/>. Unlike <see /// cref="TryGetReader(Stream, bool)"/>, it requires the version of the data in the stream to /// exactly match the current format version. Should only be used to read data written by the same version of /// Roslyn. /// </summary> public static ObjectReader GetReader(Stream stream, bool leaveOpen) => GetReader(stream, leaveOpen, checkValidationBytes: true); /// <summary> /// <inheritdoc cref="GetReader(Stream, bool)"/> /// <param name="checkValidationBytes">Whether or not the validation bytes (see <see /// cref="ObjectWriter.WriteValidationBytes"/> should be checked immediately at the stream's current /// position.</param> /// </summary> public static ObjectReader GetReader(Stream stream, bool leaveOpen, bool checkValidationBytes) { var reader = new ObjectReader(stream, leaveOpen); if (checkValidationBytes) reader.CheckValidationBytes(); return reader; } public void CheckValidationBytes() { var b = this.ReadByte(); if (b != VersionByte1) throw ExceptionUtilities.UnexpectedValue(b); b = this.ReadByte(); if (b != VersionByte2) throw ExceptionUtilities.UnexpectedValue(b); } public void Dispose() { _stringReferenceMap.Dispose(); } public bool ReadBoolean() => _reader.ReadBoolean(); public byte ReadByte() => _reader.ReadByte(); // read as ushort because BinaryWriter fails on chars that are unicode surrogates public char ReadChar() => (char)_reader.ReadUInt16(); public decimal ReadDecimal() => _reader.ReadDecimal(); public double ReadDouble() => _reader.ReadDouble(); public float ReadSingle() => _reader.ReadSingle(); public int ReadInt32() => _reader.ReadInt32(); public long ReadInt64() => _reader.ReadInt64(); public sbyte ReadSByte() => _reader.ReadSByte(); public short ReadInt16() => _reader.ReadInt16(); public uint ReadUInt32() => _reader.ReadUInt32(); public ulong ReadUInt64() => _reader.ReadUInt64(); public ushort ReadUInt16() => _reader.ReadUInt16(); public string? ReadString() => ReadStringValue(); public string ReadRequiredString() => ReadString() ?? throw ExceptionUtilities.Unreachable(); public Guid ReadGuid() { var accessor = new ObjectWriter.GuidAccessor { Low64 = ReadInt64(), High64 = ReadInt64() }; return accessor.Guid; } public object? ReadScalarValue() { var code = (TypeCode)ReadByte(); switch (code) { case TypeCode.Null: return null; case TypeCode.Boolean_True: return true; case TypeCode.Boolean_False: return false; case TypeCode.Int8: return ReadSByte(); case TypeCode.UInt8: return ReadByte(); case TypeCode.Int16: return ReadInt16(); case TypeCode.UInt16: return ReadUInt16(); case TypeCode.Int32: return ReadInt32(); case TypeCode.Int32_1Byte: return (int)ReadByte(); case TypeCode.Int32_2Bytes: return (int)ReadUInt16(); case TypeCode.Int32_0: case TypeCode.Int32_1: case TypeCode.Int32_2: case TypeCode.Int32_3: case TypeCode.Int32_4: case TypeCode.Int32_5: case TypeCode.Int32_6: case TypeCode.Int32_7: case TypeCode.Int32_8: case TypeCode.Int32_9: case TypeCode.Int32_10: return (int)code - (int)TypeCode.Int32_0; case TypeCode.UInt32: return ReadUInt32(); case TypeCode.UInt32_1Byte: return (uint)ReadByte(); case TypeCode.UInt32_2Bytes: return (uint)ReadUInt16(); case TypeCode.UInt32_0: case TypeCode.UInt32_1: case TypeCode.UInt32_2: case TypeCode.UInt32_3: case TypeCode.UInt32_4: case TypeCode.UInt32_5: case TypeCode.UInt32_6: case TypeCode.UInt32_7: case TypeCode.UInt32_8: case TypeCode.UInt32_9: case TypeCode.UInt32_10: return (uint)((int)code - (int)TypeCode.UInt32_0); case TypeCode.Int64: return ReadInt64(); case TypeCode.UInt64: return ReadUInt64(); case TypeCode.Float4: return ReadSingle(); case TypeCode.Float8: return ReadDouble(); case TypeCode.Decimal: return ReadDecimal(); case TypeCode.Char: // read as ushort because BinaryWriter fails on chars that are unicode surrogates return (char)ReadUInt16(); case TypeCode.StringUtf8: case TypeCode.StringUtf16: case TypeCode.StringRef_4Bytes: case TypeCode.StringRef_1Byte: case TypeCode.StringRef_2Bytes: return ReadStringValue(code); case TypeCode.DateTime: return DateTime.FromBinary(ReadInt64()); default: throw ExceptionUtilities.UnexpectedValue(code); } } public Encoding? ReadEncoding() { var code = (TypeCode)ReadByte(); switch (code) { case TypeCode.Null: return null; case TypeCode.EncodingName: return Encoding.GetEncoding(ReadRequiredString()); case >= TypeCode.FirstWellKnownTextEncoding and <= TypeCode.LastWellKnownTextEncoding: return ToEncodingKind(code).GetEncoding(); case TypeCode.EncodingCodePage: return Encoding.GetEncoding(ReadInt32()); default: throw ExceptionUtilities.UnexpectedValue(code); } static TextEncodingKind ToEncodingKind(TypeCode code) { Debug.Assert(code is >= TypeCode.FirstWellKnownTextEncoding and <= TypeCode.LastWellKnownTextEncoding); return Microsoft.CodeAnalysis.EncodingExtensions.FirstTextEncodingKind + (byte)(code - TypeCode.FirstWellKnownTextEncoding); } } public byte[] ReadByteArray() { var (result, _) = ReadRawArray<byte>(static (reader, array, length) => reader.Read(array, 0, length)); return result; } public char[] ReadCharArray() { var (result, _) = ReadCharArray(getArray: null); return result; } public (char[] array, int length) ReadCharArray(Func<int, char[]>? getArray) => ReadRawArray(static (reader, array, length) => reader.Read(array, 0, length), getArray); public (T[] array, int length) ReadRawArray<T>( Func<BinaryReader, T[], int, int> read, Func<int, T[]>? getArray = null) { // Defer to caller provided getArray if provided. Otherwise, we'll just allocate the array ourselves. getArray ??= static length => length == 0 ? [] : new T[length]; var length = ReadArrayLength(); var array = getArray(length); var charsRead = read(_reader, array, length); return (array, charsRead); } internal uint ReadCompressedUInt() { var info = ReadByte(); var marker = (byte)(info & ObjectWriter.ByteMarkerMask); var byte0 = (byte)(info & ~ObjectWriter.ByteMarkerMask); if (marker == ObjectWriter.Byte1Marker) { return byte0; } if (marker == ObjectWriter.Byte2Marker) { var byte1 = ReadByte(); return (((uint)byte0) << 8) | byte1; } if (marker == ObjectWriter.Byte4Marker) { var byte1 = ReadByte(); var byte2 = ReadByte(); var byte3 = ReadByte(); return (((uint)byte0) << 24) | (((uint)byte1) << 16) | (((uint)byte2) << 8) | byte3; } throw ExceptionUtilities.UnexpectedValue(marker); } private string? ReadStringValue() { var kind = (TypeCode)ReadByte(); return kind == TypeCode.Null ? null : ReadStringValue(kind); } private string ReadStringValue(TypeCode kind) { return kind switch { TypeCode.StringRef_1Byte => _stringReferenceMap.GetValue(ReadByte()), TypeCode.StringRef_2Bytes => _stringReferenceMap.GetValue(ReadUInt16()), TypeCode.StringRef_4Bytes => _stringReferenceMap.GetValue(ReadInt32()), TypeCode.StringUtf16 or TypeCode.StringUtf8 => ReadStringLiteral(kind), _ => throw ExceptionUtilities.UnexpectedValue(kind), }; } private unsafe string ReadStringLiteral(TypeCode kind) { string value; if (kind == TypeCode.StringUtf8) { value = _reader.ReadString(); } else { // This is rare, just allocate UTF-16 bytes for simplicity. var characterCount = (int)ReadCompressedUInt(); var bytes = _reader.ReadBytes(characterCount * sizeof(char)); fixed (byte* bytesPtr = bytes) { value = new string((char*)bytesPtr, 0, characterCount); } } _stringReferenceMap.AddValue(value); return value; } private int ReadArrayLength() => (TypeCode)ReadByte() switch { TypeCode.Array_0 => 0, TypeCode.Array_1 => 1, TypeCode.Array_2 => 2, TypeCode.Array_3 => 3, _ => (int)this.ReadCompressedUInt(), }; } |