File: EmbeddedText.cs
Web Access
Project: src\src\Compilers\Core\Portable\Microsoft.CodeAnalysis.csproj (Microsoft.CodeAnalysis)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Immutable;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Reflection.Metadata;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.Text;
using Roslyn.Utilities;
 
namespace Microsoft.CodeAnalysis
{
    /// <summary>
    /// Represents text to be embedded in a PDB.
    /// </summary>
    public sealed class EmbeddedText
    {
        /// <summary>
        /// The maximum number of bytes in to write out uncompressed.
        ///
        /// This prevents wasting resources on compressing tiny files with little to negative gain
        /// in PDB file size.
        ///
        /// Chosen as the point at which we start to see > 10% blob size reduction using all
        /// current source files in corefx and roslyn as sample data. 
        /// </summary>
        internal const int CompressionThreshold = 200;
 
        /// <summary>
        /// The path to the file to embed.
        /// </summary>
        /// <remarks>See remarks of <see cref="SyntaxTree.FilePath"/></remarks>
        /// <remarks>Empty file paths are disallowed, as the debugger finds source by looking up files by their name (and then verifying their signature)</remarks>
        public string FilePath { get; }
 
        /// <summary>
        /// Hash algorithm to use to calculate checksum of the text that's saved to PDB.
        /// </summary>
        public SourceHashAlgorithm ChecksumAlgorithm { get; }
 
        /// <summary>
        /// The <see cref="ChecksumAlgorithm"/> hash of the uncompressed bytes
        /// that's saved to the PDB.
        /// </summary>
        public ImmutableArray<byte> Checksum { get; }
 
        private EmbeddedText(string filePath, ImmutableArray<byte> checksum, SourceHashAlgorithm checksumAlgorithm, ImmutableArray<byte> blob)
        {
            Debug.Assert(filePath.Length > 0);
            Debug.Assert(SourceHashAlgorithms.IsSupportedAlgorithm(checksumAlgorithm));
            Debug.Assert(!blob.IsDefault && blob.Length >= sizeof(int));
 
            FilePath = filePath;
            Checksum = checksum;
            ChecksumAlgorithm = checksumAlgorithm;
            Blob = blob;
        }
 
        /// <summary>
        /// The content that will be written to the PDB.
        /// </summary>
        /// <remarks>
        /// Internal since this is an implementation detail. The only public
        /// contract is that you can pass EmbeddedText instances to Emit.
        /// It just so happened that doing this up-front was most practical
        /// and efficient, but we don't want to be tied to it.
        /// 
        /// For efficiency, the format of this blob is exactly as it is written
        /// to the PDB,which prevents extra copies being made during emit.
        ///
        /// The first 4 bytes (little endian int32) indicate the format:
        ///
        ///            0: data that follows is uncompressed
        ///     Positive: data that follows is deflate compressed and value is original, uncompressed size
        ///     Negative: invalid at this time, but reserved to mark a different format in the future.
        /// </remarks>
        internal ImmutableArray<byte> Blob { get; }
 
        /// <summary>
        /// Constructs a <see cref="EmbeddedText"/> for embedding the given <see cref="SourceText"/>.
        /// </summary>
        /// <param name="filePath">The file path (pre-normalization) to use in the PDB.</param>
        /// <param name="text">The source text to embed.</param>
        /// <exception cref="ArgumentNullException">
        /// <paramref name="filePath"/> is null.
        /// <paramref name="text"/> is null.
        /// </exception>
        /// <exception cref="ArgumentException">
        /// <paramref name="filePath"/> empty.
        /// <paramref name="text"/> cannot be embedded (see <see cref="SourceText.CanBeEmbedded"/>).
        /// </exception>
        public static EmbeddedText FromSource(string filePath, SourceText text)
        {
            ValidateFilePath(filePath);
 
            if (text == null)
            {
                throw new ArgumentNullException(nameof(text));
            }
 
            if (!text.CanBeEmbedded)
            {
                throw new ArgumentException(CodeAnalysisResources.SourceTextCannotBeEmbedded, nameof(text));
            }
 
            if (!text.PrecomputedEmbeddedTextBlob.IsDefault)
            {
                return new EmbeddedText(filePath, text.GetChecksum(), text.ChecksumAlgorithm, text.PrecomputedEmbeddedTextBlob);
            }
 
            return new EmbeddedText(filePath, text.GetChecksum(), text.ChecksumAlgorithm, CreateBlob(text));
        }
 
        /// <summary>
        /// Constructs an <see cref="EmbeddedText"/> from stream content.
        /// </summary>
        /// <param name="filePath">The file path (pre-normalization) to use in the PDB.</param>
        /// <param name="stream">The stream.</param>
        /// <param name="checksumAlgorithm">Hash algorithm to use to calculate checksum of the text that's saved to PDB.</param>
        /// <exception cref="ArgumentNullException">
        /// <paramref name="filePath" /> is null.
        /// <paramref name="stream"/> is null.
        /// </exception>
        /// <exception cref="ArgumentException">
        /// <paramref name="filePath" /> is empty.
        /// <paramref name="stream"/> doesn't support reading or seeking.
        /// <paramref name="checksumAlgorithm"/> is not supported.
        /// </exception>
        /// <exception cref="IOException">An I/O error occurs.</exception>
        /// <remarks>Reads from the beginning of the stream. Leaves the stream open.</remarks>
        public static EmbeddedText FromStream(string filePath, Stream stream, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1)
        {
            ValidateFilePath(filePath);
 
            if (stream == null)
            {
                throw new ArgumentNullException(nameof(stream));
            }
 
            if (!stream.CanRead || !stream.CanSeek)
            {
                throw new ArgumentException(CodeAnalysisResources.StreamMustSupportReadAndSeek, nameof(stream));
            }
 
            SourceText.ValidateChecksumAlgorithm(checksumAlgorithm);
 
            return new EmbeddedText(
                filePath,
                SourceText.CalculateChecksum(stream, checksumAlgorithm),
                checksumAlgorithm,
                CreateBlob(stream));
        }
 
        /// <summary>
        /// Constructs an <see cref="EmbeddedText"/> from bytes.
        /// </summary>
        /// <param name="filePath">The file path (pre-normalization) to use in the PDB.</param>
        /// <param name="bytes">The bytes.</param>
        /// <param name="checksumAlgorithm">Hash algorithm to use to calculate checksum of the text that's saved to PDB.</param>
        /// <exception cref="ArgumentNullException">
        /// <paramref name="bytes"/> is default-initialized.
        /// <paramref name="filePath" /> is null.
        /// </exception>
        /// <exception cref="ArgumentException">
        /// <paramref name="filePath" /> is empty.
        /// <paramref name="checksumAlgorithm"/> is not supported.
        /// </exception>
        /// <exception cref="IOException">An I/O error occurs.</exception>
        /// <remarks>Reads from the beginning of the stream. Leaves the stream open.</remarks>
        public static EmbeddedText FromBytes(string filePath, ArraySegment<byte> bytes, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1)
        {
            ValidateFilePath(filePath);
 
            if (bytes.Array == null)
            {
                throw new ArgumentNullException(nameof(bytes));
            }
 
            SourceText.ValidateChecksumAlgorithm(checksumAlgorithm);
 
            return new EmbeddedText(
                filePath,
                SourceText.CalculateChecksum(bytes.Array, bytes.Offset, bytes.Count, checksumAlgorithm),
                checksumAlgorithm,
                CreateBlob(bytes));
        }
 
        /// <exception cref="ArgumentNullException"><paramref name="filePath"/> is null.</exception>
        /// <exception cref="ArgumentException"><paramref name="filePath"/> is empty.</exception>
        private static void ValidateFilePath(string filePath)
        {
            if (filePath == null)
            {
                throw new ArgumentNullException(nameof(filePath));
            }
 
            if (filePath.Length == 0)
            {
                throw new ArgumentException(CodeAnalysisResources.ArgumentCannotBeEmpty, nameof(filePath));
            }
        }
 
        /// <summary>
        /// Creates the blob to be saved to the PDB.
        /// </summary>
        internal static ImmutableArray<byte> CreateBlob(Stream stream)
        {
            RoslynDebug.Assert(stream != null);
            RoslynDebug.Assert(stream.CanRead);
            RoslynDebug.Assert(stream.CanSeek);
 
            long longLength = stream.Length;
            Debug.Assert(longLength >= 0);
 
            if (longLength > int.MaxValue)
            {
                throw new IOException(CodeAnalysisResources.StreamIsTooLong);
            }
 
            stream.Seek(0, SeekOrigin.Begin);
            int length = (int)longLength;
 
            if (length < CompressionThreshold)
            {
                using (var builder = Cci.PooledBlobBuilder.GetInstance())
                {
                    builder.WriteInt32(0);
                    int bytesWritten = builder.TryWriteBytes(stream, length);
 
                    if (length != bytesWritten)
                    {
                        throw new EndOfStreamException();
                    }
 
                    return builder.ToImmutableArray();
                }
            }
            else
            {
                using (var builder = BlobBuildingStream.GetInstance())
                {
                    builder.WriteInt32(length);
 
                    using (var deflater = new CountingDeflateStream(builder, CompressionLevel.Optimal, leaveOpen: true))
                    {
                        stream.CopyTo(deflater);
 
                        if (length != deflater.BytesWritten)
                        {
                            throw new EndOfStreamException();
                        }
                    }
 
                    return builder.ToImmutableArray();
                }
            }
        }
 
        internal static ImmutableArray<byte> CreateBlob(ArraySegment<byte> bytes)
        {
            RoslynDebug.Assert(bytes.Array != null);
 
            if (bytes.Count < CompressionThreshold)
            {
                using (var builder = Cci.PooledBlobBuilder.GetInstance())
                {
                    builder.WriteInt32(0);
                    builder.WriteBytes(bytes.Array, bytes.Offset, bytes.Count);
                    return builder.ToImmutableArray();
                }
            }
            else
            {
                using (var builder = BlobBuildingStream.GetInstance())
                {
                    builder.WriteInt32(bytes.Count);
 
                    using (var deflater = new CountingDeflateStream(builder, CompressionLevel.Optimal, leaveOpen: true))
                    {
                        deflater.Write(bytes.Array, bytes.Offset, bytes.Count);
                    }
 
                    return builder.ToImmutableArray();
                }
            }
        }
 
        private static ImmutableArray<byte> CreateBlob(SourceText text)
        {
            RoslynDebug.Assert(text != null);
            RoslynDebug.Assert(text.CanBeEmbedded);
            RoslynDebug.Assert(text.Encoding != null);
            RoslynDebug.Assert(text.PrecomputedEmbeddedTextBlob.IsDefault);
 
            int maxByteCount;
            try
            {
                maxByteCount = text.Encoding.GetMaxByteCount(text.Length);
            }
            catch (ArgumentOutOfRangeException)
            {
                // Encoding does not provide a way to predict that max byte count would not
                // fit in Int32 and we must therefore catch ArgumentOutOfRange to handle that
                // case.
                maxByteCount = int.MaxValue;
            }
 
            using (var builder = BlobBuildingStream.GetInstance())
            {
                if (maxByteCount < CompressionThreshold)
                {
                    builder.WriteInt32(0);
 
                    using (var writer = new StreamWriter(builder, text.Encoding, bufferSize: Math.Max(1, text.Length), leaveOpen: true))
                    {
                        text.Write(writer);
                    }
                }
                else
                {
                    Blob reserved = builder.ReserveBytes(4);
 
                    using (var deflater = new CountingDeflateStream(builder, CompressionLevel.Optimal, leaveOpen: true))
                    {
                        using (var writer = new StreamWriter(deflater, text.Encoding, bufferSize: 1024, leaveOpen: true))
                        {
                            text.Write(writer);
                        }
 
                        new BlobWriter(reserved).WriteInt32(deflater.BytesWritten);
                    }
                }
 
                return builder.ToImmutableArray();
            }
        }
 
        internal Cci.DebugSourceInfo GetDebugSourceInfo()
        {
            return new Cci.DebugSourceInfo(Checksum, ChecksumAlgorithm, Blob);
        }
 
        private sealed class CountingDeflateStream : DeflateStream
        {
            public CountingDeflateStream(Stream stream, CompressionLevel compressionLevel, bool leaveOpen)
                : base(stream, compressionLevel, leaveOpen)
            {
            }
 
            public int BytesWritten { get; private set; }
 
            public override void Write(byte[] array, int offset, int count)
            {
                base.Write(array, offset, count);
 
                // checked arithmetic is release-enabled quasi-assert. We start with at most 
                // int.MaxValue chars so compression or encoding would have to be abysmal for
                // this to overflow. We'd probably be lucky to even get this far but if we do
                // we should fail fast.
                checked { BytesWritten += count; }
            }
 
            public override void WriteByte(byte value)
            {
                base.WriteByte(value);
 
                // same rationale for checked arithmetic as above.
                checked { BytesWritten++; };
            }
 
            public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken)
            {
                throw ExceptionUtilities.Unreachable();
            }
        }
    }
}