File: StreamMappedString.cs
Web Access
Project: ..\..\..\src\Tasks\Microsoft.Build.Tasks.csproj (Microsoft.Build.Tasks.Core)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System;
using System.IO;
using System.Text;
 
#nullable disable
 
namespace Microsoft.Build.Shared.LanguageParser
{
    /// <summary>
    /// A class with string-like semantics mapped over a Stream.
    /// </summary>
    internal sealed class StreamMappedString
    {
        /// <summary>
        /// The raw binary stream that's being read.
        /// </summary>
        private Stream _binaryStream;
 
        /// <summary>
        /// The reader on top of binaryStream. This is what interprets the encoding.
        /// </summary>
        private StreamReader _reader;
 
        /// <summary>
        /// When false, try to guess the encoding of binaryStream. When true, force the
        /// encoding to ANSI.
        /// </summary>
        private bool _forceANSI;
 
        /// <summary>
        /// The page number that 'currentPage' is pointing to.
        /// </summary>
        private int _currentPageNumber = -1;
 
        /// <summary>
        /// The final page number of the whole stream.
        /// </summary>
        private int _finalPageNumber = Int32.MaxValue;
 
        /// <summary>
        /// The number of characters read into currentPage.
        /// </summary>
        private int _charactersRead = 0;
 
        /// <summary>
        /// The page before currentPage.
        /// </summary>
        private char[] _priorPage = null;
 
        /// <summary>
        /// The most recently read page.
        /// </summary>
        private char[] _currentPage = null;
 
        /// <summary>
        /// Count of the total number of pages allocated.
        /// </summary>
        private int _pagesAllocated = 0;
 
        /// <summary>
        /// Size of pages to use for reading from source file.
        /// </summary>
        private int _pageSize = 0;
 
        /// <summary>
        /// Construct.
        /// </summary>
        /// <param name="binaryStream">The raw binary stream that's being read.</param>
        /// <param name="forceANSI">When false, try to guess the encoding of binaryStream. When true, force the encoding to ANSI.</param>
        public StreamMappedString(Stream binaryStream, bool forceANSI)
            : this(binaryStream, forceANSI, /* pageSize */ DefaultPageSize)
        {
        }
 
        /// <summary>
        /// Construct.
        /// </summary>
        /// <param name="binaryStream">The raw binary stream that's being read.</param>
        /// <param name="forceANSI">When false, try to guess the encoding of binaryStream. When true, force the encoding to ANSI.</param>
        /// <param name="pageSize">Size of pages to use for reading from source file.</param>
        internal StreamMappedString(Stream binaryStream, bool forceANSI, int pageSize)
        {
            _binaryStream = binaryStream;
            _forceANSI = forceANSI;
            _pageSize = pageSize;
            RestartReader();
        }
 
        /// <summary>
        /// Restart the stream reader at the beginning.
        /// </summary>
        private void RestartReader()
        {
            _currentPageNumber = -1;
            _charactersRead = 0;
            _priorPage = null;
            _currentPage = null;
 
            // Reset the stream if we're not at the beginning
            if (_binaryStream.Position != 0)
            {
                _binaryStream.Seek(0, SeekOrigin.Begin);
            }
 
            if (_forceANSI)
            {
                _reader = new StreamReader( // HIGHCHAR: Falling back to ANSI for VB source files.
                    _binaryStream,
#if FEATURE_ENCODING_DEFAULT
                    Encoding.Default,    // Default means ANSI.
#else
                    Encoding.ASCII,
#endif
                    false);                // If the reader had been able to guess the encoding it would have done so already.
            }
            else
            {
                Encoding utf8Encoding = new UTF8Encoding(false, true /* throw on illegal bytes */);
 
                _reader = new StreamReader( // HIGHCHAR: VB and C# source files are assumed to be UTF if there are no byte-order marks.
                        _binaryStream,
                        utf8Encoding,
                        true);            // Ask the reader to try to guess the file's encoding.
            }
        }
 
        /// <summary>
        /// Get the total number of pages allocated.
        /// </summary>
        public int PagesAllocated
        {
            get { return _pagesAllocated; }
        }
 
        /// <summary>
        /// The pagesize in characters that will be used if not specified.
        /// </summary>
        public static int DefaultPageSize
        {
            get { return 256; }
        }
 
        /// <summary>
        /// Return a particular character within the file.
        /// </summary>
        /// <param name="offset"></param>
        /// <returns></returns>
        public char GetAt(int offset)
        {
            // Find the page number that contains offset.
            char[] page = GetPage(offset);
 
            // If null now, then the requested character is out of range.
            if (page == null)
            {
                throw new ArgumentOutOfRangeException(nameof(offset));
            }
 
            // Get the relative offset within the buffer.
            int relativeOffset = AbsoluteOffsetToPageOffset(offset);
 
            // Return the character.
            return page[relativeOffset];
        }
 
        /// <summary>
        /// Get the page that contains offset. Otherwise, null.
        /// </summary>
        /// <param name="offset"></param>
        /// <returns></returns>
        private char[] GetPage(int offset)
        {
            int page = PageFromAbsoluteOffset(offset);
 
            // Is it earlier than the last page?
            if (page < _currentPageNumber - 1)
            {
                // Restart the stream. Perf hit.
                RestartReader();
            }
 
            // Read pages until the page is available.
            while (page > _currentPageNumber)
            {
                int originalPageNumber = _currentPageNumber;
 
                if (!ReadNextPage())
                {
                    break;
                }
 
                ErrorUtilities.VerifyThrow(originalPageNumber != _currentPageNumber, "Expected a new page.");
            }
 
            // Is it the current page?
            if (page == _currentPageNumber)
            {
                // If enough bytes were read to satisfy offset then return the buffer.
                if (_charactersRead > AbsoluteOffsetToPageOffset(offset))
                {
                    return _currentPage;
                }
 
                // Otherwise, null.
                return null;
            }
 
            // Is it the prior page now?
            if (page == _currentPageNumber - 1)
            {
                return _priorPage;
            }
 
            // Its out of range.
            return null;
        }
 
        /// <summary>
        /// Read the next page.
        /// </summary>
        /// <returns></returns>
        private bool ReadNextPage()
        {
            // Is this already known to be the last page?
            if (_currentPageNumber == _finalPageNumber)
            {
                // 0x0d should already be appended.
                return false;
            }
 
            // If so, read it in to the lastBuffer...
            ReadBlockStripEOF();
 
            // ...and then swap lastBuffer for buffer.
            SwapPages();
            ++_currentPageNumber;
 
            // If the number of bytesRead is less than the number requested
            // then this is the last page.
            if (_charactersRead < _pageSize)
            {
                // Mark this as the last page.
                _finalPageNumber = _currentPageNumber;
 
                // Add a 0xd if the last character is not a newline.
                if (!IsZeroLengthStream() && !TokenChar.IsNewLine(LastCharacterInStream()))
                {
                    AppendCharacterToStream('\xd');
                }
            }
 
            return _charactersRead > 0;
        }
 
        /// <summary>
        /// Read characters from the file, and strip out any 1A characters.
        /// </summary>
        private void ReadBlockStripEOF()
        {
            if (_priorPage == null)
            {
                ++_pagesAllocated;
                _priorPage = new char[_pageSize];
            }
 
            _charactersRead = _reader.ReadBlock(_priorPage, 0, _pageSize);
 
            for (int i = 0; i < _charactersRead; ++i)
            {
                if (_priorPage[i] == '\x1a')
                {
                    Array.Copy(_priorPage, i + 1, _priorPage, i, _charactersRead - i - 1);
                    _charactersRead += _reader.ReadBlock(_priorPage, _charactersRead - 1, 1);
 
                    --i;
                    --_charactersRead;
                }
            }
        }
 
        /// <summary>
        /// Add one character to the end of the stream.
        /// </summary>
        /// <param name="c"></param>
        private void AppendCharacterToStream(char c)
        {
            ErrorUtilities.VerifyThrow(_charactersRead != _pageSize, "Attempt to append to non-last page.");
 
            _currentPage[_charactersRead] = c;
            ++_charactersRead;
        }
 
        /// <summary>
        /// Retrieve the last character in the stream.
        /// </summary>
        /// <returns></returns>
        private char LastCharacterInStream()
        {
            char c;
            if (_charactersRead == 0)
            {
                ErrorUtilities.VerifyThrow(_priorPage != null, "There is no last character in the stream.");
                c = _priorPage[_pageSize - 1];
            }
            else
            {
                c = _currentPage[_charactersRead - 1];
            }
            return c;
        }
 
        /// <summary>
        /// Swap the current page for the last page.
        /// </summary>
        private void SwapPages()
        {
            char[] swap = _currentPage;
            _currentPage = _priorPage;
            _priorPage = swap;
        }
 
        /// <summary>
        /// True if this stream is zero length.
        /// </summary>
        /// <returns></returns>
        private bool IsZeroLengthStream()
        {
            return _charactersRead == 0 && _currentPageNumber == 0;
        }
 
        /// <summary>
        /// COnvert from absolute offset to relative offset within a particular page.
        /// </summary>
        /// <param name="offset"></param>
        /// <returns></returns>
        private int AbsoluteOffsetToPageOffset(int offset)
        {
            return offset - (PageFromAbsoluteOffset(offset) * _pageSize);
        }
 
        /// <summary>
        /// Convert from offset to page number.
        /// </summary>
        /// <param name="offset"></param>
        /// <returns></returns>
        private int PageFromAbsoluteOffset(int offset)
        {
            return offset / _pageSize;
        }
 
        /// <summary>
        /// Returns true of the given position is passed the end of the file.
        /// </summary>
        /// <param name="offset"></param>
        /// <returns></returns>
        public bool IsPastEnd(int offset)
        {
            return GetPage(offset) == null;
        }
 
        /// <summary>
        /// Extract a substring.
        /// </summary>
        /// <param name="startPosition"></param>
        /// <param name="length"></param>
        /// <returns></returns>
        public string Substring(int startPosition, int length)
        {
            StringBuilder result = new StringBuilder(length);
 
            int charactersExtracted;
            for (int i = 0; i < length; i += charactersExtracted)
            {
                char[] page = GetPage(startPosition + i);
 
                // If we weren't able to read enough characters then throw an exception.
                if (page == null)
                {
                    throw new ArgumentOutOfRangeException(nameof(length));
                }
 
                int relativeStartPosition = AbsoluteOffsetToPageOffset(startPosition + i);
                int charactersOnPage = GetCharactersOnPage(startPosition + i);
 
                charactersExtracted = Math.Min(length - i, charactersOnPage - relativeStartPosition);
                ErrorUtilities.VerifyThrow(charactersExtracted > 0, "Expected non-zero extraction count.");
 
                result.Append(page, relativeStartPosition, charactersExtracted);
            }
            return result.ToString();
        }
 
        /// <summary>
        /// Returns the number of characters on the page given by offset.
        /// </summary>
        /// <param name="offset"></param>
        /// <returns></returns>
        private int GetCharactersOnPage(int offset)
        {
            int page = PageFromAbsoluteOffset(offset);
            ErrorUtilities.VerifyThrow(page >= _currentPageNumber - 1 && page <= _currentPageNumber, "Could not get character count for this page.");
 
            if (page == _currentPageNumber)
            {
                return _charactersRead;
            }
 
            return _pageSize;
        }
    }
}