File: System\IO\Compression\ZipHelper.cs
Web Access
Project: src\src\libraries\System.IO.Compression\src\System.IO.Compression.csproj (System.IO.Compression)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Buffers;
using System.Diagnostics;
using System.Text;
 
namespace System.IO.Compression
{
    internal static class ZipHelper
    {
        internal const uint Mask32Bit = 0xFFFFFFFF;
        internal const ushort Mask16Bit = 0xFFFF;
 
        private const int BackwardsSeekingBufferSize = 4096;
 
        internal const int ValidZipDate_YearMin = 1980;
        internal const int ValidZipDate_YearMax = 2107;
 
        private static readonly DateTime s_invalidDateIndicator = new DateTime(ValidZipDate_YearMin, 1, 1, 0, 0, 0);
 
        internal static Encoding GetEncoding(string text)
        {
            if (text.AsSpan().ContainsAnyExceptInRange((char)32, (char)126))
            {
                // The Zip Format uses code page 437 when the Unicode bit is not set. This format
                // is the same as ASCII for characters 32-126 but differs otherwise. If we can fit
                // the string into CP437 then we treat ASCII as acceptable.
                return Encoding.UTF8;
            }
 
            return Encoding.ASCII;
        }
 
        /// <summary>
        /// Reads exactly bytesToRead out of stream, unless it is out of bytes
        /// </summary>
        internal static int ReadBytes(Stream stream, Span<byte> buffer, int bytesToRead)
        {
            int bytesRead = stream.ReadAtLeast(buffer, bytesToRead, throwOnEndOfStream: false);
            if (bytesRead < bytesToRead)
            {
                throw new IOException(SR.UnexpectedEndOfStream);
            }
            return bytesRead;
        }
 
        // will silently return InvalidDateIndicator if the uint is not a valid Dos DateTime
        internal static DateTime DosTimeToDateTime(uint dateTime)
        {
            if (dateTime == 0)
            {
                return s_invalidDateIndicator;
            }
 
            // DosTime format 32 bits
            // Year: 7 bits, 0 is ValidZipDate_YearMin, unsigned (ValidZipDate_YearMin = 1980)
            // Month: 4 bits
            // Day: 5 bits
            // Hour: 5
            // Minute: 6 bits
            // Second: 5 bits
 
            // do the bit shift as unsigned because the fields are unsigned, but
            // we can safely convert to int, because they won't be too big
            int year = (int)(ValidZipDate_YearMin + (dateTime >> 25));
            int month = (int)((dateTime >> 21) & 0xF);
            int day = (int)((dateTime >> 16) & 0x1F);
            int hour = (int)((dateTime >> 11) & 0x1F);
            int minute = (int)((dateTime >> 5) & 0x3F);
            int second = (int)((dateTime & 0x001F) * 2); // only 5 bits for second, so we only have a granularity of 2 sec.
 
            try
            {
                return new DateTime(year, month, day, hour, minute, second, 0);
            }
            catch (ArgumentOutOfRangeException)
            {
                return s_invalidDateIndicator;
            }
            catch (ArgumentException)
            {
                return s_invalidDateIndicator;
            }
        }
 
        // assume date time has passed IsConvertibleToDosTime
        internal static uint DateTimeToDosTime(DateTime dateTime)
        {
            // DateTime must be Convertible to DosTime:
            Debug.Assert(ValidZipDate_YearMin <= dateTime.Year && dateTime.Year <= ValidZipDate_YearMax);
 
            int ret = ((dateTime.Year - ValidZipDate_YearMin) & 0x7F);
            ret = (ret << 4) + dateTime.Month;
            ret = (ret << 5) + dateTime.Day;
            ret = (ret << 5) + dateTime.Hour;
            ret = (ret << 6) + dateTime.Minute;
            ret = (ret << 5) + (dateTime.Second / 2); // only 5 bits for second, so we only have a granularity of 2 sec.
            return (uint)ret;
        }
 
        // Assumes all bytes of signatureToFind are non zero, looks backwards from current position in stream,
        // assumes maxBytesToRead is positive, ensures to not read beyond the provided max number of bytes,
        // if the signature is found then returns true and positions stream at first byte of signature
        // if the signature is not found, returns false
        internal static bool SeekBackwardsToSignature(Stream stream, ReadOnlySpan<byte> signatureToFind, int maxBytesToRead)
        {
            Debug.Assert(signatureToFind.Length != 0);
            Debug.Assert(maxBytesToRead > 0);
 
            // This method reads blocks of BackwardsSeekingBufferSize bytes, searching each block for signatureToFind.
            // A simple LastIndexOf(signatureToFind) doesn't account for cases where signatureToFind is split, starting in
            // one block and ending in another.
            // To account for this, we read blocks of BackwardsSeekingBufferSize bytes, but seek backwards by
            // [BackwardsSeekingBufferSize - signatureToFind.Length] bytes. This guarantees that signatureToFind will not be
            // split between two consecutive blocks, at the cost of reading [signatureToFind.Length] duplicate bytes in each iteration.
            int bufferPointer = 0;
            byte[] buffer = ArrayPool<byte>.Shared.Rent(BackwardsSeekingBufferSize);
            Span<byte> bufferSpan = buffer.AsSpan(0, BackwardsSeekingBufferSize);
 
            try
            {
                bool outOfBytes = false;
                bool signatureFound = false;
 
                int totalBytesRead = 0;
                int duplicateBytesRead = 0;
 
                while (!signatureFound && !outOfBytes && totalBytesRead <= maxBytesToRead)
                {
                    int bytesRead = SeekBackwardsAndRead(stream, bufferSpan, signatureToFind.Length);
 
                    outOfBytes = bytesRead < bufferSpan.Length;
                    if (bytesRead < bufferSpan.Length)
                    {
                        bufferSpan = bufferSpan.Slice(0, bytesRead);
                    }
 
                    bufferPointer = bufferSpan.LastIndexOf(signatureToFind);
                    Debug.Assert(bufferPointer < bufferSpan.Length);
 
                    totalBytesRead += (bufferSpan.Length - duplicateBytesRead);
 
                    if (bufferPointer != -1)
                    {
                        signatureFound = true;
                        break;
                    }
 
                    duplicateBytesRead = signatureToFind.Length;
                }
 
                if (!signatureFound)
                {
                    return false;
                }
                else
                {
                    stream.Seek(bufferPointer, SeekOrigin.Current);
                    return true;
                }
            }
            finally
            {
                ArrayPool<byte>.Shared.Return(buffer);
            }
        }
 
        // Returns the number of bytes actually read.
        // Allows successive buffers to overlap by a number of bytes. This handles cases where
        // the value being searched for straddles buffers (i.e. where the first buffer ends with the
        // first X bytes being searched for, and the second buffer begins with the remaining bytes.)
        private static int SeekBackwardsAndRead(Stream stream, Span<byte> buffer, int overlap)
        {
            int bytesRead;
 
            if (stream.Position >= buffer.Length)
            {
                Debug.Assert(overlap <= buffer.Length);
                stream.Seek(-(buffer.Length - overlap), SeekOrigin.Current);
                bytesRead = ReadBytes(stream, buffer, buffer.Length);
                stream.Seek(-buffer.Length, SeekOrigin.Current);
            }
            else
            {
                int bytesToRead = (int)stream.Position;
                stream.Seek(0, SeekOrigin.Begin);
                bytesRead = ReadBytes(stream, buffer, bytesToRead);
                stream.Seek(0, SeekOrigin.Begin);
            }
 
            return bytesRead;
        }
 
        // Converts the specified string into bytes using the optional specified encoding.
        // If the encoding null, then the encoding is calculated from the string itself.
        // If maxBytes is greater than zero, the returned string will be truncated to a total
        // number of characters whose bytes do not add up to more than that number.
        internal static byte[] GetEncodedTruncatedBytesFromString(string? text, Encoding? encoding, int maxBytes, out bool isUTF8)
        {
            if (string.IsNullOrEmpty(text))
            {
                isUTF8 = false;
                return Array.Empty<byte>();
            }
 
            encoding ??= GetEncoding(text);
            isUTF8 = encoding.CodePage == 65001;
 
            if (maxBytes == 0) // No truncation
            {
                return encoding.GetBytes(text);
            }
 
            byte[] bytes;
            if (isUTF8 && encoding.GetMaxByteCount(text.Length) > maxBytes)
            {
                int totalCodePoints = 0;
                foreach (Rune rune in text.EnumerateRunes())
                {
                    if (totalCodePoints + rune.Utf8SequenceLength > maxBytes)
                    {
                        break;
                    }
                    totalCodePoints += rune.Utf8SequenceLength;
                }
 
                bytes = encoding.GetBytes(text);
 
                Debug.Assert(totalCodePoints > 0);
                Debug.Assert(totalCodePoints <= bytes.Length);
 
                return bytes[0..totalCodePoints];
            }
 
            bytes = encoding.GetBytes(text);
            return maxBytes < bytes.Length ? bytes[0..maxBytes] : bytes;
        }
    }
}