File: System\IO\Compression\ZipHelper.cs
Web Access
Project: src\src\libraries\System.IO.Compression\src\System.IO.Compression.csproj (System.IO.Compression)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Buffers;
using System.Diagnostics;
using System.Text;
 
namespace System.IO.Compression;
 
internal static partial class ZipHelper
{
    internal const uint Mask32Bit = 0xFFFFFFFF;
    internal const ushort Mask16Bit = 0xFFFF;
 
    private const int BackwardsSeekingBufferSize = 4096;
 
    internal const int ValidZipDate_YearMin = 1980;
    internal const int ValidZipDate_YearMax = 2107;
 
    private static readonly DateTime s_invalidDateIndicator = new DateTime(ValidZipDate_YearMin, 1, 1, 0, 0, 0);
 
    internal static Encoding GetEncoding(string text)
    {
        if (text.AsSpan().ContainsAnyExceptInRange((char)32, (char)126))
        {
            // The Zip Format uses code page 437 when the Unicode bit is not set. This format
            // is the same as ASCII for characters 32-126 but differs otherwise. If we can fit
            // the string into CP437 then we treat ASCII as acceptable.
            return Encoding.UTF8;
        }
 
        return Encoding.ASCII;
    }
 
    /// <summary>
    /// Reads exactly bytesToRead out of stream, unless it is out of bytes
    /// </summary>
    internal static int ReadBytes(Stream stream, Span<byte> buffer, int bytesToRead)
    {
        int bytesRead = stream.ReadAtLeast(buffer, bytesToRead, throwOnEndOfStream: false);
        if (bytesRead < bytesToRead)
        {
            throw new IOException(SR.UnexpectedEndOfStream);
        }
        return bytesRead;
    }
 
    // will silently return InvalidDateIndicator if the uint is not a valid Dos DateTime
    internal static DateTime DosTimeToDateTime(uint dateTime)
    {
        if (dateTime == 0)
        {
            return s_invalidDateIndicator;
        }
 
        // DosTime format 32 bits
        // Year: 7 bits, 0 is ValidZipDate_YearMin, unsigned (ValidZipDate_YearMin = 1980)
        // Month: 4 bits
        // Day: 5 bits
        // Hour: 5
        // Minute: 6 bits
        // Second: 5 bits
 
        // do the bit shift as unsigned because the fields are unsigned, but
        // we can safely convert to int, because they won't be too big
        int year = (int)(ValidZipDate_YearMin + (dateTime >> 25));
        int month = (int)((dateTime >> 21) & 0xF);
        int day = (int)((dateTime >> 16) & 0x1F);
        int hour = (int)((dateTime >> 11) & 0x1F);
        int minute = (int)((dateTime >> 5) & 0x3F);
        int second = (int)((dateTime & 0x001F) * 2); // only 5 bits for second, so we only have a granularity of 2 sec.
 
        try
        {
            return new DateTime(year, month, day, hour, minute, second, 0);
        }
        catch (ArgumentOutOfRangeException)
        {
            return s_invalidDateIndicator;
        }
        catch (ArgumentException)
        {
            return s_invalidDateIndicator;
        }
    }
 
    // assume date time has passed IsConvertibleToDosTime
    internal static uint DateTimeToDosTime(DateTime dateTime)
    {
        // DateTime must be Convertible to DosTime:
        Debug.Assert(ValidZipDate_YearMin <= dateTime.Year && dateTime.Year <= ValidZipDate_YearMax);
 
        int ret = ((dateTime.Year - ValidZipDate_YearMin) & 0x7F);
        ret = (ret << 4) + dateTime.Month;
        ret = (ret << 5) + dateTime.Day;
        ret = (ret << 5) + dateTime.Hour;
        ret = (ret << 6) + dateTime.Minute;
        ret = (ret << 5) + (dateTime.Second / 2); // only 5 bits for second, so we only have a granularity of 2 sec.
        return (uint)ret;
    }
 
    // Assumes all bytes of signatureToFind are non zero, looks backwards from current position in stream,
    // assumes maxBytesToRead is positive, ensures to not read beyond the provided max number of bytes,
    // if the signature is found then returns true and positions stream at first byte of signature
    // if the signature is not found, returns false
    internal static bool SeekBackwardsToSignature(Stream stream, ReadOnlySpan<byte> signatureToFind, int maxBytesToRead)
    {
        Debug.Assert(signatureToFind.Length != 0);
        Debug.Assert(maxBytesToRead > 0);
 
        // This method reads blocks of BackwardsSeekingBufferSize bytes, searching each block for signatureToFind.
        // A simple LastIndexOf(signatureToFind) doesn't account for cases where signatureToFind is split, starting in
        // one block and ending in another.
        // To account for this, we read blocks of BackwardsSeekingBufferSize bytes, but seek backwards by
        // [BackwardsSeekingBufferSize - signatureToFind.Length] bytes. This guarantees that signatureToFind will not be
        // split between two consecutive blocks, at the cost of reading [signatureToFind.Length] duplicate bytes in each iteration.
        int bufferPointer = 0;
        byte[] buffer = ArrayPool<byte>.Shared.Rent(BackwardsSeekingBufferSize);
        Span<byte> bufferSpan = buffer.AsSpan(0, BackwardsSeekingBufferSize);
 
        try
        {
            bool outOfBytes = false;
            bool signatureFound = false;
 
            int totalBytesRead = 0;
            int duplicateBytesRead = 0;
 
            while (!signatureFound && !outOfBytes && totalBytesRead <= maxBytesToRead)
            {
                int bytesRead = SeekBackwardsAndRead(stream, bufferSpan, signatureToFind.Length);
 
                outOfBytes = bytesRead < bufferSpan.Length;
                if (bytesRead < bufferSpan.Length)
                {
                    bufferSpan = bufferSpan.Slice(0, bytesRead);
                }
 
                bufferPointer = bufferSpan.LastIndexOf(signatureToFind);
                Debug.Assert(bufferPointer < bufferSpan.Length);
 
                totalBytesRead += (bufferSpan.Length - duplicateBytesRead);
 
                if (bufferPointer != -1)
                {
                    signatureFound = true;
                    break;
                }
 
                duplicateBytesRead = signatureToFind.Length;
            }
 
            if (!signatureFound)
            {
                return false;
            }
            else
            {
                stream.Seek(bufferPointer, SeekOrigin.Current);
                return true;
            }
        }
        finally
        {
            ArrayPool<byte>.Shared.Return(buffer);
        }
    }
 
    // Returns the number of bytes actually read.
    // Allows successive buffers to overlap by a number of bytes. This handles cases where
    // the value being searched for straddles buffers (i.e. where the first buffer ends with the
    // first X bytes being searched for, and the second buffer begins with the remaining bytes.)
    private static int SeekBackwardsAndRead(Stream stream, Span<byte> buffer, int overlap)
    {
        int bytesRead;
 
        if (stream.Position >= buffer.Length)
        {
            Debug.Assert(overlap <= buffer.Length);
            stream.Seek(-(buffer.Length - overlap), SeekOrigin.Current);
            bytesRead = ReadBytes(stream, buffer, buffer.Length);
            stream.Seek(-buffer.Length, SeekOrigin.Current);
        }
        else
        {
            int bytesToRead = (int)stream.Position;
            stream.Seek(0, SeekOrigin.Begin);
            bytesRead = ReadBytes(stream, buffer, bytesToRead);
            stream.Seek(0, SeekOrigin.Begin);
        }
 
        return bytesRead;
    }
    // Converts the specified string into bytes using the optional specified encoding.
    // If the encoding null, then the encoding is calculated from the string itself.
    // If maxBytes is greater than zero, the returned string will be truncated to a total
    // number of characters whose bytes do not add up to more than that number.
    internal static byte[] GetEncodedTruncatedBytesFromString(string? text, Encoding? encoding, int maxBytes, out bool isUTF8)
    {
        if (string.IsNullOrEmpty(text))
        {
            isUTF8 = false;
            return Array.Empty<byte>();
        }
 
        encoding ??= GetEncoding(text);
        isUTF8 = encoding.CodePage == 65001;
 
        if (maxBytes == 0) // No truncation
        {
            return encoding.GetBytes(text);
        }
 
        byte[] bytes;
        if (isUTF8 && encoding.GetMaxByteCount(text.Length) > maxBytes)
        {
            int totalCodePoints = 0;
            foreach (Rune rune in text.EnumerateRunes())
            {
                if (totalCodePoints + rune.Utf8SequenceLength > maxBytes)
                {
                    break;
                }
                totalCodePoints += rune.Utf8SequenceLength;
            }
 
            bytes = encoding.GetBytes(text);
 
            Debug.Assert(totalCodePoints > 0);
            Debug.Assert(totalCodePoints <= bytes.Length);
 
            return bytes[0..totalCodePoints];
        }
 
        bytes = encoding.GetBytes(text);
        return maxBytes < bytes.Length ? bytes[0..maxBytes] : bytes;
    }
 
 
}