File: System\Formats\Tar\TarHelpers.cs
Web Access
Project: src\src\libraries\System.Formats.Tar\src\System.Formats.Tar.csproj (System.Formats.Tar)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.IO;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
 
namespace System.Formats.Tar
{
    // Static class containing a variety of helper methods.
    internal static partial class TarHelpers
    {
        internal const short RecordSize = 512;
        internal const int MaxBufferLength = 4096;
 
        internal const UnixFileMode ValidUnixFileModes =
            UnixFileMode.UserRead |
            UnixFileMode.UserWrite |
            UnixFileMode.UserExecute |
            UnixFileMode.GroupRead |
            UnixFileMode.GroupWrite |
            UnixFileMode.GroupExecute |
            UnixFileMode.OtherRead |
            UnixFileMode.OtherWrite |
            UnixFileMode.OtherExecute |
            UnixFileMode.StickyBit |
            UnixFileMode.SetGroup |
            UnixFileMode.SetUser;
 
        // Default mode for TarEntry created for a file-type.
        private const UnixFileMode DefaultFileMode =
            UnixFileMode.UserRead | UnixFileMode.UserWrite |
            UnixFileMode.GroupRead |
            UnixFileMode.OtherRead;
 
        // Default mode for TarEntry created for a directory-type.
        private const UnixFileMode DefaultDirectoryMode =
            DefaultFileMode |
            UnixFileMode.UserExecute | UnixFileMode.GroupExecute | UnixFileMode.OtherExecute;
 
        internal static int GetDefaultMode(TarEntryType type)
            => type is TarEntryType.Directory or TarEntryType.DirectoryList ? (int)DefaultDirectoryMode : (int)DefaultFileMode;
 
        // Helps advance the stream a total number of bytes larger than int.MaxValue.
        internal static void AdvanceStream(Stream archiveStream, long bytesToDiscard)
        {
            if (archiveStream.CanSeek)
            {
                archiveStream.Position += bytesToDiscard;
            }
            else if (bytesToDiscard > 0)
            {
                byte[] buffer = ArrayPool<byte>.Shared.Rent(minimumLength: (int)Math.Min(MaxBufferLength, bytesToDiscard));
                while (bytesToDiscard > 0)
                {
                    int currentLengthToRead = (int)Math.Min(MaxBufferLength, bytesToDiscard);
                    archiveStream.ReadExactly(buffer.AsSpan(0, currentLengthToRead));
                    bytesToDiscard -= currentLengthToRead;
                }
                ArrayPool<byte>.Shared.Return(buffer);
            }
        }
 
        // Asynchronously helps advance the stream a total number of bytes larger than int.MaxValue.
        internal static async ValueTask AdvanceStreamAsync(Stream archiveStream, long bytesToDiscard, CancellationToken cancellationToken)
        {
            cancellationToken.ThrowIfCancellationRequested();
 
            if (archiveStream.CanSeek)
            {
                archiveStream.Position += bytesToDiscard;
            }
            else if (bytesToDiscard > 0)
            {
                byte[] buffer = ArrayPool<byte>.Shared.Rent(minimumLength: (int)Math.Min(MaxBufferLength, bytesToDiscard));
                while (bytesToDiscard > 0)
                {
                    int currentLengthToRead = (int)Math.Min(MaxBufferLength, bytesToDiscard);
                    await archiveStream.ReadExactlyAsync(buffer, 0, currentLengthToRead, cancellationToken).ConfigureAwait(false);
                    bytesToDiscard -= currentLengthToRead;
                }
                ArrayPool<byte>.Shared.Return(buffer);
            }
        }
 
        // Helps copy a specific number of bytes from one stream into another.
        internal static void CopyBytes(Stream origin, Stream destination, long bytesToCopy)
        {
            byte[] buffer = ArrayPool<byte>.Shared.Rent(minimumLength: (int)Math.Min(MaxBufferLength, bytesToCopy));
            while (bytesToCopy > 0)
            {
                int currentLengthToRead = (int)Math.Min(MaxBufferLength, bytesToCopy);
                origin.ReadExactly(buffer.AsSpan(0, currentLengthToRead));
                destination.Write(buffer.AsSpan(0, currentLengthToRead));
                bytesToCopy -= currentLengthToRead;
            }
            ArrayPool<byte>.Shared.Return(buffer);
        }
 
        // Asynchronously helps copy a specific number of bytes from one stream into another.
        internal static async ValueTask CopyBytesAsync(Stream origin, Stream destination, long bytesToCopy, CancellationToken cancellationToken)
        {
            cancellationToken.ThrowIfCancellationRequested();
 
            byte[] buffer = ArrayPool<byte>.Shared.Rent(minimumLength: (int)Math.Min(MaxBufferLength, bytesToCopy));
            while (bytesToCopy > 0)
            {
                int currentLengthToRead = (int)Math.Min(MaxBufferLength, bytesToCopy);
                Memory<byte> memory = buffer.AsMemory(0, currentLengthToRead);
                await origin.ReadExactlyAsync(buffer, 0, currentLengthToRead, cancellationToken).ConfigureAwait(false);
                await destination.WriteAsync(memory, cancellationToken).ConfigureAwait(false);
                bytesToCopy -= currentLengthToRead;
            }
            ArrayPool<byte>.Shared.Return(buffer);
        }
 
        // Returns the number of bytes until the next multiple of the record size.
        internal static int CalculatePadding(long size)
        {
            long ceilingMultipleOfRecordSize = ((RecordSize - 1) | (size - 1)) + 1;
            int padding = (int)(ceilingMultipleOfRecordSize - size);
            return padding;
        }
 
        // Returns true if all the bytes in the specified array are nulls, false otherwise.
        internal static bool IsAllNullBytes(ReadOnlySpan<byte> buffer) =>
            !buffer.ContainsAnyExcept((byte)0);
 
        // Converts the specified number of seconds that have passed since the Unix Epoch to a DateTimeOffset.
        internal static DateTimeOffset GetDateTimeOffsetFromSecondsSinceEpoch(long secondsSinceUnixEpoch) =>
            new DateTimeOffset((secondsSinceUnixEpoch * TimeSpan.TicksPerSecond) + DateTime.UnixEpoch.Ticks, TimeSpan.Zero);
 
        // Converts the specified number of seconds that have passed since the Unix Epoch to a DateTimeOffset.
        private static DateTimeOffset GetDateTimeOffsetFromSecondsSinceEpoch(decimal secondsSinceUnixEpoch) =>
            new DateTimeOffset((long)(secondsSinceUnixEpoch * TimeSpan.TicksPerSecond) + DateTime.UnixEpoch.Ticks, TimeSpan.Zero);
 
        // Converts the specified DateTimeOffset to the number of seconds that have passed since the Unix Epoch.
        private static decimal GetSecondsSinceEpochFromDateTimeOffset(DateTimeOffset dateTimeOffset) =>
            ((decimal)(dateTimeOffset.UtcDateTime - DateTime.UnixEpoch).Ticks) / TimeSpan.TicksPerSecond;
 
        // If the specified fieldName is found in the provided dictionary and it is a valid decimal number, returns true and sets the value in 'dateTimeOffset'.
        internal static bool TryGetDateTimeOffsetFromTimestampString(Dictionary<string, string>? dict, string fieldName, out DateTimeOffset dateTimeOffset)
        {
            dateTimeOffset = default;
            if (dict != null &&
                dict.TryGetValue(fieldName, out string? value) &&
                decimal.TryParse(value, NumberStyles.Any, CultureInfo.InvariantCulture, out decimal secondsSinceEpoch))
            {
                dateTimeOffset = GetDateTimeOffsetFromSecondsSinceEpoch(secondsSinceEpoch);
                return true;
            }
            return false;
        }
 
        // Converts the specified DateTimeOffset to the string representation of seconds since the Unix Epoch.
        internal static string GetTimestampStringFromDateTimeOffset(DateTimeOffset timestamp)
        {
            decimal secondsSinceEpoch = GetSecondsSinceEpochFromDateTimeOffset(timestamp);
 
            // Use 'G' to ensure the decimals get preserved (avoid losing precision).
            return secondsSinceEpoch.ToString("G", CultureInfo.InvariantCulture);
        }
 
        // If the specified fieldName is found in the provided dictionary and is a valid string representation of a number, returns true and sets the value in 'baseTenInteger'.
        internal static bool TryGetStringAsBaseTenInteger(IReadOnlyDictionary<string, string> dict, string fieldName, out int baseTenInteger)
        {
            if (dict.TryGetValue(fieldName, out string? strNumber) && !string.IsNullOrEmpty(strNumber))
            {
                baseTenInteger = int.Parse(strNumber, CultureInfo.InvariantCulture);
                return true;
            }
 
            baseTenInteger = 0;
            return false;
        }
 
        // If the specified fieldName is found in the provided dictionary and is a valid string representation of a number, returns true and sets the value in 'baseTenLong'.
        internal static bool TryGetStringAsBaseTenLong(IReadOnlyDictionary<string, string> dict, string fieldName, out long baseTenLong)
        {
            if (dict.TryGetValue(fieldName, out string? strNumber) && !string.IsNullOrEmpty(strNumber))
            {
                baseTenLong = long.Parse(strNumber, CultureInfo.InvariantCulture);
                return true;
            }
 
            baseTenLong = 0;
            return false;
        }
 
        // When writing an entry that came from an archive of a different format, if its entry type happens to
        // be an incompatible regular file entry type, convert it to the compatible one.
        // No change for all other entry types.
        internal static TarEntryType GetCorrectTypeFlagForFormat(TarEntryFormat format, TarEntryType entryType)
        {
            if (format is TarEntryFormat.V7)
            {
                if (entryType is TarEntryType.RegularFile)
                {
                    return TarEntryType.V7RegularFile;
                }
            }
            else if (entryType is TarEntryType.V7RegularFile)
            {
                return TarEntryType.RegularFile;
            }
 
            return entryType;
        }
 
        /// <summary>Parses a numeric field.</summary>
        internal static T ParseNumeric<T>(ReadOnlySpan<byte> buffer) where T : struct, INumber<T>, IBinaryInteger<T>
        {
            // The tar standard specifies that numeric fields are stored using an octal representation.
            // This limits the range of values that can be stored in the fields.
            // To increase the supported range, a GNU extension defines that when the leading byte is
            // '0xff'/'0x80' the remaining bytes are a negative/positive big formatted endian value.
            // Like the 'tar' tool we are permissive when encountering this representation in non GNU formats.
            byte leadingByte = buffer[0];
            if (leadingByte == 0xff)
            {
                return T.ReadBigEndian(buffer, isUnsigned: false);
            }
            else if (leadingByte == 0x80)
            {
                return T.ReadBigEndian(buffer.Slice(1), isUnsigned: true);
            }
            else
            {
                return ParseOctal<T>(buffer);
            }
        }
 
        /// <summary>Parses a byte span that represents an ASCII string containing a number in octal base.</summary>
        internal static T ParseOctal<T>(ReadOnlySpan<byte> buffer) where T : struct, INumber<T>
        {
            buffer = TrimEndingNullsAndSpaces(buffer);
            buffer = TrimLeadingNullsAndSpaces(buffer);
 
            if (buffer.Length == 0)
            {
                return T.Zero;
            }
 
            T octalFactor = T.CreateTruncating(8u);
            T value = T.Zero;
            foreach (byte b in buffer)
            {
                uint digit = (uint)(b - '0');
                if (digit >= 8)
                {
                    ThrowInvalidNumber();
                }
 
                value = checked((value * octalFactor) + T.CreateTruncating(digit));
            }
 
            return value;
        }
 
        [DoesNotReturn]
        private static void ThrowInvalidNumber() =>
            throw new InvalidDataException(SR.Format(SR.TarInvalidNumber));
 
        // Returns the string contained in the specified buffer of bytes,
        // in the specified encoding, removing the trailing null or space chars.
        private static string GetTrimmedString(ReadOnlySpan<byte> buffer, Encoding encoding)
        {
            buffer = TrimEndingNullsAndSpaces(buffer);
            return buffer.IsEmpty ? string.Empty : encoding.GetString(buffer);
        }
 
        internal static ReadOnlySpan<byte> TrimEndingNullsAndSpaces(ReadOnlySpan<byte> buffer)
        {
            int trimmedLength = buffer.Length;
            while (trimmedLength > 0 && buffer[trimmedLength - 1] is 0 or 32)
            {
                trimmedLength--;
            }
 
            return buffer.Slice(0, trimmedLength);
        }
 
        private static ReadOnlySpan<byte> TrimLeadingNullsAndSpaces(ReadOnlySpan<byte> buffer)
        {
            int newStart = 0;
            while (newStart < buffer.Length && buffer[newStart] is 0 or 32)
            {
                newStart++;
            }
 
            return buffer.Slice(newStart);
        }
 
        // Returns the ASCII string contained in the specified buffer of bytes,
        // removing the trailing null or space chars.
        internal static string GetTrimmedAsciiString(ReadOnlySpan<byte> buffer) => GetTrimmedString(buffer, Encoding.ASCII);
 
        // Returns the UTF8 string contained in the specified buffer of bytes,
        // removing the trailing null or space chars.
        internal static string GetTrimmedUtf8String(ReadOnlySpan<byte> buffer) => GetTrimmedString(buffer, Encoding.UTF8);
 
        // After the file contents, there may be zero or more null characters,
        // which exist to ensure the data is aligned to the record size. Skip them and
        // set the stream position to the first byte of the next entry.
        internal static int SkipBlockAlignmentPadding(Stream archiveStream, long size)
        {
            int bytesToSkip = CalculatePadding(size);
            AdvanceStream(archiveStream, bytesToSkip);
            return bytesToSkip;
        }
 
        // After the file contents, there may be zero or more null characters,
        // which exist to ensure the data is aligned to the record size.
        // Asynchronously skip them and set the stream position to the first byte of the next entry.
        internal static async ValueTask<int> SkipBlockAlignmentPaddingAsync(Stream archiveStream, long size, CancellationToken cancellationToken)
        {
            cancellationToken.ThrowIfCancellationRequested();
 
            int bytesToSkip = CalculatePadding(size);
            await AdvanceStreamAsync(archiveStream, bytesToSkip, cancellationToken).ConfigureAwait(false);
            return bytesToSkip;
        }
 
        // Throws if the specified entry type is not supported for the specified format.
        internal static void ThrowIfEntryTypeNotSupported(TarEntryType entryType, TarEntryFormat archiveFormat, [CallerArgumentExpression(nameof(entryType))] string? paramName = null)
        {
            switch (archiveFormat)
            {
                case TarEntryFormat.V7:
                    if (entryType is
                        TarEntryType.Directory or
                        TarEntryType.HardLink or
                        TarEntryType.V7RegularFile or
                        TarEntryType.SymbolicLink)
                    {
                        return;
                    }
                    break;
 
                case TarEntryFormat.Ustar:
                    if (entryType is
                        TarEntryType.BlockDevice or
                        TarEntryType.CharacterDevice or
                        TarEntryType.Directory or
                        TarEntryType.Fifo or
                        TarEntryType.HardLink or
                        TarEntryType.RegularFile or
                        TarEntryType.SymbolicLink)
                    {
                        return;
                    }
                    break;
 
                case TarEntryFormat.Pax:
                    if (entryType is
                        TarEntryType.BlockDevice or
                        TarEntryType.CharacterDevice or
                        TarEntryType.Directory or
                        TarEntryType.Fifo or
                        TarEntryType.HardLink or
                        TarEntryType.RegularFile or
                        TarEntryType.SymbolicLink)
                    {
                        // GlobalExtendedAttributes is handled via PaxGlobalExtendedAttributesEntry
 
                        // Not supported for writing - internally autogenerated:
                        // - ExtendedAttributes
                        return;
                    }
                    break;
 
                case TarEntryFormat.Gnu:
                    if (entryType is
                        TarEntryType.BlockDevice or
                        TarEntryType.CharacterDevice or
                        TarEntryType.Directory or
                        TarEntryType.Fifo or
                        TarEntryType.HardLink or
                        TarEntryType.RegularFile or
                        TarEntryType.SymbolicLink)
                    {
                        // Not supported for writing:
                        // - ContiguousFile
                        // - DirectoryList
                        // - MultiVolume
                        // - RenamedOrSymlinked
                        // - SparseFile
                        // - TapeVolume
 
                        // Also not supported for writing - internally autogenerated:
                        // - LongLink
                        // - LongPath
                        return;
                    }
                    break;
 
                case TarEntryFormat.Unknown:
                default:
                    throw new InvalidDataException(SR.Format(SR.TarInvalidFormat, archiveFormat));
            }
 
            throw new ArgumentException(SR.Format(SR.TarEntryTypeNotSupportedInFormat, entryType, archiveFormat), paramName);
        }
 
        public static void SetPendingModificationTimes(Stack<(string, DateTimeOffset)> directoryModificationTimes)
        {
            // note: these are ordered child to parent.
            while (directoryModificationTimes.TryPop(out (string Path, DateTimeOffset Modified) item))
            {
                AttemptDirectorySetLastWriteTime(item.Path, item.Modified);
            }
        }
 
        public static void UpdatePendingModificationTimes(Stack<(string, DateTimeOffset)> directoryModificationTimes, string fullPath, DateTimeOffset modified)
        {
            // We can't set the modification time when we create the directory because extracting entries into it
            // will cause that time to change. Instead, we track the times to set them later.
 
            // We take into account that regular tar files are ordered:
            // when we see a new directory which is not a child of the previous directory
            // we can set the parent directory timestamps, and stop tracking them.
            // This avoids having to track all directory entries until we've finished extracting the entire archive.
            while (directoryModificationTimes.TryPeek(out (string Path, DateTimeOffset Modified) previous) &&
                   !IsChildPath(previous.Path, fullPath))
            {
                directoryModificationTimes.TryPop(out previous);
                AttemptDirectorySetLastWriteTime(previous.Path, previous.Modified);
            }
 
            directoryModificationTimes.Push((fullPath, modified));
        }
 
        private static bool IsChildPath(string parentFullPath, string childFullPath)
        {
            // Both paths may end with an additional separator.
 
            // Verify that either the parent path ends with a separator
            // or the child path has a separator where the parent path ends.
            if (IsDirectorySeparatorChar(parentFullPath[^1]))
            {
                // The child needs to be at least a char longer than the parent for the name.
                if (childFullPath.Length <= parentFullPath.Length)
                {
                    return false;
                }
            }
            else
            {
                // The child needs to be at least 2 chars longer than the parent:
                // one for the separator, and one for the name.
                if ((childFullPath.Length < parentFullPath.Length + 2) ||
                    !IsDirectorySeparatorChar(childFullPath[parentFullPath.Length]))
                {
                    return false;
                }
            }
 
            return childFullPath.StartsWith(parentFullPath, PathInternal.StringComparison);
 
            // We don't need to check for AltDirectorySeparatorChar, full paths are normalized to DirectorySeparatorChar.
            static bool IsDirectorySeparatorChar(char c)
                => c == Path.DirectorySeparatorChar;
        }
 
        private static void AttemptDirectorySetLastWriteTime(string fullPath, DateTimeOffset lastWriteTime)
        {
            try
            {
                Directory.SetLastWriteTime(fullPath, lastWriteTime.UtcDateTime);
            }
            catch
            {
                // Some OSes like Android might not support setting the last write time, the extraction should not fail because of that
            }
        }
    }
}