File: MinimizeUtil.cs
Web Access
Project: src\src\Tools\PrepareTests\PrepareTests.csproj (PrepareTests)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Generic;
using System.Diagnostics.Contracts;
using System.IO;
using System.Linq;
using System.Reflection.Metadata;
using System.Reflection.PortableExecutable;
using System.Runtime.InteropServices;
using System.Text;
 
internal static class MinimizeUtil
{
    internal record FilePathInfo(string RelativeDirectory, string Directory, string RelativePath, string FullPath);
 
    internal static void Run(string sourceDirectory, string destinationDirectory, bool isUnix)
    {
        const string duplicateDirectoryName = ".duplicate";
        var duplicateDirectory = Path.Combine(destinationDirectory, duplicateDirectoryName);
        Directory.CreateDirectory(duplicateDirectory);
 
        // https://github.com/dotnet/roslyn/issues/49486
        // we should avoid copying the files under Resources.
        Directory.CreateDirectory(Path.Combine(destinationDirectory, "src/Workspaces/MSBuildTest/Resources"));
        var individualFiles = new[]
        {
            "global.json",
            "NuGet.config",
            "src/Workspaces/MSBuildTest/Resources/global.json",
            "src/Workspaces/MSBuildTest/Resources/Directory.Build.props",
            "src/Workspaces/MSBuildTest/Resources/Directory.Build.targets",
            "src/Workspaces/MSBuildTest/Resources/Directory.Build.rsp",
            "src/Workspaces/MSBuildTest/Resources/NuGet.Config",
        };
 
        foreach (var individualFile in individualFiles)
        {
            var outputPath = Path.Combine(destinationDirectory, individualFile);
            var outputDirectory = Path.GetDirectoryName(outputPath)!;
            CreateHardLink(outputPath, Path.Combine(sourceDirectory, individualFile));
        }
 
        // Map of all PE files MVID to the path information
        var idToFilePathMap = initialWalk();
        resolveDuplicates();
        writeHydrateFile();
 
        // The goal of initial walk is to
        //  1. Record any PE files as they are eligable for de-dup
        //  2. Hard link all other files into destination directory
        Dictionary<Guid, List<FilePathInfo>> initialWalk()
        {
            IEnumerable<string> directories = new[] {
                Path.Combine(sourceDirectory, "eng"),
            };
 
            if (!isUnix)
            {
                directories = directories.Concat([Path.Combine(sourceDirectory, "artifacts", "VSSetup")]);
            }
 
            var artifactsDir = Path.Combine(sourceDirectory, "artifacts/bin");
            directories = directories.Concat(Directory.EnumerateDirectories(artifactsDir, "*.UnitTests"));
            directories = directories.Concat(Directory.EnumerateDirectories(artifactsDir, "*.IntegrationTests"));
            directories = directories.Concat(Directory.EnumerateDirectories(artifactsDir, "RunTests"));
 
            var idToFilePathMap = directories.AsParallel()
                .SelectMany(unitDirPath => walkDirectory(unitDirPath, sourceDirectory, destinationDirectory))
                .GroupBy(pair => pair.mvid)
                .ToDictionary(
                    group => group.Key,
                    group => group.Select(pair => pair.pathInfo).ToList());
 
            return idToFilePathMap;
        }
 
        static IEnumerable<(Guid mvid, FilePathInfo pathInfo)> walkDirectory(string unitDirPath, string sourceDirectory, string destinationDirectory)
        {
            Console.WriteLine($"[{DateTime.UtcNow}] Walking {unitDirPath}");
            string? lastOutputDirectory = null;
            foreach (var sourceFilePath in Directory.EnumerateFiles(unitDirPath, "*", SearchOption.AllDirectories))
            {
                var currentDirName = Path.GetDirectoryName(sourceFilePath)!;
                var currentRelativeDirectory = Path.GetRelativePath(sourceDirectory, currentDirName);
                var currentOutputDirectory = Path.Combine(destinationDirectory, currentRelativeDirectory);
                if (currentOutputDirectory != lastOutputDirectory)
                {
                    Directory.CreateDirectory(currentOutputDirectory);
                    lastOutputDirectory = currentOutputDirectory;
                }
                var fileName = Path.GetFileName(sourceFilePath);
 
                if (fileName.EndsWith(".dll", StringComparison.Ordinal) && TryGetMvid(sourceFilePath, out var mvid))
                {
                    var filePathInfo = new FilePathInfo(
                        RelativeDirectory: currentRelativeDirectory,
                        Directory: currentDirName,
                        RelativePath: Path.Combine(currentRelativeDirectory, fileName),
                        FullPath: sourceFilePath);
                    yield return (mvid, filePathInfo);
                }
                else
                {
                    var destFilePath = Path.Combine(currentOutputDirectory, fileName);
                    CreateHardLink(destFilePath, sourceFilePath);
                }
            }
        }
 
        // Now that we have a complete list of PE files, determine which are duplicates
        void resolveDuplicates()
        {
            foreach (var pair in idToFilePathMap)
            {
                if (pair.Value.Count > 1)
                {
                    CreateHardLink(getPeFilePath(pair.Key), pair.Value[0].FullPath);
                }
                else
                {
                    var item = pair.Value[0];
                    var destFilePath = Path.Combine(destinationDirectory, item.RelativePath);
                    CreateHardLink(destFilePath, item.FullPath);
                }
            }
        }
 
        static string getPeFileName(Guid mvid) => mvid.ToString();
 
        string getPeFilePath(Guid mvid) => Path.Combine(duplicateDirectory, getPeFileName(mvid));
 
        void writeHydrateFile()
        {
            var fileList = new List<string>();
            var grouping = idToFilePathMap
                .Where(x => x.Value.Count > 1)
                .SelectMany(pair => pair.Value.Select(fp => (Id: pair.Key, FilePath: fp)))
                .GroupBy(fp => getGroupDirectory(fp.FilePath.RelativeDirectory));
 
            // The "rehydrate-all" script assumes we are running all tests on a single machine instead of on Helix.
            var rehydrateAllBuilder = new StringBuilder();
            if (isUnix)
            {
                writeUnixHeaderContent(rehydrateAllBuilder);
                rehydrateAllBuilder.AppendLine("export HELIX_CORRELATION_PAYLOAD=$scriptroot/.duplicate");
            }
            else
            {
                rehydrateAllBuilder.AppendLine(@"set HELIX_CORRELATION_PAYLOAD=%~dp0\.duplicate");
            }
 
            var builder = new StringBuilder();
            var fileName = isUnix ? "rehydrate.sh" : "rehydrate.cmd";
            var rehydratedDirectories = new List<string>();
            foreach (var group in grouping)
            {
                builder.Clear();
                if (isUnix)
                {
                    writeUnixRehydrateContent(builder, group);
                    rehydrateAllBuilder.AppendLine(@"bash """ + Path.Combine("$scriptroot", group.Key, "rehydrate.sh") + @"""");
                }
                else
                {
                    writeWindowsRehydrateContent(builder, group);
                    rehydrateAllBuilder.AppendLine("call " + Path.Combine("%~dp0", group.Key, "rehydrate.cmd"));
                }
 
                File.WriteAllText(Path.Combine(destinationDirectory, group.Key, fileName), builder.ToString());
                rehydratedDirectories.Add(group.Key);
            }
 
            // Even if we didn't have any duplicates, write out a file since later scripts rely on its existence.
            var noDuplicatesGrouping = idToFilePathMap.Values
                .SelectMany(v => v)
                .GroupBy(v => getGroupDirectory(v.RelativeDirectory));
            foreach (var noDuplicate in noDuplicatesGrouping)
            {
                if (!rehydratedDirectories.Contains(noDuplicate.Key))
                {
                    var file = Path.Combine(destinationDirectory, noDuplicate.Key, fileName);
                    Contract.Assert(!File.Exists(file));
                    File.WriteAllText(file, "echo \"Nothing to rehydrate\"");
                }
            }
 
            string rehydrateAllFilename = isUnix ? "rehydrate-all.sh" : "rehydrate-all.cmd";
            File.WriteAllText(Path.Combine(destinationDirectory, rehydrateAllFilename), rehydrateAllBuilder.ToString());
 
            static void writeWindowsRehydrateContent(StringBuilder builder, IGrouping<string, (Guid Id, FilePathInfo FilePath)> group)
            {
                builder.AppendLine("@echo off");
                var count = 0;
                foreach (var tuple in group)
                {
                    var source = getPeFileName(tuple.Id);
                    var destFileName = Path.GetRelativePath(group.Key, tuple.FilePath.RelativePath);
                    if (Path.GetDirectoryName(destFileName) is { Length: not 0 } directory)
                    {
                        builder.AppendLine($@"mkdir %~dp0\{directory} 2> nul");
                    }
                    builder.AppendLine($@"
mklink /h %~dp0\{destFileName} %HELIX_CORRELATION_PAYLOAD%\{source} > nul
if %errorlevel% neq 0 (
    echo Cmd failed: mklink /h %~dp0\{destFileName} %HELIX_CORRELATION_PAYLOAD%\{source}
    exit /b 1
)");
                    count++;
                    if (count % 1_000 == 0)
                    {
                        builder.AppendLine($"echo {count:n0} hydrated");
                    }
                }
                builder.AppendLine("@echo on"); // so the rest of the commands show up in helix logs
            }
 
            static void writeUnixHeaderContent(StringBuilder builder)
            {
                builder.AppendLine(@"#!/bin/bash
 
source=""${BASH_SOURCE[0]}""
 
# resolve $source until the file is no longer a symlink
while [[ -h ""$source"" ]]; do
scriptroot=""$( cd -P ""$( dirname ""$source"" )"" && pwd )""
source=""$(readlink ""$source"")""
# if $source was a relative symlink, we need to resolve it relative to the path where the
# symlink file was located
[[ $source != /* ]] && source=""$scriptroot/$source""
done
scriptroot=""$( cd -P ""$( dirname ""$source"" )"" && pwd )""
");
            }
 
            static void writeUnixRehydrateContent(StringBuilder builder, IGrouping<string, (Guid Id, FilePathInfo FilePath)> group)
            {
                writeUnixHeaderContent(builder);
 
                var count = 0;
                foreach (var tuple in group)
                {
                    var source = getPeFileName(tuple.Id);
                    var destFilePath = Path.GetRelativePath(group.Key, tuple.FilePath.RelativePath);
                    if (Path.GetDirectoryName(destFilePath) is { Length: not 0 } directory)
                    {
                        builder.AppendLine($@"mkdir -p ""$scriptroot/{directory}""");
                    }
                    builder.AppendLine($@"ln ""$HELIX_CORRELATION_PAYLOAD/{source}"" ""$scriptroot/{destFilePath}"" || exit $?");
 
                    count++;
                    if (count % 1_000 == 0)
                    {
                        builder.AppendLine($"echo '{count:n0} hydrated'");
                    }
                }
 
                // Working around an AzDo file permissions bug.
                // We want this to happen at the end so we can be agnostic about whether ilasm was already in the directory, or was linked in from the .duplicate directory.
                builder.AppendLine();
                builder.AppendLine(@"find $scriptroot -name ilasm -exec chmod 755 {} +");
            }
 
            static string getGroupDirectory(string relativePath)
            {
                // artifacts/TestProject/Debug/net472/whatever/etc should become:
                // artifacts/TestProject/Debug/net472
 
                var groupDirectory = relativePath;
                while (Path.GetFileName(Path.GetDirectoryName(groupDirectory)) is not (null or "Debug" or "Release"))
                    groupDirectory = Path.GetDirectoryName(groupDirectory);
 
                if (groupDirectory is null)
                {
                    // So far, this scenario doesn't seem to happen.
                    // If it *did* happen, we'd want to know, but it isn't necessarily a problem.
                    Console.WriteLine("Directory not grouped under configuration/TFM: " + relativePath);
                    return relativePath;
                }
 
                return groupDirectory;
            }
        }
    }
 
    private static void CreateHardLink(string fileName, string existingFileName)
    {
        if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
        {
            var success = CreateHardLink(fileName, existingFileName, IntPtr.Zero);
            if (!success)
            {
                // for debugging: https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes
                throw new IOException($"Failed to create hard link from {existingFileName} to {fileName} with exception 0x{Marshal.GetLastWin32Error():X}");
            }
        }
        else
        {
            var result = link(existingFileName, fileName);
            if (result != 0)
            {
                throw new IOException($"Failed to create hard link from {existingFileName} to {fileName} with error code {Marshal.GetLastWin32Error()}");
            }
        }
 
        // https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-createhardlinkw
        [DllImport("Kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)]
        static extern bool CreateHardLink(string lpFileName, string lpExistingFileName, IntPtr lpSecurityAttributes);
 
        // https://man7.org/linux/man-pages/man2/link.2.html
        [DllImport("libc", SetLastError = true)]
        static extern int link(string oldpath, string newpath);
    }
 
    private static bool TryGetMvid(string filePath, out Guid mvid)
    {
        try
        {
            using var stream = File.OpenRead(filePath);
            var reader = new PEReader(stream);
            if (!reader.HasMetadata)
            {
                mvid = default;
                return false;
            }
            var metadataReader = reader.GetMetadataReader();
            var mvidHandle = metadataReader.GetModuleDefinition().Mvid;
            mvid = metadataReader.GetGuid(mvidHandle);
            return true;
        }
        catch
        {
            mvid = default;
            return false;
        }
    }
}