// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Generic;
using System.Diagnostics.Contracts;
using System.IO;
using System.Linq;
using System.Reflection.Metadata;
using System.Reflection.PortableExecutable;
using System.Runtime.InteropServices;
using System.Text;
internal static class MinimizeUtil
internal record FilePathInfo(string RelativeDirectory, string Directory, string RelativePath, string FullPath);
internal static void Run(string sourceDirectory, string destinationDirectory, bool isUnix)
const string duplicateDirectoryName = ".duplicate";
var duplicateDirectory = Path.Combine(destinationDirectory, duplicateDirectoryName);
var individualFiles = new[]
foreach (var individualFile in individualFiles)
var outputPath = Path.Combine(destinationDirectory, individualFile);
var outputDirectory = Path.GetDirectoryName(outputPath)!;
CreateHardLink(outputPath, Path.Combine(sourceDirectory, individualFile));
// Map of all PE files MVID to the path information
var idToFilePathMap = initialWalk();
// The goal of initial walk is to
// 1. Record any PE files as they are eligable for de-dup
// 2. Hard link all other files into destination directory
Dictionary<Guid, List<FilePathInfo>> initialWalk()
var artifactsDir = Path.Combine(sourceDirectory, "artifacts/bin");
List<string> directories =
Path.Combine(sourceDirectory, "eng"),
Path.Combine(sourceDirectory, "artifacts", "VSSetup"),
.. Directory.EnumerateDirectories(artifactsDir, "*.UnitTests"),
.. Directory.EnumerateDirectories(artifactsDir, "*.IntegrationTests"),
.. Directory.EnumerateDirectories(artifactsDir, "RunTests")
var idToFilePathMap = directories.AsParallel()
.Where(x => Directory.Exists(x))
.SelectMany(unitDirPath => walkDirectory(unitDirPath, sourceDirectory, destinationDirectory))
.GroupBy(pair => pair.mvid)
group => group.Key,
group => group.Select(pair => pair.pathInfo).ToList());
return idToFilePathMap;
static IEnumerable<(Guid mvid, FilePathInfo pathInfo)> walkDirectory(string unitDirPath, string sourceDirectory, string destinationDirectory)
Console.WriteLine($"Walking {unitDirPath}");
string? lastOutputDirectory = null;
foreach (var sourceFilePath in Directory.EnumerateFiles(unitDirPath, "*", SearchOption.AllDirectories))
var currentDirName = Path.GetDirectoryName(sourceFilePath)!;
var currentRelativeDirectory = Path.GetRelativePath(sourceDirectory, currentDirName);
var currentOutputDirectory = Path.Combine(destinationDirectory, currentRelativeDirectory);
if (currentOutputDirectory != lastOutputDirectory)
lastOutputDirectory = currentOutputDirectory;
var fileName = Path.GetFileName(sourceFilePath);
if (fileName.EndsWith(".dll", StringComparison.Ordinal) && TryGetMvid(sourceFilePath, out var mvid))
var filePathInfo = new FilePathInfo(
RelativeDirectory: currentRelativeDirectory,
Directory: currentDirName,
RelativePath: Path.Combine(currentRelativeDirectory, fileName),
FullPath: sourceFilePath);
yield return (mvid, filePathInfo);
var destFilePath = Path.Combine(currentOutputDirectory, fileName);
CreateHardLink(destFilePath, sourceFilePath);
// Now that we have a complete list of PE files, determine which are duplicates
void resolveDuplicates()
foreach (var pair in idToFilePathMap)
if (pair.Value.Count > 1)
CreateHardLink(getPeFilePath(pair.Key), pair.Value[0].FullPath);
var item = pair.Value[0];
var destFilePath = Path.Combine(destinationDirectory, item.RelativePath);
CreateHardLink(destFilePath, item.FullPath);
static string getPeFileName(Guid mvid) => mvid.ToString();
string getPeFilePath(Guid mvid) => Path.Combine(duplicateDirectory, getPeFileName(mvid));
void writeHydrateFile()
var fileList = new List<string>();
var grouping = idToFilePathMap
.Where(x => x.Value.Count > 1)
.SelectMany(pair => pair.Value.Select(fp => (Id: pair.Key, FilePath: fp)))
.GroupBy(fp => getGroupDirectory(fp.FilePath.RelativeDirectory));
// The "rehydrate-all" script assumes we are running all tests on a single machine instead of on Helix.
var rehydrateAllBuilder = new StringBuilder();
if (isUnix)
rehydrateAllBuilder.AppendLine("export HELIX_CORRELATION_PAYLOAD=$scriptroot/.duplicate");
rehydrateAllBuilder.AppendLine(@"set HELIX_CORRELATION_PAYLOAD=%~dp0\.duplicate");
var builder = new StringBuilder();
var fileName = isUnix ? "rehydrate.sh" : "rehydrate.cmd";
var rehydratedDirectories = new List<string>();
foreach (var group in grouping)
if (isUnix)
writeUnixRehydrateContent(builder, group);
rehydrateAllBuilder.AppendLine(@"bash """ + Path.Combine("$scriptroot", group.Key, "rehydrate.sh") + @"""");
writeWindowsRehydrateContent(builder, group);
rehydrateAllBuilder.AppendLine("call " + Path.Combine("%~dp0", group.Key, "rehydrate.cmd"));
File.WriteAllText(Path.Combine(destinationDirectory, group.Key, fileName), builder.ToString());
// Even if we didn't have any duplicates, write out a file since later scripts rely on its existence.
var noDuplicatesGrouping = idToFilePathMap.Values
.SelectMany(v => v)
.GroupBy(v => getGroupDirectory(v.RelativeDirectory));
foreach (var noDuplicate in noDuplicatesGrouping)
if (!rehydratedDirectories.Contains(noDuplicate.Key))
var file = Path.Combine(destinationDirectory, noDuplicate.Key, fileName);
File.WriteAllText(file, "echo \"Nothing to rehydrate\"");
string rehydrateAllFilename = isUnix ? "rehydrate-all.sh" : "rehydrate-all.cmd";
File.WriteAllText(Path.Combine(destinationDirectory, rehydrateAllFilename), rehydrateAllBuilder.ToString());
static void writeWindowsRehydrateContent(StringBuilder builder, IGrouping<string, (Guid Id, FilePathInfo FilePath)> group)
builder.AppendLine("@echo off");
var count = 0;
foreach (var tuple in group)
var source = getPeFileName(tuple.Id);
var destFileName = Path.GetRelativePath(group.Key, tuple.FilePath.RelativePath);
if (Path.GetDirectoryName(destFileName) is { Length: not 0 } directory)
builder.AppendLine($@"mkdir %~dp0\{directory} 2> nul");
mklink /h %~dp0\{destFileName} %HELIX_CORRELATION_PAYLOAD%\{source} > nul
if %errorlevel% neq 0 (
echo Cmd failed: mklink /h %~dp0\{destFileName} %HELIX_CORRELATION_PAYLOAD%\{source}
exit /b 1
if (count % 1_000 == 0)
builder.AppendLine($"echo {count:n0} hydrated");
builder.AppendLine("@echo on"); // so the rest of the commands show up in helix logs
static void writeUnixHeaderContent(StringBuilder builder)
# resolve $source until the file is no longer a symlink
while [[ -h ""$source"" ]]; do
scriptroot=""$( cd -P ""$( dirname ""$source"" )"" && pwd )""
source=""$(readlink ""$source"")""
# if $source was a relative symlink, we need to resolve it relative to the path where the
# symlink file was located
[[ $source != /* ]] && source=""$scriptroot/$source""
scriptroot=""$( cd -P ""$( dirname ""$source"" )"" && pwd )""
static void writeUnixRehydrateContent(StringBuilder builder, IGrouping<string, (Guid Id, FilePathInfo FilePath)> group)
var count = 0;
foreach (var tuple in group)
var source = getPeFileName(tuple.Id);
var destFilePath = Path.GetRelativePath(group.Key, tuple.FilePath.RelativePath);
if (Path.GetDirectoryName(destFilePath) is { Length: not 0 } directory)
builder.AppendLine($@"mkdir -p ""$scriptroot/{directory}""");
builder.AppendLine($@"ln ""$HELIX_CORRELATION_PAYLOAD/{source}"" ""$scriptroot/{destFilePath}"" || exit $?");
if (count % 1_000 == 0)
builder.AppendLine($"echo '{count:n0} hydrated'");
// Working around an AzDo file permissions bug.
// We want this to happen at the end so we can be agnostic about whether ilasm was already in the directory, or was linked in from the .duplicate directory.
builder.AppendLine(@"find $scriptroot -name ilasm -exec chmod 755 {} +");
static string getGroupDirectory(string relativePath)
// artifacts/TestProject/Debug/net472/whatever/etc should become:
// artifacts/TestProject/Debug/net472
var groupDirectory = relativePath;
while (Path.GetFileName(Path.GetDirectoryName(groupDirectory)) is not (null or "Debug" or "Release"))
groupDirectory = Path.GetDirectoryName(groupDirectory);
if (groupDirectory is null)
// So far, this scenario doesn't seem to happen.
// If it *did* happen, we'd want to know, but it isn't necessarily a problem.
Console.WriteLine("Directory not grouped under configuration/TFM: " + relativePath);
return relativePath;
return groupDirectory;
private static void CreateHardLink(string fileName, string existingFileName)
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
var success = CreateHardLink(fileName, existingFileName, IntPtr.Zero);
if (!success)
// for debugging: https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes
throw new IOException($"Failed to create hard link from {existingFileName} to {fileName} with exception 0x{Marshal.GetLastWin32Error():X}");
var result = link(existingFileName, fileName);
if (result != 0)
throw new IOException($"Failed to create hard link from {existingFileName} to {fileName} with error code {Marshal.GetLastWin32Error()}");
// https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-createhardlinkw
[DllImport("Kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)]
static extern bool CreateHardLink(string lpFileName, string lpExistingFileName, IntPtr lpSecurityAttributes);
// https://man7.org/linux/man-pages/man2/link.2.html
[DllImport("libc", SetLastError = true)]
static extern int link(string oldpath, string newpath);
private static bool TryGetMvid(string filePath, out Guid mvid)
using var stream = File.OpenRead(filePath);
var reader = new PEReader(stream);
if (!reader.HasMetadata)
mvid = default;
return false;
var metadataReader = reader.GetMetadataReader();
var mvidHandle = metadataReader.GetModuleDefinition().Mvid;
mvid = metadataReader.GetGuid(mvidHandle);
return true;
mvid = default;
return false;