File: DiffUtil.cs
Web Access
Project: src\src\Common\Microsoft.Arcade.Test.Common\Microsoft.Arcade.Test.Common.csproj (Microsoft.Arcade.Test.Common)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
 
namespace Microsoft.Arcade.Test.Common
{
    public class DiffUtil
    {
        private enum EditKind
        {
            /// <summary>
            /// No change.
            /// </summary>
            None = 0,
 
            /// <summary>
            /// Node value was updated.
            /// </summary>
            Update = 1,
 
            /// <summary>
            /// Node was inserted.
            /// </summary>
            Insert = 2,
 
            /// <summary>
            /// Node was deleted.
            /// </summary>
            Delete = 3,
        }
 
        private class LCS<T> : LongestCommonSubsequence<IList<T>>
        {
            public static readonly LCS<T> Default = new LCS<T>((left, right) => EqualityComparer<T>.Default.Equals(left, right));
 
            private readonly Func<T, T, bool> _comparer;
 
            public LCS(Func<T, T, bool> comparer)
            {
                _comparer = comparer;
            }
 
            protected override bool ItemsEqual(IList<T> sequenceA, int indexA, IList<T> sequenceB, int indexB)
            {
                return _comparer(sequenceA[indexA], sequenceB[indexB]);
            }
 
            public IEnumerable<string> CalculateDiff(IList<T> sequenceA, IList<T> sequenceB, Func<T, string> toString)
            {
                foreach (var edit in GetEdits(sequenceA, sequenceA.Count, sequenceB, sequenceB.Count).Reverse())
                {
                    switch (edit.Kind)
                    {
                        case EditKind.Delete:
                            yield return "--> " + toString(sequenceA[edit.IndexA]);
                            break;
 
                        case EditKind.Insert:
                            yield return "++> " + toString(sequenceB[edit.IndexB]);
                            break;
 
                        case EditKind.Update:
                            yield return "    " + toString(sequenceB[edit.IndexB]);
                            break;
                    }
                }
            }
        }
 
        public static string DiffReport<T>(IEnumerable<T> expected, IEnumerable<T> actual, Func<T, T, bool> comparer = null, Func<T, string> toString = null, string separator = ",\r\n")
        {
            var lcs = (comparer != null) ? new LCS<T>(comparer) : LCS<T>.Default;
            toString = toString ?? new Func<T, string>(obj => obj.ToString());
 
            IList<T> expectedList = expected as IList<T> ?? new List<T>(expected);
            IList<T> actualList = actual as IList<T> ?? new List<T>(actual);
 
            return string.Join(separator, lcs.CalculateDiff(expectedList, actualList, toString));
        }
 
        private static readonly char[] s_lineSplitChars = new[] { '\r', '\n' };
 
        public static string[] Lines(string s)
        {
            return s.Split(s_lineSplitChars, StringSplitOptions.RemoveEmptyEntries);
        }
 
        public static string DiffReport(string expected, string actual)
        {
            var exlines = Lines(expected);
            var aclines = Lines(actual);
            return DiffReport(exlines, aclines, separator: "\r\n");
        }
 
        /// <summary>
        /// Calculates Longest Common Subsequence.
        /// </summary>
        private abstract class LongestCommonSubsequence<TSequence>
        {
            protected struct Edit
            {
                public readonly EditKind Kind;
                public readonly int IndexA;
                public readonly int IndexB;
 
                internal Edit(EditKind kind, int indexA, int indexB)
                {
                    this.Kind = kind;
                    this.IndexA = indexA;
                    this.IndexB = indexB;
                }
            }
 
            private const int DeleteCost = 1;
            private const int InsertCost = 1;
            private const int UpdateCost = 2;
 
            protected abstract bool ItemsEqual(TSequence sequenceA, int indexA, TSequence sequenceB, int indexB);
 
            protected IEnumerable<KeyValuePair<int, int>> GetMatchingPairs(TSequence sequenceA, int lengthA, TSequence sequenceB, int lengthB)
            {
                int[,] d = ComputeCostMatrix(sequenceA, lengthA, sequenceB, lengthB);
                int i = lengthA;
                int j = lengthB;
 
                while (i != 0 && j != 0)
                {
                    if (d[i, j] == d[i - 1, j] + DeleteCost)
                    {
                        i--;
                    }
                    else if (d[i, j] == d[i, j - 1] + InsertCost)
                    {
                        j--;
                    }
                    else
                    {
                        i--;
                        j--;
                        yield return new KeyValuePair<int, int>(i, j);
                    }
                }
            }
 
            protected IEnumerable<Edit> GetEdits(TSequence sequenceA, int lengthA, TSequence sequenceB, int lengthB)
            {
                int[,] d = ComputeCostMatrix(sequenceA, lengthA, sequenceB, lengthB);
                int i = lengthA;
                int j = lengthB;
 
                while (i != 0 && j != 0)
                {
                    if (d[i, j] == d[i - 1, j] + DeleteCost)
                    {
                        i--;
                        yield return new Edit(EditKind.Delete, i, -1);
                    }
                    else if (d[i, j] == d[i, j - 1] + InsertCost)
                    {
                        j--;
                        yield return new Edit(EditKind.Insert, -1, j);
                    }
                    else
                    {
                        i--;
                        j--;
                        yield return new Edit(EditKind.Update, i, j);
                    }
                }
 
                while (i > 0)
                {
                    i--;
                    yield return new Edit(EditKind.Delete, i, -1);
                }
 
                while (j > 0)
                {
                    j--;
                    yield return new Edit(EditKind.Insert, -1, j);
                }
            }
 
            /// <summary>
            /// Returns a distance [0..1] of the specified sequences.
            /// The smaller distance the more of their elements match.
            /// </summary>
            /// <summary>
            /// Returns a distance [0..1] of the specified sequences.
            /// The smaller distance the more of their elements match.
            /// </summary>
            protected double ComputeDistance(TSequence sequenceA, int lengthA, TSequence sequenceB, int lengthB)
            {
                Debug.Assert(lengthA >= 0 && lengthB >= 0);
 
                if (lengthA == 0 || lengthB == 0)
                {
                    return (lengthA == lengthB) ? 0.0 : 1.0;
                }
 
                int lcsLength = 0;
                foreach (var pair in GetMatchingPairs(sequenceA, lengthA, sequenceB, lengthB))
                {
                    lcsLength++;
                }
 
                int max = Math.Max(lengthA, lengthB);
                Debug.Assert(lcsLength <= max);
                return 1.0 - (double)lcsLength / (double)max;
            }
 
            /// <summary>
            /// Calculates costs of all paths in an edit graph starting from vertex (0,0) and ending in vertex (lengthA, lengthB). 
            /// </summary>
            /// <remarks>
            /// The edit graph for A and B has a vertex at each point in the grid (i,j), i in [0, lengthA] and j in [0, lengthB].
            /// 
            /// The vertices of the edit graph are connected by horizontal, vertical, and diagonal directed edges to form a directed acyclic graph.
            /// Horizontal edges connect each vertex to its right neighbor. 
            /// Vertical edges connect each vertex to the neighbor below it.
            /// Diagonal edges connect vertex (i,j) to vertex (i-1,j-1) if <see cref="ItemsEqual"/>(sequenceA[i-1],sequenceB[j-1]) is true.
            /// 
            /// Editing starts with S = []. 
            /// Move along horizontal edge (i-1,j)-(i,j) represents the fact that sequenceA[i-1] is not added to S.
            /// Move along vertical edge (i,j-1)-(i,j) represents an insert of sequenceB[j-1] to S.
            /// Move along diagonal edge (i-1,j-1)-(i,j) represents an addition of sequenceB[j-1] to S via an acceptable 
            /// change of sequenceA[i-1] to sequenceB[j-1].
            /// 
            /// In every vertex the cheapest outgoing edge is selected. 
            /// The number of diagonal edges on the path from (0,0) to (lengthA, lengthB) is the length of the longest common subsequence.
            /// </remarks>
            private int[,] ComputeCostMatrix(TSequence sequenceA, int lengthA, TSequence sequenceB, int lengthB)
            {
                var la = lengthA + 1;
                var lb = lengthB + 1;
 
                // TODO: Optimization possible: O(ND) time, O(N) space
                // EUGENE W. MYERS: An O(ND) Difference Algorithm and Its Variations
                var d = new int[la, lb];
 
                d[0, 0] = 0;
                for (int i = 1; i <= lengthA; i++)
                {
                    d[i, 0] = d[i - 1, 0] + DeleteCost;
                }
 
                for (int j = 1; j <= lengthB; j++)
                {
                    d[0, j] = d[0, j - 1] + InsertCost;
                }
 
                for (int i = 1; i <= lengthA; i++)
                {
                    for (int j = 1; j <= lengthB; j++)
                    {
                        int m1 = d[i - 1, j - 1] + (ItemsEqual(sequenceA, i - 1, sequenceB, j - 1) ? 0 : UpdateCost);
                        int m2 = d[i - 1, j] + DeleteCost;
                        int m3 = d[i, j - 1] + InsertCost;
                        d[i, j] = Math.Min(Math.Min(m1, m2), m3);
                    }
                }
 
                return d;
            }
        }
    }
}