File: TextDifferencing\TextDiffer.cs
Web Access
Project: src\src\Razor\src\Razor\src\Microsoft.CodeAnalysis.Razor.Workspaces\Microsoft.CodeAnalysis.Razor.Workspaces.csproj (Microsoft.CodeAnalysis.Razor.Workspaces)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System;
using System.Collections.Generic;
 
namespace Microsoft.CodeAnalysis.Razor.TextDifferencing;
 
//
// This class implements the linear space variation of the difference algorithm described in
// "An O(ND) Difference Algorithm and its Variations" by Eugene W. Myers
//
// Note: Some variable names in this class are not be fully compliant with the C# naming guidelines
// in the interest of using the same terminology discussed in the paper for ease of understanding.
//
internal abstract partial class TextDiffer
{
    protected abstract int OldSourceLength { get; }
    protected abstract int NewSourceLength { get; }
 
    private int _oldSourceOffset;
    private int _newSourceOffset;
 
    protected abstract bool SourceEqual(int oldSourceIndex, int newSourceIndex);
 
    protected List<DiffEdit> ComputeDiff()
    {
        var edits = new List<DiffEdit>(capacity: 4);
        var builder = new DiffEditBuilder(edits);
 
        var lowA = 0;
        var highA = OldSourceLength;
        var lowB = 0;
        var highB = NewSourceLength;
 
        // Determine the extent of the changes in both texts, as this will allow us
        // to limit the amount of memory needed in the vf/vr arrays. By doing this though,
        // we will need to adjust SourceEqual requests to use an appropriate offset.
        FindChangeExtent(ref lowA, ref highA, ref lowB, ref highB);
 
        _oldSourceOffset = lowA;
        _newSourceOffset = lowB;
 
        var oldSourceLength = highA - lowA;
        var newSourceLength = highB - lowB;
 
        // Initialize the vectors to use for forward and reverse searches.
        var max = newSourceLength + oldSourceLength;
        using var vf = new IntArray((2 * max) + 1);
        using var vr = new IntArray((2 * max) + 1);
 
        ComputeDiffRecursive(builder, 0, oldSourceLength, 0, newSourceLength, vf, vr);
 
        // Update the resultant edits with the appropriate offsets
        for (var i = 0; i < edits.Count; i++)
        {
            edits[i] = edits[i].Offset(_oldSourceOffset, _newSourceOffset);
        }
 
        return edits;
    }
 
    private void ComputeDiffRecursive(DiffEditBuilder edits, int lowA, int highA, int lowB, int highB, IntArray vf, IntArray vr)
    {
        FindChangeExtent(ref lowA, ref highA, ref lowB, ref highB);
 
        if (lowA == highA)
        {
            // Base case 1: We've reached the end of original text. Insert whatever is remaining in the new text.
            while (lowB < highB)
            {
                edits.AddInsert(lowA, lowB);
                lowB++;
            }
        }
        else if (lowB == highB)
        {
            // Base case 2: We've reached the end of new text. Delete whatever is remaining in the original text.
            while (lowA < highA)
            {
                edits.AddDelete(lowA);
                lowA++;
            }
        }
        else
        {
            // Find the midpoint of the optimal path.
            var (middleX, middleY) = FindMiddleSnake(lowA, highA, lowB, highB, vf, vr);
 
            // Recursively find the midpoint of the left half.
            ComputeDiffRecursive(edits, lowA, middleX, lowB, middleY, vf, vr);
 
            // Recursively find the midpoint of the right half.
            ComputeDiffRecursive(edits, middleX, highA, middleY, highB, vf, vr);
        }
    }
 
    private void FindChangeExtent(ref int lowA, ref int highA, ref int lowB, ref int highB)
    {
        while (lowA < highA && lowB < highB && SourceEqualUsingOffset(lowA, lowB))
        {
            // Skip equal text at the start.
            lowA++;
            lowB++;
        }
 
        while (lowA < highA && lowB < highB && SourceEqualUsingOffset(highA - 1, highB - 1))
        {
            // Skip equal text at the end.
            highA--;
            highB--;
        }
    }
 
    private (int, int) FindMiddleSnake(int lowA, int highA, int lowB, int highB, IntArray vf, IntArray vr)
    {
        var n = highA - lowA;
        var m = highB - lowB;
        var delta = n - m;
        var deltaIsEven = delta % 2 == 0;
 
        var max = n + m;
 
        // Compute the k-line to start the forward and reverse searches.
        var forwardK = lowA - lowB;
        var reverseK = highA - highB;
 
        // The paper uses negative indexes but we can't do that here. So we'll add an offset.
        var forwardOffset = max - forwardK;
        var reverseOffset = max - reverseK;
 
        // Initialize the vector
        vf[forwardOffset + forwardK + 1] = lowA;
        vr[reverseOffset + reverseK - 1] = highA;
 
        var maxD = Math.Ceiling((double)(m + n) / 2);
        for (var d = 0; d <= maxD; d++) // For D ← 0 to ceil((M + N)/2) Do
        {
            // Run the algorithm in forward direction.
            for (var k = forwardK - d; k <= forwardK + d; k += 2) // For k ← −D to D in steps of 2 Do
            {
                // Find the end of the furthest reaching forward D-path in diagonal k.
                int x;
                if (k == forwardK - d ||
                    (k != forwardK + d && vf[forwardOffset + k - 1] < vf[forwardOffset + k + 1]))
                {
                    // Down
                    x = vf[forwardOffset + k + 1];
                }
                else
                {
                    // Right
                    x = vf[forwardOffset + k - 1] + 1;
                }
 
                var y = x - k;
 
                // Traverse diagonal if possible.
                while (x < highA && y < highB && SourceEqualUsingOffset(x, y))
                {
                    x++;
                    y++;
                }
 
                vf[forwardOffset + k] = x;
                if (deltaIsEven)
                {
                    // Can't have overlap here.
                }
                else if (k > reverseK - d && k < reverseK + d) // If ∆ is odd and k ∈ [∆ − (D − 1) , ∆ + (D − 1)] Then
                {
                    if (vr[reverseOffset + k] <= vf[forwardOffset + k]) // If the path overlaps the furthest reaching reverse (D − 1)-path in diagonal k Then
                    {
                        // The last snake of the forward path is the middle snake.
                        x = vf[forwardOffset + k];
                        y = x - k;
                        return (x, y);
                    }
                }
            }
 
            // Run the algorithm in reverse direction.
            for (var k = reverseK - d; k <= reverseK + d; k += 2) // For k ← −D to D in steps of 2 Do
            {
                // Find the end of the furthest reaching reverse D-path in diagonal k+∆.
                int x;
                if (k == reverseK + d ||
                    (k != reverseK - d && vr[reverseOffset + k - 1] < vr[reverseOffset + k + 1] - 1))
                {
                    // Up
                    x = vr[reverseOffset + k - 1];
                }
                else
                {
                    // Left
                    x = vr[reverseOffset + k + 1] - 1;
                }
 
                var y = x - k;
 
                // Traverse diagonal if possible.
                while (x > lowA && y > lowB && SourceEqualUsingOffset(x - 1, y - 1))
                {
                    x--;
                    y--;
                }
 
                vr[reverseOffset + k] = x;
                if (!deltaIsEven)
                {
                    // Can't have overlap here.
                }
                else if (k >= forwardK - d && k <= forwardK + d) // If ∆ is even and k + ∆ ∈ [−D, D] Then
                {
                    if (vr[reverseOffset + k] <= vf[forwardOffset + k]) // If the path overlaps the furthest reaching forward D-path in diagonal k+∆ Then
                    {
                        // The last snake of the reverse path is the middle snake.
                        x = vf[forwardOffset + k];
                        y = x - k;
                        return (x, y);
                    }
                }
            }
        }
 
        throw Assumes.NotReachable();
    }
 
    private bool SourceEqualUsingOffset(int oldSourceIndex, int newSourceIndex)
        => SourceEqual(oldSourceIndex + _oldSourceOffset, newSourceIndex + _newSourceOffset);
}