File: UtilityTest\EditDistanceTests.cs
Web Access
Project: src\src\Workspaces\CoreTest\Microsoft.CodeAnalysis.Workspaces.UnitTests.csproj (Microsoft.CodeAnalysis.Workspaces.UnitTests)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
#nullable disable
 
using System.Linq;
using Roslyn.Utilities;
using Xunit;
 
namespace Microsoft.CodeAnalysis.UnitTests
{
    public class EditDistanceTests
    {
        private static void VerifyEditDistance(string s, string t, int expectedEditDistance)
        {
            // We want the full edit distance, without bailing out early because we crossed the
            // threshold.
            var editDistance1 = EditDistance.GetEditDistance(s, t);
            Assert.Equal(expectedEditDistance, editDistance1);
 
            // Edit distances are symmetric.
            var editDistance2 = EditDistance.GetEditDistance(t, s);
            Assert.Equal(editDistance1, editDistance2);
 
            // If we set hte edit distance as our threshold, we should still find the value.
            var editDistance3 = EditDistance.GetEditDistance(s, t, editDistance1);
            Assert.Equal(editDistance1, editDistance3);
 
            if (editDistance1 > 0)
            {
                var editDistance4 = EditDistance.GetEditDistance(s, t, editDistance1 - 1);
                Assert.Equal(EditDistance.BeyondThreshold, editDistance4);
            }
        }
 
        [Fact]
        public void EditDistance0()
        {
            VerifyEditDistance("", "", 0);
            VerifyEditDistance("a", "a", 0);
        }
 
        [Fact]
        public void EditDistance1()
        {
            VerifyEditDistance("", "a", 1);
            VerifyEditDistance("a", "", 1);
            VerifyEditDistance("a", "b", 1);
            VerifyEditDistance("ab", "a", 1);
            VerifyEditDistance("a", "ab", 1);
            VerifyEditDistance("aabb", "abab", 1);
        }
 
        [Fact]
        public void EditDistance2()
        {
            VerifyEditDistance("", "aa", 2);
            VerifyEditDistance("aa", "", 2);
            VerifyEditDistance("aa", "bb", 2);
            VerifyEditDistance("aab", "a", 2);
            VerifyEditDistance("a", "aab", 2);
            VerifyEditDistance("aababb", "ababab", 2);
        }
 
        [Fact]
        public void EditDistance3()
        {
            VerifyEditDistance("", "aaa", 3);
            VerifyEditDistance("aaa", "", 3);
            VerifyEditDistance("aaa", "bbb", 3);
            VerifyEditDistance("aaab", "a", 3);
            VerifyEditDistance("a", "aaab", 3);
            VerifyEditDistance("aababbab", "abababaa", 3);
        }
 
        [Fact]
        public void EditDistance4()
            => VerifyEditDistance("XlmReade", "XmlReader", 2);
 
        [Fact]
        public void EditDistance5()
            => VerifyEditDistance("Zeil", "trials", 4);
 
        [Fact]
        public void EditDistance6()
            => VerifyEditDistance("barking", "corkliness", 6);
 
        [Fact]
        public void EditDistance7()
            => VerifyEditDistance("kitten", "sitting", 3);
 
        [Fact]
        public void EditDistance8()
            => VerifyEditDistance("sunday", "saturday", 3);
 
        [Fact]
        public void EditDistance9()
            => VerifyEditDistance("meilenstein", "levenshtein", 4);
 
        [Fact]
        public void EditDistance10()
            => VerifyEditDistance("rosettacode", "raisethysword", 8);
 
        [Fact]
        public void EditDistance11()
        {
            var editDistance = EditDistance.GetEditDistance("book", "moons", 1);
            Assert.Equal(EditDistance.BeyondThreshold, editDistance);
            VerifyEditDistance("book", "moons", 3);
        }
 
        [Fact]
        public void EditDistance12()
        {
            VerifyEditDistance("aaaab", "aaabc", 2);
            VerifyEditDistance("aaaab", "aabcc", 3);
            VerifyEditDistance("aaaab", "abccc", 4);
            VerifyEditDistance("aaaab", "bcccc", 5);
 
            VerifyEditDistance("aaaabb", "aaabbc", 2);
            VerifyEditDistance("aaaabb", "aabbcc", 4);
            VerifyEditDistance("aaaabb", "abbccc", 5);
            VerifyEditDistance("aaaabb", "bbcccc", 6);
 
            VerifyEditDistance("aaaabbb", "aaabbbc", 2);
            VerifyEditDistance("aaaabbb", "aabbbcc", 4);
            VerifyEditDistance("aaaabbb", "abbbccc", 6);
            VerifyEditDistance("aaaabbb", "bbbcccc", 7);
 
            VerifyEditDistance("aaaabbbb", "aaabbbbc", 2);
            VerifyEditDistance("aaaabbbb", "aabbbbcc", 4);
            VerifyEditDistance("aaaabbbb", "abbbbccc", 6);
            VerifyEditDistance("aaaabbbb", "bbbbcccc", 8);
        }
 
        public static readonly string[] Top1000 =
#pragma warning disable format // https://github.com/dotnet/roslyn/issues/70711 tracks removing this suppression.
            [
                "a","able","about","above","act","add","afraid","after","again","against","age","ago","agree","air","all",
                "allow","also","always","am","among","an","and","anger","animal","answer","any","appear","apple","are",
                "area","arm","arrange","arrive","art","as","ask","at","atom","baby","back","bad","ball","band","bank",
                "bar","base","basic","bat","be","bear","beat","beauty","bed","been","before","began","begin","behind",
                "believe","bell","best","better","between","big","bird","bit","black","block","blood","blow","blue","board",
                "boat","body","bone","book","born","both","bottom","bought","box","boy","branch","bread","break","bright",
                "bring","broad","broke","brother","brought","brown","build","burn","busy","but","buy","by","call","came",
                "camp","can","capital","captain","car","card","care","carry","case","cat","catch","caught","cause","cell",
                "cent","center","century","certain","chair","chance","change","character","charge","chart","check","chick",
                "chief","child","children","choose","chord","circle","city","claim","class","clean","clear","climb","clock",
                "close","clothe","cloud","coast","coat","cold","collect","colony","color","column","come","common","company",
                "compare","complete","condition","connect","consider","consonant","contain","continent","continue","control",
                "cook","cool","copy","corn","corner","correct","cost","cotton","could","count","country","course","cover",
                "cow","crease","create","crop","cross","crowd","cry","current","cut","dad","dance","danger","dark","day",
                "dead","deal","dear","death","decide","decimal","deep","degree","depend","describe","desert","design",
                "determine","develop","dictionary","did","die","differ","difficult","direct","discuss","distant","divide",
                "division","do","doctor","does","dog","dollar","done","dont","door","double","down","draw","dream","dress",
                "drink","drive","drop","dry","duck","during","each","ear","early","earth","ease","east","eat","edge",
                "effect","egg","eight","either","electric","element","else","end","enemy","energy","engine","enough",
                "enter","equal","equate","especially","even","evening","event","ever","every","exact","example","except",
                "excite","exercise","expect","experience","experiment","eye","face","fact","fair","fall","family","famous",
                "far","farm","fast","fat","father","favor","fear","feed","feel","feet","fell","felt","few","field","fig",
                "fight","figure","fill","final","find","fine","finger","finish","fire","first","fish","fit","five","flat",
                "floor","flow","flower","fly","follow","food","foot","for","force","forest","form","forward","found",
                "four","fraction","free","fresh","friend","from","front","fruit","full","fun","game","garden","gas","gather",
                "gave","general","gentle","get","girl","give","glad","glass","go","gold","gone","good","got","govern",
                "grand","grass","gray","great","green","grew","ground","group","grow","guess","guide","gun","had","hair",
                "half","hand","happen","happy","hard","has","hat","have","he","head","hear","heard","heart","heat","heavy",
                "held","help","her","here","high","hill","him","his","history","hit","hold","hole","home","hope","horse",
                "hot","hour","house","how","huge","human","hundred","hunt","hurry","i","ice","idea","if","imagine","in",
                "inch","include","indicate","industry","insect","instant","instrument","interest","invent","iron","is",
                "island","it","job","join","joy","jump","just","keep","kept","key","kill","kind","king","knew","know",
                "lady","lake","land","language","large","last","late","laugh","law","lay","lead","learn","least","leave",
                "led","left","leg","length","less","let","letter","level","lie","life","lift","light","like","line","liquid",
                "list","listen","little","live","locate","log","lone","long","look","lost","lot","loud","love","low",
                "machine","made","magnet","main","major","make","man","many","map","mark","market","mass","master","match",
                "material","matter","may","me","mean","meant","measure","meat","meet","melody","men","metal","method",
                "middle","might","mile","milk","million","mind","mine","minute","miss","mix","modern","molecule","moment",
                "money","month","moon","more","morning","most","mother","motion","mount","mountain","mouth","move","much",
                "multiply","music","must","my","name","nation","natural","nature","near","necessary","neck","need","neighbor",
                "never","new","next","night","nine","no","noise","noon","nor","north","nose","note","nothing","notice",
                "noun","now","number","numeral","object","observe","occur","ocean","of","off","offer","office","often",
                "oh","oil","old","on","once","one","only","open","operate","opposite","or","order","organ","original",
                "other","our","out","over","own","oxygen","page","paint","pair","paper","paragraph","parent","part","particular",
                "party","pass","past","path","pattern","pay","people","perhaps","period","person","phrase","pick","picture",
                "piece","pitch","place","plain","plan","plane","planet","plant","play","please","plural","poem","point",
                "poor","populate","port","pose","position","possible","post","pound","power","practice","prepare","present",
                "press","pretty","print","probable","problem","process","produce","product","proper","property","protect",
                "prove","provide","pull","push","put","quart","question","quick","quiet","quite","quotient","race","radio",
                "rail","rain","raise","ran","range","rather","reach","read","ready","real","reason","receive","record",
                "red","region","remember","repeat","reply","represent","require","rest","result","rich","ride","right",
                "ring","rise","river","road","rock","roll","room","root","rope","rose","round","row","rub","rule","run",
                "safe","said","sail","salt","same","sand","sat","save","saw","say","scale","school","science","score",
                "sea","search","season","seat","second","section","see","seed","seem","segment","select","self","sell",
                "send","sense","sent","sentence","separate","serve","set","settle","seven","several","shall","shape",
                "share","sharp","she","sheet","shell","shine","ship","shoe","shop","shore","short","should","shoulder",
                "shout","show","side","sight","sign","silent","silver","similar","simple","since","sing","single","sister",
                "sit","six","size","skill","skin","sky","slave","sleep","slip","slow","small","smell","smile","snow",
                "so","soft","soil","soldier","solution","solve","some","son","song","soon","sound","south","space","speak",
                "special","speech","speed","spell","spend","spoke","spot","spread","spring","square","stand","star","start",
                "state","station","stay","stead","steam","steel","step","stick","still","stone","stood","stop","store",
                "story","straight","strange","stream","street","stretch","string","strong","student","study","subject",
                "substance","subtract","success","such","sudden","suffix","sugar","suggest","suit","summer","sun","supply",
                "support","sure","surface","surprise","swim","syllable","symbol","system","table","tail","take","talk",
                "tall","teach","team","teeth","tell","temperature","ten","term","test","than","thank","that","the","their",
                "them","then","there","these","they","thick","thin","thing","think","third","this","those","though","thought",
                "thousand","three","through","throw","thus","tie","time","tiny","tire","to","together","told","tone",
                "too","took","tool","top","total","touch","toward","town","track","trade","train","travel","tree","triangle",
                "trip","trouble","truck","true","try","tube","turn","twenty","two","type","under","unit","until","up",
                "us","use","usual","valley","value","vary","verb","very","view","village","visit","voice","vowel","wait",
                "walk","wall","want","war","warm","was","wash","watch","water","wave","way","we","wear","weather","week",
                "weight","well","went","were","west","what","wheel","when","where","whether","which","while","white",
                "who","whole","whose","why","wide","wife","wild","will","win","wind","window","wing","winter","wire",
                "wish","with","woman","women","wonder","wont","wood","word","work","world","would","write","written",
                "wrong","wrote","yard","year","yellow","yes","yet","you","young","your",
            ];
#pragma warning restore format
 
        [Fact]
        public void Top1000Test()
        {
            for (var i = 0; i < Top1000.Length; i++)
            {
                var source = Top1000[i];
                for (var j = 0; j < Top1000.Length; j++)
                {
                    var target = Top1000[j];
                    var editDistance1 = EditDistance.GetEditDistance(source, target);
 
                    if (i == j)
                    {
                        Assert.Equal(0, editDistance1);
                    }
 
                    if (editDistance1 == 0)
                    {
                        Assert.Equal(i, j);
                    }
 
                    Assert.True(editDistance1 >= 0);
 
                    var editDistance2 = EditDistance.GetEditDistance(source, target, editDistance1);
                    Assert.Equal(editDistance1, editDistance2);
                }
            }
        }
 
        [Fact]
        public void TestSpecificMetric()
        {
            // If our edit distance is a metric then ED(CA,ABC) = 2 because CA -> AC -> ABC
            // In this case.  This then satisfies the triangle inequality because 
            // ED(CA, AC) + ED(AC, ABC) >= ED(CA, ABC)   ...   1 + 1 >= 2
            //
            // If it's not implemented with a metric (like if we used the Optimal String Alignment
            // algorithm), then the we could get an edit distance of 3 "CA -> A -> AB -> ABC".  
            // This violates the triangle inequality rule because: 
            // 
            // OSA(CA,AC) + OSA(AC,ABC) >= OSA(CA,ABC)  ...   1 + 1 >= 3    is not true.
            //
            // Being a metric is important so that we can properly use this with BKTrees.
            VerifyEditDistance("CA", "ABC", 2);
        }
 
        [Fact]
        public void TestTriangleInequality()
        {
            var top = Top1000.Take(50).ToArray();
 
            for (var i = 0; i < top.Length; i++)
            {
                for (var j = 0; j < top.Length; j++)
                {
                    if (j == i)
                    {
                        continue;
                    }
 
                    for (var k = 0; k < top.Length; k++)
                    {
                        if (k == i || k == j)
                        {
                            continue;
                        }
 
                        var string1 = top[i];
                        var string2 = top[j];
                        var string3 = top[k];
 
                        var editDistance12 = EditDistance.GetEditDistance(string1, string2);
                        var editDistance13 = EditDistance.GetEditDistance(string1, string3);
                        var editDistance23 = EditDistance.GetEditDistance(string2, string3);
 
                        Assert.True(editDistance13 <= editDistance12 + editDistance23);
                    }
                }
            }
        }
    }
}