File: Common\GLEUAlgorithm.cs
Web Access
Project: src\src\Libraries\Microsoft.Extensions.AI.Evaluation.NLP\Microsoft.Extensions.AI.Evaluation.NLP.csproj (Microsoft.Extensions.AI.Evaluation.NLP)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System;
using System.Collections.Generic;
using Microsoft.Shared.Diagnostics;
 
namespace Microsoft.Extensions.AI.Evaluation.NLP.Common;
 
/// <summary>
/// Google-BLEU (GLEU) algorithm implementation for evaluating the quality of a response.
/// Python implementation reference: https://www.nltk.org/api/nltk.translate.gleu_score.html.
/// </summary>
internal static class GLEUAlgorithm
{
    internal static double SentenceGLEU(string[][] references, string[] hypothesis, int minN = 1, int maxN = 4)
    {
        if (references == null || references.Length == 0)
        {
            Throw.ArgumentNullException(nameof(references), $"'{nameof(references)}' cannot be null or empty.");
        }
 
        if (hypothesis == null || hypothesis.Length == 0)
        {
            Throw.ArgumentNullException(nameof(hypothesis), $"'{nameof(hypothesis)}' cannot be null or empty.");
        }
 
        MatchCounter<NGram<string>> hypNGrams = new(hypothesis.CreateAllNGrams(minN, maxN));
        int truePosFalsePos = hypNGrams.Sum();
 
        List<(int, int)> hypCounts = [];
        foreach (var reference in references)
        {
            MatchCounter<NGram<string>> refNGrams = new(reference.CreateAllNGrams(minN, maxN));
            int truePosFalseNeg = refNGrams.Sum();
 
            MatchCounter<NGram<string>> overlapNGrams = hypNGrams.Intersect(refNGrams);
            int truePos = overlapNGrams.Sum();
 
            int nAll = Math.Max(truePosFalsePos, truePosFalseNeg);
 
            if (nAll > 0)
            {
                hypCounts.Add((truePos, nAll));
            }
        }
 
        int corpusNMatch = 0;
        int corpusNAll = 0;
 
        foreach (var (truePos, nAll) in hypCounts)
        {
            corpusNMatch += truePos;
            corpusNAll += nAll;
        }
 
        if (corpusNAll == 0)
        {
            return 0.0;
        }
        else
        {
            return (double)corpusNMatch / corpusNAll;
        }
    }
}