F1EvaluatorTests.cs

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Collections.Generic;
using System.Threading.Tasks;
using Microsoft.Extensions.AI.Evaluation.NLP;
using Xunit;
 
namespace Microsoft.Extensions.AI.Evaluation.NLP.Tests;
 
#pragma warning disable AIEVAL001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
 
public class F1EvaluatorTests
{
    [Fact]
    public async Task ReturnsPerfectScoreForIdenticalText()
    {
        var evaluator = new F1Evaluator();
        var context = new F1EvaluatorContext("The quick brown fox jumps over the lazy dog.");
        var response = new ChatResponse(new ChatMessage(ChatRole.Assistant, "The quick brown fox jumps over the lazy dog."));
        var result = await evaluator.EvaluateAsync(response, null, [context]);
        var metric = Assert.Single(result.Metrics.Values) as NumericMetric;
        Assert.NotNull(metric);
        Assert.Equal(F1Evaluator.F1MetricName, metric.Name);
        Assert.Equal(1.0, (double)metric!.Value!, 4);
        Assert.NotNull(metric.Interpretation);
        Assert.Equal(EvaluationRating.Exceptional, metric.Interpretation.Rating);
        Assert.False(metric.Interpretation.Failed);
    }
 
    [Fact]
    public async Task ReturnsLowScoreForCompletelyDifferentText()
    {
        var evaluator = new F1Evaluator();
        var context = new F1EvaluatorContext("The quick brown fox jumps over the lazy dog.");
        var response = new ChatResponse(new ChatMessage(ChatRole.Assistant, "Completely unrelated sentence."));
        var result = await evaluator.EvaluateAsync(response, null, [context]);
        var metric = Assert.Single(result.Metrics.Values) as NumericMetric;
        Assert.NotNull(metric);
        Assert.Equal(F1Evaluator.F1MetricName, metric.Name);
        Assert.Equal(0.1429, (double)metric!.Value!, 4);
        Assert.NotNull(metric.Interpretation);
        Assert.Equal(EvaluationRating.Unacceptable, metric.Interpretation.Rating);
        Assert.True(metric.Interpretation.Failed);
    }
 
    [Fact]
    public async Task ReturnsErrorDiagnosticIfNoContext()
    {
        var evaluator = new F1Evaluator();
        var response = new ChatResponse(new ChatMessage(ChatRole.Assistant, "Some text."));
        var result = await evaluator.EvaluateAsync(response, null, null);
        var metric = Assert.Single(result.Metrics.Values) as NumericMetric;
        Assert.NotNull(metric);
        Assert.Equal(F1Evaluator.F1MetricName, metric.Name);
        Assert.NotNull(metric.Diagnostics);
        Assert.Contains(metric.Diagnostics, d => d.Severity == EvaluationDiagnosticSeverity.Error);
    }
 
    [Theory]
    [InlineData("the cat is on the mat",
        "the the the the the the the", 0.30769)]
    [InlineData("It is a guide to action that ensures that the military will forever heed Party commands",
        "It is a guide to action which ensures that the military always obeys the commands of the party", 0.70589)]
    [InlineData("It is the practical guide for the army always to heed the directions of the party",
        "It is to insure the troops forever hearing the activity guidebook that party direct", 0.4000)]
    public async Task SampleCases(string reference, string hypothesis, double score)
    {
        var evaluator = new F1Evaluator();
        var context = new F1EvaluatorContext(reference);
        var response = new ChatResponse(new ChatMessage(ChatRole.Assistant, hypothesis));
        var result = await evaluator.EvaluateAsync(response, null, [context]);
        var metric = Assert.Single(result.Metrics.Values) as NumericMetric;
        Assert.NotNull(metric);
        Assert.Equal(F1Evaluator.F1MetricName, metric.Name);
        Assert.Equal(score, (double)metric!.Value!, 4);
    }
 
    [Fact]
    public async Task ReturnsErrorDiagnosticIfEmptyResponse()
    {
        var evaluator = new F1Evaluator();
        var context = new F1EvaluatorContext("Reference text.");
        var response = new ChatResponse(new ChatMessage(ChatRole.Assistant, ""));
        var result = await evaluator.EvaluateAsync(response, null, [context]);
        var metric = Assert.Single(result.Metrics.Values) as NumericMetric;
        Assert.NotNull(metric);
        Assert.Equal(F1Evaluator.F1MetricName, metric.Name);
        Assert.NotNull(metric.Diagnostics);
        Assert.Contains(metric.Diagnostics, d => d.Severity == EvaluationDiagnosticSeverity.Error);
    }
 
}
File: F1EvaluatorTests.cs	Web Access
Project: src\test\Libraries\Microsoft.Extensions.AI.Evaluation.NLP.Tests\Microsoft.Extensions.AI.Evaluation.NLP.Tests.csproj (Microsoft.Extensions.AI.Evaluation.NLP.Tests)