RelevanceTruthAndCompletenessEvaluatorTests.cs

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Threading.Tasks;
using Microsoft.Extensions.AI.Evaluation.Quality;
using Microsoft.Extensions.AI.Evaluation.Reporting;
using Microsoft.Extensions.AI.Evaluation.Reporting.Storage;
using Microsoft.TestUtilities;
using Xunit;
 
namespace Microsoft.Extensions.AI.Evaluation.Integration.Tests;
 
public class RelevanceTruthAndCompletenessEvaluatorTests
{
    private static readonly ChatOptions _chatOptions;
    private static readonly ReportingConfiguration? _reportingConfigurationWithoutReasoning;
    private static readonly ReportingConfiguration? _reportingConfigurationWithReasoning;
 
    static RelevanceTruthAndCompletenessEvaluatorTests()
    {
        _chatOptions =
            new ChatOptions
            {
                Temperature = 0.0f,
                ResponseFormat = ChatResponseFormat.Text
            };
 
        if (Settings.Current.Configured)
        {
            IEvaluator rtcEvaluatorWithoutReasoning = new RelevanceTruthAndCompletenessEvaluator();
 
            _reportingConfigurationWithoutReasoning =
                DiskBasedReportingConfiguration.Create(
                    storageRootPath: Settings.Current.StorageRootPath,
                    evaluators: [rtcEvaluatorWithoutReasoning],
                    chatConfiguration: Setup.CreateChatConfiguration(),
                    executionName: Constants.Version);
 
            var options = new RelevanceTruthAndCompletenessEvaluatorOptions(includeReasoning: true);
            IEvaluator rtcEvaluatorWithReasoning = new RelevanceTruthAndCompletenessEvaluator(options);
 
            _reportingConfigurationWithReasoning =
                DiskBasedReportingConfiguration.Create(
                    storageRootPath: Settings.Current.StorageRootPath,
                    evaluators: [rtcEvaluatorWithReasoning],
                    chatConfiguration: Setup.CreateChatConfiguration(),
                    executionName: Constants.Version);
        }
    }
 
    [ConditionalFact]
    public async Task WithoutReasoning()
    {
        SkipIfNotConfigured();
 
        await using ScenarioRun scenarioRun =
            await _reportingConfigurationWithoutReasoning.CreateScenarioRunAsync(
                scenarioName: $"Microsoft.Extensions.AI.Evaluation.Integration.Tests.{nameof(RelevanceTruthAndCompletenessEvaluatorTests)}.{nameof(WithoutReasoning)}");
 
        IChatClient chatClient = scenarioRun.ChatConfiguration!.ChatClient;
 
        var messages = new List<ChatMessage>();
        string prompt = @"What is the molecular formula of ammonia?";
        ChatMessage promptMessage = prompt.ToUserMessage();
        messages.Add(promptMessage);
 
        ChatResponse response = await chatClient.GetResponseAsync(messages, _chatOptions);
        ChatMessage responseMessage = response.Message;
        Assert.NotNull(responseMessage.Text);
 
        EvaluationResult result = await scenarioRun.EvaluateAsync(promptMessage, responseMessage);
 
        Assert.False(result.ContainsDiagnostics(d => d.Severity >= EvaluationDiagnosticSeverity.Warning));
 
        NumericMetric relevance = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.RelevanceMetricName);
        NumericMetric truth = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.TruthMetricName);
        NumericMetric completeness = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.CompletenessMetricName);
 
        Assert.True(relevance.Value >= 4, string.Format("Relevance - Reasoning: {0}", relevance.Diagnostics.Single().Message));
        Assert.True(truth.Value >= 4, string.Format("Truth - Reasoning: {0}", truth.Diagnostics.Single().Message));
        Assert.True(completeness.Value >= 4, string.Format("Completeness - Reasoning: {0}", completeness.Diagnostics.Single().Message));
    }
 
    [ConditionalFact]
    public async Task WithReasoning()
    {
        SkipIfNotConfigured();
 
        await using ScenarioRun scenarioRun =
            await _reportingConfigurationWithReasoning.CreateScenarioRunAsync(
                scenarioName: $"Microsoft.Extensions.AI.Evaluation.Integration.Tests.{nameof(RelevanceTruthAndCompletenessEvaluatorTests)}.{nameof(WithReasoning)}");
 
        IChatClient chatClient = scenarioRun.ChatConfiguration!.ChatClient;
 
        var messages = new List<ChatMessage>();
        string prompt = @"What is the molecular formula of glucose?";
        ChatMessage promptMessage = prompt.ToUserMessage();
        messages.Add(promptMessage);
 
        ChatResponse response = await chatClient.GetResponseAsync(messages, _chatOptions);
        ChatMessage responseMessage = response.Message;
        Assert.NotNull(responseMessage.Text);
 
        EvaluationResult result = await scenarioRun.EvaluateAsync(promptMessage, responseMessage);
 
        Assert.False(result.ContainsDiagnostics(d => d.Severity >= EvaluationDiagnosticSeverity.Warning));
 
        NumericMetric relevance = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.RelevanceMetricName);
        NumericMetric truth = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.TruthMetricName);
        NumericMetric completeness = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.CompletenessMetricName);
 
        Assert.True(relevance.Value >= 4, string.Format("Relevance - Reasoning: {0}", relevance.Diagnostics.Single().Message));
        Assert.True(truth.Value >= 4, string.Format("Truth - Reasoning: {0}", truth.Diagnostics.Single().Message));
        Assert.True(completeness.Value >= 4, string.Format("Completeness - Reasoning: {0}", completeness.Diagnostics.Single().Message));
    }
 
    [MemberNotNull(nameof(_reportingConfigurationWithReasoning))]
    [MemberNotNull(nameof(_reportingConfigurationWithoutReasoning))]
    private static void SkipIfNotConfigured()
    {
        if (!Settings.Current.Configured)
        {
            throw new SkipTestException("Test is not configured");
        }
 
        Assert.NotNull(_reportingConfigurationWithReasoning);
        Assert.NotNull(_reportingConfigurationWithoutReasoning);
    }
}
File: RelevanceTruthAndCompletenessEvaluatorTests.cs	Web Access
Project: src\test\Libraries\Microsoft.Extensions.AI.Evaluation.Integration.Tests\Microsoft.Extensions.AI.Evaluation.Integration.Tests.csproj (Microsoft.Extensions.AI.Evaluation.Integration.Tests)