File: AgentQualityEvaluatorTests.cs
Web Access
Project: src\test\Libraries\Microsoft.Extensions.AI.Evaluation.Integration.Tests\Microsoft.Extensions.AI.Evaluation.Integration.Tests.csproj (Microsoft.Extensions.AI.Evaluation.Integration.Tests)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Threading.Tasks;
using Microsoft.Extensions.AI.Evaluation.Quality;
using Microsoft.Extensions.AI.Evaluation.Reporting;
using Microsoft.Extensions.AI.Evaluation.Reporting.Storage;
using Microsoft.Extensions.AI.Evaluation.Tests;
using Microsoft.TestUtilities;
using Xunit;
 
namespace Microsoft.Extensions.AI.Evaluation.Integration.Tests;
 
[Experimental("AIEVAL001")]
public class AgentQualityEvaluatorTests
{
    private static readonly ChatOptions? _chatOptions;
    private static readonly ChatOptions? _chatOptionsWithTools;
    private static readonly ReportingConfiguration? _agentQualityReportingConfiguration;
    private static readonly ReportingConfiguration? _needsContextReportingConfiguration;
 
    static AgentQualityEvaluatorTests()
    {
        if (Settings.Current.Configured)
        {
            _chatOptions =
                new ChatOptions
                {
                    Temperature = 0.0f,
                    ResponseFormat = ChatResponseFormat.Text
                };
 
            _chatOptionsWithTools =
                new ChatOptions
                {
                    Temperature = 0.0f,
                    ResponseFormat = ChatResponseFormat.Text,
                    Tools = [AIFunctionFactory.Create(GetOrders), AIFunctionFactory.Create(GetOrderStatus)]
                };
 
            ChatConfiguration chatConfiguration = Setup.CreateChatConfiguration();
            ChatClientMetadata? clientMetadata = chatConfiguration.ChatClient.GetService<ChatClientMetadata>();
 
            IChatClient chatClient = chatConfiguration.ChatClient;
            IChatClient chatClientWithToolCalling = chatClient.AsBuilder().UseFunctionInvocation().Build();
            ChatConfiguration chatConfigurationWithToolCalling = new ChatConfiguration(chatClientWithToolCalling);
 
            string version = $"Product Version: {Constants.Version}";
            string date = $"Date: {DateTime.UtcNow:dddd, dd MMMM yyyy}";
            string projectName = $"Project: Integration Tests";
            string testClass = $"Test Class: {nameof(AgentQualityEvaluatorTests)}";
            string provider = $"Model Provider: {clientMetadata?.ProviderName ?? "Unknown"}";
            string model = $"Model: {clientMetadata?.DefaultModelId ?? "Unknown"}";
            string temperature = $"Temperature: {_chatOptionsWithTools.Temperature}";
            string usesContext = $"Feature: Context";
 
            IEvaluator toolCallAccuracyEvaluator = new ToolCallAccuracyEvaluator();
            IEvaluator taskAdherenceEvaluator = new TaskAdherenceEvaluator();
            IEvaluator intentResolutionEvaluator = new IntentResolutionEvaluator();
 
            _agentQualityReportingConfiguration =
                DiskBasedReportingConfiguration.Create(
                    storageRootPath: Settings.Current.StorageRootPath,
                    evaluators: [taskAdherenceEvaluator, intentResolutionEvaluator],
                    chatConfiguration: chatConfigurationWithToolCalling,
                    executionName: Constants.Version,
                    tags: [version, date, projectName, testClass, provider, model, temperature]);
 
            _needsContextReportingConfiguration =
                DiskBasedReportingConfiguration.Create(
                    storageRootPath: Settings.Current.StorageRootPath,
                    evaluators: [toolCallAccuracyEvaluator, taskAdherenceEvaluator, intentResolutionEvaluator],
                    chatConfiguration: chatConfigurationWithToolCalling,
                    executionName: Constants.Version,
                    tags: [version, date, projectName, testClass, provider, model, temperature, usesContext]);
        }
    }
 
    [ConditionalFact]
    public async Task ToolDefinitionsAreNotNeededAndNotPassed()
    {
        SkipIfNotConfigured();
 
        await using ScenarioRun scenarioRun =
            await _agentQualityReportingConfiguration.CreateScenarioRunAsync(
                scenarioName: $"Microsoft.Extensions.AI.Evaluation.Integration.Tests.{nameof(AgentQualityEvaluatorTests)}.{nameof(ToolDefinitionsAreNotNeededAndNotPassed)}");
 
        (IEnumerable<ChatMessage> messages, ChatResponse response) =
            await GetConversationWithoutToolsAsync(scenarioRun.ChatConfiguration!.ChatClient);
 
        EvaluationResult result = await scenarioRun.EvaluateAsync(messages, response);
 
        Assert.False(
            result.ContainsDiagnostics(d => d.Severity >= EvaluationDiagnosticSeverity.Warning),
            string.Join("\r\n\r\n", result.Metrics.Values.SelectMany(m => m.Diagnostics ?? []).Select(d => d.ToString())));
 
        Assert.Equal(2, result.Metrics.Count);
        Assert.True(result.TryGet(TaskAdherenceEvaluator.TaskAdherenceMetricName, out NumericMetric? _));
        Assert.True(result.TryGet(IntentResolutionEvaluator.IntentResolutionMetricName, out NumericMetric? _));
    }
 
    [ConditionalFact]
    public async Task ToolDefinitionsAreNotNeededButPassed()
    {
        SkipIfNotConfigured();
 
        await using ScenarioRun scenarioRun =
            await _agentQualityReportingConfiguration.CreateScenarioRunAsync(
                scenarioName: $"Microsoft.Extensions.AI.Evaluation.Integration.Tests.{nameof(AgentQualityEvaluatorTests)}.{nameof(ToolDefinitionsAreNotNeededButPassed)}");
 
        (IEnumerable<ChatMessage> messages, ChatResponse response) =
            await GetConversationWithoutToolsAsync(scenarioRun.ChatConfiguration!.ChatClient);
 
        var toolDefinitionsForTaskAdherenceEvaluator =
            new TaskAdherenceEvaluatorContext(toolDefinitions: _chatOptionsWithTools.Tools!);
 
        var toolDefinitionsForIntentResolution =
            new IntentResolutionEvaluatorContext(toolDefinitions: _chatOptionsWithTools.Tools!);
 
        EvaluationResult result =
            await scenarioRun.EvaluateAsync(
                messages,
                response,
                additionalContext: [toolDefinitionsForTaskAdherenceEvaluator, toolDefinitionsForIntentResolution]);
 
        Assert.False(
            result.ContainsDiagnostics(d => d.Severity >= EvaluationDiagnosticSeverity.Warning),
            string.Join("\r\n\r\n", result.Metrics.Values.SelectMany(m => m.Diagnostics ?? []).Select(d => d.ToString())));
 
        Assert.Equal(2, result.Metrics.Count);
        Assert.True(result.TryGet(TaskAdherenceEvaluator.TaskAdherenceMetricName, out NumericMetric? _));
        Assert.True(result.TryGet(IntentResolutionEvaluator.IntentResolutionMetricName, out NumericMetric? _));
    }
 
    [ConditionalFact]
    public async Task ToolDefinitionsAreNeededButNotPassed()
    {
        SkipIfNotConfigured();
 
        await using ScenarioRun scenarioRun =
            await _needsContextReportingConfiguration.CreateScenarioRunAsync(
                scenarioName: $"Microsoft.Extensions.AI.Evaluation.Integration.Tests.{nameof(AgentQualityEvaluatorTests)}.{nameof(ToolDefinitionsAreNeededButNotPassed)}");
 
        (IEnumerable<ChatMessage> messages, ChatResponse response) =
            await GetConversationWithToolsAsync(scenarioRun.ChatConfiguration!.ChatClient);
 
        EvaluationResult result = await scenarioRun.EvaluateAsync(messages, response);
 
        Assert.True(
            result.Metrics.Values.All(m => m.ContainsDiagnostics(d => d.Severity is EvaluationDiagnosticSeverity.Error)),
            string.Join("\r\n\r\n", result.Metrics.Values.SelectMany(m => m.Diagnostics ?? []).Select(d => d.ToString())));
 
        Assert.Equal(3, result.Metrics.Count);
        Assert.True(result.TryGet(ToolCallAccuracyEvaluator.ToolCallAccuracyMetricName, out BooleanMetric? _));
        Assert.True(result.TryGet(TaskAdherenceEvaluator.TaskAdherenceMetricName, out NumericMetric? _));
        Assert.True(result.TryGet(IntentResolutionEvaluator.IntentResolutionMetricName, out NumericMetric? _));
    }
 
    [ConditionalFact]
    public async Task ToolDefinitionsAreNeededAndPassed()
    {
        SkipIfNotConfigured();
 
        await using ScenarioRun scenarioRun =
            await _needsContextReportingConfiguration.CreateScenarioRunAsync(
                scenarioName: $"Microsoft.Extensions.AI.Evaluation.Integration.Tests.{nameof(AgentQualityEvaluatorTests)}.{nameof(ToolDefinitionsAreNeededAndPassed)}");
 
        (IEnumerable<ChatMessage> messages, ChatResponse response) =
            await GetConversationWithToolsAsync(scenarioRun.ChatConfiguration!.ChatClient);
 
        var toolDefinitionsForToolCallAccuracyEvaluator =
            new ToolCallAccuracyEvaluatorContext(toolDefinitions: _chatOptionsWithTools.Tools!);
 
        var toolDefinitionsForTaskAdherenceEvaluator =
            new TaskAdherenceEvaluatorContext(toolDefinitions: _chatOptionsWithTools.Tools!);
 
        var toolDefinitionsForIntentResolutionEvaluator =
            new IntentResolutionEvaluatorContext(toolDefinitions: _chatOptionsWithTools.Tools!);
 
        EvaluationResult result =
            await scenarioRun.EvaluateAsync(
                messages,
                response,
                additionalContext: [
                    toolDefinitionsForToolCallAccuracyEvaluator,
                    toolDefinitionsForTaskAdherenceEvaluator,
                    toolDefinitionsForIntentResolutionEvaluator]);
 
        Assert.False(
            result.ContainsDiagnostics(d => d.Severity >= EvaluationDiagnosticSeverity.Warning),
            string.Join("\r\n\r\n", result.Metrics.Values.SelectMany(m => m.Diagnostics ?? []).Select(d => d.ToString())));
 
        Assert.Equal(3, result.Metrics.Count);
        Assert.True(result.TryGet(ToolCallAccuracyEvaluator.ToolCallAccuracyMetricName, out BooleanMetric? _));
        Assert.True(result.TryGet(TaskAdherenceEvaluator.TaskAdherenceMetricName, out NumericMetric? _));
        Assert.True(result.TryGet(IntentResolutionEvaluator.IntentResolutionMetricName, out NumericMetric? _));
    }
 
    private static async Task<(IEnumerable<ChatMessage> messages, ChatResponse response)>
        GetConversationWithoutToolsAsync(IChatClient chatClient)
    {
        List<ChatMessage> messages =
            [
                "You are a friendly and helpful assistant that can answer questions.".ToSystemMessage(),
                "Hi, could you help me figure out the correct pronunciation for the word rendezvous?".ToUserMessage()
            ];
 
        ChatResponse response = await chatClient.GetResponseAsync(messages, _chatOptions);
        return (messages, response);
    }
 
    private static async Task<(IEnumerable<ChatMessage> messages, ChatResponse response)>
        GetConversationWithToolsAsync(IChatClient chatClient)
    {
        List<ChatMessage> messages =
            [
                "You are a friendly and helpful customer service agent.".ToSystemMessage(),
                "Hi, I need help with the last 2 orders on my account #888. Could you please update me on their status?".ToUserMessage()
            ];
 
        ChatResponse response = await chatClient.GetResponseAsync(messages, _chatOptionsWithTools);
        return (messages, response);
    }
 
    [Description("Gets the orders for a customer")]
    private static IReadOnlyList<Order> GetOrders(int accountNumber)
    {
        if (accountNumber != 888)
        {
            throw new InvalidOperationException($"Account number {accountNumber} is not valid.");
        }
 
        return [new Order(123), new Order(124)];
    }
 
    [Description("Gets the delivery status of an order")]
    private static OrderStatus GetOrderStatus(int orderId)
    {
        if (orderId == 123)
        {
            return new OrderStatus(orderId, "shipped", DateTime.Now.AddDays(1));
        }
        else if (orderId == 124)
        {
            return new OrderStatus(orderId, "delayed", DateTime.Now.AddDays(10));
        }
        else
        {
            throw new InvalidOperationException($"Order with ID {orderId} not found.");
        }
    }
 
    private record Order(int OrderId)
    {
    }
 
    private record OrderStatus(int OrderId, string Status, DateTime ExpectedDelivery)
    {
    }
 
    [MemberNotNull(nameof(_chatOptionsWithTools))]
    [MemberNotNull(nameof(_agentQualityReportingConfiguration))]
    [MemberNotNull(nameof(_needsContextReportingConfiguration))]
    private static void SkipIfNotConfigured()
    {
        if (!Settings.Current.Configured)
        {
            throw new SkipTestException("Test is not configured");
        }
 
        Assert.NotNull(_chatOptionsWithTools);
        Assert.NotNull(_agentQualityReportingConfiguration);
        Assert.NotNull(_needsContextReportingConfiguration);
    }
}