File: CompositeEvaluator.cs
Web Access
Project: src\src\Libraries\Microsoft.Extensions.AI.Evaluation\Microsoft.Extensions.AI.Evaluation.csproj (Microsoft.Extensions.AI.Evaluation)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.AI.Evaluation.Utilities;
using Microsoft.Shared.Diagnostics;
 
namespace Microsoft.Extensions.AI.Evaluation;
 
/// <summary>
/// An <see cref="IEvaluator"/> that composes other <see cref="IEvaluator"/>s to execute multiple (concurrent)
/// evaluations on a supplied response.
/// </summary>
public sealed class CompositeEvaluator : IEvaluator
{
    /// <summary>
    /// Gets the <see cref="EvaluationMetric.Name"/>s of all the <see cref="EvaluationMetric"/>s produced by the
    /// composed <see cref="IEvaluator"/>s.
    /// </summary>
    public IReadOnlyCollection<string> EvaluationMetricNames { get; }
 
    private readonly IReadOnlyList<IEvaluator> _evaluators;
 
    /// <summary>
    /// Initializes a new instance of the <see cref="CompositeEvaluator"/> class that composes the supplied
    /// <see cref="IEvaluator"/>s.
    /// </summary>
    /// <param name="evaluators">An array of <see cref="IEvaluator"/>s that are to be composed.</param>
    public CompositeEvaluator(params IEvaluator[] evaluators)
        : this(evaluators as IEnumerable<IEvaluator>)
    {
    }
 
    /// <summary>
    /// Initializes a new instance of the <see cref="CompositeEvaluator"/> class that composes the supplied
    /// <see cref="IEvaluator"/>s.
    /// </summary>
    /// <param name="evaluators">An enumeration of <see cref="IEvaluator"/>s that are to be composed.</param>
    public CompositeEvaluator(IEnumerable<IEvaluator> evaluators)
    {
        _ = Throw.IfNull(evaluators, nameof(evaluators));
 
        var metricNames = new HashSet<string>();
 
        foreach (IEvaluator evaluator in evaluators)
        {
            if (evaluator.EvaluationMetricNames.Count == 0)
            {
#pragma warning disable S103 // Lines should not be too long
                throw new InvalidOperationException(
                    $"The '{nameof(evaluator.EvaluationMetricNames)}' property on '{evaluator.GetType().FullName}' returned an empty collection. An evaluator must advertise the names of the metrics that it supports.");
#pragma warning restore S103
            }
 
            foreach (string metricName in evaluator.EvaluationMetricNames)
            {
                if (!metricNames.Add(metricName))
                {
                    Throw.ArgumentException(nameof(evaluators), $"Cannot add multiple evaluators for '{metricName}'.");
                }
            }
        }
 
        EvaluationMetricNames = metricNames;
        _evaluators = [.. evaluators];
    }
 
    /// <summary>
    /// Evaluates the supplied <paramref name="modelResponse"/> and returns an <see cref="EvaluationResult"/>
    /// containing one or more <see cref="EvaluationMetric"/>s.
    /// </summary>
    /// <remarks>
    /// <para>
    /// The <see cref="EvaluationMetric.Name"/>s of the <see cref="EvaluationMetric"/>s contained in the returned
    /// <see cref="EvaluationResult"/> should match <see cref="EvaluationMetricNames"/>.
    /// </para>
    /// <para>
    /// Also note that <paramref name="chatConfiguration"/> must not be omitted if one or more composed
    /// <see cref="IEvaluator"/>s use an AI model to perform evaluation.
    /// </para>
    /// </remarks>
    /// <param name="messages">
    /// The conversation history including the request that produced the supplied <paramref name="modelResponse"/>.
    /// </param>
    /// <param name="modelResponse">The response that is to be evaluated.</param>
    /// <param name="chatConfiguration">
    /// A <see cref="ChatConfiguration"/> that specifies the <see cref="IChatClient"/> and the
    /// <see cref="IEvaluationTokenCounter"/> that should be used if one or more composed <see cref="IEvaluator"/>s use
    /// an AI model to perform evaluation.
    /// </param>
    /// <param name="additionalContext">
    /// Additional contextual information (beyond that which is available in <paramref name="messages"/>) that composed
    /// <see cref="IEvaluator"/>s may need to accurately evaluate the supplied <paramref name="modelResponse"/>.
    /// </param>
    /// <param name="cancellationToken">
    /// A <see cref="CancellationToken"/> that can cancel the evaluation operation.
    /// </param>
    /// <returns>An <see cref="EvaluationResult"/> containing one or more <see cref="EvaluationMetric"/>s.</returns>
    public async ValueTask<EvaluationResult> EvaluateAsync(
        IEnumerable<ChatMessage> messages,
        ChatMessage modelResponse,
        ChatConfiguration? chatConfiguration = null,
        IEnumerable<EvaluationContext>? additionalContext = null,
        CancellationToken cancellationToken = default)
    {
        var metrics = new List<EvaluationMetric>();
 
        IAsyncEnumerable<EvaluationResult> resultsStream =
            EvaluateAndStreamResultsAsync(
                messages,
                modelResponse,
                chatConfiguration,
                additionalContext,
                cancellationToken);
 
        await foreach (EvaluationResult result in resultsStream.ConfigureAwait(false))
        {
            metrics.AddRange(result.Metrics.Values);
        }
 
        return new EvaluationResult(metrics);
    }
 
    private IAsyncEnumerable<EvaluationResult> EvaluateAndStreamResultsAsync(
        IEnumerable<ChatMessage> messages,
        ChatMessage modelResponse,
        ChatConfiguration? chatConfiguration = null,
        IEnumerable<EvaluationContext>? additionalContext = null,
        CancellationToken cancellationToken = default)
    {
        async ValueTask<EvaluationResult> EvaluateAsync(IEvaluator e)
        {
            try
            {
                return await e.EvaluateAsync(
                    messages,
                    modelResponse,
                    chatConfiguration,
                    additionalContext,
                    cancellationToken).ConfigureAwait(false);
            }
            catch (Exception ex)
            {
                string message = ex.ToString();
                EvaluationResult result = new EvaluationResult();
 
                if (e.EvaluationMetricNames.Count == 0)
                {
#pragma warning disable S103 // Lines should not be too long
                    throw new InvalidOperationException(
                        $"The '{nameof(e.EvaluationMetricNames)}' property on '{e.GetType().FullName}' returned an empty collection. An evaluator must advertise the names of the metrics that it supports.");
#pragma warning restore S103
                }
 
                foreach (string metricName in e.EvaluationMetricNames)
                {
                    var metric = new EvaluationMetric(metricName);
                    metric.AddDiagnostic(EvaluationDiagnostic.Error(message));
                    result.Metrics.Add(metric.Name, metric);
                }
 
                return result;
            }
        }
 
        IEnumerable<ValueTask<EvaluationResult>> concurrentTasks = _evaluators.Select(EvaluateAsync);
        return concurrentTasks.StreamResultsAsync(preserveOrder: false, cancellationToken);
    }
}