File: Evaluators\Metrics\CalibratedBinaryClassificationMetrics.cs
Web Access
Project: src\src\Microsoft.ML.Data\Microsoft.ML.Data.csproj (Microsoft.ML.Data)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using Microsoft.ML.Runtime;
 
namespace Microsoft.ML.Data
{
    /// <summary>
    /// Evaluation results for binary classifiers, including probabilistic metrics.
    /// </summary>
    public sealed class CalibratedBinaryClassificationMetrics : BinaryClassificationMetrics
    {
        /// <summary>
        /// Gets the log-loss of the classifier. Log-loss measures the performance of a classifier
        /// with respect to how much the predicted probabilities diverge from the true class label. Lower
        /// log-loss indicates a better model. A perfect model, which predicts a probability of 1 for the
        /// true class, will have a log-loss of 0.
        /// </summary>
        /// <remarks>
        /// <format type="text/markdown"><![CDATA[
        /// The log-loss metric, is computed as follows:
        /// $LogLoss = - \frac{1}{m} \sum{i = 1}^m ln(p_i)$
        /// where m is the number of instances in the test set and
        /// $p_i$ is the probability returned by the classifier if the instance belongs to class 1,
        /// and 1 minus the probability returned by the classifier if the instance belongs to class 0.
        /// ]]>
        /// </format>
        /// </remarks>
        public double LogLoss { get; }
 
        /// <summary>
        /// Gets the log-loss reduction (also known as relative log-loss, or reduction in information gain - RIG)
        /// of the classifier. It gives a measure of how much a model improves on a model that gives random predictions.
        /// Log-loss reduction closer to 1 indicates a better model.
        /// </summary>
        /// <remarks>
        /// <format type="text/markdown"><![CDATA[
        /// The log-loss reduction is scaled relative to a classifier that predicts the prior for every example:
        /// $LogLossReduction = \frac{LogLoss(prior) - LogLoss(classifier)}{LogLoss(prior)}$
        /// This metric can be interpreted as the advantage of the classifier over a random prediction.
        /// For example, if the RIG equals 0.2, it can be interpreted as "the probability of a correct prediction is
        /// 20% better than random guessing".
        /// ]]>
        /// </format>
        /// </remarks>
        public double LogLossReduction { get; }
 
        /// <summary>
        /// Gets the test-set entropy, which is the prior log-loss based on the proportion of positive
        /// and negative instances in the test set. A classifier's <see cref="LogLoss"/> lower than
        /// the entropy indicates that a classifier does better than predicting the proportion of positive
        /// instances as the probability for each instance.
        /// </summary>
        /// <remarks>
        /// <format type="text/markdown"><![CDATA[
        /// $Entropy = -p log_2(p) - (1 - p) log_2(1 - p)$, where $p$ is the proportion of the positive class
        /// in the test set.
        /// ]]>
        /// </format>
        /// </remarks>
        public double Entropy { get; }
 
        internal CalibratedBinaryClassificationMetrics(IHost host, DataViewRow overallResult, IDataView confusionMatrix)
            : base(host, overallResult, confusionMatrix)
        {
            double Fetch(string name) => Fetch<double>(host, overallResult, name);
            LogLoss = Fetch(BinaryClassifierEvaluator.LogLoss);
            LogLossReduction = Fetch(BinaryClassifierEvaluator.LogLossReduction);
            Entropy = Fetch(BinaryClassifierEvaluator.Entropy);
        }
    }
}