File: Evaluators\Metrics\BinaryClassificationMetrics.cs
Web Access
Project: src\src\Microsoft.ML.Data\Microsoft.ML.Data.csproj (Microsoft.ML.Data)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using Microsoft.ML.Runtime;
 
namespace Microsoft.ML.Data
{
    /// <summary>
    /// Evaluation results for binary classifiers, excluding probabilistic metrics.
    /// </summary>
    public class BinaryClassificationMetrics
    {
        /// <summary>
        /// Gets the area under the ROC curve.
        /// </summary>
        /// <remarks>
        /// The area under the ROC curve is equal to the probability that the classifier ranks
        /// a randomly chosen positive instance higher than a randomly chosen negative one
        /// (assuming 'positive' ranks higher than 'negative'). Area under the ROC curve ranges between
        /// 0 and 1, with a value closer to 1 indicating a better model.
        ///
        /// <a href="https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve">Area Under ROC Curve</a>
        /// </remarks>
        public double AreaUnderRocCurve { get; }
 
        /// <summary>
        /// Gets the accuracy of a classifier which is the proportion of correct predictions in the test set.
        /// </summary>
        public double Accuracy { get; }
 
        /// <summary>
        /// Gets the positive precision of a classifier which is the proportion of correctly predicted
        /// positive instances among all the positive predictions (i.e., the number of positive instances
        /// predicted as positive, divided by the total number of instances predicted as positive).
        /// </summary>
        public double PositivePrecision { get; }
 
        /// <summary>
        /// Gets the positive recall of a classifier which is the proportion of correctly predicted
        /// positive instances among all the positive instances (i.e., the number of positive instances
        /// predicted as positive, divided by the total number of positive instances).
        /// </summary>
        public double PositiveRecall { get; private set; }
 
        /// <summary>
        /// Gets the negative precision of a classifier which is the proportion of correctly predicted
        /// negative instances among all the negative predictions (i.e., the number of negative instances
        /// predicted as negative, divided by the total number of instances predicted as negative).
        /// </summary>
        public double NegativePrecision { get; }
 
        /// <summary>
        /// Gets the negative recall of a classifier which is the proportion of correctly predicted
        /// negative instances among all the negative instances (i.e., the number of negative instances
        /// predicted as negative, divided by the total number of negative instances).
        /// </summary>
        public double NegativeRecall { get; }
 
        /// <summary>
        /// Gets the F1 score of the classifier, which is a measure of the classifier's quality considering
        /// both precision and recall.
        /// </summary>
        /// <remarks>
        /// F1 score is the harmonic mean of precision and recall: 2 * precision * recall / (precision + recall).
        /// F1 ranges between 0 and 1, with a value of 1 indicating perfect precision and recall.
        /// </remarks>
        public double F1Score { get; }
 
        /// <summary>
        /// Gets the area under the precision/recall curve of the classifier.
        /// </summary>
        /// <remarks>
        /// The area under the precision/recall curve is a single number summary of the information in the
        /// precision/recall curve. It is increasingly used in the machine learning community, particularly
        /// for imbalanced datasets where one class is observed more frequently than the other. On these
        /// datasets, <see cref="AreaUnderPrecisionRecallCurve"/> can highlight performance differences that
        /// are lost with <see cref="AreaUnderRocCurve"/>.
        /// </remarks>
        public double AreaUnderPrecisionRecallCurve { get; }
 
        /// <summary>
        /// The <a href="https://en.wikipedia.org/wiki/Confusion_matrix">confusion matrix</a> giving the counts of the
        /// true positives, true negatives, false positives and false negatives for the two classes of data.
        /// </summary>
        public ConfusionMatrix ConfusionMatrix { get; }
 
        private protected static T Fetch<T>(IExceptionContext ectx, DataViewRow row, string name)
        {
            var column = row.Schema.GetColumnOrNull(name);
            if (!column.HasValue)
                throw ectx.Except($"Could not find column '{name}'");
            T val = default;
            row.GetGetter<T>(column.Value)(ref val);
            return val;
        }
 
        internal BinaryClassificationMetrics(IHost host, DataViewRow overallResult, IDataView confusionMatrix)
        {
            double Fetch(string name) => Fetch<double>(host, overallResult, name);
            AreaUnderRocCurve = Fetch(BinaryClassifierEvaluator.Auc);
            Accuracy = Fetch(BinaryClassifierEvaluator.Accuracy);
            PositivePrecision = Fetch(BinaryClassifierEvaluator.PosPrecName);
            PositiveRecall = Fetch(BinaryClassifierEvaluator.PosRecallName);
            NegativePrecision = Fetch(BinaryClassifierEvaluator.NegPrecName);
            NegativeRecall = Fetch(BinaryClassifierEvaluator.NegRecallName);
            F1Score = Fetch(BinaryClassifierEvaluator.F1);
            AreaUnderPrecisionRecallCurve = Fetch(BinaryClassifierEvaluator.AuPrc);
            ConfusionMatrix = MetricWriter.GetConfusionMatrix(host, confusionMatrix);
        }
 
        [BestFriend]
        internal BinaryClassificationMetrics(double auc, double accuracy, double positivePrecision, double positiveRecall,
            double negativePrecision, double negativeRecall, double f1Score, double auprc)
        {
            AreaUnderRocCurve = auc;
            Accuracy = accuracy;
            PositivePrecision = positivePrecision;
            PositiveRecall = positiveRecall;
            NegativePrecision = negativePrecision;
            NegativeRecall = negativeRecall;
            F1Score = f1Score;
            AreaUnderPrecisionRecallCurve = auprc;
        }
 
        internal BinaryClassificationMetrics(double auc, double accuracy, double positivePrecision, double positiveRecall,
            double negativePrecision, double negativeRecall, double f1Score, double auprc, ConfusionMatrix confusionMatrix)
            : this(auc, accuracy, positivePrecision, positiveRecall, negativePrecision, negativeRecall, f1Score, auprc)
        {
            ConfusionMatrix = confusionMatrix;
        }
    }
}