File: Utils\LossFunctions.cs
Web Access
Project: src\src\Microsoft.ML.Data\Microsoft.ML.Data.csproj (Microsoft.ML.Data)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using Microsoft.ML;
using Microsoft.ML.CommandLine;
using Microsoft.ML.EntryPoints;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Runtime;
using Microsoft.ML.Trainers;
 
[assembly: LoadableClass(LogLoss.Summary, typeof(LogLoss), null, typeof(SignatureClassificationLoss),
    "Log Loss", "LogLoss", "Logistic", "CrossEntropy")]
 
[assembly: LoadableClass(HingeLoss.Summary, typeof(HingeLoss), typeof(HingeLoss.Options), typeof(SignatureClassificationLoss),
    "Hinge Loss", "HingeLoss", "Hinge")]
 
[assembly: LoadableClass(SmoothedHingeLoss.Summary, typeof(SmoothedHingeLoss), typeof(SmoothedHingeLoss.Options), typeof(SignatureClassificationLoss),
    "Smoothed Hinge Loss", "SmoothedHingeLoss", "SmoothedHinge")]
 
[assembly: LoadableClass(ExpLoss.Summary, typeof(ExpLoss), typeof(ExpLoss.Options), typeof(SignatureClassificationLoss),
    "Exponential Loss", "ExpLoss", "Exp")]
 
[assembly: LoadableClass(SquaredLoss.Summary, typeof(SquaredLoss), null, typeof(SignatureRegressionLoss),
    "Squared Loss", "SquaredLoss", "L2")]
 
[assembly: LoadableClass(PoissonLoss.Summary, typeof(PoissonLoss), null, typeof(SignatureRegressionLoss),
    "Poisson Loss", "PoissonLoss", "Poisson")]
 
[assembly: LoadableClass(TweedieLoss.Summary, typeof(TweedieLoss), typeof(TweedieLoss.Options), typeof(SignatureRegressionLoss),
    "Tweedie Loss", "TweedieLoss", "Tweedie", "Tw")]
 
[assembly: EntryPointModule(typeof(ExpLoss.Options))]
[assembly: EntryPointModule(typeof(LogLossFactory))]
[assembly: EntryPointModule(typeof(HingeLoss.Options))]
[assembly: EntryPointModule(typeof(PoissonLossFactory))]
[assembly: EntryPointModule(typeof(SmoothedHingeLoss.Options))]
[assembly: EntryPointModule(typeof(SquaredLossFactory))]
[assembly: EntryPointModule(typeof(TweedieLoss.Options))]
 
namespace Microsoft.ML.Trainers
{
    /// <summary>
    /// The loss function may know the close-form solution to the optimal dual update
    /// Ref: Sec(6.2) of http://jmlr.org/papers/volume14/shalev-shwartz13a/shalev-shwartz13a.pdf
    /// </summary>
    public interface ISupportSdcaLoss : IScalarLoss
    {
        //This method helps the optimizer pre-compute the invariants that will be used later in DualUpdate.
        //scaledFeaturesNormSquared = instanceWeight * (|x|^2 + 1) / (lambda * n), where
        //  - x is the features vector
        //  - lambda is the L2 const
        //  - n is the number of instances
        //    Note that if we are going to implement Online-DCA then n = t and varies.
        float ComputeDualUpdateInvariant(float scaledFeaturesNormSquared);
 
        /// <summary>
        /// Compute the dual update (\Delta\alpha_i) in SDCA
        /// - alpha: dual variable at the specified instance
        /// - lambdaN: L2 const x number of instances
        /// - cached invariant, hinted by the method above
        /// </summary>
        float DualUpdate(float output, float label, float dual, float invariant, int maxNumThreads);
 
        /// <summary>
        /// The dual loss function for a training example.
        /// If f(x) denotes the loss function on an individual training example,
        /// then this function returns -f*(-x*), where f*(x*) is the Fenchel conjugate
        /// of f(x).
        /// </summary>
        /// <param name="label">The label of the example.</param>
        /// <param name="dual">The dual variable of the example.</param>
        Double DualLoss(float label, float dual);
    }
 
    public interface ISupportSdcaClassificationLoss : ISupportSdcaLoss, IClassificationLoss
    {
    }
 
    public interface ISupportSdcaRegressionLoss : ISupportSdcaLoss, IRegressionLoss
    {
    }
 
    [TlcModule.ComponentKind("SDCAClassificationLossFunction")]
    [BestFriend]
    internal interface ISupportSdcaClassificationLossFactory : IComponentFactory<ISupportSdcaClassificationLoss>
    {
    }
 
    [TlcModule.ComponentKind("SDCARegressionLossFunction")]
    [BestFriend]
    internal interface ISupportSdcaRegressionLossFactory : IComponentFactory<ISupportSdcaRegressionLoss>
    {
        new ISupportSdcaRegressionLoss CreateComponent(IHostEnvironment env);
    }
 
    [TlcModule.Component(Name = "LogLoss", FriendlyName = "Log loss", Aliases = new[] { "Logistic", "CrossEntropy" },
        Desc = "Log loss.")]
    [BestFriend]
    internal sealed class LogLossFactory : ISupportSdcaClassificationLossFactory, ISupportClassificationLossFactory
    {
        public ISupportSdcaClassificationLoss CreateComponent(IHostEnvironment env) => new LogLoss();
 
        IClassificationLoss IComponentFactory<IClassificationLoss>.CreateComponent(IHostEnvironment env) => new LogLoss();
    }
 
    /// <summary>
    /// The Log Loss, also known as the Cross Entropy Loss. It is commonly used in classification tasks.
    /// </summary>
    /// <remarks>
    /// <format type="text/markdown"><![CDATA[
    ///
    /// The Log Loss function is defined as:
    ///
    /// $L(p(\hat{y}), y) = -y ln(\hat{y}) - (1 - y) ln(1 - \hat{y})$
    ///
    /// where $\hat{y}$ is the predicted score, $p(\hat{y})$ is the probability of belonging to the positive class by applying a [sigmoid function](https://en.wikipedia.org/wiki/Sigmoid_function) to the score, and $y \in \\{0, 1\\}$ is the true label.
    ///
    /// Note that the labels used in this calculation are 0 and 1, unlike [Hinge Loss](xref:Microsoft.ML.Trainers.HingeLoss) and [Exponential Loss](xref:Microsoft.ML.Trainers.ExpLoss), where the labels used are -1 and 1.
    ///
    /// The Log Loss function provides a measure of how *certain* a classifier's predictions are, instead of just measuring how *correct* they are.
    /// For example, a predicted probability of 0.80 for a true label of 1 gets penalized more than a predicted probability of 0.99.
    ///
    /// ]]>
    /// </format>
    /// </remarks>
    public sealed class LogLoss : ISupportSdcaClassificationLoss
    {
        internal const string Summary = "The log loss function for classification. Supported by SDCA.";
        private const float Threshold = 0.5f;
 
        public Double Loss(float output, float label)
        {
            float prediction = MathUtils.Sigmoid(output);
            return label > 0 ? -Log(prediction) : -Log(1 - prediction);
        }
 
        public float Derivative(float output, float label)
        {
            float prediction = MathUtils.Sigmoid(output);
            return label > 0 ? prediction - 1 : prediction;
        }
 
        public float ComputeDualUpdateInvariant(float scaledFeaturesNormSquared)
        {
            return 1 / Math.Max(1, (float)0.25 + scaledFeaturesNormSquared);
        }
 
        // REVIEW: this dual update uses a different log loss formulation,
        //although the two are equivalents if the labels are restricted to 0 and 1
        //Need to update so that it can handle probability label and true to the
        //definition, which is a smooth loss function
        public float DualUpdate(float output, float label, float dual, float invariant, int maxNumThreads)
        {
            label = label > 0 ? 1 : -1;
 
            // This is an approximate solution
            // REVIEW: is it necessary to do a few extra Newton steps?
            var fullUpdate = (MathUtils.Sigmoid(-label * output) * label - dual) * invariant;
            return maxNumThreads >= 2 && Math.Abs(fullUpdate) > Threshold ? fullUpdate / maxNumThreads : fullUpdate;
        }
 
        public Double DualLoss(float label, float dual)
        {
            // Normalize the dual with label.
            if (label <= 0)
                dual = -dual;
 
            // The dual variable is out of the feasible region [0, 1].
            if (dual < 0 || dual > 1)
                return Double.NegativeInfinity;
 
            return MathUtils.Entropy(dual, useLnNotLog2: true);
        }
 
        private static Double Log(Double x)
        {
            return Math.Log(Math.Max(x, 1e-8));
        }
    }
 
    /// <summary>
    /// Hinge Loss, commonly used in classification tasks.
    /// </summary>
    /// <remarks>
    /// <format type="text/markdown"><![CDATA[
    ///
    /// The Hinge Loss function is defined as:
    ///
    /// $L(\hat{y}, y) = max(0, m - y\hat{y})$
    ///
    /// where $\hat{y}$ is the predicted score, $y \in \\{-1, 1\\}$ is the true label, and $m$ is the margin parameter set to 1 by default.
    ///
    /// Note that the labels used in this calculation are -1 and 1, unlike [Log Loss](xref:Microsoft.ML.Trainers.LogLoss), where the labels used are 0 and 1.
    /// Also unlike [Log Loss](xref:Microsoft.ML.Trainers.LogLoss), $\hat{y}$ is the raw predicted score, not the predicted probability (which is calculated by applying a [sigmoid function](https://en.wikipedia.org/wiki/Sigmoid_function) to the predicted score).
    ///
    /// While the hinge loss function is both convex and continuous, it is not smooth (that is not differentiable) at $y\hat{y} = m$.
    /// Consequently, it cannot be used with gradient descent methods or stochastic gradient descent methods, which rely on differentiability over the entire domain.
    ///
    /// For more, see [Hinge Loss for classification](https://en.wikipedia.org/wiki/Loss_functions_for_classification#Hinge_loss).
    ///
    /// ]]>
    /// </format>
    /// </remarks>
    public sealed class HingeLoss : ISupportSdcaClassificationLoss
    {
        [TlcModule.Component(Name = "HingeLoss", FriendlyName = "Hinge loss", Alias = "Hinge", Desc = "Hinge loss.")]
        [BestFriend]
        internal sealed class Options : ISupportSdcaClassificationLossFactory, ISupportClassificationLossFactory
        {
            [Argument(ArgumentType.AtMostOnce, HelpText = "Margin value", ShortName = "marg")]
            public float Margin = Defaults.Margin;
 
            public ISupportSdcaClassificationLoss CreateComponent(IHostEnvironment env) => new HingeLoss(this);
 
            IClassificationLoss IComponentFactory<IClassificationLoss>.CreateComponent(IHostEnvironment env) => new HingeLoss(this);
        }
 
        internal const string Summary = "The Hinge loss function for classification. Supported by SDCA.";
        private const float Threshold = 0.5f;
        private readonly float _margin;
 
        private HingeLoss(Options options)
        {
            _margin = options.Margin;
        }
 
        private static class Defaults
        {
            public const float Margin = 1;
        }
 
        public HingeLoss(float margin = Defaults.Margin)
            : this(new Options() { Margin = margin })
        {
        }
 
        public Double Loss(float output, float label)
        {
            float truth = label > 0 ? 1 : -1;
            float loss = _margin - truth * output;
            return loss > 0 ? loss : 0;
        }
 
        public float Derivative(float output, float label)
        {
            float truth = label > 0 ? 1 : -1;
            return _margin > truth * output ? -truth : 0;
        }
 
        public float ComputeDualUpdateInvariant(float scaledFeaturesNormSquared)
        {
            return 1 / scaledFeaturesNormSquared;
        }
 
        public float DualUpdate(float output, float label, float alpha, float invariant, int maxNumThreads)
        {
            float truth = label > 0 ? 1 : -1;
            var tmp = (_margin - output * truth) * invariant + alpha * truth;
            var fullUpdate = truth * Math.Max(0, Math.Min(1, tmp)) - alpha;
            return maxNumThreads >= 2 && Math.Abs(fullUpdate) > Threshold ? fullUpdate / maxNumThreads : fullUpdate;
        }
 
        public Double DualLoss(float label, float dual)
        {
            if (label <= 0)
                dual = -dual;
 
            // The dual variable is out of the feasible region [0, 1].
            if (dual < 0 || dual > 1)
                return Double.NegativeInfinity;
 
            return _margin * dual;
        }
    }
 
    /// <summary>
    /// A smooth version of the <see cref="HingeLoss"/> function, commonly used in classification tasks.
    /// </summary>
    /// <remarks>
    /// <format type="text/markdown"><![CDATA[
    ///
    /// Let $f(\hat{y}, y) = 1 - y\hat{y}$, where $\hat{y}$ is the predicted score and $y \in \\{-1, 1\\}$ is the true label. $f(\hat{y}, y)$ here is the non-zero portion of the [Hinge Loss](xref:Microsoft.ML.Trainers.HingeLoss).
    ///
    /// Note that the labels used in this calculation are -1 and 1, unlike [Log Loss](xref:Microsoft.ML.Trainers.LogLoss), where the labels used are 0 and 1.
    /// Also unlike [Log Loss](xref:Microsoft.ML.Trainers.LogLoss), $\hat{y}$ is the raw predicted score, not the predicted probability (which is calculated by applying a [sigmoid function](https://en.wikipedia.org/wiki/Sigmoid_function) to the predicted score).
    ///
    /// The Smoothed Hinge Loss function is then defined as:
    ///
    /// $
    /// L(f(\hat{y}, y)) =
    /// \begin{cases}
    /// 0                                  & \text{if } f(\hat{y}, y) < 0 \\\\
    /// \frac{(f(\hat{y}, y))^2}{2\alpha}  & \text{if } f(\hat{y}, y) < \alpha \\\\
    /// f(\hat{y}, y) - \frac{\alpha}{2}   & \text{otherwise}
    /// \end{cases}
    /// $
    ///
    /// where $\alpha$ is a smoothing parameter set to 1 by default.
    ///
    /// ]]>
    /// </format>
    /// </remarks>
    public sealed class SmoothedHingeLoss : ISupportSdcaClassificationLoss
    {
        [TlcModule.Component(Name = "SmoothedHingeLoss", FriendlyName = "Smoothed Hinge Loss", Alias = "SmoothedHinge",
                             Desc = "Smoothed Hinge loss.")]
        internal sealed class Options : ISupportSdcaClassificationLossFactory, ISupportClassificationLossFactory
        {
            [Argument(ArgumentType.AtMostOnce, HelpText = "Smoothing constant", ShortName = "smooth")]
            public float SmoothingConst = Defaults.SmoothingConst;
 
            public ISupportSdcaClassificationLoss CreateComponent(IHostEnvironment env) => new SmoothedHingeLoss(env, this);
 
            IClassificationLoss IComponentFactory<IClassificationLoss>.CreateComponent(IHostEnvironment env) => new SmoothedHingeLoss(env, this);
        }
 
        internal const string Summary = "The smooth Hinge loss function for classification. Supported by SDCA.";
        private const float Threshold = 0.5f;
        // The smoothed Hinge loss is 1/(_SmoothParam) smooth (its definition can be found in http://jmlr.org/papers/volume14/shalev-shwartz13a/shalev-shwartz13a.pdf (page 568 Definition 1)
        private readonly float _smoothConst;
        private readonly Double _halfSmoothConst;
        private readonly Double _doubleSmoothConst;
 
        private static class Defaults
        {
            public const float SmoothingConst = 1;
        }
 
        /// <summary>
        /// Constructor for smoothed hinge losee.
        /// </summary>
        /// <param name="smoothingConstant">The smoothing constant.</param>
        public SmoothedHingeLoss(float smoothingConstant = Defaults.SmoothingConst)
        {
            Contracts.CheckParam(smoothingConstant >= 0, nameof(smoothingConstant), "Must be non-negative.");
            _smoothConst = smoothingConstant;
            _halfSmoothConst = _smoothConst / 2;
            _doubleSmoothConst = _smoothConst * 2;
        }
 
        private SmoothedHingeLoss(IHostEnvironment env, Options options)
            : this(options.SmoothingConst)
        {
        }
 
        public Double Loss(float output, float label)
        {
            float truth = label > 0 ? 1 : -1;
            float u = 1 - truth * output;
 
            if (u < 0)
                return 0;
 
            if (u < _smoothConst) // u > 1 - _smoothConst
                return u * u / _doubleSmoothConst;
 
            return u - _halfSmoothConst;
        }
 
        public float Derivative(float output, float label)
        {
            float truth = label > 0 ? 1 : -1;
            float u = 1 - truth * output;
 
            if (u < 0)
                return 0;
 
            if (u < _smoothConst)
                return -truth * u / _smoothConst;
 
            return -truth;
        }
 
        public float ComputeDualUpdateInvariant(float scaledFeaturesNormSquared)
        {
            return 1 / (scaledFeaturesNormSquared + _smoothConst);
        }
 
        public float DualUpdate(float output, float label, float alpha, float invariant, int maxNumThreads)
        {
            float truth = label > 0 ? 1 : -1;
            var tmp = (1 - output * truth - _smoothConst * alpha * truth) * invariant + alpha * truth;
            var fullUpdate = truth * Math.Max(0, Math.Min(1, tmp)) - alpha;
            return maxNumThreads >= 2 && Math.Abs(fullUpdate) > Threshold ? fullUpdate / maxNumThreads : fullUpdate;
        }
 
        public Double DualLoss(float label, float dual)
        {
            if (label <= 0)
                dual = -dual;
 
            // The dual variable is out of the feasible region [0, 1].
            if (dual < 0 || dual > 1)
                return Double.NegativeInfinity;
 
            return dual * (1 - dual * _halfSmoothConst);
        }
    }
 
    /// <summary>
    /// Exponential Loss, commonly used in classification tasks.
    /// </summary>
    /// <remarks>
    /// <format type="text/markdown"><![CDATA[
    ///
    /// The Exponential Loss function is defined as:
    ///
    /// $L(\hat{y}, y) = e^{-\beta y \hat{y}}$
    ///
    /// where $\hat{y}$ is the predicted score, $y \in \\{-1, 1\\}$ is the true label, and $\beta$ is a scale factor set to 1 by default.
    ///
    /// Note that the labels used in this calculation are -1 and 1, unlike [Log Loss](xref:Microsoft.ML.Trainers.LogLoss), where the labels used are 0 and 1.
    /// Also unlike [Log Loss](xref:Microsoft.ML.Trainers.LogLoss), $\hat{y}$ is the raw predicted score, not the predicted probability (which is calculated by applying a [sigmoid function](https://en.wikipedia.org/wiki/Sigmoid_function) to the predicted score).
    ///
    /// The Exponential Loss function penalizes incorrect predictions more than the [Hinge Loss](xref:Microsoft.ML.Trainers.HingeLoss) and has a larger gradient.
    ///
    /// ]]>
    /// </format>
    /// </remarks>
    public sealed class ExpLoss : IClassificationLoss
    {
        [TlcModule.Component(Name = "ExpLoss", FriendlyName = "Exponential Loss", Desc = "Exponential loss.")]
        internal sealed class Options : ISupportClassificationLossFactory
        {
            [Argument(ArgumentType.AtMostOnce, HelpText = "Beta (dilation)", ShortName = "beta")]
            public float Beta = 1;
 
            public IClassificationLoss CreateComponent(IHostEnvironment env) => new ExpLoss(this);
        }
 
        internal const string Summary = "The exponential loss function for classification.";
 
        private readonly float _beta;
 
        internal ExpLoss(Options options)
        {
            _beta = options.Beta;
        }
 
        public ExpLoss(float beta = 1)
        {
            _beta = beta;
        }
 
        public Double Loss(float output, float label)
        {
            float truth = label > 0 ? 1 : -1;
            return MathUtils.ExpSlow(-_beta * truth * output);
        }
 
        public float Derivative(float output, float label)
        {
            float truth = label > 0 ? 1 : -1;
            float factor = -_beta * truth;
            return factor * MathUtils.ExpSlow(factor * output);
        }
    }
 
    [TlcModule.Component(Name = "SquaredLoss", FriendlyName = "Squared Loss", Alias = "L2", Desc = "Squared loss.")]
    [BestFriend]
    internal sealed class SquaredLossFactory : ISupportSdcaRegressionLossFactory, ISupportRegressionLossFactory
    {
        public ISupportSdcaRegressionLoss CreateComponent(IHostEnvironment env) => new SquaredLoss();
 
        IRegressionLoss IComponentFactory<IRegressionLoss>.CreateComponent(IHostEnvironment env) => new SquaredLoss();
    }
 
    /// <summary>
    /// The Squared Loss, commonly used in regression tasks.
    /// </summary>
    /// <remarks>
    /// <format type="text/markdown"><![CDATA[
    ///
    /// The Squared Loss function is defined as:
    ///
    /// $L(\hat{y}, y) = (\hat{y} - y)^2$
    ///
    /// where $\hat{y}$ is the predicted value and $y$ is the true value.
    ///
    /// ]]>
    /// </format>
    /// </remarks>
    public sealed class SquaredLoss : ISupportSdcaRegressionLoss
    {
        internal const string Summary = "The squared loss function for regression.";
 
        public Double Loss(float output, float label)
        {
            float diff = output - label;
            return diff * diff;
        }
 
        public float Derivative(float output, float label)
        {
            float diff = output - label;
            return 2 * diff;
        }
 
        public float ComputeDualUpdateInvariant(float scaledFeaturesNormSquared)
        {
            return 1 / ((float)0.5 + scaledFeaturesNormSquared);
        }
 
        public float DualUpdate(float output, float label, float dual, float invariant, int maxNumThreads)
        {
            var fullUpdate = (label - output - (float)0.5 * dual) * invariant;
            return maxNumThreads >= 2 ? fullUpdate / maxNumThreads : fullUpdate;
        }
 
        public Double DualLoss(float label, float dual)
        {
            return -dual * (dual / 4 - label);
        }
    }
 
    [TlcModule.Component(Name = "PoissonLoss", FriendlyName = "Poisson Loss", Desc = "Poisson loss.")]
    [BestFriend]
    internal sealed class PoissonLossFactory : ISupportRegressionLossFactory
    {
        public IRegressionLoss CreateComponent(IHostEnvironment env) => new PoissonLoss();
    }
 
    /// <summary>
    /// Poisson Loss function for Poisson Regression.
    /// </summary>
    /// <remarks type="text/markdown"><![CDATA[
    ///
    /// The Poisson Loss function is defined as:
    ///
    /// $L(\hat{y}, y) = e^{\hat{y}} - y\hat{y}$
    ///
    /// where $\hat{y}$ is the predicted value, $y$ is the true label.
    ///
    /// ]]>
    /// </remarks>
    public sealed class PoissonLoss : IRegressionLoss
    {
        internal const string Summary = "The Poisson loss function for regression.";
 
        public Double Loss(float output, float label)
        {
            // REVIEW: This is stupid and leads to error whenever this loss is used in an evaluator.
            // The output is in the log-space, while the label is in the original space, while the evaluator
            // has absolutely no way of knowing about this requirement.
            return Math.Exp(output) - label * output;
        }
 
        public float Derivative(float output, float label)
        {
            return (float)Math.Exp(output) - label;
        }
    }
 
    /// <summary>
    /// Tweedie loss, based on the log-likelihood of the Tweedie distribution. This loss function is used in Tweedie regression.
    /// </summary>
    /// <remarks type="text/markdown"><![CDATA[
    ///
    /// The Tweedie Loss function is defined as:
    ///
    /// $
    /// L(\hat{y}, y, i) =
    /// \begin{cases}
    /// \hat{y} - y ln(\hat{y}) + ln(\Gamma(y))                                                                                     & \text{if } i = 1 \\\\
    /// \hat{y} + \frac{y}{\hat{y}} - \sqrt{y}                                                                                      & \text{if } i = 2 \\\\
    /// \frac{(\hat{y})^{2 - i}}{2 - i} - y \frac{(\hat{y})^{1 - i}}{1 - i} - (\frac{y^{2 - i}}{2 - i} - y\frac{y^{1 - i}}{1 - i})  & \text{otherwise}
    /// \end{cases}
    /// $
    ///
    /// where $\hat{y}$ is the predicted value, $y$ is the true label, $\Gamma$ is the [Gamma function](https://en.wikipedia.org/wiki/Gamma_function), and $i$ is the index parameter for the [Tweedie distribution](https://en.wikipedia.org/wiki/Tweedie_distribution), in the range [1, 2].
    /// $i$ is set to 1.5 by default. $i = 1$ is Poisson loss, $i = 2$ is gamma loss, and intermediate values are compound Poisson-Gamma loss.
    ///
    /// ]]>
    /// </remarks>
    public sealed class TweedieLoss : IRegressionLoss
    {
        [TlcModule.Component(Name = "TweedieLoss", FriendlyName = "Tweedie Loss", Alias = "tweedie", Desc = "Tweedie loss.")]
        internal sealed class Options : ISupportRegressionLossFactory
        {
            [Argument(ArgumentType.LastOccurrenceWins, HelpText =
                "Index parameter for the Tweedie distribution, in the range [1, 2]. 1 is Poisson loss, 2 is gamma loss, " +
                "and intermediate values are compound Poisson loss.")]
            public Double Index = 1.5;
 
            public IRegressionLoss CreateComponent(IHostEnvironment env) => new TweedieLoss(this);
        }
 
        internal const string Summary = "The Tweedie loss function for regression.";
 
        private readonly Double _index;  // The index parameter specified by the user.
        private readonly Double _index1; // 1 minus the index parameter.
        private readonly Double _index2; // 2 minus the index parameter.
 
        private TweedieLoss(Options options)
        {
            Contracts.CheckUserArg(1 <= options.Index && options.Index <= 2, nameof(options.Index), "Must be in the range [1, 2]");
            _index = options.Index;
            _index1 = 1 - _index;
            _index2 = 2 - _index;
        }
 
        /// <summary>
        /// Constructor for Tweedie loss.
        /// </summary>
        /// <param name="index">Index parameter for the Tweedie distribution, in the range [1, 2].
        /// 1 is Poisson loss, 2 is gamma loss, and intermediate values are compound Poisson loss.</param>
        public TweedieLoss(double index = 1.5)
        {
            Contracts.CheckParam(1 <= index && index <= 2, nameof(index), "Must be in the range [1, 2]");
            _index = index;
            _index1 = 1 - _index;
            _index2 = 2 - _index;
        }
 
        private static void Clamp(ref float val)
        {
            const float eps = (float)1e-7;
            if (val < eps) // I tawt I taw a negwawive wowue.
                val = eps; // I did! I did taw a negwawive wowue!!
        }
 
        public Double Loss(float output, float label)
        {
            Clamp(ref output);
            Clamp(ref label);
 
            // This is the log likelihood.
            // If output were in the original log-space, then this would be exp((1-rho) * output).
            // However output is not in the log space.
            if (_index1 == 0)
                return output - label * Math.Log(output) + MathUtils.LogGamma(label);
            // The last log-gamma is not necessary from the point of view of the mathematical models,
            // but to the extent that this loss is used as a human comprehensible value, it's nice to
            // have its minimum at 0.
            if (_index2 == 0)
                return output + label / output - Math.Sqrt(label);
 
            return Math.Pow(output, _index2) / _index2 - label * Math.Pow(output, _index1) / _index1
                - (Math.Pow(label, _index2) / _index2 - label * Math.Pow(label, _index1) / _index1);
        }
 
        public float Derivative(float output, float label)
        {
            Clamp(ref output);
            Clamp(ref label);
 
            if (_index1 == 0)
                return output - label;
            return (float)(Math.Pow(output, _index2) - label * Math.Pow(output, _index1));
        }
    }
}