File: Optimizer\DifferentiableFunction.cs
Web Access
Project: src\src\Microsoft.ML.StandardTrainers\Microsoft.ML.StandardTrainers.csproj (Microsoft.ML.StandardTrainers)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Generic;
using System.Threading;
using Microsoft.ML.Data;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Runtime;
 
namespace Microsoft.ML.Numeric
{
    /// <summary>
    /// A delegate for functions with gradients.
    /// </summary>
    /// <param name="input">The point at which to evaluate the function</param>
    /// <param name="gradient">The gradient vector, which must be filled in (its initial contents are undefined)</param>
    /// <param name="progress">The progress channel provider that can be used to report calculation progress. Can be null.</param>
    /// <returns>The value of the function</returns>
    internal delegate float DifferentiableFunction(in VBuffer<float> input, ref VBuffer<float> gradient, IProgressChannelProvider progress);
 
    /// <summary>
    /// A delegate for indexed sets of functions with gradients.
    ///
    /// REVIEW: I didn't add an <see cref="IProgressChannelProvider"/> here, since it looks like this code is not actually
    /// accessed from anywhere. Maybe it should go away?
    /// </summary>
    /// <param name="index">The index of the function</param>
    /// <param name="input">The point at which to evaluate the function</param>
    /// <param name="gradient">The gradient vector, which must be filled in (its initial contents are undefined)</param>
    /// <returns>The value of the function</returns>
    internal delegate float IndexedDifferentiableFunction(int index, in VBuffer<float> input, ref VBuffer<float> gradient);
 
    /// <summary>
    /// Class to aggregate an indexed differentiable function into a single function, in parallel
    /// </summary>
    internal class DifferentiableFunctionAggregator
    {
        private readonly IndexedDifferentiableFunction _func;
        private readonly int _maxIndex;
        private readonly int _threads;
        private readonly int _dim;
 
        private readonly VBuffer<float>[] _tempGrads;
        private VBuffer<float> _input;
        private readonly float[] _tempVals;
        private readonly AutoResetEvent[] _threadFinished;
 
        /// <summary>
        /// Creates a DifferentiableFunctionAggregator
        /// </summary>
        /// <param name="func">Indexed function to use</param>
        /// <param name="dim">Dimensionality of the function</param>
        /// <param name="maxIndex">Max index of the function</param>
        /// <param name="threads">Number of threads to use</param>
        public DifferentiableFunctionAggregator(IndexedDifferentiableFunction func, int dim, int maxIndex, int threads = 64)
        {
            _func = func;
            _dim = dim;
 
            _maxIndex = maxIndex;
 
            if (threads > maxIndex)
                threads = maxIndex;
            if (threads > 64)
                threads = 64;
            _threads = threads;
 
            _tempGrads = new VBuffer<float>[threads];
            _threadFinished = new AutoResetEvent[threads];
            for (int i = 0; i < threads; ++i)
                _threadFinished[i] = new AutoResetEvent(false);
 
            _tempVals = new float[threads];
        }
 
        private void Eval(object chunkIndexObj)
        {
            int chunkIndex = (int)chunkIndexObj;
            int chunkSize = _maxIndex / _threads;
            int bigChunkSize = chunkSize + 1;
            int numBigChunks = _maxIndex % _threads;
            int from;
            int to;
            if (chunkIndex < numBigChunks)
            {
                from = bigChunkSize * chunkIndex;
                to = from + bigChunkSize;
            }
            else
            {
                from = bigChunkSize * numBigChunks + chunkSize * (chunkIndex - numBigChunks);
                to = from + chunkSize;
            }
 
            _tempVals[chunkIndex] = 0;
            VectorUtils.ScaleBy(ref _tempGrads[chunkIndex], 0);
 
            VBuffer<float> tempGrad = default(VBuffer<float>);
            for (int i = from; i < to; ++i)
            {
                VBufferUtils.Resize(ref tempGrad, 0, 0);
                _tempVals[chunkIndex] += _func(i, in _input, ref tempGrad);
                if (_tempGrads[chunkIndex].Length == 0)
                    tempGrad.CopyTo(ref _tempGrads[chunkIndex]);
                else
                    VectorUtils.Add(in tempGrad, ref _tempGrads[chunkIndex]);
            }
 
            _threadFinished[chunkIndex].Set();
        }
 
        /// <summary>
        /// Evaluate and sum the function over all indices, in parallel
        /// </summary>
        /// <param name="input">The point at which to evaluate the function</param>
        /// <param name="gradient">The gradient vector, which must be filled in (its initial contents are undefined)</param>
        /// <returns>Function value</returns>
        public float Eval(in VBuffer<float> input, ref VBuffer<float> gradient)
        {
            _input = input;
 
            for (int c = 0; c < _threads; ++c)
            {
                ThreadPool.QueueUserWorkItem(Eval, c);
            }
 
            AutoResetEvent.WaitAll(_threadFinished);
 
            VectorUtils.ScaleBy(ref gradient, 0);
            float value = 0;
            for (int c = 0; c < _threads; ++c)
            {
                if (gradient.Length == 0)
                    _tempGrads[c].CopyTo(ref gradient);
                else
                    VectorUtils.Add(in _tempGrads[c], ref gradient);
                value += _tempVals[c];
            }
 
            return value;
        }
    }
 
    // REVIEW: The following GradientTester is very inefficient, and is not called
    // from anywhere. Consider deletion.
    /// <summary>
    /// A class for testing the gradient of DifferentiableFunctions, useful for debugging
    /// </summary>
    /// <remarks>
    /// Works by comparing the reported gradient to the numerically computed gradient.
    /// If the gradient is correct, the return value should be small (order of 1e-6).
    /// May have false negatives if extreme values cause the numeric gradient to be off,
    /// for example, if the norm of x is very large, or if the gradient is changing rapidly at x.
    /// </remarks>
    internal static class GradientTester
    {
        // approximately u^(1/3), where u is the unit roundoff ~ 1.1e-16.
        // the optimal value of eps for the central difference approximation, Nocedal & Wright
        private const float Eps = (float)4.79e-6;
 
        private static readonly Random _r = new Random(5);
 
        /// <summary>
        /// Tests the gradient reported by f.
        /// </summary>
        /// <param name="f">function to test</param>
        /// <param name="x">point at which to test</param>
        /// <returns>maximum normalized difference between analytic and numeric directional derivative over multiple tests</returns>
        public static float Test(DifferentiableFunction f, in VBuffer<float> x)
        {
            // REVIEW: Delete this method?
            return Test(f, in x, false);
        }
 
        /// <summary>
        /// Tests the gradient reported by f.
        /// </summary>
        /// <param name="f">function to test</param>
        /// <param name="x">point at which to test</param>
        /// <param name="quiet">If false, outputs detailed info.</param>
        /// <returns>maximum normalized difference between analytic and numeric directional derivative over multiple tests</returns>
        public static float Test(DifferentiableFunction f, in VBuffer<float> x, bool quiet)
        {
            // REVIEW: Delete this method?
            VBuffer<float> grad = default(VBuffer<float>);
            VBuffer<float> newGrad = default(VBuffer<float>);
            VBuffer<float> newX = default(VBuffer<float>);
            float normX = VectorUtils.Norm(x);
            f(in x, ref grad, null);
 
            if (!quiet)
                Console.WriteLine(Header);
 
            float maxNormDiff = float.NegativeInfinity;
 
            int numIters = Math.Min((int)x.Length, 10);
            int maxDirCount = Math.Min((int)x.Length / 2, 100);
 
            for (int n = 1; n <= numIters; n++)
            {
                int dirCount = Math.Min(n * 10, maxDirCount);
                List<int> indices = new List<int>(dirCount);
                List<float> values = new List<float>(dirCount);
                for (int i = 0; i < dirCount; i++)
                {
                    int index = _r.Next((int)x.Length);
                    while (indices.IndexOf(index) >= 0)
                        index = _r.Next((int)x.Length);
                    indices.Add(index);
                    values.Add(SampleFromGaussian(_r));
                }
                VBuffer<float> dir = new VBuffer<float>(x.Length, values.Count, values.ToArray(), indices.ToArray());
 
                float norm = VectorUtils.Norm(dir);
                VectorUtils.ScaleBy(ref dir, 1 / norm);
 
                VectorUtils.AddMultInto(in x, Eps, in dir, ref newX);
                float rVal = f(in newX, ref newGrad, null);
 
                VectorUtils.AddMultInto(in x, -Eps, in dir, ref newX);
                float lVal = f(in newX, ref newGrad, null);
 
                float dirDeriv = VectorUtils.DotProduct(in grad, in dir);
                float numDeriv = (rVal - lVal) / (2 * Eps);
 
                float normDiff = Math.Abs(1 - numDeriv / dirDeriv);
                float diff = numDeriv - dirDeriv;
                if (!quiet)
                    Console.WriteLine("{0,-9}{1,-18:0.0000e0}{2,-18:0.0000e0}{3,-15:0.0000e0}{4,0:0.0000e0}", n, numDeriv, dirDeriv, diff, normDiff);
 
                maxNormDiff = Math.Max(maxNormDiff, normDiff);
            }
 
            return maxNormDiff;
        }
 
        /// <summary>
        /// The head of the test output
        /// </summary>
        public static readonly string Header = "Trial    Numeric deriv     Analytic deriv    Difference     Normalized";
 
        /// <summary>
        /// Tests the gradient using finite differences on each axis (appropriate for small functions)
        /// </summary>
        /// <param name="f"></param>
        /// <param name="x"></param>
        public static void TestAllCoords(DifferentiableFunction f, in VBuffer<float> x)
        {
            // REVIEW: Delete this method?
            VBuffer<float> grad = default(VBuffer<float>);
            VBuffer<float> newGrad = default(VBuffer<float>);
            VBuffer<float> newX = default(VBuffer<float>);
            float val = f(in x, ref grad, null);
            float normX = VectorUtils.Norm(x);
 
            Console.WriteLine(Header);
 
            Random r = new Random(5);
 
            VBuffer<float> dir = new VBuffer<float>(x.Length, 1, new float[] { 1 }, new int[] { 0 });
            for (int n = 0; n < x.Length; n++)
            {
                VBufferEditor.CreateFromBuffer(ref dir).Values[0] = n;
                VectorUtils.AddMultInto(in x, Eps, in dir, ref newX);
                float rVal = f(in newX, ref newGrad, null);
 
                VectorUtils.AddMultInto(in x, -Eps, in dir, ref newX);
                float lVal = f(in newX, ref newGrad, null);
 
                float dirDeriv = VectorUtils.DotProduct(in grad, in dir);
                float numDeriv = (rVal - lVal) / (2 * Eps);
 
                float normDiff = Math.Abs(1 - numDeriv / dirDeriv);
                float diff = numDeriv - dirDeriv;
                if (diff != 0)
                    Console.WriteLine("{0,-9}{1,-18:0.0000e0}{2,-18:0.0000e0}{3,-15:0.0000e0}{4,0:0.0000e0}", n, numDeriv, dirDeriv, diff, normDiff);
            }
        }
 
        /// <summary>
        /// Tests the gradient using finite differences on each axis in the list
        /// </summary>
        /// <param name="f">Function to test</param>
        /// <param name="x">Point at which to test</param>
        /// <param name="coords">List of coordinates to test</param>
        public static void TestCoords(DifferentiableFunction f, in VBuffer<float> x, IList<int> coords)
        {
            // REVIEW: Delete this method?
            VBuffer<float> grad = default(VBuffer<float>);
            VBuffer<float> newGrad = default(VBuffer<float>);
            VBuffer<float> newX = default(VBuffer<float>);
            float val = f(in x, ref grad, null);
            float normX = VectorUtils.Norm(x);
 
            Console.WriteLine(Header);
 
            Random r = new Random(5);
 
            VBuffer<float> dir = new VBuffer<float>(x.Length, 1, new float[] { 1 }, new int[] { 0 });
            foreach (int n in coords)
            {
                VBufferEditor.CreateFromBuffer(ref dir).Values[0] = n;
                VectorUtils.AddMultInto(in x, Eps, in dir, ref newX);
                float rVal = f(in newX, ref newGrad, null);
 
                VectorUtils.AddMultInto(in x, -Eps, in dir, ref newX);
                float lVal = f(in newX, ref newGrad, null);
 
                float dirDeriv = VectorUtils.DotProduct(in grad, in dir);
                float numDeriv = (rVal - lVal) / (2 * Eps);
 
                float normDiff = Math.Abs(1 - numDeriv / dirDeriv);
                float diff = numDeriv - dirDeriv;
                Console.WriteLine("{0,-9}{1,-18:0.0000e0}{2,-18:0.0000e0}{3,-15:0.0000e0}{4,0:0.0000e0}", n, numDeriv, dirDeriv, diff, normDiff);
            }
        }
 
        /// <summary>
        /// Tests the gradient reported by <paramref name="f"/>.
        /// </summary>
        /// <param name="f">Function to test</param>
        /// <param name="x">Point at which to test</param>
        /// <param name="dir">Direction to test derivative</param>
        /// <param name="quiet">Whether to disable output</param>
        /// <param name="newGrad">This is a reusable working buffer for intermediate calculations</param>
        /// <param name="newX">This is a reusable working buffer for intermediate calculations</param>
        /// <returns>Normalized difference between analytic and numeric directional derivative</returns>
        public static float Test(DifferentiableFunction f, in VBuffer<float> x, ref VBuffer<float> dir, bool quiet,
            ref VBuffer<float> newGrad, ref VBuffer<float> newX)
        {
            float normDir = VectorUtils.Norm(dir);
 
            float val = f(in x, ref newGrad, null);
            float dirDeriv = VectorUtils.DotProduct(in newGrad, in dir);
 
            float scaledEps = Eps / normDir;
 
            VectorUtils.AddMultInto(in x, scaledEps, in dir, ref newX);
            float rVal = f(in newX, ref newGrad, null);
 
            VectorUtils.AddMultInto(in x, -scaledEps, in dir, ref newX);
            float lVal = f(in newX, ref newGrad, null);
 
            float numDeriv = (rVal - lVal) / (2 * scaledEps);
 
            float normDiff = Math.Abs(1 - numDeriv / dirDeriv);
            float diff = numDeriv - dirDeriv;
            if (!quiet)
                Console.WriteLine("{0,-18:0.0000e0}{1,-18:0.0000e0}{2,-15:0.0000e0}{3,0:0.0000e0}", numDeriv, dirDeriv, diff, normDiff);
 
            return normDiff;
        }
 
        private static float SampleFromGaussian(Random r)
        {
            double a = r.NextDouble();
            double b = r.NextDouble();
            return (float)(Math.Sqrt(-2 * Math.Log(a)) * MathUtils.Cos(2 * Math.PI * b));
        }
    }
}