VBufferMathUtils.cs

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using Microsoft.ML.Data;
using Microsoft.ML.Internal.CpuMath;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Runtime;
 
namespace Microsoft.ML.Numeric
{
 
    internal static partial class VectorUtils
    {
        /// <summary>
        /// Returns the L2 norm squared of the vector (sum of squares of the components).
        /// </summary>
        public static float NormSquared(in VBuffer<float> a)
        {
            var aValues = a.GetValues();
            if (aValues.Length == 0)
                return 0;
            return CpuMathUtils.SumSq(aValues);
        }
 
        /// <summary>
        /// Returns the L2 norm squared of the vector (sum of squares of the components).
        /// </summary>
        public static float NormSquared(ReadOnlySpan<float> a)
        {
            return CpuMathUtils.SumSq(a);
        }
 
        /// <summary>
        /// Returns the L2 norm of the vector.
        /// </summary>
        /// <returns>L2 norm of the vector</returns>
        public static float Norm(in VBuffer<float> a)
        {
            return MathUtils.Sqrt(NormSquared(in a));
        }
 
        /// <summary>
        /// Returns the L1 norm of the vector.
        /// </summary>
        /// <returns>L1 norm of the vector</returns>
        public static float L1Norm(in VBuffer<float> a)
        {
            var aValues = a.GetValues();
            if (aValues.Length == 0)
                return 0;
            return CpuMathUtils.SumAbs(aValues);
        }
 
        /// <summary>
        /// Returns the L-infinity norm of the vector (i.e., the maximum absolute value).
        /// </summary>
        /// <returns>L-infinity norm of the vector</returns>
        public static float MaxNorm(in VBuffer<float> a)
        {
            var aValues = a.GetValues();
            if (aValues.Length == 0)
                return 0;
            return CpuMathUtils.MaxAbs(aValues);
        }
 
        /// <summary>
        /// Returns the sum of elements in the vector.
        /// </summary>
        public static float Sum(in VBuffer<float> a)
        {
            var aValues = a.GetValues();
            if (aValues.Length == 0)
                return 0;
            return CpuMathUtils.Sum(aValues);
        }
 
        /// <summary>
        /// Scales the vector by a real value.
        /// </summary>
        /// <param name="dst">Incoming vector</param>
        /// <param name="c">Value to multiply vector with</param>
        public static void ScaleBy(ref VBuffer<float> dst, float c)
        {
            if (c == 1 || dst.GetValues().Length == 0)
                return;
            var editor = VBufferEditor.CreateFromBuffer(ref dst);
            if (c != 0)
                CpuMathUtils.Scale(c, editor.Values);
            else // Maintain density of dst.
                editor.Values.Clear();
            // REVIEW: Any benefit in sparsifying?
        }
 
        /// <summary>
        /// Scales the vector by a real value.
        /// <c><paramref name="dst"/> = <paramref name="c"/> * <paramref name="src"/></c>
        /// </summary>
        public static void ScaleBy(in VBuffer<float> src, ref VBuffer<float> dst, float c)
        {
            int length = src.Length;
            var srcValues = src.GetValues();
            int count = srcValues.Length;
 
            if (count == 0)
            {
                // dst is a zero vector.
                VBufferUtils.Resize(ref dst, length, 0);
                return;
            }
 
            if (src.IsDense)
            {
                // Maintain the density of src to dst in order to avoid slow down of L-BFGS.
                var editor = VBufferEditor.Create(ref dst, length);
                Contracts.Assert(length == count);
                if (c == 0)
                    editor.Values.Clear();
                else
                    CpuMathUtils.Scale(c, srcValues, editor.Values, length);
                dst = editor.Commit();
            }
            else
            {
                var editor = VBufferEditor.Create(ref dst, length, count);
                src.GetIndices().CopyTo(editor.Indices);
                if (c == 0)
                    editor.Values.Clear();
                else
                    CpuMathUtils.Scale(c, srcValues, editor.Values, count);
                dst = editor.Commit();
            }
        }
 
        /// <summary>
        /// Perform in-place vector addition <c><paramref name="dst"/> += <paramref name="src"/></c>.
        /// </summary>
        public static void Add(in VBuffer<float> src, ref VBuffer<float> dst)
        {
            Contracts.Check(src.Length == dst.Length, "Vectors must have the same dimensionality.");
 
            var srcValues = src.GetValues();
            if (srcValues.Length == 0)
                return;
 
            if (dst.IsDense)
            {
                var editor = VBufferEditor.Create(ref dst, dst.Length);
                if (src.IsDense)
                    CpuMathUtils.Add(srcValues, editor.Values, src.Length);
                else
                    CpuMathUtils.Add(srcValues, src.GetIndices(), editor.Values, srcValues.Length);
                return;
            }
            // REVIEW: Should we use SSE for any of these possibilities?
            VBufferUtils.ApplyWith(in src, ref dst, (int i, float v1, ref float v2) => v2 += v1);
        }
 
        // REVIEW: Rename all instances of AddMult to AddScale, as soon as conversion concerns are no more.
        /// <summary>
        /// Perform in-place scaled vector addition
        /// <c><paramref name="dst"/> += <paramref name="c"/> * <paramref name="src"/></c>.
        /// If either vector is dense, <paramref name="dst"/> will be dense, unless
        /// <paramref name="c"/> is 0 in which case this method does nothing.
        /// </summary>
        public static void AddMult(in VBuffer<float> src, float c, ref VBuffer<float> dst)
        {
            Contracts.Check(src.Length == dst.Length, "Vectors must have the same dimensionality.");
 
            var srcValues = src.GetValues();
            if (srcValues.Length == 0 || c == 0)
                return;
 
            if (dst.IsDense)
            {
                var editor = VBufferEditor.Create(ref dst, dst.Length);
                if (src.IsDense)
                    CpuMathUtils.AddScale(c, srcValues, editor.Values, src.Length);
                else
                    CpuMathUtils.AddScale(c, srcValues, src.GetIndices(), editor.Values, srcValues.Length);
                return;
            }
            // REVIEW: Should we use SSE for any of these possibilities?
            VBufferUtils.ApplyWith(in src, ref dst, (int i, float v1, ref float v2) => v2 += c * v1);
        }
 
        /// <summary>
        /// Perform scalar vector addition
        /// <c><paramref name="res"/> = <paramref name="c"/> * <paramref name="src"/> + <paramref name="dst"/></c>
        /// </summary>
        public static void AddMult(in VBuffer<float> src, float c, ref VBuffer<float> dst, ref VBuffer<float> res)
        {
            Contracts.Check(src.Length == dst.Length, "Vectors must have the same dimensionality.");
            int length = src.Length;
 
            var srcValues = src.GetValues();
            if (srcValues.Length == 0 || c == 0)
            {
                // src is zero vector, res = dst
                dst.CopyTo(ref res);
                return;
            }
 
            Contracts.Assert(length > 0);
            if (dst.IsDense && src.IsDense)
            {
                var editor = VBufferEditor.Create(ref res, length);
                CpuMathUtils.AddScaleCopy(c, srcValues, dst.GetValues(), editor.Values, length);
                res = editor.Commit();
                return;
            }
 
            VBufferUtils.ApplyWithCopy(in src, ref dst, ref res, (int i, float v1, float v2, ref float v3) => v3 = v2 + c * v1);
        }
 
        /// <summary>
        /// Calculate
        /// <c><paramref name="a"/> + <paramref name="c"/> * <paramref name="b"/></c>
        /// and store the result in <paramref name="dst"/>.
        /// </summary>
        public static void AddMultInto(in VBuffer<float> a, float c, in VBuffer<float> b, ref VBuffer<float> dst)
        {
            Contracts.Check(a.Length == b.Length, "Vectors must have the same dimensionality.");
 
            if (c == 0 || b.GetValues().Length == 0)
                a.CopyTo(ref dst);
            else if (a.GetValues().Length == 0)
                ScaleInto(in b, c, ref dst);
            else
                VBufferUtils.ApplyInto(in a, in b, ref dst, (ind, v1, v2) => v1 + c * v2);
        }
 
        /// <summary>
        /// Perform in-place scaled vector addition
        /// <c><paramref name="dst"/> += <paramref name="c"/> * <paramref name="src"/></c>,
        /// except that this takes place in the section of <paramref name="dst"/> starting
        /// at slot <paramref name="offset"/>.
        /// </summary>
        public static void AddMultWithOffset(in VBuffer<float> src, float c, ref VBuffer<float> dst, int offset)
        {
            Contracts.CheckParam(0 <= offset && offset <= dst.Length, nameof(offset));
            Contracts.CheckParam(src.Length <= dst.Length - offset, nameof(offset));
 
            var srcValues = src.GetValues();
            if (srcValues.Length == 0 || c == 0)
                return;
            VBufferEditor<float> editor;
            Span<float> values;
            if (dst.IsDense)
            {
                // This is by far the most common case.
                editor = VBufferEditor.Create(ref dst, dst.Length);
                values = editor.Values.Slice(offset);
                if (src.IsDense)
                    CpuMathUtils.AddScale(c, srcValues, values, srcValues.Length);
                else
                    CpuMathUtils.AddScale(c, srcValues, src.GetIndices(), values, srcValues.Length);
                return;
            }
            // REVIEW: Perhaps implementing an ApplyInto with an offset would be more
            // appropriate, as well as more general, considering that this case is less important.
 
            // dst is sparse. I expect this will see limited practical use, since accumulates
            // are often better off going into a dense vector in all applications of interest to us.
            // Correspondingly, this implementation will be functional, but not optimized.
            var dstIndices = dst.GetIndices();
            int dMin = dstIndices.Length == 0 ? 0 : dstIndices.FindIndexSorted(0, dstIndices.Length, offset);
            int dLim = dstIndices.Length == 0 ? 0 : dstIndices.FindIndexSorted(dMin, dstIndices.Length, offset + src.Length);
            Contracts.Assert(dMin - dLim <= src.Length);
            // First get the number of extra values that we will need to accommodate.
            int gapCount;
            if (src.IsDense)
                gapCount = src.Length - (dLim - dMin);
            else
            {
                gapCount = srcValues.Length;
                var srcIndices = src.GetIndices();
                for (int iS = 0, iD = dMin; iS < srcIndices.Length && iD < dLim;)
                {
                    var comp = srcIndices[iS] - dstIndices[iD] + offset;
                    if (comp < 0) // dst index is larger.
                        iS++;
                    else if (comp > 0) // src index is larger.
                        iD++;
                    else
                    {
                        iS++;
                        iD++;
                        gapCount--;
                    }
                }
            }
            // Extend dst so that it has room for this additional stuff. Shift things over as well.
            var dstValues = dst.GetValues();
            editor = VBufferEditor.Create(ref dst,
                dst.Length,
                dstValues.Length + gapCount,
                keepOldOnResize: true,
                requireIndicesOnDense: true);
            var indices = editor.Indices;
            values = editor.Values;
            if (gapCount > 0)
            {
                // Shift things over, unless there's nothing to shift over, or no new elements are being introduced anyway.
                if (dstValues.Length != dLim)
                {
                    Contracts.Assert(dLim < dstValues.Length);
                    indices.Slice(dLim, dstValues.Length - dLim)
                        .CopyTo(indices.Slice(dLim + gapCount));
                    values.Slice(dLim, dstValues.Length - dLim)
                        .CopyTo(values.Slice(dLim + gapCount));
                }
            }
            // Now, fill in the stuff in this "gap." Both of these implementations work
            // backwards from the end, since they can potentially be working in place if
            // the EnsureSize calls did not actually result in a new array.
            if (src.IsDense)
            {
                // dst is sparse, src is dense.
                int iD = dLim - 1;
                int iS = src.Length - 1;
                for (int iDD = dLim + gapCount; --iDD >= dMin; --iS)
                {
                    Contracts.Assert(iDD == iS + dMin);
                    // iDD and iD are the points in where we are writing and reading from.
                    Contracts.Assert(iDD >= iD);
                    if (iD >= 0 && offset + iS == dstIndices[iD]) // Collision.
                        values[iDD] = dstValues[iD--] + c * srcValues[iS];
                    else // Miss.
                        values[iDD] = c * srcValues[iS];
                    indices[iDD] = offset + iS;
                }
            }
            else
            {
                // Both dst and src are sparse.
                int iD = dLim - 1;
                var srcIndices = src.GetIndices();
                int iS = srcIndices.Length - 1;
                int sIndex = iS < 0 ? -1 : srcIndices[iS];
                int dIndex = iD < 0 ? -1 : dstIndices[iD] - offset;
 
                for (int iDD = dLim + gapCount; --iDD >= dMin;)
                {
                    Contracts.Assert(iDD >= iD);
                    int comp = sIndex - dIndex;
                    if (comp == 0) // Collision on both.
                    {
                        indices[iDD] = dstIndices[iD];
                        values[iDD] = dstValues[iD--] + c * srcValues[iS--];
                        sIndex = iS < 0 ? -1 : srcIndices[iS];
                        dIndex = iD < 0 ? -1 : dstIndices[iD] - offset;
                    }
                    else if (comp < 0) // Collision on dst.
                    {
                        indices[iDD] = dstIndices[iD];
                        values[iDD] = dstValues[iD--];
                        dIndex = iD < 0 ? -1 : dstIndices[iD] - offset;
                    }
                    else // Collision on src.
                    {
                        indices[iDD] = sIndex + offset;
                        values[iDD] = c * srcValues[iS--];
                        sIndex = iS < 0 ? -1 : srcIndices[iS];
                    }
                }
            }
            dst = editor.Commit();
        }
 
        /// <summary>
        /// Perform in-place scaling of a vector into another vector as
        /// <c><paramref name="dst"/> = <paramref name="src"/> * <paramref name="c"/></c>.
        /// This is more or less equivalent to performing the same operation with
        /// <see cref="VBufferUtils.ApplyInto{TSrc1,TSrc2,TDst}"/> except perhaps more efficiently,
        /// with one exception: if <paramref name="c"/> is 0 and <paramref name="src"/>
        /// is sparse, <paramref name="dst"/> will have a count of zero, instead of the
        /// same count as <paramref name="src"/>.
        /// </summary>
        public static void ScaleInto(in VBuffer<float> src, float c, ref VBuffer<float> dst)
        {
            // REVIEW: The analogous WritableVector method insisted on
            // equal lengths, but I assume I don't care here.
            if (c == 1)
                src.CopyTo(ref dst);
            else if (src.GetValues().Length == 0 || c == 0)
            {
                if (src.Length > 0 && src.IsDense)
                {
                    // Due to sparsity preservation from src, dst must be dense, in the same way.
                    var editor = VBufferEditor.Create(ref dst, src.Length);
                    if (!editor.CreatedNewValues) // We need to clear it.
                        editor.Values.Clear();
                    dst = editor.Commit();
                }
                else
                {
                    VBufferUtils.Resize(ref dst, src.Length, 0);
                }
            }
            else if (c == -1)
                VBufferUtils.ApplyIntoEitherDefined(in src, ref dst, (i, v) => -v);
            else
                VBufferUtils.ApplyIntoEitherDefined(in src, ref dst, (i, v) => c * v);
        }
 
        public static int ArgMax(in VBuffer<float> src)
        {
            if (src.Length == 0)
                return -1;
            var srcValues = src.GetValues();
            if (srcValues.Length == 0)
                return 0;
 
            int ind = MathUtils.ArgMax(srcValues);
            // ind < 0 iff all explicit values are NaN.
            Contracts.Assert(-1 <= ind && ind < srcValues.Length);
 
            if (src.IsDense)
                return ind;
 
            var srcIndices = src.GetIndices();
            if (ind >= 0)
            {
                Contracts.Assert(srcIndices[ind] >= ind);
                if (srcValues[ind] > 0)
                    return srcIndices[ind];
                // This covers the case where there is an explicit zero, and zero is the max,
                // and the first explicit zero is before any implicit entries.
                if (srcValues[ind] == 0 && srcIndices[ind] == ind)
                    return ind;
            }
 
            // All explicit values are non-positive or NaN, so return the first index not in src.Indices.
            ind = 0;
            while (ind < srcIndices.Length && srcIndices[ind] == ind)
                ind++;
            Contracts.Assert(ind <= srcIndices.Length);
            Contracts.Assert(ind == srcIndices.Length || ind < srcIndices[ind]);
            return ind;
        }
 
        public static int ArgMin(in VBuffer<float> src)
        {
            if (src.Length == 0)
                return -1;
            var srcValues = src.GetValues();
            if (srcValues.Length == 0)
                return 0;
 
            int ind = MathUtils.ArgMin(srcValues);
            // ind < 0 iff all explicit values are NaN.
            Contracts.Assert(-1 <= ind && ind < srcValues.Length);
 
            if (src.IsDense)
                return ind;
 
            var srcIndices = src.GetIndices();
            if (ind >= 0)
            {
                Contracts.Assert(srcIndices[ind] >= ind);
                if (srcValues[ind] < 0)
                    return srcIndices[ind];
                // This covers the case where there is an explicit zero, and zero is the min,
                // and the first explicit zero is before any implicit entries.
                if (srcValues[ind] == 0 && srcIndices[ind] == ind)
                    return ind;
            }
 
            // All explicit values are non-negative or NaN, so return the first index not in srcIndices.
            ind = 0;
            while (ind < srcIndices.Length && srcIndices[ind] == ind)
                ind++;
            Contracts.Assert(ind <= srcIndices.Length);
            Contracts.Assert(ind == srcIndices.Length || ind < srcIndices[ind]);
            return ind;
        }
    }
}
File: Deprecated\Vector\VBufferMathUtils.cs	Web Access
Project: src\src\Microsoft.ML.Data\Microsoft.ML.Data.csproj (Microsoft.ML.Data)