FloatUtils.cs

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Globalization;
using System.Runtime.InteropServices;
using Microsoft.ML.Runtime;
 
namespace Microsoft.ML.Internal.Utilities
{
    [BestFriend]
    internal static class FloatUtils
    {
        // This is used to read and write the bits of a Double.
        // Thanks to Vance Morrison for educating me about this excellent aliasing mechanism.
        [StructLayout(LayoutKind.Explicit)]
        private struct DoubleBits
        {
            // Masks for the portions of a Double: 1 sign bit, 11 exponent bits, 52 mantissa bits.
            public const ulong MaskSign = 0x8000000000000000UL;
            public const ulong MaskExp = 0x7FF0000000000000UL;
            public const ulong MaskMan = 0x000FFFFFFFFFFFFFUL;
 
            public const int RawExpInf = 0x7FF; // The raw exponent value for infinities and nan.
            public const int RawExpZero = 0x3FF; // The raw exponent value for "1", when the exponent is logically zero.
            public const int CbitExp = 11; // Number of exponent bits.
            public const int CbitMan = 52; // Number of mantissa bits.
            public const int ExpDenorm = -1074;
            public const int ExpOrigin = 1075;
 
            [FieldOffset(0)]
            public double Float; // used only for construction from Double
            [FieldOffset(0)]
            public ulong Bits; //overlay
 
            public int GetExp()
            {
                return ((int)(Bits >> CbitMan) & RawExpInf) - RawExpZero;
            }
            public int GetRawExp()
            {
                return (int)(Bits >> CbitMan) & RawExpInf;
            }
            public bool IsFinite()
            {
                return (Bits & MaskExp) < MaskExp;
            }
            public bool IsFiniteNonZero()
            {
                var bits = Bits & ~MaskSign;
                return 0 < bits && bits < MaskExp;
            }
            public bool IsFiniteNormal()
            {
                var exp = Bits & MaskExp;
                return 0 < exp && exp < MaskExp;
            }
            public bool IsDenormal()
            {
                return (Bits & MaskExp) == 0;
            }
            public void SetExponent(int exp)
            {
                Contracts.Assert(-RawExpZero < exp && exp < RawExpInf - RawExpZero);
                Bits = (Bits & ~MaskExp) | (((ulong)(exp + RawExpZero) << CbitMan) & MaskExp);
            }
            public void GetParts(out int sign, out int exp, out ulong man, out bool fFinite)
            {
                sign = 1 - ((int)(Bits >> 62) & 2);
                man = Bits & MaskMan;
                exp = GetRawExp();
                if (exp == 0)
                {
                    // Denormalized number.
                    fFinite = true;
                    if (man != 0)
                        exp = ExpDenorm;
                }
                else if (exp == RawExpInf)
                {
                    // NaN or infinite.
                    fFinite = false;
                    exp = int.MaxValue;
                }
                else
                {
                    fFinite = true;
                    man += MaskMan + 1;
                    exp -= ExpOrigin;
                }
            }
            public void SetFromParts(int sign, int exp, ulong man)
            {
                if (man == 0)
                    Bits = 0;
                else
                {
                    // Normalize so that 0x0010 0000 0000 0000 is the highest bit set.
                    int cbitShift = Utils.CbitHighZero(man) - CbitExp;
                    if (cbitShift < 0)
                    {
                        // REVIEW: Should this round?
                        man >>= -cbitShift;
                    }
                    else
                        man <<= cbitShift;
                    exp -= cbitShift;
                    Contracts.Assert((man & ~MaskMan) == MaskMan + 1);
 
                    // Move the point to just behind the leading 1: 0x001.0 0000 0000 0000
                    // (52 bits) and skew the exponent (by 0x3FF == 1023).
                    exp += ExpOrigin;
 
                    if (exp >= RawExpInf)
                    {
                        // Infinity.
                        Bits = MaskExp;
                    }
                    else if (exp <= 0)
                    {
                        // Denormalized.
                        exp--;
                        if (exp < -CbitMan)
                        {
                            // Underflow to zero.
                            Bits = 0;
                        }
                        else
                        {
                            Bits = man >> (int)(-exp);
                            Contracts.Assert(Bits != 0);
                        }
                    }
                    else
                    {
                        // Mask off the implicit high bit.
                        Bits = (man & MaskMan) | ((ulong)exp << CbitMan);
                    }
                }
 
                if (sign < 0)
                    Bits |= MaskSign;
            }
            public void SetPowerOfTwo(int exp)
            {
                if (exp >= RawExpInf || (exp += RawExpZero) >= RawExpInf)
                    Bits = MaskExp; // Overflow to infinity.
                else if (exp > 0)
                    Bits = (ulong)exp << CbitMan;
                else if ((exp += CbitMan - 1) < 0)
                    Bits = 0; // Underflow to zero.
                else
                    Bits = 1UL << exp; // Denormal.
            }
            public void NormalizeExponent()
            {
                // This doesn't work on denormalized numbers or non-finite values.
                Contracts.Assert(IsFiniteNormal());
                Bits = (Bits & ~MaskExp) | ((ulong)RawExpZero << CbitMan);
            }
            public void TruncateMantissaToSingleBit()
            {
                if (Bits == 0)
                    return;
                int exp = GetRawExp();
                if (exp == RawExpInf)
                    return;
                if (exp != 0)
                {
                    // Mantissa has an implicit high bit.
                    Bits &= ~MaskMan;
                }
                else
                {
                    // Denormalized number.
                    // Clear all but the high bit in the mantissa.
                    ulong tmp;
                    ulong man = Bits & MaskMan;
                    while ((tmp = man & (man - 1)) != 0)
                        man = tmp;
                    Bits = (Bits & MaskSign) | man;
                }
            }
            public void Truncate()
            {
                int exp = GetRawExp();
                if (exp == RawExpInf) // NaN or infinite.
                    return;
 
                exp -= ExpOrigin;
                if (exp >= 0)
                    return;
 
                if (exp <= -CbitMan - 1)
                {
                    // Preserve only the sign. The rest is zero.
                    Bits &= MaskSign;
                }
                else
                {
                    // Clear the low (-exp) bits.
                    Bits &= ~Utils.UuMaskBelow(-exp);
                }
            }
        }
 
        // This is used to read and write the bits of a Single.
        // Thanks to Vance Morrison for educating me about this excellent aliasing mechanism.
        [StructLayout(LayoutKind.Explicit)]
        private struct SingleBits
        {
            // Masks for the portions of a Single: 1 sign bit, 8 exponent bits, 23 mantissa bits.
            public const uint MaskSign = 0x80000000U;
            public const uint MaskExp = 0x7F800000U;
            public const uint MaskMan = 0x007FFFFFU;
 
            public const int RawExpInf = 0xFF; // The raw exponent value for infinities and nan.
            public const int RawExpZero = 0x7F; // The raw exponent value for "1", when the exponent is logically zero.
            public const int CbitExp = 8; // Number of exponent bits.
            public const int CbitMan = 23; // Number of mantissa bits.
            public const int ExpDenorm = -126;
            public const int ExpOrigin = 127;
 
            [FieldOffset(0)]
            public float Float; // used only for construction from Single
            [FieldOffset(0)]
            public uint Bits; //overlay
 
            public int GetExp()
            {
                return ((int)(Bits >> CbitMan) & RawExpInf) - RawExpZero;
            }
            public int GetRawExp()
            {
                return (int)(Bits >> CbitMan) & RawExpInf;
            }
            public bool IsFinite()
            {
                return (Bits & MaskExp) < MaskExp;
            }
            public bool IsFiniteNonZero()
            {
                var bits = Bits & ~MaskSign;
                return 0 < bits && bits < MaskExp;
            }
            public bool IsFiniteNormal()
            {
                var exp = Bits & MaskExp;
                return 0 < exp && exp < MaskExp;
            }
            public bool IsDenormal()
            {
                return (Bits & MaskExp) == 0;
            }
            public void SetExponent(int exp)
            {
                Contracts.Assert(-RawExpZero < exp && exp < RawExpInf - RawExpZero);
                Bits = (Bits & ~MaskExp) | (((uint)(exp + RawExpZero) << CbitMan) & MaskExp);
            }
            public void GetParts(out int sign, out int exp, out uint man, out bool fFinite)
            {
                sign = 1 - ((int)(Bits >> 30) & 2);
                man = Bits & MaskMan;
                exp = GetRawExp();
                if (exp == 0)
                {
                    // Denormalized number.
                    fFinite = true;
                    if (man != 0)
                        exp = ExpDenorm;
                }
                else if (exp == RawExpInf)
                {
                    // NaN or infinite.
                    fFinite = false;
                    exp = int.MaxValue;
                }
                else
                {
                    fFinite = true;
                    man += MaskMan + 1;
                    exp += ExpDenorm - 1;
                }
            }
            public void SetFromParts(int sign, int exp, uint man)
            {
                if (man == 0)
                    Bits = 0;
                else
                {
                    // Normalize so that 0x0010 0000 0000 0000 is the highest bit set.
                    int cbitShift = Utils.CbitHighZero(man) - CbitExp;
                    if (cbitShift < 0)
                    {
                        // REVIEW: Should this round?
                        man >>= -cbitShift;
                    }
                    else
                        man <<= cbitShift;
                    exp -= cbitShift;
                    Contracts.Assert((man & ~MaskMan) == MaskMan + 1);
 
                    // Move the point to just behind the leading 1: 0x001.0 0000 0000 0000
                    // (52 bits) and skew the exponent (by 0x3FF == 1023).
                    exp += ExpOrigin;
 
                    if (exp >= RawExpInf)
                    {
                        // Infinity.
                        Bits = MaskExp;
                    }
                    else if (exp <= 0)
                    {
                        // Denormalized.
                        exp--;
                        if (exp < -CbitMan)
                        {
                            // Underflow to zero.
                            Bits = 0;
                        }
                        else
                        {
                            Bits = man >> (int)(-exp);
                            Contracts.Assert(Bits != 0);
                        }
                    }
                    else
                    {
                        // Mask off the implicit high bit.
                        Bits = (man & MaskMan) | ((uint)exp << CbitMan);
                    }
                }
 
                if (sign < 0)
                    Bits |= MaskSign;
            }
            public void SetPowerOfTwo(int exp)
            {
                if (exp >= RawExpInf || (exp += RawExpZero) >= RawExpInf)
                    Bits = MaskExp; // Overflow to infinity.
                else if (exp > 0)
                    Bits = (uint)exp << CbitMan;
                else if ((exp += CbitMan - 1) < 0)
                    Bits = 0; // Underflow to zero.
                else
                    Bits = 1U << exp; // Denormal.
            }
            public void NormalizeExponent()
            {
                // This doesn't work on denormalized numbers or non-finite values.
                Contracts.Assert(IsFiniteNormal());
                Bits = (Bits & ~MaskExp) | ((uint)RawExpZero << CbitMan);
            }
            public void TruncateMantissaToSingleBit()
            {
                if (Bits == 0)
                    return;
                int exp = GetRawExp();
                if (exp == RawExpInf)
                    return;
                if (exp != 0)
                {
                    // Mantissa has an implicit high bit.
                    Bits &= ~MaskMan;
                }
                else
                {
                    // Denormalized number.
                    // Clear all but the high bit in the mantissa.
                    uint tmp;
                    uint man = Bits & MaskMan;
                    while ((tmp = man & (man - 1)) != 0)
                        man = tmp;
                    Bits = (Bits & MaskSign) | man;
                }
            }
            public void Truncate()
            {
                int exp = GetRawExp();
                if (exp == RawExpInf) // NaN or infinite.
                    return;
 
                exp -= ExpOrigin;
                if (exp >= 0)
                    return;
 
                if (exp <= -CbitMan - 1)
                {
                    // Preserve only the sign. The rest is zero.
                    Bits &= MaskSign;
                }
                else
                {
                    // Clear the low (-exp) bits.
                    Bits &= ~Utils.UMaskBelow(-exp);
                }
            }
        }
 
        public static ulong GetBits(double x)
        {
            var bits = default(DoubleBits);
            bits.Float = x;
            return bits.Bits;
        }
 
        public static uint GetBits(float x)
        {
            var bits = default(SingleBits);
            bits.Float = x;
            return bits.Bits;
        }
 
        public static double FromBits(ulong bits)
        {
            var sb = default(DoubleBits);
            sb.Bits = bits;
            return sb.Float;
        }
 
        public static float FromBits(uint bits)
        {
            var sb = default(SingleBits);
            sb.Bits = bits;
            return sb.Float;
        }
 
        public static bool IsFinite(double x)
        {
            var bits = default(DoubleBits);
            bits.Float = x;
            return bits.IsFinite();
        }
 
        public static bool IsFinite(float x)
        {
            var bits = default(SingleBits);
            bits.Float = x;
            return bits.IsFinite();
        }
 
        public static bool IsFinite(ReadOnlySpan<double> values)
        {
            // Assuming that non-finites are rare, this is faster than testing on each item.
            double sum = 0;
            for (int i = 0; i < values.Length; i++)
            {
                var v = values[i];
                sum += v - v;
            }
            return sum == 0;
        }
 
        // REVIEW: Consider implementing using SSE.
        public static bool IsFinite(ReadOnlySpan<float> values)
        {
            // Assuming that non-finites are rare, this is faster than testing on each item.
            float sum = 0;
            for (int i = 0; i < values.Length; i++)
            {
                var v = values[i];
                sum += v - v;
            }
            return sum == 0;
        }
 
        public static bool IsFiniteNonZero(double x)
        {
            var bits = default(DoubleBits);
            bits.Float = x;
            return bits.IsFiniteNonZero();
        }
 
        public static bool IsFiniteNonZero(float x)
        {
            var bits = default(SingleBits);
            bits.Float = x;
            return bits.IsFiniteNonZero();
        }
 
        public static bool IsFiniteNormal(double x)
        {
            var bits = default(DoubleBits);
            bits.Float = x;
            return bits.IsFiniteNormal();
        }
 
        public static bool IsFiniteNormal(float x)
        {
            var bits = default(SingleBits);
            bits.Float = x;
            return bits.IsFiniteNormal();
        }
 
        public static bool IsDenormal(double x)
        {
            var bits = default(DoubleBits);
            bits.Float = x;
            return bits.IsDenormal();
        }
 
        public static bool IsDenormal(float x)
        {
            var bits = default(SingleBits);
            bits.Float = x;
            return bits.IsDenormal();
        }
 
        public static int GetExponent(double x)
        {
            var bits = default(DoubleBits);
            bits.Float = x;
            return bits.GetExp();
        }
 
        public static int GetExponent(float x)
        {
            var bits = default(SingleBits);
            bits.Float = x;
            return bits.GetExp();
        }
 
        public static double SetExponent(double x, int exp)
        {
            var bits = default(DoubleBits);
            bits.Float = x;
            bits.SetExponent(exp);
            return bits.Float;
        }
 
        public static float SetExponent(float x, int exp)
        {
            var bits = default(SingleBits);
            bits.Float = x;
            bits.SetExponent(exp);
            return bits.Float;
        }
 
        public static void GetParts(double x, out int sign, out int exp, out ulong man, out bool fFinite)
        {
            var bits = default(DoubleBits);
            bits.Float = x;
            bits.GetParts(out sign, out exp, out man, out fFinite);
        }
 
        public static void GetParts(float x, out int sign, out int exp, out uint man, out bool fFinite)
        {
            var bits = default(SingleBits);
            bits.Float = x;
            bits.GetParts(out sign, out exp, out man, out fFinite);
        }
 
        public static double GetFromPartsDouble(int sign, int exp, ulong man)
        {
            var bits = default(DoubleBits);
            bits.SetFromParts(sign, exp, man);
            return bits.Float;
        }
 
        public static float GetFromPartsSingle(int sign, int exp, uint man)
        {
            var bits = default(SingleBits);
            bits.SetFromParts(sign, exp, man);
            return bits.Float;
        }
 
        public static double GetPowerOfTwoDouble(int exp)
        {
            var bits = default(DoubleBits);
            bits.SetPowerOfTwo(exp);
            return bits.Float;
        }
 
        public static float GetPowerOfTwoSingle(int exp)
        {
            var bits = default(SingleBits);
            bits.SetPowerOfTwo(exp);
            return bits.Float;
        }
 
        // Returns the previous exponent and sets the exponent to zero. Asserts that
        // the original value is finite and not a denormal.
        public static int NormalizeExponent(ref double x)
        {
            var bits = default(DoubleBits);
            bits.Float = x;
            int expTmp = bits.GetExp();
            bits.NormalizeExponent();
            x = bits.Float;
            return expTmp;
        }
 
        // Returns the previous exponent and sets the exponent to zero. Asserts that
        // the original value is finite and not a denormal.
        public static int NormalizeExponent(ref float x)
        {
            var bits = default(SingleBits);
            bits.Float = x;
            int expTmp = bits.GetExp();
            bits.NormalizeExponent();
            x = bits.Float;
            return expTmp;
        }
 
        public static double TruncateMantissaToSingleBit(double x)
        {
            var bits = default(DoubleBits);
            bits.Float = x;
            bits.TruncateMantissaToSingleBit();
            return bits.Float;
        }
 
        public static float TruncateMantissaToSingleBit(float x)
        {
            var bits = default(SingleBits);
            bits.Float = x;
            bits.TruncateMantissaToSingleBit();
            return bits.Float;
        }
 
        public static double Truncate(double x)
        {
            var bits = default(DoubleBits);
            bits.Float = x;
            bits.Truncate();
            return bits.Float;
        }
 
        public static string ToRoundTripString(float x)
        {
            return x.ToString("R", CultureInfo.InvariantCulture);
        }
 
        public static string ToRoundTripString(double x)
        {
            return x.ToString("G17", CultureInfo.InvariantCulture);
        }
    }
}
File: Utilities\FloatUtils.cs	Web Access
Project: src\src\Microsoft.ML.Core\Microsoft.ML.Core.csproj (Microsoft.ML.Core)