|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;
namespace System.Runtime.Intrinsics
{
internal static class VectorMath
{
public static TVectorDouble CosDouble<TVectorDouble, TVectorInt64>(TVectorDouble x)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
{
// This code is based on `cos` from amd/aocl-libm-ose
// Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text
// Implementation Notes
// ---------------------
// checks for special cases
// if ( ux = infinity) raise overflow exception and return x
// if x is NaN then raise invalid FP operation exception and return x.
//
// 1. Argument reduction
// if |x| > 5e5 then
// __amd_remainder_piby2(x, &r, &rr, ®ion)
// else
// Argument reduction
// Let z = |x| * 2/pi
// z = dn + r, where dn = round(z)
// rhead = dn * pi/2_head
// rtail = dn * pi/2_tail
// r = z – dn = |x| - rhead – rtail
// expdiff = exp(dn) – exp(r)
// if(expdiff) > 15)
// rtail = |x| - dn*pi/2_tail2
// r = |x| - dn*pi/2_head - dn*pi/2_tail1 - dn*pi/2_tail2 - (((rhead + rtail) – rhead )-rtail)
// rr = (|x| – rhead) – r + rtail
//
// 2. Polynomial approximation
// if(dn is even)
// rr = rr * r;
// x4 = x2 * x2;
// s = 0.5 * x2;
// t = s - 1.0;
// poly = x4 * (C1 + x2 * (C2 + x2 * (C3 + x2 * (C4 + x2 * (C5 + x2 * x6)))))
// r = (((1.0 + t) - s) - rr) + poly – t
// else
// x3 = x2 * r
// poly = S2 + (r2 * (S3 + (r2 * (S4 + (r2 * (S5 + S6 * r2))))))
// r = r - ((x2 * (0.5*rr - x3 * poly)) - rr) - S1 * x3
// if((sign + 1) & 2)
// return r
// else
// return -r;
//
// if |x| < pi/4 && |x| > 2.0^(-13)
// cos(x) = 1.0 + x*x * (-0.5 + (C1*x*x + (C2*x*x + (C3*x*x
// + (C4*x*x + (C5*x*x + C6*x*x))))))
//
// if |x| < 2.0^(-13) && |x| > 2.0^(-27)
// cos(x) = 1.0 - x*x*0.5;;
//
// else return 1.0
const long ARG_HUGE = 0x415312D000000000; // 5e6
const long ARG_LARGE = 0x3FE921FB54442D18; // PI / 4
const long ARG_SMALL = 0x3F20000000000000; // 2^-13
const long ARG_SMALLER = 0x3E40000000000000; // 2^-27
TVectorDouble ax = TVectorDouble.Abs(x);
TVectorInt64 ux = Unsafe.BitCast<TVectorDouble, TVectorInt64>(ax);
TVectorDouble result;
if (TVectorInt64.LessThanAll(ux, TVectorInt64.Create(ARG_LARGE + 1)))
{
// We must be a finite value: (pi / 4) >= |x|
TVectorDouble x2 = x * x;
if (TVectorInt64.GreaterThanAny(ux, TVectorInt64.Create(ARG_SMALL - 1)))
{
// at least one element is: |x| >= 2^-13
result = TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(
CosDoublePoly(x),
x2,
TVectorDouble.Create(-0.5)),
x2,
TVectorDouble.One
);
}
else
{
result = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.5), x2, TVectorDouble.One);
}
}
else if (TVectorInt64.LessThanAll(ux, TVectorInt64.Create(ARG_HUGE)))
{
// at least one element is: |x| > (pi / 4) -or- infinite -or- nan
(TVectorDouble r, TVectorDouble rr, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
TVectorDouble sin = SinDoubleLarge(r, rr);
TVectorDouble cos = CosDoubleLarge(r, rr);
result = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
cos, // region 0 or 2
sin // region 1 or 3
);
result = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals((region + TVectorInt64.One) & TVectorInt64.Create(2), TVectorInt64.Zero)),
+result, // region 0 or 3
-result // region 1 or 2
);
}
else
{
return ScalarFallback(x);
}
return TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.GreaterThan(ux, TVectorInt64.Create(ARG_SMALLER - 1))),
result, // for elements: |x| >= 2^-27, infinity, or NaN
TVectorDouble.One // for elements: 2^-27 > |x|
);
static TVectorDouble ScalarFallback(TVectorDouble x)
{
TVectorDouble result = TVectorDouble.Zero;
for (int i = 0; i < TVectorDouble.ElementCount; i++)
{
double scalar = double.Cos(x[i]);
result = result.WithElement(i, scalar);
}
return result;
}
}
public static TVectorSingle CosSingle<TVectorSingle, TVectorInt32, TVectorDouble, TVectorInt64>(TVectorSingle x)
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
where TVectorInt32 : unmanaged, ISimdVector<TVectorInt32, int>
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
{
// This code is based on `cosf` from amd/aocl-libm-ose
// Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text
// Implementation Notes
// ---------------------
// Checks for special cases
// if ( ux = infinity) raise overflow exception and return x
// if x is NaN then raise invalid FP operation exception and return x.
//
// 1. Argument reduction
// if |x| > 5e5 then
// __amd_remainder_piby2d2f((uint64_t)x, &r, ®ion)
// else
// Argument reduction
// Let z = |x| * 2/pi
// z = dn + r, where dn = round(z)
// rhead = dn * pi/2_head
// rtail = dn * pi/2_tail
// r = z – dn = |x| - rhead – rtail
// expdiff = exp(dn) – exp(r)
// if(expdiff) > 15)
// rtail = |x| - dn*pi/2_tail2
// r = |x| - dn*pi/2_head - dn*pi/2_tail1
// - dn*pi/2_tail2 - (((rhead + rtail) – rhead )-rtail)
//
// 2. Polynomial approximation
// if(dn is even)
// x4 = x2 * x2;
// s = 0.5 * x2;
// t = 1.0 - s;
// poly = x4 * (C1 + x2 * (C2 + x2 * (C3 + x2 * C4 )))
// r = t + poly
// else
// x3 = x2 * r
// poly = x3 * (S1 + x2 * (S2 + x2 * (S3 + x2 * S4)))
// r = r + poly
// if((sign + 1) & 2)
// return r
// else
// return -r;
//
// if |x| < pi/4 && |x| > 2.0^(-13)
// r = 0.5 * x2;
// t = 1 - r;
// cos(x) = t + ((1.0 - t) - r) + (x*x * (x*x * C1 + C2*x*x + C3*x*x
// + C4*x*x +x*x*C5 + x*x*C6)))
//
// if |x| < 2.0^(-13) && |x| > 2.0^(-27)
// cos(x) = 1.0 - x*x*0.5;;
//
// else return 1.0
const int ARG_HUGE = 0x4A989680; // 5e6
const int ARG_LARGE = 0x3F490FDB; // PI / 4
const int ARG_SMALL = 0x3C000000; // 2^-13
const int ARG_SMALLER = 0x39000000; // 2^-27
TVectorSingle ax = TVectorSingle.Abs(x);
TVectorInt32 ux = Unsafe.BitCast<TVectorSingle, TVectorInt32>(ax);
TVectorSingle result;
if (TVectorInt32.LessThanAll(ux, TVectorInt32.Create(ARG_LARGE + 1)))
{
// We must be a finite value: (pi / 4) >= |x|
if (TVectorInt32.GreaterThanAny(ux, TVectorInt32.Create(ARG_SMALL - 1)))
{
// at least one element is: |x| >= 2^-13
if (TVectorSingle.ElementCount == TVectorDouble.ElementCount)
{
result = Narrow<TVectorDouble, TVectorSingle>(
CosSingleSmall(Widen<TVectorSingle, TVectorDouble>(x))
);
}
else
{
result = Narrow<TVectorDouble, TVectorSingle>(
CosSingleSmall(WidenLower<TVectorSingle, TVectorDouble>(x)),
CosSingleSmall(WidenUpper<TVectorSingle, TVectorDouble>(x))
);
}
}
else
{
// at least one element is: 2^-13 > |x|
TVectorSingle x2 = x * x;
result = TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(-0.5f), x2, TVectorSingle.One);
}
}
else if (TVectorInt32.LessThanAll(ux, TVectorInt32.Create(ARG_HUGE)))
{
// at least one element is: |x| > (pi / 4) -or- infinite -or- nan
if (TVectorSingle.ElementCount == TVectorDouble.ElementCount)
{
result = Narrow<TVectorDouble, TVectorSingle>(
CoreImpl(Widen<TVectorSingle, TVectorDouble>(ax))
);
}
else
{
result = Narrow<TVectorDouble, TVectorSingle>(
CoreImpl(WidenLower<TVectorSingle, TVectorDouble>(ax)),
CoreImpl(WidenUpper<TVectorSingle, TVectorDouble>(ax))
);
}
}
else
{
return ScalarFallback(x);
}
return TVectorSingle.ConditionalSelect(
Unsafe.BitCast<TVectorInt32, TVectorSingle>(TVectorInt32.GreaterThan(ux, TVectorInt32.Create(ARG_SMALLER - 1))),
result, // for elements: |x| >= 2^-27, infinity, or NaN
TVectorSingle.One // for elements: 2^-27 > |x|
);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static TVectorDouble CoreImpl(TVectorDouble ax)
{
(TVectorDouble r, _, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
TVectorDouble sin = SinSinglePoly(r);
TVectorDouble cos = CosSingleLarge(r);
TVectorDouble result = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
cos, // region 0 or 2
sin // region 1 or 3
);
return TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals((region + TVectorInt64.One) & TVectorInt64.Create(2), TVectorInt64.Zero)),
+result, // region 0 or 3
-result // region 1 or 2
);
}
static TVectorSingle ScalarFallback(TVectorSingle x)
{
TVectorSingle result = TVectorSingle.Zero;
for (int i = 0; i < TVectorSingle.ElementCount; i++)
{
float scalar = float.Cos(x[i]);
result = result.WithElement(i, scalar);
}
return result;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVector CopySign<TVector, T>(TVector value, TVector sign)
where TVector : unmanaged, ISimdVector<TVector, T>
{
Debug.Assert((typeof(T) != typeof(byte))
&& (typeof(T) != typeof(ushort))
&& (typeof(T) != typeof(uint))
&& (typeof(T) != typeof(ulong))
&& (typeof(T) != typeof(nuint)));
if (typeof(T) == typeof(float))
{
return TVector.ConditionalSelect(Create<TVector, T>(-0.0f), sign, value);
}
else if (typeof(T) == typeof(double))
{
return TVector.ConditionalSelect(Create<TVector, T>(-0.0), sign, value);
}
else
{
// All values are two's complement and so `value ^ sign` will produce a positive
// number if the signs match and a negative number if the signs differ. When the
// signs differ we want to negate the value and otherwise take the value as is.
return TVector.ConditionalSelect(TVector.IsNegative(value ^ sign), -value, value);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVector DegreesToRadians<TVector, T>(TVector degrees)
where TVector : unmanaged, ISimdVector<TVector, T>
where T : IFloatingPointIeee754<T>
{
// NOTE: Don't change the algorithm without consulting the DIM
// which elaborates on why this implementation was chosen
return (degrees * TVector.Create(T.Pi)) / TVector.Create(T.CreateTruncating(180));
}
public static TVectorDouble ExpDouble<TVectorDouble, TVectorUInt64>(TVectorDouble x)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorUInt64 : unmanaged, ISimdVector<TVectorUInt64, ulong>
{
// This code is based on `vrd2_exp` from amd/aocl-libm-ose
// Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text
// Implementation Notes
// ----------------------
// 1. Argument Reduction:
// e^x = 2^(x/ln2) = 2^(x*(64/ln(2))/64) --- (1)
//
// Choose 'n' and 'f', such that
// x * 64/ln2 = n + f --- (2) | n is integer
// | |f| <= 0.5
// Choose 'm' and 'j' such that,
// n = (64 * m) + j --- (3)
//
// From (1), (2) and (3),
// e^x = 2^((64*m + j + f)/64)
// = (2^m) * (2^(j/64)) * 2^(f/64)
// = (2^m) * (2^(j/64)) * e^(f*(ln(2)/64))
//
// 2. Table Lookup
// Values of (2^(j/64)) are precomputed, j = 0, 1, 2, 3 ... 63
//
// 3. Polynomial Evaluation
// From (2),
// f = x*(64/ln(2)) - n
// Let,
// r = f*(ln(2)/64) = x - n*(ln(2)/64)
//
// 4. Reconstruction
// Thus,
// e^x = (2^m) * (2^(j/64)) * e^r
const ulong V_ARG_MAX = 0x40862000_00000000;
const ulong V_DP64_BIAS = 1023;
const double V_EXPF_HUGE = 6755399441055744;
const double V_TBL_LN2 = 1.4426950408889634;
const double V_LN2_HEAD = +0.693359375;
const double V_LN2_TAIL = -0.00021219444005469057;
const double C03 = 0.5000000000000018;
const double C04 = 0.1666666666666617;
const double C05 = 0.04166666666649277;
const double C06 = 0.008333333333559272;
const double C07 = 0.001388888895122404;
const double C08 = 0.00019841269432677495;
const double C09 = 2.4801486521374483E-05;
const double C10 = 2.7557622532543023E-06;
const double C11 = 2.7632293298250954E-07;
const double C12 = 2.499430431958571E-08;
// Check if -709 < vx < 709
if (TVectorUInt64.LessThanOrEqualAll(Unsafe.BitCast<TVectorDouble, TVectorUInt64>(TVectorDouble.Abs(x)), TVectorUInt64.Create(V_ARG_MAX)))
{
// x * (64.0 / ln(2))
TVectorDouble dn = TVectorDouble.MultiplyAddEstimate(x, TVectorDouble.Create(V_TBL_LN2), TVectorDouble.Create(V_EXPF_HUGE));
// n = (int)z
TVectorUInt64 n = Unsafe.BitCast<TVectorDouble, TVectorUInt64>(dn);
// dn = (double)n
dn -= TVectorDouble.Create(V_EXPF_HUGE);
// r = x - (dn * (ln(2) / 64))
// where ln(2) / 64 is split into Head and Tail values
TVectorDouble r = TVectorDouble.MultiplyAddEstimate(dn, TVectorDouble.Create(-V_LN2_HEAD), x);
r = TVectorDouble.MultiplyAddEstimate(dn, TVectorDouble.Create(-V_LN2_TAIL), r);
TVectorDouble r2 = r * r;
TVectorDouble r4 = r2 * r2;
TVectorDouble r8 = r4 * r4;
// Compute polynomial
TVectorDouble poly = TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C12), r, TVectorDouble.Create(C11)),
r2,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C10), r, TVectorDouble.Create(C09))),
r8,
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C08), r, TVectorDouble.Create(C07)),
r2,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C06), r, TVectorDouble.Create(C05))),
r4,
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C04), r, TVectorDouble.Create(C03)),
r2,
r + TVectorDouble.One
)
)
);
// m = (n - j) / 64
// result = polynomial * 2^m
return poly * Unsafe.BitCast<TVectorUInt64, TVectorDouble>((n + TVectorUInt64.Create(V_DP64_BIAS)) << 52);
}
else
{
return ScalarFallback(x);
static TVectorDouble ScalarFallback(TVectorDouble x)
{
TVectorDouble expResult = TVectorDouble.Zero;
for (int i = 0; i < TVectorDouble.ElementCount; i++)
{
double expScalar = double.Exp(x[i]);
expResult = expResult.WithElement(i, expScalar);
}
return expResult;
}
}
}
public static TVectorSingle ExpSingle<TVectorSingle, TVectorUInt32, TVectorDouble, TVectorUInt64>(TVectorSingle x)
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
where TVectorUInt32 : unmanaged, ISimdVector<TVectorUInt32, uint>
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorUInt64 : unmanaged, ISimdVector<TVectorUInt64, ulong>
{
// This code is based on `vrs4_expf` from amd/aocl-libm-ose
// Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text
// Implementation Notes:
// 1. Argument Reduction:
// e^x = 2^(x/ln2) --- (1)
//
// Let x/ln(2) = z --- (2)
//
// Let z = n + r , where n is an integer --- (3)
// |r| <= 1/2
//
// From (1), (2) and (3),
// e^x = 2^z
// = 2^(N+r)
// = (2^N)*(2^r) --- (4)
//
// 2. Polynomial Evaluation
// From (4),
// r = z - N
// 2^r = C1 + C2*r + C3*r^2 + C4*r^3 + C5 *r^4 + C6*r^5
//
// 4. Reconstruction
// Thus,
// e^x = (2^N) * (2^r)
const uint V_ARG_MAX = 0x42AE0000;
const float V_EXPF_MIN = -103.97208f;
const float V_EXPF_MAX = +88.72284f;
const double V_EXPF_HUGE = 6755399441055744;
const double V_TBL_LN2 = 1.4426950408889634;
const double C1 = 1.0000000754895704;
const double C2 = 0.6931472254087585;
const double C3 = 0.2402210737432219;
const double C4 = 0.05550297297702539;
const double C5 = 0.009676036358193323;
const double C6 = 0.001341000536524434;
TVectorSingle result;
if (TVectorSingle.ElementCount == TVectorDouble.ElementCount)
{
result = Narrow<TVectorDouble, TVectorSingle>(
CoreImpl(Widen<TVectorSingle, TVectorDouble>(x))
);
}
else
{
result = Narrow<TVectorDouble, TVectorSingle>(
CoreImpl(WidenLower<TVectorSingle, TVectorDouble>(x)),
CoreImpl(WidenUpper<TVectorSingle, TVectorDouble>(x))
);
}
// Check if -103 < |x| < 88
if (TVectorUInt32.GreaterThanAny(Unsafe.BitCast<TVectorSingle, TVectorUInt32>(TVectorSingle.Abs(x)), TVectorUInt32.Create(V_ARG_MAX)))
{
// (x > V_EXPF_MAX) ? float.PositiveInfinity : x
TVectorSingle infinityMask = TVectorSingle.GreaterThan(x, TVectorSingle.Create(V_EXPF_MAX));
result = TVectorSingle.ConditionalSelect(
infinityMask,
TVectorSingle.Create(float.PositiveInfinity),
result
);
// (x < V_EXPF_MIN) ? 0 : x
result = TVectorSingle.AndNot(result, TVectorSingle.LessThan(x, TVectorSingle.Create(V_EXPF_MIN)));
}
return result;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static TVectorDouble CoreImpl(TVectorDouble x)
{
// x * (64.0 / ln(2))
TVectorDouble z = x * TVectorDouble.Create(V_TBL_LN2);
TVectorDouble v_expf_huge = TVectorDouble.Create(V_EXPF_HUGE);
TVectorDouble dn = z + v_expf_huge;
// n = (int)z
TVectorUInt64 n = Unsafe.BitCast<TVectorDouble, TVectorUInt64>(dn);
// r = z - n
TVectorDouble r = z - (dn - v_expf_huge);
TVectorDouble r2 = r * r;
TVectorDouble r4 = r2 * r2;
TVectorDouble poly = TVectorDouble.MultiplyAddEstimate(
r4,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C6), r, TVectorDouble.Create(C5)),
TVectorDouble.MultiplyAddEstimate(
r2,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C4), r, TVectorDouble.Create(C3)),
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C2), r, TVectorDouble.Create(C1))
)
);
// result = poly + (n << 52)
return Unsafe.BitCast<TVectorUInt64, TVectorDouble>(Unsafe.BitCast<TVectorDouble, TVectorUInt64>(poly) + (n << 52));
}
}
public static TVectorDouble HypotDouble<TVectorDouble, TVectorUInt64>(TVectorDouble x, TVectorDouble y)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorUInt64 : unmanaged, ISimdVector<TVectorUInt64, ulong>
{
// This code is based on `hypot` from amd/aocl-libm-ose
// Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text
TVectorDouble ax = TVectorDouble.Abs(x);
TVectorDouble ay = TVectorDouble.Abs(y);
TVectorDouble infinityMask = TVectorDouble.IsPositiveInfinity(ax) | TVectorDouble.IsPositiveInfinity(ay);
TVectorDouble nanMask = TVectorDouble.IsNaN(ax) | TVectorDouble.IsNaN(ay);
TVectorUInt64 xBits = Unsafe.BitCast<TVectorDouble, TVectorUInt64>(ax);
TVectorUInt64 yBits = Unsafe.BitCast<TVectorDouble, TVectorUInt64>(ay);
TVectorUInt64 shiftedExponentMask = TVectorUInt64.Create(double.ShiftedBiasedExponentMask);
TVectorUInt64 xExp = (xBits >> double.BiasedExponentShift) & shiftedExponentMask;
TVectorUInt64 yExp = (yBits >> double.BiasedExponentShift) & shiftedExponentMask;
TVectorUInt64 expDiff = xExp - yExp;
// Cover cases where x or y is insignifican compared to the other
TVectorDouble insignificanMask = Unsafe.BitCast<TVectorUInt64, TVectorDouble>(
TVectorUInt64.GreaterThanOrEqual(expDiff, TVectorUInt64.Create(double.SignificandLength + 1)) &
TVectorUInt64.LessThanOrEqual(expDiff, TVectorUInt64.Create(unchecked((ulong)(-double.SignificandLength - 1))))
);
TVectorDouble insignificantResult = ax + ay;
// To prevent overflow, scale down by 2^+600
TVectorUInt64 expBiasP500 = TVectorUInt64.Create(double.ExponentBias + 500);
TVectorUInt64 scaleDownMask = TVectorUInt64.GreaterThan(xExp, expBiasP500) | TVectorUInt64.GreaterThan(yExp, expBiasP500);
TVectorDouble expFix = TVectorDouble.ConditionalSelect(Unsafe.BitCast<TVectorUInt64, TVectorDouble>(scaleDownMask), TVectorDouble.Create(4.149515568880993E+180), TVectorDouble.One);
TVectorUInt64 bitsFix = scaleDownMask & TVectorUInt64.Create(0xDA80000000000000);
// To prevent underflow, scale up by 2^-600, but only if we didn't scale down already
TVectorUInt64 expBiasM500 = TVectorUInt64.Create(double.ExponentBias - 500);
TVectorUInt64 scaleUpMask = TVectorUInt64.AndNot(TVectorUInt64.LessThan(xExp, expBiasM500) | TVectorUInt64.LessThan(yExp, expBiasM500), scaleDownMask);
expFix = TVectorDouble.ConditionalSelect(Unsafe.BitCast<TVectorUInt64, TVectorDouble>(scaleUpMask), TVectorDouble.Create(2.409919865102884E-181), expFix);
bitsFix = TVectorUInt64.ConditionalSelect(scaleUpMask, TVectorUInt64.Create(0x2580000000000000), bitsFix);
xBits += bitsFix;
yBits += bitsFix;
// For subnormal values when scaling up, do an additional fixing
// up changing the adjustment to scale up by 2^601 instead and then
// subtract a correction of 2^601 to account for the implicit bit.
TVectorDouble subnormalFix = TVectorDouble.Create(9.232978617785736E-128);
TVectorUInt64 subnormalBitsFix = TVectorUInt64.Create(0x0010000000000000);
TVectorUInt64 xSubnormalMask = TVectorUInt64.IsZero(xExp) & scaleUpMask;
xBits += subnormalBitsFix & xSubnormalMask;
ax = Unsafe.BitCast<TVectorUInt64, TVectorDouble>(xBits);
ax -= subnormalFix & Unsafe.BitCast<TVectorUInt64, TVectorDouble>(xSubnormalMask);
TVectorUInt64 ySubnormalMask = TVectorUInt64.IsZero(yExp) & scaleUpMask;
yBits += subnormalBitsFix & ySubnormalMask;
ay = Unsafe.BitCast<TVectorUInt64, TVectorDouble>(yBits);
ay -= subnormalFix & Unsafe.BitCast<TVectorUInt64, TVectorDouble>(ySubnormalMask);
xBits = Unsafe.BitCast<TVectorDouble, TVectorUInt64>(ax);
yBits = Unsafe.BitCast<TVectorDouble, TVectorUInt64>(ay);
// Sort so ax is greater than ay
TVectorDouble lessThanMask = TVectorDouble.LessThan(ax, ay);
TVectorDouble tmp = ax;
ax = TVectorDouble.ConditionalSelect(lessThanMask, ay, ax);
ay = TVectorDouble.ConditionalSelect(lessThanMask, tmp, ay);
TVectorUInt64 tmpBits = xBits;
xBits = TVectorUInt64.ConditionalSelect(Unsafe.BitCast<TVectorDouble, TVectorUInt64>(lessThanMask), yBits, xBits);
yBits = TVectorUInt64.ConditionalSelect(Unsafe.BitCast<TVectorDouble, TVectorUInt64>(lessThanMask), tmpBits, yBits);
Debug.Assert(TVectorDouble.GreaterThanOrEqualAll(ax, ay));
// Split ax and ay into a head and tail portion
TVectorUInt64 headMask = TVectorUInt64.Create(0xFFFF_FFFF_F800_0000);
TVectorDouble xHead = Unsafe.BitCast<TVectorUInt64, TVectorDouble>(xBits & headMask);
TVectorDouble yHead = Unsafe.BitCast<TVectorUInt64, TVectorDouble>(yBits & headMask);
TVectorDouble xTail = ax - xHead;
TVectorDouble yTail = ay - yHead;
// Compute (x * x) + (y * y) with extra precision
//
// This includes taking into account expFix which may
// cause an underflow or overflow, but if it does that
// will still be the correct result.
TVectorDouble xx = ax * ax;
TVectorDouble yy = ay * ay;
TVectorDouble rHead = xx + yy;
TVectorDouble rTail = (xx - rHead) + yy;
rTail += TVectorDouble.MultiplyAddEstimate(xHead, xHead, -xx);
rTail = TVectorDouble.MultiplyAddEstimate(xHead * 2, xTail, rTail);
rTail = TVectorDouble.MultiplyAddEstimate(xTail, xTail, rTail);
// We only need to do extra accounting when ax and ay have equal exponents
TVectorDouble equalExponentsMask = Unsafe.BitCast<TVectorUInt64, TVectorDouble>(TVectorUInt64.IsZero(expDiff));
TVectorDouble rTailTmp = rTail;
rTailTmp += TVectorDouble.MultiplyAddEstimate(yHead, yHead, -yy);
rTailTmp = TVectorDouble.MultiplyAddEstimate(yHead * 2, yTail, rTailTmp);
rTailTmp = TVectorDouble.MultiplyAddEstimate(yTail, yTail, rTailTmp);
rTail = TVectorDouble.ConditionalSelect(equalExponentsMask, rTailTmp, rTail);
TVectorDouble result = TVectorDouble.Sqrt(rHead + rTail) * expFix;
// IEEE 754 requires that we return +Infinity
// if either input is Infinity, even if one of
// the inputs is NaN. Otherwise if either input
// is NaN, we return NaN
result = TVectorDouble.ConditionalSelect(insignificanMask, insignificantResult, result);
result = TVectorDouble.ConditionalSelect(nanMask, TVectorDouble.Create(double.NaN), result);
result = TVectorDouble.ConditionalSelect(infinityMask, TVectorDouble.Create(double.PositiveInfinity), result);
return result;
}
public static TVectorSingle HypotSingle<TVectorSingle, TVectorDouble>(TVectorSingle x, TVectorSingle y)
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
{
// This code is based on `hypotf` from amd/aocl-libm-ose
// Copyright (C) 2008-2020 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text
TVectorSingle ax = TVectorSingle.Abs(x);
TVectorSingle ay = TVectorSingle.Abs(y);
TVectorSingle infinityMask = TVectorSingle.IsPositiveInfinity(ax) | TVectorSingle.IsPositiveInfinity(ay);
TVectorSingle nanMask = TVectorSingle.IsNaN(ax) | TVectorSingle.IsNaN(ay);
TVectorSingle result;
if (TVectorSingle.ElementCount == TVectorDouble.ElementCount)
{
result = Narrow<TVectorDouble, TVectorSingle>(
CoreImpl(Widen<TVectorSingle, TVectorDouble>(ax), Widen<TVectorSingle, TVectorDouble>(ay))
);
}
else
{
result = Narrow<TVectorDouble, TVectorSingle>(
CoreImpl(WidenLower<TVectorSingle, TVectorDouble>(ax), WidenLower<TVectorSingle, TVectorDouble>(ay)),
CoreImpl(WidenUpper<TVectorSingle, TVectorDouble>(ax), WidenUpper<TVectorSingle, TVectorDouble>(ay))
);
}
// IEEE 754 requires that we return +Infinity
// if either input is Infinity, even if one of
// the inputs is NaN. Otherwise if either input
// is NaN, we return NaN
result = TVectorSingle.ConditionalSelect(nanMask, TVectorSingle.Create(float.NaN), result);
result = TVectorSingle.ConditionalSelect(infinityMask, TVectorSingle.Create(float.PositiveInfinity), result);
return result;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static TVectorDouble CoreImpl(TVectorDouble x, TVectorDouble y)
{
return TVectorDouble.Sqrt(TVectorDouble.MultiplyAddEstimate(x, x, y * y));
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVectorSingle IsEvenIntegerSingle<TVectorSingle, TVectorUInt32>(TVectorSingle vector)
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
where TVectorUInt32 : unmanaged, ISimdVector<TVectorUInt32, uint>
{
TVectorUInt32 bits = Unsafe.BitCast<TVectorSingle, TVectorUInt32>(TVectorSingle.Abs(vector));
TVectorUInt32 exponent = ((bits >> float.BiasedExponentShift) & TVectorUInt32.Create(float.ShiftedBiasedExponentMask)) - TVectorUInt32.Create(float.ExponentBias);
TVectorUInt32 fractionalBits = TVectorUInt32.Create(float.BiasedExponentShift) - exponent;
TVectorUInt32 firstIntegralBit = ShiftLeftUInt32(TVectorUInt32.One, fractionalBits);
TVectorUInt32 fractionalBitMask = firstIntegralBit - TVectorUInt32.One;
// We must be an integer in the range [1, 2^24) with the least significant integral bit clear
// or in the range [2^24, +Infinity) in which case we are known to be an even integer
TVectorUInt32 result = TVectorUInt32.GreaterThan(bits, TVectorUInt32.Create(0x3FFF_FFFF))
& TVectorUInt32.LessThan(bits, TVectorUInt32.Create(float.PositiveInfinityBits))
& ((TVectorUInt32.IsZero(bits & fractionalBitMask) & TVectorUInt32.IsZero(bits & firstIntegralBit))
| TVectorUInt32.GreaterThan(bits, TVectorUInt32.Create(0x4B7F_FFFF)));
// We are also an even integer if we are zero
result |= TVectorUInt32.IsZero(bits);
return Unsafe.BitCast<TVectorUInt32, TVectorSingle>(result);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVectorDouble IsEvenIntegerDouble<TVectorDouble, TVectorUInt64>(TVectorDouble vector)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorUInt64 : unmanaged, ISimdVector<TVectorUInt64, ulong>
{
TVectorUInt64 bits = Unsafe.BitCast<TVectorDouble, TVectorUInt64>(TVectorDouble.Abs(vector));
TVectorUInt64 exponent = ((bits >> double.BiasedExponentShift) & TVectorUInt64.Create(double.ShiftedBiasedExponentMask)) - TVectorUInt64.Create(double.ExponentBias);
TVectorUInt64 fractionalBits = TVectorUInt64.Create(double.BiasedExponentShift) - exponent;
TVectorUInt64 firstIntegralBit = ShiftLeftUInt64(TVectorUInt64.One, fractionalBits);
TVectorUInt64 fractionalBitMask = firstIntegralBit - TVectorUInt64.One;
// We must be an integer in the range [1, 2^53) with the least significant integral bit clear
// or in the range [2^53, +Infinity) in which case we are known to be an even integer
TVectorUInt64 result = TVectorUInt64.GreaterThan(bits, TVectorUInt64.Create(0x3FFF_FFFF_FFFF_FFFF))
& TVectorUInt64.LessThan(bits, TVectorUInt64.Create(double.PositiveInfinityBits))
& ((TVectorUInt64.IsZero(bits & fractionalBitMask) & TVectorUInt64.IsZero(bits & firstIntegralBit))
| TVectorUInt64.GreaterThan(bits, TVectorUInt64.Create(0x433F_FFFF_FFFF_FFFF)));
// We are also an even integer if we are zero
result |= TVectorUInt64.IsZero(bits);
return Unsafe.BitCast<TVectorUInt64, TVectorDouble>(result);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVectorSingle IsOddIntegerSingle<TVectorSingle, TVectorUInt32>(TVectorSingle vector)
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
where TVectorUInt32 : unmanaged, ISimdVector<TVectorUInt32, uint>
{
TVectorUInt32 bits = Unsafe.BitCast<TVectorSingle, TVectorUInt32>(TVectorSingle.Abs(vector));
TVectorUInt32 exponent = ((bits >> float.BiasedExponentShift) & TVectorUInt32.Create(float.ShiftedBiasedExponentMask)) - TVectorUInt32.Create(float.ExponentBias);
TVectorUInt32 fractionalBits = TVectorUInt32.Create(float.BiasedExponentShift) - exponent;
TVectorUInt32 firstIntegralBit = ShiftLeftUInt32(TVectorUInt32.One, fractionalBits);
TVectorUInt32 fractionalBitMask = firstIntegralBit - TVectorUInt32.One;
// We must be an integer in the range [1, 2^24) with the least significant integral bit set
TVectorUInt32 result = TVectorUInt32.GreaterThan(bits, TVectorUInt32.Create(0x3F7F_FFFF))
& TVectorUInt32.LessThan(bits, TVectorUInt32.Create(0x4B80_0000))
& TVectorUInt32.IsZero(bits & fractionalBitMask)
& ~TVectorUInt32.IsZero(bits & firstIntegralBit);
return Unsafe.BitCast<TVectorUInt32, TVectorSingle>(result);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVectorDouble IsOddIntegerDouble<TVectorDouble, TVectorUInt64>(TVectorDouble vector)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorUInt64 : unmanaged, ISimdVector<TVectorUInt64, ulong>
{
TVectorUInt64 bits = Unsafe.BitCast<TVectorDouble, TVectorUInt64>(TVectorDouble.Abs(vector));
TVectorUInt64 exponent = ((bits >> double.BiasedExponentShift) & TVectorUInt64.Create(double.ShiftedBiasedExponentMask)) - TVectorUInt64.Create(double.ExponentBias);
TVectorUInt64 fractionalBits = TVectorUInt64.Create(double.BiasedExponentShift) - exponent;
TVectorUInt64 firstIntegralBit = ShiftLeftUInt64(TVectorUInt64.One, fractionalBits);
TVectorUInt64 fractionalBitMask = firstIntegralBit - TVectorUInt64.One;
// We must be an integer in the range [1, 2^53) with the least significant integral bit set
TVectorUInt64 result = TVectorUInt64.GreaterThan(bits, TVectorUInt64.Create(0x3FEF_FFFF_FFFF_FFFF))
& TVectorUInt64.LessThan(bits, TVectorUInt64.Create(0x4340_0000_0000_0000))
& TVectorUInt64.IsZero(bits & fractionalBitMask)
& ~TVectorUInt64.IsZero(bits & firstIntegralBit);
return Unsafe.BitCast<TVectorUInt64, TVectorDouble>(result);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVector Lerp<TVector, T>(TVector x, TVector y, TVector amount)
where TVector : unmanaged, ISimdVector<TVector, T>
{
return TVector.MultiplyAddEstimate(x, TVector.One - amount, y * amount);
}
public static TVectorDouble LogDouble<TVectorDouble, TVectorInt64, TVectorUInt64>(TVectorDouble x)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
where TVectorUInt64 : unmanaged, ISimdVector<TVectorUInt64, ulong>
{
// This code is based on `vrd2_log` from amd/aocl-libm-ose
// Copyright (C) 2018-2020 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text
// Reduce x into the form:
// x = (-1)^s*2^n*m
// s will be always zero, as log is defined for positive numbers
// n is an integer known as the exponent
// m is mantissa
//
// x is reduced such that the mantissa, m lies in [2/3,4/3]
// x = 2^n*m where m is in [2/3,4/3]
// log(x) = log(2^n*m) We have log(a*b) = log(a)+log(b)
// = log(2^n) + log(m) We have log(a^n) = n*log(a)
// = n*log(2) + log(m)
// = n*log(2) + log(1+(m-1))
// = n*log(2) + log(1+f) Where f = m-1
// = n*log(2) + log1p(f) f lies in [-1/3,+1/3]
//
// Thus we have :
// log(x) = n*log(2) + log1p(f)
// In the above, the first term n*log(2), n can be calculated by using right shift operator and the value of log(2)
// is known and is stored as a constant
// The second term log1p(F) is approximated by using a polynomial
const ulong V_MIN = double.SmallestNormalBits;
const ulong V_MAX = double.PositiveInfinityBits;
const ulong V_MSK = 0x000FFFFF_FFFFFFFF; // (1 << 52) - 1
const ulong V_OFF = 0x3FE55555_55555555; // 2.0 / 3.0
const double LN2_HEAD = 0.693359375;
const double LN2_TAIL = -0.00021219444005469057;
const double C02 = -0.499999999999999560;
const double C03 = +0.333333333333414750;
const double C04 = -0.250000000000297430;
const double C05 = +0.199999999975985220;
const double C06 = -0.166666666608919500;
const double C07 = +0.142857145600277100;
const double C08 = -0.125000005127831270;
const double C09 = +0.111110952357159440;
const double C10 = -0.099999750495501240;
const double C11 = +0.090914349823462390;
const double C12 = -0.083340600527551860;
const double C13 = +0.076817603328311300;
const double C14 = -0.071296718946287310;
const double C15 = +0.067963465211535730;
const double C16 = -0.063995035098960040;
const double C17 = +0.049370587082412105;
const double C18 = -0.045370170994891980;
const double C19 = +0.088970636003577750;
const double C20 = -0.086906174116908760;
TVectorDouble specialResult = x;
// x is zero, subnormal, infinity, or NaN
TVectorUInt64 specialMask = TVectorUInt64.GreaterThanOrEqual(Unsafe.BitCast<TVectorDouble, TVectorUInt64>(x) - TVectorUInt64.Create(V_MIN), TVectorUInt64.Create(V_MAX - V_MIN));
if (specialMask != TVectorUInt64.Zero)
{
// double.IsNegative(x) ? double.NaN : x
TVectorDouble isNegativeMask = TVectorDouble.IsNegative(x);
specialResult = TVectorDouble.ConditionalSelect(
isNegativeMask,
TVectorDouble.Create(double.NaN),
specialResult
);
// double.IsZero(x) ? double.NegativeInfinity : x
TVectorDouble zeroMask = TVectorDouble.IsZero(x);
specialResult = TVectorDouble.ConditionalSelect(
zeroMask,
TVectorDouble.Create(double.NegativeInfinity),
specialResult
);
// double.IsZero(x) | double.IsNegative(x) | double.IsNaN(x) | double.IsPositiveInfinity(x)
TVectorDouble temp = zeroMask
| isNegativeMask
| TVectorDouble.IsNaN(x)
| TVectorDouble.IsPositiveInfinity(x);
// subnormal
TVectorDouble subnormalMask = TVectorDouble.AndNot(Unsafe.BitCast<TVectorUInt64, TVectorDouble>(specialMask), temp);
// multiply by 2^52, then normalize
x = TVectorDouble.ConditionalSelect(
subnormalMask,
Unsafe.BitCast<TVectorUInt64, TVectorDouble>(Unsafe.BitCast<TVectorDouble, TVectorUInt64>(x * 4503599627370496.0) - TVectorUInt64.Create(52ul << 52)),
x
);
specialMask = Unsafe.BitCast<TVectorDouble, TVectorUInt64>(temp);
}
// Reduce the mantissa to [+2/3, +4/3]
TVectorUInt64 vx = Unsafe.BitCast<TVectorDouble, TVectorUInt64>(x) - TVectorUInt64.Create(V_OFF);
TVectorDouble n = ConvertToDouble<TVectorInt64, TVectorDouble>(Unsafe.BitCast<TVectorUInt64, TVectorInt64>(vx) >> 52);
vx = (vx & TVectorUInt64.Create(V_MSK)) + TVectorUInt64.Create(V_OFF);
// Adjust the mantissa to [-1/3, +1/3]
TVectorDouble r = Unsafe.BitCast<TVectorUInt64, TVectorDouble>(vx) - TVectorDouble.One;
TVectorDouble r02 = r * r;
TVectorDouble r04 = r02 * r02;
TVectorDouble r08 = r04 * r04;
TVectorDouble r16 = r08 * r08;
// Compute log(x + 1) using Polynomial approximation
// C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20)
TVectorDouble poly = TVectorDouble.MultiplyAddEstimate(
r16,
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.Create(C20),
r04,
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C19), r, TVectorDouble.Create(C18)),
r02,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C17), r, TVectorDouble.Create(C16)))),
TVectorDouble.MultiplyAddEstimate(
r08,
TVectorDouble.MultiplyAddEstimate(
r04,
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C15), r, TVectorDouble.Create(C14)),
r02,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C13), r, TVectorDouble.Create(C12))),
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C11), r, TVectorDouble.Create(C10)),
r02,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C09), r, TVectorDouble.Create(C08)))),
TVectorDouble.MultiplyAddEstimate(
r04,
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C07), r, TVectorDouble.Create(C06)),
r02,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C05), r, TVectorDouble.Create(C04))),
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C03), r, TVectorDouble.Create(C02)),
r02,
r
)
)
)
);
return TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorUInt64, TVectorDouble>(specialMask),
specialResult,
TVectorDouble.MultiplyAddEstimate(
n,
TVectorDouble.Create(LN2_HEAD),
TVectorDouble.MultiplyAddEstimate(n, TVectorDouble.Create(LN2_TAIL), poly)
)
);
}
public static TVectorSingle LogSingle<TVectorSingle, TVectorInt32, TVectorUInt32>(TVectorSingle x)
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
where TVectorInt32 : unmanaged, ISimdVector<TVectorInt32, int>
where TVectorUInt32 : unmanaged, ISimdVector<TVectorUInt32, uint>
{
// This code is based on `vrs4_logf` from amd/aocl-libm-ose
// Copyright (C) 2018-2019 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text
// Spec:
// logf(x)
// = logf(x) if x ∈ F and x > 0
// = x if x = qNaN
// = 0 if x = 1
// = -inf if x = (-0, 0}
// = NaN otherwise
//
// Assumptions/Expectations
// - ULP is derived to be << 4 (always)
// - Some FPU Exceptions may not be available
// - Performance is at least 3x
//
// Implementation Notes:
// 1. Range Reduction:
// x = 2^n*(1+f) .... (1)
// where n is exponent and is an integer
// (1+f) is mantissa ∈ [1,2). i.e., 1 ≤ 1+f < 2 .... (2)
//
// From (1), taking log on both sides
// log(x) = log(2^n * (1+f))
// = log(2^n) + log(1+f)
// = n*log(2) + log(1+f) .... (3)
//
// let z = 1 + f
// log(z) = log(k) + log(z) - log(k)
// log(z) = log(kz) - log(k)
//
// From (2), range of z is [1, 2)
// by simply dividing range by 'k', z is in [1/k, 2/k) .... (4)
// Best choice of k is the one which gives equal and opposite values
// at extrema +- -+
// 1 | 2 |
// --- - 1 = - |--- - 1 |
// k | k | .... (5)
// +- -+
//
// Solving for k, k = 3/2,
// From (4), using 'k' value, range is therefore [-0.3333, 0.3333]
//
// 2. Polynomial Approximation:
// More information refer to tools/sollya/vrs4_logf.sollya
//
// 7th Deg - Error abs: 0x1.04c4ac98p-22 rel: 0x1.2216e6f8p-19
// 6th Deg - Error abs: 0x1.179e97d8p-19 rel: 0x1.db676c1p-17
const uint V_MIN = 0x00800000;
const uint V_MAX = 0x7F800000;
const uint V_MASK = 0x007FFFFF;
const uint V_OFF = 0x3F2AAAAB;
const float V_LN2 = 0.6931472f;
const float C02 = -0.5000001f;
const float C03 = +0.33332965f;
const float C04 = -0.24999046f;
const float C05 = +0.20018855f;
const float C06 = -0.16700386f;
const float C07 = +0.13902695f;
const float C08 = -0.1197452f;
const float C09 = +0.14401625f;
const float C10 = -0.13657966f;
TVectorSingle specialResult = x;
// x is subnormal or infinity or NaN
TVectorUInt32 specialMask = TVectorUInt32.GreaterThanOrEqual(Unsafe.BitCast<TVectorSingle, TVectorUInt32>(x) - TVectorUInt32.Create(V_MIN), TVectorUInt32.Create(V_MAX - V_MIN));
if (specialMask != TVectorUInt32.Zero)
{
// float.IsNegative(x) ? float.NaN : x
TVectorSingle isNegativeMask = TVectorSingle.IsNegative(x);
specialResult = TVectorSingle.ConditionalSelect(
isNegativeMask,
TVectorSingle.Create(float.NaN),
specialResult
);
// float.IsZero(x) ? float.NegativeInfinity : x
TVectorSingle zeroMask = TVectorSingle.IsZero(x);
specialResult = TVectorSingle.ConditionalSelect(
zeroMask,
TVectorSingle.Create(float.NegativeInfinity),
specialResult
);
// float.IsZero(x) | float.IsNegative(x) | float.IsNaN(x) | float.IsPositiveInfinity(x)
TVectorSingle temp = zeroMask
| isNegativeMask
| TVectorSingle.IsNaN(x)
| TVectorSingle.IsPositiveInfinity(x);
// subnormal
TVectorSingle subnormalMask = TVectorSingle.AndNot(Unsafe.BitCast<TVectorUInt32, TVectorSingle>(specialMask), temp);
x = TVectorSingle.ConditionalSelect(
subnormalMask,
Unsafe.BitCast<TVectorUInt32, TVectorSingle>(Unsafe.BitCast<TVectorSingle, TVectorUInt32>(x * 8388608.0f) - TVectorUInt32.Create(23u << 23)),
x
);
specialMask = Unsafe.BitCast<TVectorSingle, TVectorUInt32>(temp);
}
TVectorUInt32 vx = Unsafe.BitCast<TVectorSingle, TVectorUInt32>(x) - TVectorUInt32.Create(V_OFF);
TVectorSingle n = ConvertToSingle<TVectorInt32, TVectorSingle>(Unsafe.BitCast<TVectorUInt32, TVectorInt32>(vx) >> 23);
vx = (vx & TVectorUInt32.Create(V_MASK)) + TVectorUInt32.Create(V_OFF);
TVectorSingle r = Unsafe.BitCast<TVectorUInt32, TVectorSingle>(vx) - TVectorSingle.Create(1.0f);
TVectorSingle r2 = r * r;
TVectorSingle r4 = r2 * r2;
TVectorSingle r8 = r4 * r4;
TVectorSingle q = TVectorSingle.MultiplyAddEstimate(
TVectorSingle.MultiplyAddEstimate(
r2,
TVectorSingle.Create(C10),
TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(C09), r, TVectorSingle.Create(C08))),
r8,
TVectorSingle.MultiplyAddEstimate(
TVectorSingle.MultiplyAddEstimate(
TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(C07), r, TVectorSingle.Create(C06)),
r2,
TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(C05), r, TVectorSingle.Create(C04))),
r4,
TVectorSingle.MultiplyAddEstimate(
TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(C03), r, TVectorSingle.Create(C02)),
r2,
r
)
)
);
return TVectorSingle.ConditionalSelect(
Unsafe.BitCast<TVectorUInt32, TVectorSingle>(specialMask),
specialResult,
TVectorSingle.MultiplyAddEstimate(n, TVectorSingle.Create(V_LN2), q)
);
}
public static TVectorDouble Log2Double<TVectorDouble, TVectorInt64, TVectorUInt64>(TVectorDouble x)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
where TVectorUInt64 : unmanaged, ISimdVector<TVectorUInt64, ulong>
{
// This code is based on `vrd2_log2` from amd/aocl-libm-ose
// Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text
// Reduce x into the form:
// x = (-1)^s*2^n*m
// s will be always zero, as log is defined for positive numbers
// n is an integer known as the exponent
// m is mantissa
//
// x is reduced such that the mantissa, m lies in [2/3,4/3]
// x = 2^n*m where m is in [2/3,4/3]
// log2(x) = log2(2^n*m) We have log(a*b) = log(a)+log(b)
// = log2(2^n) + log2(m) We have log(a^n) = n*log(a)
// = n + log2(m)
// = n + log2(1+(m-1))
// = n + ln(1+f) * log2(e) Where f = m-1
// = n + log1p(f) * log2(e) f lies in [-1/3,+1/3]
//
// Thus we have :
// log(x) = n + log1p(f) * log2(e)
// The second term log1p(F) is approximated by using a polynomial
const ulong V_MIN = double.SmallestNormalBits;
const ulong V_MAX = double.PositiveInfinityBits;
const ulong V_MSK = 0x000FFFFF_FFFFFFFF; // (1 << 52) - 1
const ulong V_OFF = 0x3FE55555_55555555; // 2.0 / 3.0
const double LN2_HEAD = 1.44269180297851562500E+00;
const double LN2_TAIL = 3.23791044778235969970E-06;
const double C02 = -0.499999999999999560;
const double C03 = +0.333333333333414750;
const double C04 = -0.250000000000297430;
const double C05 = +0.199999999975985220;
const double C06 = -0.166666666608919500;
const double C07 = +0.142857145600277100;
const double C08 = -0.125000005127831270;
const double C09 = +0.111110952357159440;
const double C10 = -0.099999750495501240;
const double C11 = +0.090914349823462390;
const double C12 = -0.083340600527551860;
const double C13 = +0.076817603328311300;
const double C14 = -0.071296718946287310;
const double C15 = +0.067963465211535730;
const double C16 = -0.063995035098960040;
const double C17 = +0.049370587082412105;
const double C18 = -0.045370170994891980;
const double C19 = +0.088970636003577750;
const double C20 = -0.086906174116908760;
TVectorDouble specialResult = x;
// x is zero, subnormal, infinity, or NaN
TVectorUInt64 specialMask = TVectorUInt64.GreaterThanOrEqual(Unsafe.BitCast<TVectorDouble, TVectorUInt64>(x) - TVectorUInt64.Create(V_MIN), TVectorUInt64.Create(V_MAX - V_MIN));
if (specialMask != TVectorUInt64.Zero)
{
// double.IsNegative(x) ? double.NaN : x
TVectorDouble isNegativeMask = TVectorDouble.IsNegative(x);
specialResult = TVectorDouble.ConditionalSelect(
isNegativeMask,
TVectorDouble.Create(double.NaN),
specialResult
);
// double.IsZero(x) ? double.NegativeInfinity : x
TVectorDouble zeroMask = TVectorDouble.IsZero(x);
specialResult = TVectorDouble.ConditionalSelect(
zeroMask,
TVectorDouble.Create(double.NegativeInfinity),
specialResult
);
// double.IsZero(x) | double.IsNegative(x) | double.IsNaN(x) | double.IsPositiveInfinity(x)
TVectorDouble temp = zeroMask
| isNegativeMask
| TVectorDouble.IsNaN(x)
| TVectorDouble.IsPositiveInfinity(x);
// subnormal
TVectorDouble subnormalMask = TVectorDouble.AndNot(Unsafe.BitCast<TVectorUInt64, TVectorDouble>(specialMask), temp);
// multiply by 2^52, then normalize
x = TVectorDouble.ConditionalSelect(
subnormalMask,
Unsafe.BitCast<TVectorUInt64, TVectorDouble>(Unsafe.BitCast<TVectorDouble, TVectorUInt64>(x * 4503599627370496.0) - TVectorUInt64.Create(52ul << 52)),
x
);
specialMask = Unsafe.BitCast<TVectorDouble, TVectorUInt64>(temp);
}
// Reduce the mantissa to [+2/3, +4/3]
TVectorUInt64 vx = Unsafe.BitCast<TVectorDouble, TVectorUInt64>(x) - TVectorUInt64.Create(V_OFF);
TVectorDouble n = ConvertToDouble<TVectorInt64, TVectorDouble>(Unsafe.BitCast<TVectorUInt64, TVectorInt64>(vx) >> 52);
vx = (vx & TVectorUInt64.Create(V_MSK)) + TVectorUInt64.Create(V_OFF);
// Adjust the mantissa to [-1/3, +1/3]
TVectorDouble r = Unsafe.BitCast<TVectorUInt64, TVectorDouble>(vx) - TVectorDouble.One;
TVectorDouble r02 = r * r;
TVectorDouble r04 = r02 * r02;
TVectorDouble r08 = r04 * r04;
TVectorDouble r16 = r08 * r08;
// Compute log(x + 1) using polynomial approximation
// C0 + (r * C1) + (r^2 * C2) + ... + (r^20 * C20)
TVectorDouble poly = TVectorDouble.MultiplyAddEstimate(
r16,
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.Create(C20),
r04,
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C19), r, TVectorDouble.Create(C18)),
r02,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C17), r, TVectorDouble.Create(C16)))),
TVectorDouble.MultiplyAddEstimate(
r08,
TVectorDouble.MultiplyAddEstimate(
r04,
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C15), r, TVectorDouble.Create(C14)),
r02,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C13), r, TVectorDouble.Create(C12))),
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C11), r, TVectorDouble.Create(C10)),
r02,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C09), r, TVectorDouble.Create(C08)))),
TVectorDouble.MultiplyAddEstimate(
r04,
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C07), r, TVectorDouble.Create(C06)),
r02,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C05), r, TVectorDouble.Create(C04))),
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C03), r, TVectorDouble.Create(C02)),
r02,
r
)
)
)
);
return TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorUInt64, TVectorDouble>(specialMask),
specialResult,
TVectorDouble.MultiplyAddEstimate(
poly,
TVectorDouble.Create(LN2_HEAD),
TVectorDouble.MultiplyAddEstimate(poly, TVectorDouble.Create(LN2_TAIL), n)
)
);
}
public static TVectorSingle Log2Single<TVectorSingle, TVectorInt32, TVectorUInt32>(TVectorSingle x)
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
where TVectorInt32 : unmanaged, ISimdVector<TVectorInt32, int>
where TVectorUInt32 : unmanaged, ISimdVector<TVectorUInt32, uint>
{
// This code is based on `vrs4_log2f` from amd/aocl-libm-ose
// Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text
// Spec:
// log2f(x)
// = log2f(x) if x ∈ F and x > 0
// = x if x = qNaN
// = 0 if x = 1
// = -inf if x = (-0, 0}
// = NaN otherwise
//
// Assumptions/Expectations
// - Maximum ULP is observed to be at 4
// - Some FPU Exceptions may not be available
// - Performance is at least 3x
//
// Implementation Notes:
// 1. Range Reduction:
// x = 2^n*(1+f) .... (1)
// where n is exponent and is an integer
// (1+f) is mantissa ∈ [1,2). i.e., 1 ≤ 1+f < 2 .... (2)
//
// From (1), taking log on both sides
// log2(x) = log2(2^n * (1+f))
// = n + log2(1+f) .... (3)
//
// let z = 1 + f
// log2(z) = log2(k) + log2(z) - log2(k)
// log2(z) = log2(kz) - log2(k)
//
// From (2), range of z is [1, 2)
// by simply dividing range by 'k', z is in [1/k, 2/k) .... (4)
// Best choice of k is the one which gives equal and opposite values
// at extrema +- -+
// 1 | 2 |
// --- - 1 = - |--- - 1 |
// k | k | .... (5)
// +- -+
//
// Solving for k, k = 3/2,
// From (4), using 'k' value, range is therefore [-0.3333, 0.3333]
//
// 2. Polynomial Approximation:
// More information refer to tools/sollya/vrs4_logf.sollya
//
// 7th Deg - Error abs: 0x1.04c4ac98p-22 rel: 0x1.2216e6f8p-19
const uint V_MIN = float.SmallestNormalBits;
const uint V_MAX = float.PositiveInfinityBits;
const uint V_MSK = 0x007F_FFFF; // (1 << 23) - 1
const uint V_OFF = 0x3F2A_AAAB; // 2.0 / 3.0
const float C1 = +1.44269510f;
const float C2 = -0.72134554f;
const float C3 = +0.48089063f;
const float C4 = -0.36084408f;
const float C5 = +0.28889710f;
const float C6 = -0.23594281f;
const float C7 = +0.19948183f;
const float C8 = -0.22616665f;
const float C9 = +0.21228963f;
TVectorSingle specialResult = x;
// x is zero, subnormal, infinity, or NaN
TVectorUInt32 specialMask = TVectorUInt32.GreaterThanOrEqual(Unsafe.BitCast<TVectorSingle, TVectorUInt32>(x) - TVectorUInt32.Create(V_MIN), TVectorUInt32.Create(V_MAX - V_MIN));
if (specialMask != TVectorUInt32.Zero)
{
// float.IsNegative(x) ? float.NaN : x
TVectorSingle isNegativeMask = TVectorSingle.IsNegative(x);
specialResult = TVectorSingle.ConditionalSelect(
isNegativeMask,
TVectorSingle.Create(float.NaN),
specialResult
);
// float.IsZero(x) ? float.NegativeInfinity : x
TVectorSingle zeroMask = TVectorSingle.IsZero(x);
specialResult = TVectorSingle.ConditionalSelect(
zeroMask,
TVectorSingle.Create(float.NegativeInfinity),
specialResult
);
// float.IsZero(x) | float.IsNegative(x) | float.IsNaN(x) | float.IsPositiveInfinity(x)
TVectorSingle temp = zeroMask
| isNegativeMask
| TVectorSingle.IsNaN(x)
| TVectorSingle.IsPositiveInfinity(x);
// subnormal
TVectorSingle subnormalMask = TVectorSingle.AndNot(Unsafe.BitCast<TVectorUInt32, TVectorSingle>(specialMask), temp);
// multiply by 2^23, then normalize
x = TVectorSingle.ConditionalSelect(
subnormalMask,
Unsafe.BitCast<TVectorUInt32, TVectorSingle>(Unsafe.BitCast<TVectorSingle, TVectorUInt32>(x * 8388608.0f) - TVectorUInt32.Create(23u << 23)),
x
);
specialMask = Unsafe.BitCast<TVectorSingle, TVectorUInt32>(temp);
}
// Reduce the mantissa to [+2/3, +4/3]
TVectorUInt32 vx = Unsafe.BitCast<TVectorSingle, TVectorUInt32>(x) - TVectorUInt32.Create(V_OFF);
TVectorSingle n = ConvertToSingle<TVectorInt32, TVectorSingle>(Unsafe.BitCast<TVectorUInt32, TVectorInt32>(vx) >> 23);
vx = (vx & TVectorUInt32.Create(V_MSK)) + TVectorUInt32.Create(V_OFF);
// Adjust the mantissa to [-1/3, +1/3]
TVectorSingle r = Unsafe.BitCast<TVectorUInt32, TVectorSingle>(vx) - TVectorSingle.One;
// Compute log(x + 1) using polynomial approximation
// C0 + (r * C1) + (r^2 * C2) + ... + (r^9 * C9)
TVectorSingle r2 = r * r;
TVectorSingle r4 = r2 * r2;
TVectorSingle r8 = r4 * r4;
TVectorSingle poly = TVectorSingle.MultiplyAddEstimate(
TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(C9), r, TVectorSingle.Create(C8)),
r8,
TVectorSingle.MultiplyAddEstimate(
TVectorSingle.MultiplyAddEstimate(
TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(C7), r, TVectorSingle.Create(C6)),
r2,
TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(C5), r, TVectorSingle.Create(C4))),
r4,
TVectorSingle.MultiplyAddEstimate(
TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(C3), r, TVectorSingle.Create(C2)),
r2,
TVectorSingle.Create(C1) * r
)
)
);
return TVectorSingle.ConditionalSelect(
Unsafe.BitCast<TVectorUInt32, TVectorSingle>(specialMask),
specialResult,
n + poly
);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVector Max<TVector, T>(TVector x, TVector y)
where TVector : unmanaged, ISimdVector<TVector, T>
{
if ((typeof(T) == typeof(float)) || (typeof(T) == typeof(double)))
{
return TVector.ConditionalSelect(
TVector.LessThan(y, x) | TVector.IsNaN(x) | (TVector.Equals(x, y) & TVector.IsNegative(y)),
x,
y
);
}
return TVector.ConditionalSelect(TVector.GreaterThan(x, y), x, y);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVector MaxMagnitude<TVector, T>(TVector x, TVector y)
where TVector : unmanaged, ISimdVector<TVector, T>
{
if ((typeof(T) == typeof(float)) || (typeof(T) == typeof(double)))
{
TVector xMag = TVector.Abs(x);
TVector yMag = TVector.Abs(y);
return TVector.ConditionalSelect(
TVector.GreaterThan(xMag, yMag) | TVector.IsNaN(xMag) | (TVector.Equals(xMag, yMag) & TVector.IsPositive(x)),
x,
y
);
}
return MaxMagnitudeNumber<TVector, T>(x, y);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVector MaxMagnitudeNumber<TVector, T>(TVector x, TVector y)
where TVector : unmanaged, ISimdVector<TVector, T>
{
if ((typeof(T) == typeof(byte))
|| (typeof(T) == typeof(ushort))
|| (typeof(T) == typeof(uint))
|| (typeof(T) == typeof(ulong))
|| (typeof(T) == typeof(nuint)))
{
return TVector.Max(x, y);
}
TVector xMag = TVector.Abs(x);
TVector yMag = TVector.Abs(y);
if ((typeof(T) == typeof(float)) || (typeof(T) == typeof(double)))
{
return TVector.ConditionalSelect(
TVector.GreaterThan(xMag, yMag) | TVector.IsNaN(yMag) | (TVector.Equals(xMag, yMag) & TVector.IsPositive(x)),
x,
y
);
}
Debug.Assert((typeof(T) == typeof(sbyte))
|| (typeof(T) == typeof(short))
|| (typeof(T) == typeof(int))
|| (typeof(T) == typeof(long))
|| (typeof(T) == typeof(nint)));
return TVector.ConditionalSelect(
(TVector.GreaterThan(xMag, yMag) & TVector.IsPositive(yMag)) | (TVector.Equals(xMag, yMag) & TVector.IsNegative(y)) | TVector.IsNegative(xMag),
x,
y
);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVector MaxNumber<TVector, T>(TVector x, TVector y)
where TVector : unmanaged, ISimdVector<TVector, T>
{
if ((typeof(T) == typeof(float)) || (typeof(T) == typeof(double)))
{
return TVector.ConditionalSelect(
TVector.LessThan(y, x) | TVector.IsNaN(y) | (TVector.Equals(x, y) & TVector.IsNegative(y)),
x,
y
);
}
return TVector.Max(x, y);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVector Min<TVector, T>(TVector x, TVector y)
where TVector : unmanaged, ISimdVector<TVector, T>
{
if ((typeof(T) == typeof(float)) || (typeof(T) == typeof(double)))
{
return TVector.ConditionalSelect(
TVector.LessThan(x, y) | TVector.IsNaN(x) | (TVector.Equals(x, y) & TVector.IsNegative(x)),
x,
y
);
}
return TVector.ConditionalSelect(TVector.LessThan(x, y), x, y);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVector MinMagnitude<TVector, T>(TVector x, TVector y)
where TVector : unmanaged, ISimdVector<TVector, T>
{
if ((typeof(T) == typeof(float)) || (typeof(T) == typeof(double)))
{
TVector xMag = TVector.Abs(x);
TVector yMag = TVector.Abs(y);
return TVector.ConditionalSelect(
TVector.LessThan(xMag, yMag) | TVector.IsNaN(xMag) | (TVector.Equals(xMag, yMag) & TVector.IsNegative(x)),
x,
y
);
}
return MinMagnitudeNumber<TVector, T>(x, y);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVector MinMagnitudeNumber<TVector, T>(TVector x, TVector y)
where TVector : unmanaged, ISimdVector<TVector, T>
{
if ((typeof(T) == typeof(byte))
|| (typeof(T) == typeof(ushort))
|| (typeof(T) == typeof(uint))
|| (typeof(T) == typeof(ulong))
|| (typeof(T) == typeof(nuint)))
{
return TVector.Min(x, y);
}
TVector xMag = TVector.Abs(x);
TVector yMag = TVector.Abs(y);
if ((typeof(T) == typeof(float)) || (typeof(T) == typeof(double)))
{
return TVector.ConditionalSelect(
TVector.LessThan(xMag, yMag) | TVector.IsNaN(yMag) | (TVector.Equals(xMag, yMag) & TVector.IsNegative(x)),
x,
y
);
}
Debug.Assert((typeof(T) == typeof(sbyte))
|| (typeof(T) == typeof(short))
|| (typeof(T) == typeof(int))
|| (typeof(T) == typeof(long))
|| (typeof(T) == typeof(nint)));
return TVector.ConditionalSelect(
(TVector.LessThan(xMag, yMag) & TVector.IsPositive(xMag)) | (TVector.Equals(xMag, yMag) & TVector.IsNegative(x)) | TVector.IsNegative(yMag),
x,
y
);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVector MinNumber<TVector, T>(TVector x, TVector y)
where TVector : unmanaged, ISimdVector<TVector, T>
{
if ((typeof(T) == typeof(float)) || (typeof(T) == typeof(double)))
{
return TVector.ConditionalSelect(
TVector.LessThan(x, y) | TVector.IsNaN(y) | (TVector.Equals(x, y) & TVector.IsNegative(x)),
x,
y
);
}
return TVector.Min(x, y);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TVector RadiansToDegrees<TVector, T>(TVector radians)
where TVector : unmanaged, ISimdVector<TVector, T>
where T : IFloatingPointIeee754<T>
{
// NOTE: Don't change the algorithm without consulting the DIM
// which elaborates on why this implementation was chosen
return (radians * TVector.Create(T.CreateTruncating(180))) / TVector.Create(T.Pi);
}
public static TVectorDouble RoundDouble<TVectorDouble>(TVectorDouble vector, MidpointRounding mode)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
{
switch (mode)
{
// Rounds to the nearest value; if the number falls midway,
// it is rounded to the nearest value above (for positive numbers) or below (for negative numbers)
case MidpointRounding.AwayFromZero:
{
// manually fold BitDecrement(0.5)
return TVectorDouble.Truncate(vector + CopySign<TVectorDouble, double>(TVectorDouble.Create(0.49999999999999994), vector));
}
// Rounds to the nearest value; if the number falls midway,
// it is rounded to the nearest value with an even least significant digit
case MidpointRounding.ToEven:
{
return TVectorDouble.Round(vector);
}
// Directed rounding: Round to the nearest value, toward to zero
case MidpointRounding.ToZero:
{
return TVectorDouble.Truncate(vector);
}
// Directed Rounding: Round down to the next value, toward negative infinity
case MidpointRounding.ToNegativeInfinity:
{
return TVectorDouble.Floor(vector);
}
// Directed rounding: Round up to the next value, toward positive infinity
case MidpointRounding.ToPositiveInfinity:
{
return TVectorDouble.Ceiling(vector);
}
default:
{
ThrowHelper.ThrowArgumentException_InvalidEnumValue(mode);
return default;
}
}
}
public static TVectorSingle RoundSingle<TVectorSingle>(TVectorSingle vector, MidpointRounding mode)
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
{
switch (mode)
{
// Rounds to the nearest value; if the number falls midway,
// it is rounded to the nearest value above (for positive numbers) or below (for negative numbers)
case MidpointRounding.AwayFromZero:
{
// manually fold BitDecrement(0.5)
return TVectorSingle.Truncate(vector + CopySign<TVectorSingle, float>(TVectorSingle.Create(0.49999997f), vector));
}
// Rounds to the nearest value; if the number falls midway,
// it is rounded to the nearest value with an even least significant digit
case MidpointRounding.ToEven:
{
return TVectorSingle.Round(vector);
}
// Directed rounding: Round to the nearest value, toward to zero
case MidpointRounding.ToZero:
{
return TVectorSingle.Truncate(vector);
}
// Directed Rounding: Round down to the next value, toward negative infinity
case MidpointRounding.ToNegativeInfinity:
{
return TVectorSingle.Floor(vector);
}
// Directed rounding: Round up to the next value, toward positive infinity
case MidpointRounding.ToPositiveInfinity:
{
return TVectorSingle.Ceiling(vector);
}
default:
{
ThrowHelper.ThrowArgumentException_InvalidEnumValue(mode);
return default;
}
}
}
public static (TVectorDouble Sin, TVectorDouble Cos) SinCosDouble<TVectorDouble, TVectorInt64>(TVectorDouble x)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
{
// This code is based on `sin` and `cos` from amd/aocl-libm-ose
// Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text
// See SinDouble and CosDouble for implementation details
const long ARG_HUGE = 0x415312D000000000; // 5e6
const long ARG_LARGE = 0x3FE921FB54442D18; // PI / 4
const long ARG_SMALL = 0x3F20000000000000; // 2^-13
const long ARG_SMALLER = 0x3E40000000000000; // 2^-27
TVectorDouble ax = TVectorDouble.Abs(x);
TVectorInt64 ux = Unsafe.BitCast<TVectorDouble, TVectorInt64>(ax);
TVectorDouble sinResult, cosResult;
if (TVectorInt64.LessThanAll(ux, TVectorInt64.Create(ARG_LARGE + 1)))
{
// We must be a finite value: (pi / 4) >= |x|
TVectorDouble x2 = x * x;
if (TVectorInt64.GreaterThanAny(ux, TVectorInt64.Create(ARG_SMALL - 1)))
{
// at least one element is: |x| >= 2^-13
sinResult = SinDoublePoly(x);
cosResult = TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(
CosDoublePoly(x),
x2,
TVectorDouble.Create(-0.5)),
x2,
TVectorDouble.One
);
}
else
{
// at least one element is: 2^-13 > |x|
TVectorDouble x3 = x2 * x;
sinResult = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.16666666666666666), x3, x);
cosResult = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.5), x2, TVectorDouble.One);
}
}
else if (TVectorInt64.LessThanAll(ux, TVectorInt64.Create(ARG_HUGE)))
{
// at least one element is: |x| > (pi / 4) -or- infinite -or- nan
(TVectorDouble r, TVectorDouble rr, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
TVectorDouble sin = SinDoubleLarge(r, rr);
TVectorDouble cos = CosDoubleLarge(r, rr);
sinResult = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
sin, // region 0 or 2
cos // region 1 or 3
);
cosResult = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
cos, // region 0 or 2
sin // region 1 or 3
);
TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
sinResult = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >>> 1)) | (~sign & ~(region >>> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
-sinResult, // negative in region 1 or 3, positive in region 0 or 2
+sinResult // negative in region 0 or 2, positive in region 1 or 3
);
cosResult = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals((region + TVectorInt64.One) & TVectorInt64.Create(2), TVectorInt64.Zero)),
+cosResult, // region 0 or 3
-cosResult // region 1 or 2
);
}
else
{
return ScalarFallback(x);
}
sinResult = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.GreaterThan(ux, TVectorInt64.Create(ARG_SMALLER - 1))),
sinResult, // for elements: |x| >= 2^-27, infinity, or NaN
x // for elements: 2^-27 > |x|
);
cosResult = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.GreaterThan(ux, TVectorInt64.Create(ARG_SMALLER - 1))),
cosResult, // for elements: |x| >= 2^-27, infinity, or NaN
TVectorDouble.One // for elements: 2^-27 > |x|
);
return (sinResult, cosResult);
static (TVectorDouble Sin, TVectorDouble Cos) ScalarFallback(TVectorDouble x)
{
TVectorDouble sinResult = TVectorDouble.Zero;
TVectorDouble cosResult = TVectorDouble.Zero;
for (int i = 0; i < TVectorDouble.ElementCount; i++)
{
(double sinScalar, double cosScalar) = double.SinCos(x[i]);
sinResult = sinResult.WithElement(i, sinScalar);
cosResult = cosResult.WithElement(i, cosScalar);
}
return (sinResult, cosResult);
}
}
public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle, TVectorInt32, TVectorDouble, TVectorInt64>(TVectorSingle x)
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
where TVectorInt32 : unmanaged, ISimdVector<TVectorInt32, int>
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
{
// This code is based on `sinf` and `cosf` from amd/aocl-libm-ose
// Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text
// See SinSingle and CosSingle for implementation details
const int ARG_HUGE = 0x4A989680; // 5e6
const int ARG_LARGE = 0x3F490FDB; // PI / 4
const int ARG_SMALL = 0x3C000000; // 2^-13
const int ARG_SMALLER = 0x39000000; // 2^-27
TVectorSingle ax = TVectorSingle.Abs(x);
TVectorInt32 ux = Unsafe.BitCast<TVectorSingle, TVectorInt32>(ax);
TVectorSingle sinResult, cosResult;
if (TVectorInt32.LessThanAll(ux, TVectorInt32.Create(ARG_LARGE + 1)))
{
// We must be a finite value: (pi / 4) >= |x|
if (TVectorInt32.GreaterThanAny(ux, TVectorInt32.Create(ARG_SMALL - 1)))
{
// at least one element is: |x| >= 2^-13
if (TVectorSingle.ElementCount == TVectorDouble.ElementCount)
{
TVectorDouble dx = Widen<TVectorSingle, TVectorDouble>(x);
sinResult = Narrow<TVectorDouble, TVectorSingle>(
SinSinglePoly(dx)
);
cosResult = Narrow<TVectorDouble, TVectorSingle>(
CosSingleSmall(dx)
);
}
else
{
TVectorDouble dxLo = WidenLower<TVectorSingle, TVectorDouble>(x);
TVectorDouble dxHi = WidenUpper<TVectorSingle, TVectorDouble>(x);
sinResult = Narrow<TVectorDouble, TVectorSingle>(
SinSinglePoly(dxLo),
SinSinglePoly(dxHi)
);
cosResult = Narrow<TVectorDouble, TVectorSingle>(
CosSingleSmall(dxLo),
CosSingleSmall(dxHi)
);
}
}
else
{
// at least one element is: 2^-13 > |x|
TVectorSingle x2 = x * x;
TVectorSingle x3 = x2 * x;
sinResult = TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(-0.16666667f), x3, x);
cosResult = TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(-0.5f), x2, TVectorSingle.One);
}
}
else if (TVectorInt32.LessThanAll(ux, TVectorInt32.Create(ARG_HUGE)))
{
// at least one element is: |x| > (pi / 4) -or- infinite -or- nan
if (TVectorSingle.ElementCount == TVectorDouble.ElementCount)
{
(TVectorDouble sin, TVectorDouble cos) = CoreImpl(Widen<TVectorSingle, TVectorDouble>(x));
sinResult = Narrow<TVectorDouble, TVectorSingle>(sin);
cosResult = Narrow<TVectorDouble, TVectorSingle>(cos);
}
else
{
(TVectorDouble sinLo, TVectorDouble cosLo) = CoreImpl(WidenLower<TVectorSingle, TVectorDouble>(x));
(TVectorDouble sinHi, TVectorDouble cosHi) = CoreImpl(WidenUpper<TVectorSingle, TVectorDouble>(x));
sinResult = Narrow<TVectorDouble, TVectorSingle>(sinLo, sinHi);
cosResult = Narrow<TVectorDouble, TVectorSingle>(cosLo, cosHi);
}
}
else
{
return ScalarFallback(x);
}
sinResult = TVectorSingle.ConditionalSelect(
Unsafe.BitCast<TVectorInt32, TVectorSingle>(TVectorInt32.GreaterThan(ux, TVectorInt32.Create(ARG_SMALLER - 1))),
sinResult, // for elements: |x| >= 2^-27, infinity, or NaN
x // for elements: 2^-27 > |x|
);
cosResult = TVectorSingle.ConditionalSelect(
Unsafe.BitCast<TVectorInt32, TVectorSingle>(TVectorInt32.GreaterThan(ux, TVectorInt32.Create(ARG_SMALLER - 1))),
cosResult, // for elements: |x| >= 2^-27, infinity, or NaN
TVectorSingle.One // for elements: 2^-27 > |x|
);
return (sinResult, cosResult);
static (TVectorDouble Sin, TVectorDouble Cos) CoreImpl(TVectorDouble x)
{
TVectorDouble ax = TVectorDouble.Abs(x);
(TVectorDouble r, _, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
TVectorDouble sin = SinSinglePoly(r);
TVectorDouble cos = CosSingleLarge(r);
TVectorDouble sinResult = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
sin, // region 0 or 2
cos // region 1 or 3
);
TVectorDouble cosResult = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
cos, // region 0 or 2
sin // region 1 or 3
);
TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
sinResult = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >>> 1)) | (~sign & ~(region >>> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
-sinResult, // negative in region 1 or 3, positive in region 0 or 2
+sinResult // negative in region 0 or 2, positive in region 1 or 3
);
cosResult = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals((region + TVectorInt64.One) & TVectorInt64.Create(2), TVectorInt64.Zero)),
+cosResult, // region 0 or 3
-cosResult // region 1 or 2
);
return (sinResult, cosResult);
}
static (TVectorSingle Sin, TVectorSingle Cos) ScalarFallback(TVectorSingle x)
{
TVectorSingle sinResult = TVectorSingle.Zero;
TVectorSingle cosResult = TVectorSingle.Zero;
for (int i = 0; i < TVectorSingle.ElementCount; i++)
{
(float sinScalar, float cosScalar) = float.SinCos(x[i]);
sinResult = sinResult.WithElement(i, sinScalar);
cosResult = cosResult.WithElement(i, cosScalar);
}
return (sinResult, cosResult);
}
}
public static TVectorDouble SinDouble<TVectorDouble, TVectorInt64>(TVectorDouble x)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
{
// This code is based on `sin` from amd/aocl-libm-ose
// Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text
// Implementation Notes
// ---------------------
// checks for special cases
// if ( ux = infinity) raise overflow exception and return x
// if x is NaN then raise invalid FP operation exception and return x.
//
// 1. Argument reduction
// if |x| > 5e5 then
// __amd_remainder_piby2(x, &r, &rr, ®ion)
// else
// Argument reduction
// Let z = |x| * 2/pi
// z = dn + r, where dn = round(z)
// rhead = dn * pi/2_head
// rtail = dn * pi/2_tail
// r = z – dn = |x| - rhead – rtail
// expdiff = exp(dn) – exp(r)
// if(expdiff) > 15)
// rtail = |x| - dn*pi/2_tail2
// r = |x| - dn*pi/2_head - dn*pi/2_tail1 - dn*pi/2_tail2 - (((rhead + rtail) – rhead )-rtail)
// rr = (|x| – rhead) – r + rtail
//
// 2. Polynomial approximation
// if(dn is odd)
// rr = rr * r;
// x4 = x2 * x2;
// s = 0.5 * x2;
// t = s - 1.0;
// poly = x4 * (C1 + x2 * (C2 + x2 * (C3 + x2 * (C4 + x2 * (C5 + x2 * x6)))))
// r = (((1.0 + t) - s) - rr) + poly – t
// else
// x3 = x2 * r
// poly = S2 + (r2 * (S3 + (r2 * (S4 + (r2 * (S5 + S6 * r2))))))
// r = r - ((x2 * (0.5*rr - x3 * poly)) - rr) - S1 * x3
// if(((sign & region) | ((~sign) & (~region))) & 1)
// return r
// else
// return -r;
//
// if |x| < pi/4 && |x| > 2.0^(-13)
// sin(x) = x + (x * (r2 * (S1 + r2 * (S2 + r2 * (S3 + r2 * (S4 + r2 * (S5 + r2 * S6)))))))
// if |x| < 2.0^(-13) && |x| > 2.0^(-27)
// sin(x) = x - (x * x * x * (1/6));
const long ARG_HUGE = 0x415312D000000000; // 5e6
const long ARG_LARGE = 0x3FE921FB54442D18; // PI / 4
const long ARG_SMALL = 0x3F20000000000000; // 2^-13
const long ARG_SMALLER = 0x3E40000000000000; // 2^-27
TVectorDouble ax = TVectorDouble.Abs(x);
TVectorInt64 ux = Unsafe.BitCast<TVectorDouble, TVectorInt64>(ax);
TVectorDouble result;
if (TVectorInt64.LessThanAll(ux, TVectorInt64.Create(ARG_LARGE + 1)))
{
// We must be a finite value: (pi / 4) >= |x|
TVectorDouble x2 = x * x;
if (TVectorInt64.GreaterThanAny(ux, TVectorInt64.Create(ARG_SMALL - 1)))
{
// at least one element is: |x| >= 2^-13
result = SinDoublePoly(x);
}
else
{
// at least one element is: 2^-13 > |x|
TVectorDouble x3 = x2 * x;
result = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.16666666666666666), x3, x);
}
}
else if (TVectorInt64.LessThanAll(ux, TVectorInt64.Create(ARG_HUGE)))
{
// at least one element is: |x| > (pi / 4) -or- infinite -or- nan
(TVectorDouble r, TVectorDouble rr, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
TVectorDouble sin = SinDoubleLarge(r, rr);
TVectorDouble cos = CosDoubleLarge(r, rr);
result = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
sin, // region 0 or 2
cos // region 1 or 3
);
TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
result = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >>> 1)) | (~sign & ~(region >>> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
-result, // negative in region 1 or 3, positive in region 0 or 2
+result // negative in region 0 or 2, positive in region 1 or 3
);
}
else
{
return ScalarFallback(x);
}
return TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.GreaterThan(ux, TVectorInt64.Create(ARG_SMALLER - 1))),
result, // for elements: |x| >= 2^-27, infinity, or NaN
x // for elements: 2^-27 > |x|
);
static TVectorDouble ScalarFallback(TVectorDouble x)
{
TVectorDouble result = TVectorDouble.Zero;
for (int i = 0; i < TVectorDouble.ElementCount; i++)
{
double scalar = double.Sin(x[i]);
result = result.WithElement(i, scalar);
}
return result;
}
}
public static TVectorSingle SinSingle<TVectorSingle, TVectorInt32, TVectorDouble, TVectorInt64>(TVectorSingle x)
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
where TVectorInt32 : unmanaged, ISimdVector<TVectorInt32, int>
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
{
// This code is based on `sinf` from amd/aocl-libm-ose
// Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
//
// Licensed under the BSD 3-Clause "New" or "Revised" License
// See THIRD-PARTY-NOTICES.TXT for the full license text
// Implementation Notes
// ---------------------
// checks for special cases
// if ( ux = infinity) raise overflow exception and return x
// if x is NaN then raise invalid FP operation exception and return x.
//
// 1. Argument reduction
// if |x| > 5e5 then
// __amd_remainder_piby2(x, &r, &rr, ®ion)
// else
// Argument reduction
// Let z = |x| * 2/pi
// z = dn + r, where dn = round(z)
// rhead = dn * pi/2_head
// rtail = dn * pi/2_tail
// r = z – dn = |x| - rhead – rtail
// expdiff = exp(dn) – exp(r)
// if(expdiff) > 15)
// rtail = |x| - dn*pi/2_tail2
// r = |x| - dn*pi/2_head - dn*pi/2_tail1 - dn*pi/2_tail2 - (((rhead + rtail) – rhead )-rtail)
// rr = (|x| – rhead) – r + rtail
//
// 2. Polynomial approximation
// if(dn is odd)
// rr = rr * r;
// x4 = x2 * x2;
// s = 0.5 * x2;
// t = s - 1.0;
// poly = x4 * (C1 + x2 * (C2 + x2 * (C3 + x2 * (C4))))
// r = (((1.0 + t) - s) - rr) + poly – t
// else
// x3 = x2 * r
// poly = S2 + (r2 * (S3 + (r2 * (S4))))
// r = r - ((x2 * (0.5*rr - x3 * poly)) - rr) - S1 * x3
// if(((sign & region) | ((~sign) & (~region))) & 1)
// return r
// else
// return -r;
//
// if |x| < pi/4 && |x| > 2.0^(-13)
// sin(x) = x + (x * (r2 * (S1 + r2 * (S2 + r2 * (S3 + r2 * (S4)))))
// if |x| < 2.0^(-13) && |x| > 2.0^(-27)
// sin(x) = x - (x * x * x * (1/6));
const int ARG_HUGE = 0x4A989680; // 5e6
const int ARG_LARGE = 0x3F490FDB; // PI / 4
const int ARG_SMALL = 0x3C000000; // 2^-13
const int ARG_SMALLER = 0x39000000; // 2^-27
TVectorSingle ax = TVectorSingle.Abs(x);
TVectorInt32 ux = Unsafe.BitCast<TVectorSingle, TVectorInt32>(ax);
TVectorSingle result;
if (TVectorInt32.LessThanAll(ux, TVectorInt32.Create(ARG_LARGE + 1)))
{
// We must be a finite value: (pi / 4) >= |x|
if (TVectorInt32.GreaterThanAny(ux, TVectorInt32.Create(ARG_SMALL - 1)))
{
// at least one element is: |x| >= 2^-13
if (TVectorSingle.ElementCount == TVectorDouble.ElementCount)
{
result = Narrow<TVectorDouble, TVectorSingle>(
SinSinglePoly(Widen<TVectorSingle, TVectorDouble>(x))
);
}
else
{
result = Narrow<TVectorDouble, TVectorSingle>(
SinSinglePoly(WidenLower<TVectorSingle, TVectorDouble>(x)),
SinSinglePoly(WidenUpper<TVectorSingle, TVectorDouble>(x))
);
}
}
else
{
// at least one element is: 2^-13 > |x|
TVectorSingle x3 = (x * x) * x;
result = TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(-0.16666667f), x3, x);
}
}
else if (TVectorInt32.LessThanAll(ux, TVectorInt32.Create(ARG_HUGE)))
{
// at least one element is: |x| > (pi / 4) -or- infinite -or- nan
if (TVectorSingle.ElementCount == TVectorDouble.ElementCount)
{
result = Narrow<TVectorDouble, TVectorSingle>(
CoreImpl(Widen<TVectorSingle, TVectorDouble>(x))
);
}
else
{
result = Narrow<TVectorDouble, TVectorSingle>(
CoreImpl(WidenLower<TVectorSingle, TVectorDouble>(x)),
CoreImpl(WidenUpper<TVectorSingle, TVectorDouble>(x))
);
}
}
else
{
return ScalarFallback(x);
}
return TVectorSingle.ConditionalSelect(
Unsafe.BitCast<TVectorInt32, TVectorSingle>(TVectorInt32.GreaterThan(ux, TVectorInt32.Create(ARG_SMALLER - 1))),
result, // for elements: |x| >= 2^-27, infinity, or NaN
x // for elements: 2^-27 > |x|
);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static TVectorDouble CoreImpl(TVectorDouble x)
{
TVectorDouble ax = TVectorDouble.Abs(x);
(TVectorDouble r, _, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
TVectorDouble sin = SinSinglePoly(r);
TVectorDouble cos = CosSingleLarge(r);
TVectorDouble result = TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
sin, // region 0 or 2
cos // region 1 or 3
);
TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
return TVectorDouble.ConditionalSelect(
Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >>> 1)) | (~sign & ~(region >>> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
-result, // negative in region 1 or 3, positive in region 0 or 2
+result // negative in region 0 or 2, positive in region 1 or 3
);
}
static TVectorSingle ScalarFallback(TVectorSingle x)
{
TVectorSingle result = TVectorSingle.Zero;
for (int i = 0; i < TVectorSingle.ElementCount; i++)
{
float scalar = float.Sin(x[i]);
result = result.WithElement(i, scalar);
}
return result;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorDouble ConvertToDouble<TVectorInt64, TVectorDouble>(TVectorInt64 vector)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
{
Unsafe.SkipInit(out TVectorDouble result);
if (typeof(TVectorInt64) == typeof(Vector<long>))
{
result = (TVectorDouble)(object)Vector.ConvertToDouble((Vector<long>)(object)vector);
}
else if (typeof(TVectorInt64) == typeof(Vector64<long>))
{
result = (TVectorDouble)(object)Vector64.ConvertToDouble((Vector64<long>)(object)vector);
}
else if (typeof(TVectorInt64) == typeof(Vector128<long>))
{
result = (TVectorDouble)(object)Vector128.ConvertToDouble((Vector128<long>)(object)vector);
}
else if (typeof(TVectorInt64) == typeof(Vector256<long>))
{
result = (TVectorDouble)(object)Vector256.ConvertToDouble((Vector256<long>)(object)vector);
}
else if (typeof(TVectorInt64) == typeof(Vector512<long>))
{
result = (TVectorDouble)(object)Vector512.ConvertToDouble((Vector512<long>)(object)vector);
}
else
{
ThrowHelper.ThrowNotSupportedException();
}
return result;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorSingle ConvertToSingle<TVectorInt32, TVectorSingle>(TVectorInt32 vector)
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
where TVectorInt32 : unmanaged, ISimdVector<TVectorInt32, int>
{
Unsafe.SkipInit(out TVectorSingle result);
if (typeof(TVectorInt32) == typeof(Vector<int>))
{
result = (TVectorSingle)(object)Vector.ConvertToSingle((Vector<int>)(object)vector);
}
else if (typeof(TVectorInt32) == typeof(Vector64<int>))
{
result = (TVectorSingle)(object)Vector64.ConvertToSingle((Vector64<int>)(object)vector);
}
else if (typeof(TVectorInt32) == typeof(Vector128<int>))
{
result = (TVectorSingle)(object)Vector128.ConvertToSingle((Vector128<int>)(object)vector);
}
else if (typeof(TVectorInt32) == typeof(Vector256<int>))
{
result = (TVectorSingle)(object)Vector256.ConvertToSingle((Vector256<int>)(object)vector);
}
else if (typeof(TVectorInt32) == typeof(Vector512<int>))
{
result = (TVectorSingle)(object)Vector512.ConvertToSingle((Vector512<int>)(object)vector);
}
else
{
ThrowHelper.ThrowNotSupportedException();
}
return result;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorDouble CosDoubleLarge<TVectorDouble>(TVectorDouble r, TVectorDouble rr)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
{
TVectorDouble r2 = r * r;
TVectorDouble r4 = r2 * r2;
TVectorDouble s = r2 * 0.5;
TVectorDouble t = s - TVectorDouble.One;
return TVectorDouble.MultiplyAddEstimate(
CosDoublePoly(r),
r4,
TVectorDouble.MultiplyAddEstimate(r, rr, ((TVectorDouble.One + t) - s))
) - t;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorDouble CosDoublePoly<TVectorDouble>(TVectorDouble r)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
{
const double C1 = +0.041666666666666664;
const double C2 = -0.0013888888888887398;
const double C3 = +2.4801587298767044E-05;
const double C4 = -2.755731727234489E-07;
const double C5 = +2.0876146382372144E-09;
const double C6 = -1.138263981623609E-11;
TVectorDouble r2 = r * r;
TVectorDouble r4 = r2 * r2;
TVectorDouble r8 = r4 * r4;
return TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C6), r2, TVectorDouble.Create(C5)),
r8,
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C4), r2, TVectorDouble.Create(C3)),
r4,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C2), r2, TVectorDouble.Create(C1))
)
);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorDouble CosSingleLarge<TVectorDouble>(TVectorDouble r)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
{
TVectorDouble r2 = r * r;
TVectorDouble r4 = r2 * r2;
return TVectorDouble.MultiplyAddEstimate(
CosSinglePoly(r),
r4,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.5), r2, TVectorDouble.One)
);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorDouble CosSinglePoly<TVectorDouble>(TVectorDouble r)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
{
const double C1 = +0.041666666666666664;
const double C2 = -0.0013888888888887398;
const double C3 = +2.4801587298767044E-05;
const double C4 = -2.755731727234489E-07;
TVectorDouble r2 = r * r;
TVectorDouble r4 = r2 * r2;
return TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C4), r2, TVectorDouble.Create(C3)),
r4,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C2), r2, TVectorDouble.Create(C1))
);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorDouble CosSingleSmall<TVectorDouble>(TVectorDouble x)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
{
TVectorDouble x2 = x * x;
TVectorDouble x4 = x2 * x2;
TVectorDouble r = x2 * 0.5;
TVectorDouble t = TVectorDouble.One - r;
TVectorDouble s = t + (TVectorDouble.One - t - r);
return TVectorDouble.MultiplyAddEstimate(CosSinglePoly(x), x4, s);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVector Create<TVector, T>(double value)
where TVector : unmanaged, ISimdVector<TVector, T>
{
Unsafe.SkipInit(out TVector result);
if (typeof(TVector) == typeof(Vector<double>))
{
result = (TVector)(object)Vector.Create(value);
}
else if (typeof(TVector) == typeof(Vector64<double>))
{
result = (TVector)(object)Vector64.Create(value);
}
else if (typeof(TVector) == typeof(Vector128<double>))
{
result = (TVector)(object)Vector128.Create(value);
}
else if (typeof(TVector) == typeof(Vector256<double>))
{
result = (TVector)(object)Vector256.Create(value);
}
else if (typeof(TVector) == typeof(Vector512<double>))
{
result = (TVector)(object)Vector512.Create(value);
}
else
{
ThrowHelper.ThrowNotSupportedException();
}
return result;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVector Create<TVector, T>(float value)
where TVector : unmanaged, ISimdVector<TVector, T>
{
Unsafe.SkipInit(out TVector result);
if (typeof(TVector) == typeof(Vector<float>))
{
result = (TVector)(object)Vector.Create(value);
}
else if (typeof(TVector) == typeof(Vector64<float>))
{
result = (TVector)(object)Vector64.Create(value);
}
else if (typeof(TVector) == typeof(Vector128<float>))
{
result = (TVector)(object)Vector128.Create(value);
}
else if (typeof(TVector) == typeof(Vector256<float>))
{
result = (TVector)(object)Vector256.Create(value);
}
else if (typeof(TVector) == typeof(Vector512<float>))
{
result = (TVector)(object)Vector512.Create(value);
}
else
{
ThrowHelper.ThrowNotSupportedException();
}
return result;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorSingle Narrow<TVectorDouble, TVectorSingle>(TVectorDouble vector)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
{
Unsafe.SkipInit(out TVectorSingle result);
if (typeof(TVectorDouble) == typeof(Vector128<double>))
{
Debug.Assert(typeof(TVectorSingle) == typeof(Vector64<float>));
if (AdvSimd.Arm64.IsSupported)
{
result = (TVectorSingle)(object)AdvSimd.Arm64.ConvertToSingleLower((Vector128<double>)(object)vector);
}
else
{
Vector128<double> value = (Vector128<double>)(object)vector;
result = (TVectorSingle)(object)Vector64.Narrow(value.GetLower(), value.GetUpper());
}
}
else if (typeof(TVectorDouble) == typeof(Vector256<double>))
{
Debug.Assert(typeof(TVectorSingle) == typeof(Vector128<float>));
if (Avx.IsSupported)
{
result = (TVectorSingle)(object)Avx.ConvertToVector128Single((Vector256<double>)(object)vector);
}
else
{
Vector256<double> value = (Vector256<double>)(object)vector;
result = (TVectorSingle)(object)Vector128.Narrow(value.GetLower(), value.GetUpper());
}
}
else if (typeof(TVectorDouble) == typeof(Vector512<double>))
{
Debug.Assert(typeof(TVectorSingle) == typeof(Vector256<float>));
if (Avx512F.IsSupported)
{
result = (TVectorSingle)(object)Avx512F.ConvertToVector256Single((Vector512<double>)(object)vector);
}
else
{
Vector512<double> value = (Vector512<double>)(object)vector;
result = (TVectorSingle)(object)Vector256.Narrow(value.GetLower(), value.GetUpper());
}
}
else
{
ThrowHelper.ThrowNotSupportedException();
}
return result;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorSingle Narrow<TVectorDouble, TVectorSingle>(TVectorDouble lower, TVectorDouble upper)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
{
Unsafe.SkipInit(out TVectorSingle result);
if (typeof(TVectorDouble) == typeof(Vector<double>))
{
Debug.Assert(typeof(TVectorSingle) == typeof(Vector<float>));
result = (TVectorSingle)(object)Vector.Narrow((Vector<double>)(object)lower, (Vector<double>)(object)upper);
}
else if (typeof(TVectorDouble) == typeof(Vector64<double>))
{
Debug.Assert(typeof(TVectorSingle) == typeof(Vector64<float>));
result = (TVectorSingle)(object)Vector64.Narrow((Vector64<double>)(object)lower, (Vector64<double>)(object)upper);
}
else if (typeof(TVectorDouble) == typeof(Vector128<double>))
{
Debug.Assert(typeof(TVectorSingle) == typeof(Vector128<float>));
result = (TVectorSingle)(object)Vector128.Narrow((Vector128<double>)(object)lower, (Vector128<double>)(object)upper);
}
else if (typeof(TVectorDouble) == typeof(Vector256<double>))
{
Debug.Assert(typeof(TVectorSingle) == typeof(Vector256<float>));
result = (TVectorSingle)(object)Vector256.Narrow((Vector256<double>)(object)lower, (Vector256<double>)(object)upper);
}
else if (typeof(TVectorDouble) == typeof(Vector512<double>))
{
Debug.Assert(typeof(TVectorSingle) == typeof(Vector512<float>));
result = (TVectorSingle)(object)Vector512.Narrow((Vector512<double>)(object)lower, (Vector512<double>)(object)upper);
}
else
{
ThrowHelper.ThrowNotSupportedException();
}
return result;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorUInt32 ShiftLeftUInt32<TVectorUInt32>(TVectorUInt32 vector, TVectorUInt32 shiftAmount)
where TVectorUInt32 : unmanaged, ISimdVector<TVectorUInt32, uint>
{
Unsafe.SkipInit(out TVectorUInt32 result);
if (typeof(TVectorUInt32) == typeof(Vector<uint>))
{
result = (TVectorUInt32)(object)Vector.ShiftLeft(
(Vector<uint>)(object)vector,
(Vector<uint>)(object)shiftAmount
);
}
else if (typeof(TVectorUInt32) == typeof(Vector64<uint>))
{
result = (TVectorUInt32)(object)Vector64.ShiftLeft(
(Vector64<uint>)(object)vector,
(Vector64<uint>)(object)shiftAmount
);
}
else if (typeof(TVectorUInt32) == typeof(Vector128<uint>))
{
result = (TVectorUInt32)(object)Vector128.ShiftLeft(
(Vector128<uint>)(object)vector,
(Vector128<uint>)(object)shiftAmount
);
}
else if (typeof(TVectorUInt32) == typeof(Vector256<uint>))
{
result = (TVectorUInt32)(object)Vector256.ShiftLeft(
(Vector256<uint>)(object)vector,
(Vector256<uint>)(object)shiftAmount
);
}
else if (typeof(TVectorUInt32) == typeof(Vector512<uint>))
{
result = (TVectorUInt32)(object)Vector512.ShiftLeft(
(Vector512<uint>)(object)vector,
(Vector512<uint>)(object)shiftAmount
);
}
else
{
ThrowHelper.ThrowNotSupportedException();
}
return result;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorUInt64 ShiftLeftUInt64<TVectorUInt64>(TVectorUInt64 vector, TVectorUInt64 shiftAmount)
where TVectorUInt64 : unmanaged, ISimdVector<TVectorUInt64, ulong>
{
Unsafe.SkipInit(out TVectorUInt64 result);
if (typeof(TVectorUInt64) == typeof(Vector<ulong>))
{
result = (TVectorUInt64)(object)Vector.ShiftLeft(
(Vector<ulong>)(object)vector,
(Vector<ulong>)(object)shiftAmount
);
}
else if (typeof(TVectorUInt64) == typeof(Vector64<ulong>))
{
result = (TVectorUInt64)(object)Vector64.ShiftLeft(
(Vector64<ulong>)(object)vector,
(Vector64<ulong>)(object)shiftAmount
);
}
else if (typeof(TVectorUInt64) == typeof(Vector128<ulong>))
{
result = (TVectorUInt64)(object)Vector128.ShiftLeft(
(Vector128<ulong>)(object)vector,
(Vector128<ulong>)(object)shiftAmount
);
}
else if (typeof(TVectorUInt64) == typeof(Vector256<ulong>))
{
result = (TVectorUInt64)(object)Vector256.ShiftLeft(
(Vector256<ulong>)(object)vector,
(Vector256<ulong>)(object)shiftAmount
);
}
else if (typeof(TVectorUInt64) == typeof(Vector512<ulong>))
{
result = (TVectorUInt64)(object)Vector512.ShiftLeft(
(Vector512<ulong>)(object)vector,
(Vector512<ulong>)(object)shiftAmount
);
}
else
{
ThrowHelper.ThrowNotSupportedException();
}
return result;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorDouble SinDoubleLarge<TVectorDouble>(TVectorDouble r, TVectorDouble rr)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
{
const double S1 = -0.16666666666666666;
const double S2 = +0.00833333333333095;
const double S3 = -0.00019841269836761127;
const double S4 = +2.7557316103728802E-06;
const double S5 = -2.5051132068021698E-08;
const double S6 = +1.5918144304485914E-10;
TVectorDouble r2 = r * r;
TVectorDouble r3 = r2 * r;
TVectorDouble r4 = r2 * r2;
TVectorDouble r8 = r4 * r4;
TVectorDouble sinPoly = TVectorDouble.MultiplyAddEstimate(
TVectorDouble.Create(S6),
r8,
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(S5), r2, TVectorDouble.Create(S4)),
r4,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(S3), r2, TVectorDouble.Create(S2))
)
);
return r - TVectorDouble.MultiplyAddEstimate(
TVectorDouble.Create(-S1),
r3,
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(rr, TVectorDouble.Create(0.5), -(r3 * sinPoly)),
r2,
-rr
)
);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorDouble SinDoublePoly<TVectorDouble>(TVectorDouble r)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
{
const double S1 = -0.16666666666666666;
const double S2 = +0.00833333333333095;
const double S3 = -0.00019841269836761127;
const double S4 = +2.7557316103728802E-06;
const double S5 = -2.5051132068021698E-08;
const double S6 = +1.5918144304485914E-10;
TVectorDouble r2 = r * r;
TVectorDouble r3 = r2 * r;
TVectorDouble r4 = r2 * r2;
TVectorDouble r8 = r4 * r4;
TVectorDouble poly = TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(S6), r2, TVectorDouble.Create(S5)),
r8,
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(S4), r2, TVectorDouble.Create(S3)),
r4,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(S2), r2, TVectorDouble.Create(S1)))
);
return TVectorDouble.MultiplyAddEstimate(poly, r3, r);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorDouble SinSinglePoly<TVectorDouble>(TVectorDouble r)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
{
const double S1 = -0.16666666666666666;
const double S2 = +0.00833333333333095;
const double S3 = -0.00019841269836761127;
const double S4 = +2.7557316103728802E-06;
TVectorDouble r2 = r * r;
TVectorDouble r3 = r2 * r;
TVectorDouble r4 = r2 * r2;
return TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(S4), r2, TVectorDouble.Create(S3)),
r4,
TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(S2), r2, TVectorDouble.Create(S1))),
r3,
r
);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static (TVectorDouble r, TVectorDouble rr, TVectorInt64 region) SinCosReduce<TVectorDouble, TVectorInt64>(TVectorDouble ax)
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
{
// reduce the argument to be in a range from (-pi / 4) to (+pi / 4) by subtracting multiples of (pi / 2)
const double V_ALM_SHIFT = 6755399441055744.0;
const double V_TWO_BY_PI = 0.6366197723675814;
const double V_PI_BY_TWO_1 = 1.5707963267341256;
const double V_PI_BY_TWO_2 = 6.077100506303966E-11;
const double V_PI_BY_TWO_2_TAIL = 2.0222662487959506E-21;
// dn = (int)(|x| * 2 / pi)
TVectorDouble npi2 = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(V_TWO_BY_PI), ax, TVectorDouble.Create(V_ALM_SHIFT));
TVectorInt64 region = Unsafe.BitCast<TVectorDouble, TVectorInt64>(npi2);
npi2 -= TVectorDouble.Create(V_ALM_SHIFT);
TVectorDouble rhead = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-V_PI_BY_TWO_1), npi2, ax);
TVectorDouble rtail = npi2 * V_PI_BY_TWO_2;
TVectorDouble r = rhead - rtail;
rtail = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(V_PI_BY_TWO_2_TAIL), npi2, -(rhead - r - rtail));
rhead = r;
r -= rtail;
return (r, (rhead - r) - rtail, region);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorDouble Widen<TVectorSingle, TVectorDouble>(TVectorSingle vector)
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
{
Unsafe.SkipInit(out TVectorDouble result);
if (typeof(TVectorSingle) == typeof(Vector64<float>))
{
Debug.Assert(typeof(TVectorDouble) == typeof(Vector128<double>));
if (AdvSimd.Arm64.IsSupported)
{
result = (TVectorDouble)(object)AdvSimd.Arm64.ConvertToDouble((Vector64<float>)(object)vector);
}
else
{
Vector64<float> value = (Vector64<float>)(object)vector;
Vector64<double> lower = Vector64.WidenLower(value);
Vector64<double> upper = Vector64.WidenUpper(value);
result = (TVectorDouble)(object)Vector128.Create(lower, upper);
}
}
else if (typeof(TVectorSingle) == typeof(Vector128<float>))
{
Debug.Assert(typeof(TVectorDouble) == typeof(Vector256<double>));
if (Avx.IsSupported)
{
result = (TVectorDouble)(object)Avx.ConvertToVector256Double((Vector128<float>)(object)vector);
}
else
{
Vector128<float> value = (Vector128<float>)(object)vector;
Vector128<double> lower = Vector128.WidenLower(value);
Vector128<double> upper = Vector128.WidenUpper(value);
result = (TVectorDouble)(object)Vector256.Create(lower, upper);
}
}
else if (typeof(TVectorSingle) == typeof(Vector256<float>))
{
Debug.Assert(typeof(TVectorDouble) == typeof(Vector512<double>));
if (Avx512F.IsSupported)
{
result = (TVectorDouble)(object)Avx512F.ConvertToVector512Double((Vector256<float>)(object)vector);
}
else
{
Vector256<float> value = (Vector256<float>)(object)vector;
Vector256<double> lower = Vector256.WidenLower(value);
Vector256<double> upper = Vector256.WidenUpper(value);
result = (TVectorDouble)(object)Vector512.Create(lower, upper);
}
}
else
{
ThrowHelper.ThrowNotSupportedException();
}
return result;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorDouble WidenLower<TVectorSingle, TVectorDouble>(TVectorSingle vector)
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
{
Unsafe.SkipInit(out TVectorDouble result);
if (typeof(TVectorSingle) == typeof(Vector<float>))
{
Debug.Assert(typeof(TVectorDouble) == typeof(Vector<double>));
result = (TVectorDouble)(object)Vector.WidenLower((Vector<float>)(object)vector);
}
else if (typeof(TVectorSingle) == typeof(Vector64<float>))
{
Debug.Assert(typeof(TVectorDouble) == typeof(Vector64<double>));
result = (TVectorDouble)(object)Vector64.WidenLower((Vector64<float>)(object)vector);
}
else if (typeof(TVectorSingle) == typeof(Vector128<float>))
{
Debug.Assert(typeof(TVectorDouble) == typeof(Vector128<double>));
result = (TVectorDouble)(object)Vector128.WidenLower((Vector128<float>)(object)vector);
}
else if (typeof(TVectorSingle) == typeof(Vector256<float>))
{
Debug.Assert(typeof(TVectorDouble) == typeof(Vector256<double>));
result = (TVectorDouble)(object)Vector256.WidenLower((Vector256<float>)(object)vector);
}
else if (typeof(TVectorSingle) == typeof(Vector512<float>))
{
Debug.Assert(typeof(TVectorDouble) == typeof(Vector512<double>));
result = (TVectorDouble)(object)Vector512.WidenLower((Vector512<float>)(object)vector);
}
else
{
ThrowHelper.ThrowNotSupportedException();
}
return result;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static TVectorDouble WidenUpper<TVectorSingle, TVectorDouble>(TVectorSingle vector)
where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
{
Unsafe.SkipInit(out TVectorDouble result);
if (typeof(TVectorSingle) == typeof(Vector<float>))
{
Debug.Assert(typeof(TVectorDouble) == typeof(Vector<double>));
result = (TVectorDouble)(object)Vector.WidenUpper((Vector<float>)(object)vector);
}
else if (typeof(TVectorSingle) == typeof(Vector64<float>))
{
Debug.Assert(typeof(TVectorDouble) == typeof(Vector64<double>));
result = (TVectorDouble)(object)Vector64.WidenUpper((Vector64<float>)(object)vector);
}
else if (typeof(TVectorSingle) == typeof(Vector128<float>))
{
Debug.Assert(typeof(TVectorDouble) == typeof(Vector128<double>));
result = (TVectorDouble)(object)Vector128.WidenUpper((Vector128<float>)(object)vector);
}
else if (typeof(TVectorSingle) == typeof(Vector256<float>))
{
Debug.Assert(typeof(TVectorDouble) == typeof(Vector256<double>));
result = (TVectorDouble)(object)Vector256.WidenUpper((Vector256<float>)(object)vector);
}
else if (typeof(TVectorSingle) == typeof(Vector512<float>))
{
Debug.Assert(typeof(TVectorDouble) == typeof(Vector512<double>));
result = (TVectorDouble)(object)Vector512.WidenUpper((Vector512<float>)(object)vector);
}
else
{
ThrowHelper.ThrowNotSupportedException();
}
return result;
}
}
}
|