System\Numerics\Tensors\netcore\Common\TensorPrimitives.IAggregationOperator.cs (104)
20static abstract T Invoke(Vector512<T> x);
35public Vector512<T> Invoke(Vector512<T> x) => TOperator.Invoke(x);
72if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TTransformOperator.Vectorizable)
76if (remainder >= (uint)Vector512<T>.Count)
515Vector512<T> vresult = Vector512.Create(TAggregationOperator.IdentityValue);
519Vector512<T> beg = transform.Invoke(Vector512.LoadUnsafe(ref xRef));
520Vector512<T> end = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count));
524if (remainder > (uint)(Vector512<T>.Count * 8))
550misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)xPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
554Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector512<T>)) == 0);
564misalignment = (uint)Vector512<T>.Count;
569Vector512<T> vector1;
570Vector512<T> vector2;
571Vector512<T> vector3;
572Vector512<T> vector4;
576while (remainder >= (uint)(Vector512<T>.Count * 8))
580vector1 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)));
581vector2 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)));
582vector3 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)));
583vector4 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)));
592vector1 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)));
593vector2 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)));
594vector3 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)));
595vector4 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)));
605xPtr += (uint)(Vector512<T>.Count * 8);
607remainder -= (uint)(Vector512<T>.Count * 8);
628(nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector512<T>.Count);
636Vector512<T> vector = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)));
643Vector512<T> vector = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)));
650Vector512<T> vector = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)));
657Vector512<T> vector = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)));
664Vector512<T> vector = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)));
671Vector512<T> vector = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)));
678Vector512<T> vector = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 1)));
1191if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TBinaryOperator.Vectorizable)
1195if (remainder >= (uint)Vector512<T>.Count)
1686Vector512<T> vresult = Vector512.Create(TAggregationOperator.IdentityValue);
1690Vector512<T> beg = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
1692Vector512<T> end = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
1693Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count));
1697if (remainder > (uint)(Vector512<T>.Count * 8))
1725misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)xPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
1730Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector512<T>)) == 0);
1740misalignment = (uint)Vector512<T>.Count;
1748Vector512<T> vector1;
1749Vector512<T> vector2;
1750Vector512<T> vector3;
1751Vector512<T> vector4;
1755while (remainder >= (uint)(Vector512<T>.Count * 8))
1759vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
1760Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)));
1761vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
1762Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)));
1763vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
1764Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)));
1765vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
1766Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)));
1775vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
1776Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)));
1777vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
1778Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)));
1779vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
1780Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)));
1781vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
1782Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)));
1792xPtr += (uint)(Vector512<T>.Count * 8);
1793yPtr += (uint)(Vector512<T>.Count * 8);
1795remainder -= (uint)(Vector512<T>.Count * 8);
1817(nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector512<T>.Count);
1825Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
1826Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)));
1833Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
1834Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)));
1841Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
1842Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)));
1849Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
1850Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)));
1857Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
1858Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)));
1865Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
1866Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)));
1873Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 1)),
1874Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 1)));
2463private static Vector512<T> CreateAlignmentMaskVector512<T>(int count)
2571private static Vector512<T> CreateRemainderMaskVector512<T>(int count)
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IBinaryOperator.cs (190)
20static abstract Vector512<T> Invoke(Vector512<T> x, Vector512<T> y);
85if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TBinaryOperator.Vectorizable)
87if (remainder >= (uint)Vector512<T>.Count)
644Vector512<T> beg = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
646Vector512<T> end = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
647Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count));
649if (remainder > (uint)(Vector512<T>.Count * 8))
676nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
682Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
687Vector512<T> vector1;
688Vector512<T> vector2;
689Vector512<T> vector3;
690Vector512<T> vector4;
697while (remainder >= (uint)(Vector512<T>.Count * 8))
701vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
702Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)));
703vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
704Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)));
705vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
706Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)));
707vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
708Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)));
710vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
711vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
712vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
713vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
717vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
718Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)));
719vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
720Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)));
721vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
722Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)));
723vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
724Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)));
726vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
727vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
728vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
729vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
734xPtr += (uint)(Vector512<T>.Count * 8);
735yPtr += (uint)(Vector512<T>.Count * 8);
736dPtr += (uint)(Vector512<T>.Count * 8);
738remainder -= (uint)(Vector512<T>.Count * 8);
743while (remainder >= (uint)(Vector512<T>.Count * 8))
747vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
748Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)));
749vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
750Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)));
751vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
752Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)));
753vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
754Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)));
756vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
757vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
758vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
759vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
763vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
764Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)));
765vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
766Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)));
767vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
768Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)));
769vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
770Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)));
772vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
773vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
774vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
775vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
780xPtr += (uint)(Vector512<T>.Count * 8);
781yPtr += (uint)(Vector512<T>.Count * 8);
782dPtr += (uint)(Vector512<T>.Count * 8);
784remainder -= (uint)(Vector512<T>.Count * 8);
804remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
806switch (remainder / (uint)Vector512<T>.Count)
810Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
811Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 8)));
812vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
818Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
819Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)));
820vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
826Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
827Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)));
828vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
834Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
835Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)));
836vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
842Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
843Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)));
844vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
850Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
851Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)));
852vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
858Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
859Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)));
860vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
867end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
1417if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TTransformOperator.Vectorizable && TBinaryOperator.Vectorizable)
1419if (remainder >= (uint)Vector512<T>.Count)
1968Vector512<T> yVec = Vector512.Create(y);
1970Vector512<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef)),
1972Vector512<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count)),
1975if (remainder > (uint)(Vector512<T>.Count * 8))
2000nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
2005Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
2010Vector512<T> vector1;
2011Vector512<T> vector2;
2012Vector512<T> vector3;
2013Vector512<T> vector4;
2020while (remainder >= (uint)(Vector512<T>.Count * 8))
2024vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0))),
2026vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1))),
2028vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2))),
2030vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3))),
2033vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
2034vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
2035vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
2036vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
2040vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4))),
2042vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5))),
2044vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6))),
2046vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7))),
2049vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
2050vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
2051vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
2052vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
2057xPtr += (uint)(Vector512<T>.Count * 8);
2058dPtr += (uint)(Vector512<T>.Count * 8);
2060remainder -= (uint)(Vector512<T>.Count * 8);
2065while (remainder >= (uint)(Vector512<T>.Count * 8))
2069vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0))),
2071vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1))),
2073vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2))),
2075vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3))),
2078vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
2079vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
2080vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
2081vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
2085vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4))),
2087vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5))),
2089vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6))),
2091vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7))),
2094vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
2095vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
2096vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
2097vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
2102xPtr += (uint)(Vector512<T>.Count * 8);
2103dPtr += (uint)(Vector512<T>.Count * 8);
2105remainder -= (uint)(Vector512<T>.Count * 8);
2124remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
2126switch (remainder / (uint)Vector512<T>.Count)
2130Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8))),
2132vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
2138Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7))),
2140vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
2146Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6))),
2148vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
2154Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5))),
2156vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
2162Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4))),
2164vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
2170Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3))),
2172vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
2178Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2))),
2180vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
2187end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
2730private static T HorizontalAggregate<T, TAggregate>(Vector512<T> x) where TAggregate : struct, IBinaryOperator<T> =>
2775public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => TOperator.Invoke(y, x);
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IBooleanUnaryOperator.cs (47)
22static abstract Vector512<T> Invoke(Vector512<T> x);
31static abstract bool ShouldEarlyExit(Vector512<T> result);
43public static bool ShouldEarlyExit(Vector512<T> result) => Vector512.AnyWhereAllBitsSet(result);
78public static bool ShouldEarlyExit(Vector512<T> result) =>
79typeof(T) == typeof(float) ? Vector512.EqualsAny(result.AsUInt32(), Vector512<uint>.Zero) :
80typeof(T) == typeof(double) ? Vector512.EqualsAny(result.AsUInt64(), Vector512<ulong>.Zero) :
81Vector512.EqualsAny(result, Vector512<T>.Zero);
101if (Vector512.IsHardwareAccelerated && TOperator.Vectorizable && Vector512<T>.IsSupported)
103oneVectorFromEnd = x.Length - Vector512<T>.Count;
114i += Vector512<T>.Count;
120TAnyAll.ShouldEarlyExit(TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512<T>.Count)))))
235if (Vector512.IsHardwareAccelerated && TOperator.Vectorizable && Vector512<T>.IsSupported)
237int vectorFromEnd = x.Length - Vector512<T>.Count;
244i += Vector512<T>.Count;
251i = x.Length - Vector512<T>.Count;
259Vector512<byte> v = TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i)).AsByte();
261(v & Vector512<byte>.One).StoreUnsafe(ref Unsafe.As<bool, byte>(ref destinationRef), (uint)i);
343if (Vector512.IsHardwareAccelerated && TOperator.Vectorizable && Vector512<T>.IsSupported)
345int vectorsFromEnd = x.Length - (Vector512<T>.Count * sizeof(T));
352i += Vector512<T>.Count * sizeof(T);
359i = x.Length - (Vector512<T>.Count * sizeof(T));
367Vector512<byte> v =
370TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + Vector512<T>.Count))).AsUInt16());
372(v & Vector512<byte>.One).StoreUnsafe(ref Unsafe.As<bool, byte>(ref destinationRef), (uint)i);
460if (Vector512.IsHardwareAccelerated && TOperator.Vectorizable && Vector512<T>.IsSupported)
462int vectorsFromEnd = x.Length - (Vector512<T>.Count * sizeof(T));
469i += Vector512<T>.Count * sizeof(T);
476i = x.Length - (Vector512<T>.Count * sizeof(T));
484Vector512<byte> v =
488TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + Vector512<T>.Count))).AsUInt32()),
490TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (2 * Vector512<T>.Count)))).AsUInt32(),
491TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (3 * Vector512<T>.Count)))).AsUInt32()));
493(v & Vector512<byte>.One).StoreUnsafe(ref Unsafe.As<bool, byte>(ref destinationRef), (uint)i);
587if (Vector512.IsHardwareAccelerated && TOperator.Vectorizable && Vector512<T>.IsSupported)
591int vectorsFromEnd = x.Length - (Vector512<T>.Count * sizeof(T));
598i += Vector512<T>.Count * sizeof(T);
605i = x.Length - (Vector512<T>.Count * sizeof(T));
613Vector512<byte> v =
618TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + Vector512<T>.Count))).AsUInt64()),
620TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (2 * Vector512<T>.Count)))).AsUInt64(),
621TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (3 * Vector512<T>.Count)))).AsUInt64())),
624TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (4 * Vector512<T>.Count)))).AsUInt64(),
625TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (5 * Vector512<T>.Count)))).AsUInt64()),
627TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (6 * Vector512<T>.Count)))).AsUInt64(),
628TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (7 * Vector512<T>.Count)))).AsUInt64())));
630(v & Vector512<byte>.One).StoreUnsafe(ref Unsafe.As<bool, byte>(ref destinationRef), (uint)i);
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IStatefulUnaryOperator.cs (80)
20Vector512<T> Invoke(Vector512<T> x);
51if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TStatefulUnaryOperator.Vectorizable)
53if (remainder >= (uint)Vector512<T>.Count)
547Vector512<T> beg = op.Invoke(Vector512.LoadUnsafe(ref xRef));
548Vector512<T> end = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count));
550if (remainder > (uint)(Vector512<T>.Count * 8))
575nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
580Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
585Vector512<T> vector1;
586Vector512<T> vector2;
587Vector512<T> vector3;
588Vector512<T> vector4;
595while (remainder >= (uint)(Vector512<T>.Count * 8))
599vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)));
600vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)));
601vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)));
602vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)));
604vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
605vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
606vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
607vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
611vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)));
612vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)));
613vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)));
614vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)));
616vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
617vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
618vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
619vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
624xPtr += (uint)(Vector512<T>.Count * 8);
625dPtr += (uint)(Vector512<T>.Count * 8);
627remainder -= (uint)(Vector512<T>.Count * 8);
632while (remainder >= (uint)(Vector512<T>.Count * 8))
636vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)));
637vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)));
638vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)));
639vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)));
641vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
642vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
643vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
644vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
648vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)));
649vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)));
650vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)));
651vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)));
653vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
654vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
655vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
656vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
661xPtr += (uint)(Vector512<T>.Count * 8);
662dPtr += (uint)(Vector512<T>.Count * 8);
664remainder -= (uint)(Vector512<T>.Count * 8);
683remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
685switch (remainder / (uint)Vector512<T>.Count)
689Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)));
690vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
696Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)));
697vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
703Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)));
704vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
710Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)));
711vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
717Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)));
718vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
724Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)));
725vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
731Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)));
732vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
739end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
System\Numerics\Tensors\netcore\Common\TensorPrimitives.ITernaryOperator.cs (323)
19static abstract Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> z);
28public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> z) => TOperator.Invoke(x, z, y);
70if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported)
72if (remainder >= (uint)Vector512<T>.Count)
692Vector512<T> beg = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
695Vector512<T> end = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
696Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count),
697Vector512.LoadUnsafe(ref zRef, remainder - (uint)Vector512<T>.Count));
699if (remainder > (uint)(Vector512<T>.Count * 8))
728nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
735Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
740Vector512<T> vector1;
741Vector512<T> vector2;
742Vector512<T> vector3;
743Vector512<T> vector4;
750while (remainder >= (uint)(Vector512<T>.Count * 8))
754vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
755Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
756Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 0)));
757vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
758Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
759Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 1)));
760vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
761Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
762Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 2)));
763vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
764Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
765Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 3)));
767vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
768vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
769vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
770vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
774vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
775Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
776Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 4)));
777vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
778Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
779Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 5)));
780vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
781Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
782Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 6)));
783vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
784Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
785Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 7)));
787vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
788vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
789vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
790vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
795xPtr += (uint)(Vector512<T>.Count * 8);
796yPtr += (uint)(Vector512<T>.Count * 8);
797zPtr += (uint)(Vector512<T>.Count * 8);
798dPtr += (uint)(Vector512<T>.Count * 8);
800remainder -= (uint)(Vector512<T>.Count * 8);
805while (remainder >= (uint)(Vector512<T>.Count * 8))
809vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
810Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
811Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 0)));
812vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
813Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
814Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 1)));
815vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
816Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
817Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 2)));
818vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
819Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
820Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 3)));
822vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
823vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
824vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
825vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
829vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
830Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
831Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 4)));
832vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
833Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
834Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 5)));
835vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
836Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
837Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 6)));
838vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
839Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
840Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 7)));
842vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
843vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
844vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
845vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
850xPtr += (uint)(Vector512<T>.Count * 8);
851yPtr += (uint)(Vector512<T>.Count * 8);
852zPtr += (uint)(Vector512<T>.Count * 8);
853dPtr += (uint)(Vector512<T>.Count * 8);
855remainder -= (uint)(Vector512<T>.Count * 8);
876remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
878switch (remainder / (uint)Vector512<T>.Count)
882Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
883Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 8)),
884Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 8)));
885vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
891Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
892Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)),
893Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 7)));
894vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
900Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
901Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)),
902Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 6)));
903vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
909Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
910Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)),
911Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 5)));
912vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
918Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
919Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)),
920Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 4)));
921vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
927Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
928Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)),
929Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 3)));
930vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
936Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
937Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)),
938Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 2)));
939vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
946end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
1545if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported)
1547if (remainder >= (uint)Vector512<T>.Count)
2159Vector512<T> zVec = Vector512.Create(z);
2161Vector512<T> beg = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
2164Vector512<T> end = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
2165Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count),
2168if (remainder > (uint)(Vector512<T>.Count * 8))
2195nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
2201Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
2206Vector512<T> vector1;
2207Vector512<T> vector2;
2208Vector512<T> vector3;
2209Vector512<T> vector4;
2216while (remainder >= (uint)(Vector512<T>.Count * 8))
2220vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
2221Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
2223vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
2224Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
2226vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
2227Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
2229vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
2230Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
2233vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
2234vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
2235vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
2236vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
2240vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
2241Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
2243vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
2244Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
2246vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
2247Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
2249vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
2250Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
2253vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
2254vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
2255vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
2256vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
2261xPtr += (uint)(Vector512<T>.Count * 8);
2262yPtr += (uint)(Vector512<T>.Count * 8);
2263dPtr += (uint)(Vector512<T>.Count * 8);
2265remainder -= (uint)(Vector512<T>.Count * 8);
2270while (remainder >= (uint)(Vector512<T>.Count * 8))
2274vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
2275Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
2277vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
2278Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
2280vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
2281Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
2283vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
2284Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
2287vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
2288vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
2289vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
2290vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
2294vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
2295Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
2297vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
2298Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
2300vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
2301Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
2303vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
2304Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
2307vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
2308vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
2309vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
2310vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
2315xPtr += (uint)(Vector512<T>.Count * 8);
2316yPtr += (uint)(Vector512<T>.Count * 8);
2317dPtr += (uint)(Vector512<T>.Count * 8);
2319remainder -= (uint)(Vector512<T>.Count * 8);
2339remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
2341switch (remainder / (uint)Vector512<T>.Count)
2345Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
2346Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 8)),
2348vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
2354Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
2355Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)),
2357vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
2363Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
2364Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)),
2366vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
2372Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
2373Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)),
2375vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
2381Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
2382Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)),
2384vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
2390Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
2391Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)),
2393vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
2399Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
2400Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)),
2402vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
2409end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
3031if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported)
3033if (remainder >= (uint)Vector512<T>.Count)
3635Vector512<T> yVec = Vector512.Create(y);
3636Vector512<T> zVec = Vector512.Create(z);
3638Vector512<T> beg = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
3641Vector512<T> end = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
3645if (remainder > (uint)(Vector512<T>.Count * 8))
3670nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
3675Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
3680Vector512<T> vector1;
3681Vector512<T> vector2;
3682Vector512<T> vector3;
3683Vector512<T> vector4;
3690while (remainder >= (uint)(Vector512<T>.Count * 8))
3694vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
3697vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
3700vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
3703vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
3707vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
3708vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
3709vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
3710vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
3714vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
3717vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
3720vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
3723vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
3727vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
3728vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
3729vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
3730vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
3735xPtr += (uint)(Vector512<T>.Count * 8);
3736dPtr += (uint)(Vector512<T>.Count * 8);
3738remainder -= (uint)(Vector512<T>.Count * 8);
3743while (remainder >= (uint)(Vector512<T>.Count * 8))
3747vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
3750vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
3753vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
3756vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
3760vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
3761vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
3762vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
3763vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
3767vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
3770vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
3773vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
3776vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
3780vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
3781vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
3782vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
3783vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
3788xPtr += (uint)(Vector512<T>.Count * 8);
3789dPtr += (uint)(Vector512<T>.Count * 8);
3791remainder -= (uint)(Vector512<T>.Count * 8);
3810remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
3812switch (remainder / (uint)Vector512<T>.Count)
3816Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
3819vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
3825Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
3828vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
3834Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
3837vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
3843Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
3846vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
3852Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
3855vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
3861Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
3864vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
3870Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
3873vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
3880end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IUnaryInputBinaryOutput.cs (40)
20static abstract (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x);
30static abstract (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x, Vector512<T> y);
41public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x, Vector512<T> y) => TOperator.Invoke(y, x);
71if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TUnaryOperator.Vectorizable)
73oneVectorFromEnd = x.Length - Vector512<T>.Count;
79(Vector512<T> first, Vector512<T> second) = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
83i += Vector512<T>.Count;
90i = x.Length - Vector512<T>.Count;
92(Vector512<T> first, Vector512<T> second) = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
206if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TOperator.Vectorizable)
208oneVectorFromEnd = x.Length - Vector512<T>.Count;
214(Vector512<T> first, Vector512<T> second) = TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i), Vector512.LoadUnsafe(ref yRef, (uint)i));
218i += Vector512<T>.Count;
225Vector512<T> mask = Vector512.Equals(CreateRemainderMaskVector512<T>(x.Length - i), Vector512<T>.Zero);
227i = x.Length - Vector512<T>.Count;
229Vector512<T> first = Vector512.ConditionalSelect(mask,
233Vector512<T> second = Vector512.ConditionalSelect(mask,
387if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TOperator.Vectorizable)
389oneVectorFromEnd = x.Length - Vector512<T>.Count;
392Vector512<T> yVec = Vector512.Create(y);
397(Vector512<T> first, Vector512<T> second) = TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i), yVec);
401i += Vector512<T>.Count;
408Vector512<T> mask = Vector512.Equals(CreateRemainderMaskVector512<T>(x.Length - i), Vector512<T>.Zero);
410i = x.Length - Vector512<T>.Count;
412Vector512<T> first = Vector512.ConditionalSelect(mask,
416Vector512<T> second = Vector512.ConditionalSelect(mask,
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IUnaryOneToFourOperator.cs (24)
21static abstract (Vector512<TOutput>, Vector512<TOutput>, Vector512<TOutput>, Vector512<TOutput>) Invoke(Vector512<TInput> x);
46Debug.Assert(Vector512<TInput>.IsSupported);
47Debug.Assert(Vector512<TOutput>.IsSupported);
49oneVectorFromEnd = x.Length - Vector512<TInput>.Count;
55(Vector512<TOutput>, Vector512<TOutput>, Vector512<TOutput>, Vector512<TOutput>) results = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
57results.Item2.StoreUnsafe(ref destinationRef, (uint)(i + Vector512<TOutput>.Count));
58results.Item3.StoreUnsafe(ref destinationRef, (uint)(i + (Vector512<TOutput>.Count * 2)));
59results.Item4.StoreUnsafe(ref destinationRef, (uint)(i + (Vector512<TOutput>.Count * 3)));
61i += Vector512<TInput>.Count;
68i = x.Length - Vector512<TInput>.Count;
70(Vector512<TOutput>, Vector512<TOutput>, Vector512<TOutput>, Vector512<TOutput>) results = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
72results.Item2.StoreUnsafe(ref destinationRef, (uint)(i + Vector512<TOutput>.Count));
73results.Item3.StoreUnsafe(ref destinationRef, (uint)(i + (Vector512<TOutput>.Count * 2)));
74results.Item4.StoreUnsafe(ref destinationRef, (uint)(i + (Vector512<TOutput>.Count * 3)));
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IUnaryOperator.cs (83)
37static abstract Vector512<TOutput> Invoke(Vector512<TInput> x);
47public static Vector512<T> Invoke(Vector512<T> x) => x;
91if (Vector512.IsHardwareAccelerated && Vector512<TInput>.IsSupported && Vector512<TOutput>.IsSupported && TUnaryOperator.Vectorizable && sizeof(TInput) == sizeof(TOutput))
93if (remainder >= (uint)Vector512<TInput>.Count)
587Vector512<TOutput> beg = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef));
588Vector512<TOutput> end = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<TInput>.Count));
590if (remainder > (uint)(Vector512<TInput>.Count * 8))
615nuint misalignment = ((uint)sizeof(Vector512<TInput>) - ((nuint)dPtr % (uint)sizeof(Vector512<TInput>))) / (uint)sizeof(TInput);
620Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<TInput>)) == 0);
625Vector512<TOutput> vector1;
626Vector512<TOutput> vector2;
627Vector512<TOutput> vector3;
628Vector512<TOutput> vector4;
635while (remainder >= (uint)(Vector512<TInput>.Count * 8))
639vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 0)));
640vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 1)));
641vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 2)));
642vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 3)));
644vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 0));
645vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 1));
646vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 2));
647vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 3));
651vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 4)));
652vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 5)));
653vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 6)));
654vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 7)));
656vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 4));
657vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 5));
658vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 6));
659vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 7));
664xPtr += (uint)(Vector512<TInput>.Count * 8);
665dPtr += (uint)(Vector512<TInput>.Count * 8);
667remainder -= (uint)(Vector512<TInput>.Count * 8);
672while (remainder >= (uint)(Vector512<TInput>.Count * 8))
676vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 0)));
677vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 1)));
678vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 2)));
679vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 3)));
681vector1.Store(dPtr + (uint)(Vector512<TOutput>.Count * 0));
682vector2.Store(dPtr + (uint)(Vector512<TOutput>.Count * 1));
683vector3.Store(dPtr + (uint)(Vector512<TOutput>.Count * 2));
684vector4.Store(dPtr + (uint)(Vector512<TOutput>.Count * 3));
688vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 4)));
689vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 5)));
690vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 6)));
691vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 7)));
693vector1.Store(dPtr + (uint)(Vector512<TOutput>.Count * 4));
694vector2.Store(dPtr + (uint)(Vector512<TOutput>.Count * 5));
695vector3.Store(dPtr + (uint)(Vector512<TOutput>.Count * 6));
696vector4.Store(dPtr + (uint)(Vector512<TOutput>.Count * 7));
701xPtr += (uint)(Vector512<TInput>.Count * 8);
702dPtr += (uint)(Vector512<TOutput>.Count * 8);
704remainder -= (uint)(Vector512<TInput>.Count * 8);
723remainder = (remainder + (uint)(Vector512<TInput>.Count - 1)) & (nuint)(-Vector512<TInput>.Count);
725switch (remainder / (uint)Vector512<TInput>.Count)
729Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 8)));
730vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 8));
736Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 7)));
737vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 7));
743Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 6)));
744vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 6));
750Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 5)));
751vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 5));
757Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 4)));
758vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 4));
764Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 3)));
765vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 3));
771Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 2)));
772vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 2));
779end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<TOutput>.Count);
System\Numerics\Tensors\netcore\TensorPrimitives.Clamp.cs (12)
180public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> min, Vector512<T> max) => Vector512.Clamp(x, min, max);
228public static Vector512<T> Invoke(Vector512<T> min, Vector512<T> max, Vector512<T> x) => Vector512.Clamp(x, min, max);
277public static Vector512<T> Invoke(Vector512<T> max, Vector512<T> x, Vector512<T> min) => Vector512.Clamp(x, min, max);
System\Numerics\Tensors\netcore\TensorPrimitives.ConvertHelpers.cs (77)
172public static Vector512<float> Invoke(Vector512<int> x) => Vector512.ConvertToSingle(x);
183public static Vector512<float> Invoke(Vector512<uint> x) => Vector512.ConvertToSingle(x);
194public static Vector512<double> Invoke(Vector512<ulong> x) => Vector512.ConvertToDouble(x);
205public static Vector512<double> Invoke(Vector512<long> x) => Vector512.ConvertToDouble(x);
216public static (Vector512<double> Lower, Vector512<double> Upper) Invoke(Vector512<float> x) => Vector512.Widen(x);
227public static Vector512<float> Invoke(Vector512<double> lower, Vector512<double> upper) => Vector512.Narrow(lower, upper);
238public static (Vector512<ushort> Lower, Vector512<ushort> Upper) Invoke(Vector512<byte> x) => Vector512.Widen(x);
267public static (Vector512<uint>, Vector512<uint>, Vector512<uint>, Vector512<uint>) Invoke(Vector512<byte> x)
269(Vector512<ushort> Lower, Vector512<ushort> Upper) ushorts = Vector512.Widen(x);
270(Vector512<uint> Lower, Vector512<uint> Upper) uintsLower = Vector512.Widen(ushorts.Lower);
271(Vector512<uint> Lower, Vector512<uint> Upper) uintsUpper = Vector512.Widen(ushorts.Upper);
308public static (Vector512<float>, Vector512<float>, Vector512<float>, Vector512<float>) Invoke(Vector512<byte> x)
328public static (Vector512<short> Lower, Vector512<short> Upper) Invoke(Vector512<sbyte> x) => Vector512.Widen(x);
339public static (Vector512<uint> Lower, Vector512<uint> Upper) Invoke(Vector512<ushort> x) => Vector512.Widen(x);
350public static (Vector512<int> Lower, Vector512<int> Upper) Invoke(Vector512<short> x) => Vector512.Widen(x);
361public static (Vector512<ulong> Lower, Vector512<ulong> Upper) Invoke(Vector512<uint> x) => Vector512.Widen(x);
372public static (Vector512<long> Lower, Vector512<long> Upper) Invoke(Vector512<int> x) => Vector512.Widen(x);
498public static (Vector512<float> Lower, Vector512<float> Upper) Invoke(Vector512<short> x)
500(Vector512<int> lowerInt32, Vector512<int> upperInt32) = Vector512.Widen(x);
505static Vector512<float> HalfAsWidenedUInt32ToSingle(Vector512<uint> value)
508Vector512<uint> sign = value & Vector512.Create(SingleSignMask);
511Vector512<uint> bitValueInProcess = value;
514Vector512<uint> offsetExponent = bitValueInProcess & Vector512.Create(HalfExponentMask);
517Vector512<uint> subnormalMask = Vector512.Equals(offsetExponent, Vector512<uint>.Zero);
520Vector512<uint> infinityOrNaNMask = Vector512.Equals(offsetExponent, Vector512.Create(HalfExponentMask));
523Vector512<uint> maskedExponentLowerBound = subnormalMask & Vector512.Create(ExponentLowerBound);
526Vector512<uint> offsetMaskedExponentLowerBound = Vector512.Create(ExponentOffset) | maskedExponentLowerBound;
532offsetMaskedExponentLowerBound = Vector512.ConditionalSelect(Vector512.Equals(infinityOrNaNMask, Vector512<uint>.Zero),
543Vector512<uint> absoluteValue = (bitValueInProcess.AsSingle() - maskedExponentLowerBound.AsSingle()).AsUInt32();
703public static Vector512<ushort> Invoke(Vector512<float> lower, Vector512<float> upper)
709static Vector512<uint> SingleToHalfAsWidenedUInt32(Vector512<float> value)
711Vector512<uint> bitValue = value.AsUInt32();
714Vector512<uint> sign = Vector512.ShiftRightLogical(bitValue & Vector512.Create(SingleSignMask), 16);
717Vector512<uint> realMask = Vector512.Equals(value, value).AsUInt32();
726Vector512<uint> exponentOffset0 = Vector512.Max(value, Vector512.Create(MinExp).AsSingle()).AsUInt32();
739Vector512<uint> maskedHalfExponentForNaN = ~realMask & Vector512.Create(ExponentMask);
745Vector512<uint> newExponent = Vector512.ShiftRightLogical(bitValue, 13);
757Vector512<uint> signAndMaskedExponent = maskedHalfExponentForNaN | sign;
System\Numerics\Tensors\netcore\TensorPrimitives.ConvertTruncating.cs (28)
111public static Vector512<int> Invoke(Vector512<float> x) => Vector512.ConvertToInt32(x);
122public static Vector512<uint> Invoke(Vector512<float> x) => Vector512.ConvertToUInt32(x);
133public static Vector512<ulong> Invoke(Vector512<double> x) => Vector512.ConvertToUInt64(x);
144public static Vector512<long> Invoke(Vector512<double> x) => Vector512.ConvertToInt64(x);
155public static Vector512<byte> Invoke(Vector512<ushort> lower, Vector512<ushort> upper) => Vector512.Narrow(lower, upper);
166public static Vector512<sbyte> Invoke(Vector512<short> lower, Vector512<short> upper) => Vector512.Narrow(lower, upper);
177public static Vector512<ushort> Invoke(Vector512<uint> lower, Vector512<uint> upper) => Vector512.Narrow(lower, upper);
188public static Vector512<short> Invoke(Vector512<int> lower, Vector512<int> upper) => Vector512.Narrow(lower, upper);
199public static Vector512<uint> Invoke(Vector512<ulong> lower, Vector512<ulong> upper) => Vector512.Narrow(lower, upper);
210public static Vector512<int> Invoke(Vector512<long> lower, Vector512<long> upper) => Vector512.Narrow(lower, upper);
221public static Vector512<TTo> Invoke(Vector512<TFrom> x) => throw new NotSupportedException();
System\Numerics\Tensors\netcore\TensorPrimitives.IndexOfMax.cs (39)
87public static void Invoke(ref Vector512<T> result, Vector512<T> current, ref Vector512<T> resultIndex, Vector512<T> currentIndex)
89Vector512<T> useResult = Vector512.GreaterThan(result, current);
90Vector512<T> equalMask = Vector512.Equals(result, current);
92if (equalMask != Vector512<T>.Zero)
94Vector512<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
98Vector512<T> currentNegative = IsNegative(current);
99Vector512<T> sameSign = Vector512.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As<int, T>();
148if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && x.Length >= Vector512<T>.Count)
153static Vector512<T> CreateVector512T(int i) =>
160Vector512<T> resultIndex =
162sizeof(T) == sizeof(long) ? Vector512<long>.Indices.As<long, T>() :
163sizeof(T) == sizeof(int) ? Vector512<int>.Indices.As<int, T>() :
164sizeof(T) == sizeof(short) ? Vector512<short>.Indices.As<short, T>() :
165Vector512<byte>.Indices.As<byte, T>();
172Vector512<T> currentIndex = resultIndex;
173Vector512<T> increment = CreateVector512T(Vector512<T>.Count);
177Vector512<T> result = Vector512.LoadUnsafe(ref xRef);
178Vector512<T> current;
180Vector512<T> nanMask;
184if (nanMask != Vector512<T>.Zero)
190int oneVectorFromEnd = x.Length - Vector512<T>.Count;
191int i = Vector512<T>.Count;
203if (nanMask != Vector512<T>.Zero)
211i += Vector512<T>.Count;
217current = Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512<T>.Count));
223if (nanMask != Vector512<T>.Zero)
449private static int IndexOfFirstMatch<T>(Vector512<T> mask) =>
460private static unsafe Vector512<T> IndexLessThan<T>(Vector512<T> indices1, Vector512<T> indices2) =>
504private static unsafe Vector512<T> ElementWiseSelect<T>(Vector512<T> mask, Vector512<T> left, Vector512<T> right)
System\Numerics\Tensors\netcore\TensorPrimitives.Tan.cs (48)
87public static Vector512<T> Invoke(Vector512<T> x)
191public static Vector512<float> Invoke(Vector512<float> x)
193Vector512<float> uxMasked = Vector512.Abs(x);
199Vector512<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked, Vector512.Create(2 / float.Pi), Vector512.Create(AlmHuge));
200Vector512<uint> odd = dn.AsUInt32() << 31;
203Vector512<float> f = uxMasked;
209Vector512<float> f2 = f * f;
210Vector512<float> f4 = f2 * f2;
211Vector512<float> f8 = f4 * f4;
212Vector512<float> f12 = f8 * f4;
213Vector512<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1));
214Vector512<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C4), f2, Vector512.Create(C3));
215Vector512<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C6), f2, Vector512.Create(C5));
216Vector512<float> b1 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
217Vector512<float> b2 = MultiplyAddEstimateOperator<float>.Invoke(f8, a3, f12 * Vector512.Create(C7));
218Vector512<float> poly = MultiplyAddEstimateOperator<float>.Invoke(f * f2, b1 + b2, f);
220Vector512<float> result = (poly.AsUInt32() ^ (x.AsUInt32() & Vector512.Create(~SignMask))).AsSingle();
221return Vector512.ConditionalSelect(Vector512.Equals(odd, Vector512<uint>.Zero).AsSingle(),
348public static Vector512<double> Invoke(Vector512<double> x)
350Vector512<double> uxMasked = Vector512.Abs(x);
357Vector512<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector512.Create(2 / double.Pi), Vector512.Create(AlmHuge));
358Vector512<ulong> odd = dn.AsUInt64() << 63;
362Vector512<double> f = uxMasked;
368Vector512<double> g = f * f;
369Vector512<double> g2 = g * g;
370Vector512<double> g3 = g * g2;
371Vector512<double> g5 = g3 * g2;
372Vector512<double> g7 = g5 * g2;
373Vector512<double> g9 = g7 * g2;
374Vector512<double> g11 = g9 * g2;
375Vector512<double> g13 = g11 * g2;
376Vector512<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C3), g, Vector512.Create(C1));
377Vector512<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C7), g, Vector512.Create(C5));
378Vector512<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C11), g, Vector512.Create(C9));
379Vector512<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C15), g, Vector512.Create(C13));
380Vector512<double> a5 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C19), g, Vector512.Create(C17));
381Vector512<double> a6 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C23), g, Vector512.Create(C21));
382Vector512<double> a7 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C27), g, Vector512.Create(C25));
383Vector512<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(g, a1, g3 * a2);
384Vector512<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(g5, a3, g7 * a4);
385Vector512<double> b3 = MultiplyAddEstimateOperator<double>.Invoke(g9, a5, g11 * a6);
386Vector512<double> q = MultiplyAddEstimateOperator<double>.Invoke(g13, a7, b1 + b2 + b3);
387Vector512<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, q, f);
389Vector512<double> result = (poly.AsUInt64() ^ (x.AsUInt64() & Vector512.Create(~SignMask))).AsDouble();
390return Vector512.ConditionalSelect(Vector512.Equals(odd, Vector512<ulong>.Zero).AsDouble(),