System\Numerics\Tensors\netcore\Common\TensorPrimitives.IAggregationOperator.cs (104)
20static abstract T Invoke(Vector512<T> x);
35public Vector512<T> Invoke(Vector512<T> x) => TOperator.Invoke(x);
72if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TTransformOperator.Vectorizable)
76if (remainder >= (uint)Vector512<T>.Count)
515Vector512<T> vresult = Vector512.Create(TAggregationOperator.IdentityValue);
519Vector512<T> beg = transform.Invoke(Vector512.LoadUnsafe(ref xRef));
520Vector512<T> end = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count));
524if (remainder > (uint)(Vector512<T>.Count * 8))
550misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)xPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
554Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector512<T>)) == 0);
564misalignment = (uint)Vector512<T>.Count;
569Vector512<T> vector1;
570Vector512<T> vector2;
571Vector512<T> vector3;
572Vector512<T> vector4;
576while (remainder >= (uint)(Vector512<T>.Count * 8))
580vector1 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)));
581vector2 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)));
582vector3 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)));
583vector4 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)));
592vector1 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)));
593vector2 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)));
594vector3 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)));
595vector4 = transform.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)));
605xPtr += (uint)(Vector512<T>.Count * 8);
607remainder -= (uint)(Vector512<T>.Count * 8);
628(nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector512<T>.Count);
636Vector512<T> vector = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)));
643Vector512<T> vector = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)));
650Vector512<T> vector = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)));
657Vector512<T> vector = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)));
664Vector512<T> vector = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)));
671Vector512<T> vector = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)));
678Vector512<T> vector = transform.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 1)));
1191if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TBinaryOperator.Vectorizable)
1195if (remainder >= (uint)Vector512<T>.Count)
1686Vector512<T> vresult = Vector512.Create(TAggregationOperator.IdentityValue);
1690Vector512<T> beg = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
1692Vector512<T> end = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
1693Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count));
1697if (remainder > (uint)(Vector512<T>.Count * 8))
1725misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)xPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
1730Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector512<T>)) == 0);
1740misalignment = (uint)Vector512<T>.Count;
1748Vector512<T> vector1;
1749Vector512<T> vector2;
1750Vector512<T> vector3;
1751Vector512<T> vector4;
1755while (remainder >= (uint)(Vector512<T>.Count * 8))
1759vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
1760Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)));
1761vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
1762Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)));
1763vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
1764Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)));
1765vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
1766Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)));
1775vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
1776Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)));
1777vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
1778Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)));
1779vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
1780Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)));
1781vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
1782Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)));
1792xPtr += (uint)(Vector512<T>.Count * 8);
1793yPtr += (uint)(Vector512<T>.Count * 8);
1795remainder -= (uint)(Vector512<T>.Count * 8);
1817(nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector512<T>.Count);
1825Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
1826Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)));
1833Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
1834Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)));
1841Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
1842Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)));
1849Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
1850Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)));
1857Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
1858Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)));
1865Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
1866Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)));
1873Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 1)),
1874Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 1)));
2463private static Vector512<T> CreateAlignmentMaskVector512<T>(int count)
2571private static Vector512<T> CreateRemainderMaskVector512<T>(int count)
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IBinaryOperator.cs (190)
20static abstract Vector512<T> Invoke(Vector512<T> x, Vector512<T> y);
85if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TBinaryOperator.Vectorizable)
87if (remainder >= (uint)Vector512<T>.Count)
644Vector512<T> beg = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
646Vector512<T> end = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
647Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count));
649if (remainder > (uint)(Vector512<T>.Count * 8))
676nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
682Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
687Vector512<T> vector1;
688Vector512<T> vector2;
689Vector512<T> vector3;
690Vector512<T> vector4;
697while (remainder >= (uint)(Vector512<T>.Count * 8))
701vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
702Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)));
703vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
704Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)));
705vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
706Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)));
707vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
708Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)));
710vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
711vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
712vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
713vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
717vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
718Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)));
719vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
720Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)));
721vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
722Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)));
723vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
724Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)));
726vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
727vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
728vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
729vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
734xPtr += (uint)(Vector512<T>.Count * 8);
735yPtr += (uint)(Vector512<T>.Count * 8);
736dPtr += (uint)(Vector512<T>.Count * 8);
738remainder -= (uint)(Vector512<T>.Count * 8);
743while (remainder >= (uint)(Vector512<T>.Count * 8))
747vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
748Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)));
749vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
750Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)));
751vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
752Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)));
753vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
754Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)));
756vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
757vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
758vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
759vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
763vector1 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
764Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)));
765vector2 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
766Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)));
767vector3 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
768Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)));
769vector4 = TBinaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
770Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)));
772vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
773vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
774vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
775vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
780xPtr += (uint)(Vector512<T>.Count * 8);
781yPtr += (uint)(Vector512<T>.Count * 8);
782dPtr += (uint)(Vector512<T>.Count * 8);
784remainder -= (uint)(Vector512<T>.Count * 8);
804remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
806switch (remainder / (uint)Vector512<T>.Count)
810Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
811Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 8)));
812vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
818Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
819Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)));
820vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
826Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
827Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)));
828vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
834Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
835Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)));
836vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
842Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
843Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)));
844vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
850Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
851Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)));
852vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
858Vector512<T> vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
859Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)));
860vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
867end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
1417if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TTransformOperator.Vectorizable && TBinaryOperator.Vectorizable)
1419if (remainder >= (uint)Vector512<T>.Count)
1968Vector512<T> yVec = Vector512.Create(y);
1970Vector512<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef)),
1972Vector512<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count)),
1975if (remainder > (uint)(Vector512<T>.Count * 8))
2000nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
2005Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
2010Vector512<T> vector1;
2011Vector512<T> vector2;
2012Vector512<T> vector3;
2013Vector512<T> vector4;
2020while (remainder >= (uint)(Vector512<T>.Count * 8))
2024vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0))),
2026vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1))),
2028vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2))),
2030vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3))),
2033vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
2034vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
2035vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
2036vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
2040vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4))),
2042vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5))),
2044vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6))),
2046vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7))),
2049vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
2050vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
2051vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
2052vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
2057xPtr += (uint)(Vector512<T>.Count * 8);
2058dPtr += (uint)(Vector512<T>.Count * 8);
2060remainder -= (uint)(Vector512<T>.Count * 8);
2065while (remainder >= (uint)(Vector512<T>.Count * 8))
2069vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0))),
2071vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1))),
2073vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2))),
2075vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3))),
2078vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
2079vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
2080vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
2081vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
2085vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4))),
2087vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5))),
2089vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6))),
2091vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7))),
2094vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
2095vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
2096vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
2097vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
2102xPtr += (uint)(Vector512<T>.Count * 8);
2103dPtr += (uint)(Vector512<T>.Count * 8);
2105remainder -= (uint)(Vector512<T>.Count * 8);
2124remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
2126switch (remainder / (uint)Vector512<T>.Count)
2130Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8))),
2132vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
2138Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7))),
2140vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
2146Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6))),
2148vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
2154Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5))),
2156vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
2162Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4))),
2164vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
2170Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3))),
2172vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
2178Vector512<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2))),
2180vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
2187end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
2730private static T HorizontalAggregate<T, TAggregate>(Vector512<T> x) where TAggregate : struct, IBinaryOperator<T> =>
2775public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => TOperator.Invoke(y, x);
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IBooleanUnaryOperator.cs (47)
22static abstract Vector512<T> Invoke(Vector512<T> x);
31static abstract bool ShouldEarlyExit(Vector512<T> result);
43public static bool ShouldEarlyExit(Vector512<T> result) => Vector512.AnyWhereAllBitsSet(result);
78public static bool ShouldEarlyExit(Vector512<T> result) =>
79typeof(T) == typeof(float) ? Vector512.EqualsAny(result.AsUInt32(), Vector512<uint>.Zero) :
80typeof(T) == typeof(double) ? Vector512.EqualsAny(result.AsUInt64(), Vector512<ulong>.Zero) :
81Vector512.EqualsAny(result, Vector512<T>.Zero);
101if (Vector512.IsHardwareAccelerated && TOperator.Vectorizable && Vector512<T>.IsSupported)
103oneVectorFromEnd = x.Length - Vector512<T>.Count;
114i += Vector512<T>.Count;
120TAnyAll.ShouldEarlyExit(TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512<T>.Count)))))
235if (Vector512.IsHardwareAccelerated && TOperator.Vectorizable && Vector512<T>.IsSupported)
237int vectorFromEnd = x.Length - Vector512<T>.Count;
244i += Vector512<T>.Count;
251i = x.Length - Vector512<T>.Count;
259Vector512<byte> v = TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i)).AsByte();
261(v & Vector512<byte>.One).StoreUnsafe(ref Unsafe.As<bool, byte>(ref destinationRef), (uint)i);
343if (Vector512.IsHardwareAccelerated && TOperator.Vectorizable && Vector512<T>.IsSupported)
345int vectorsFromEnd = x.Length - (Vector512<T>.Count * sizeof(T));
352i += Vector512<T>.Count * sizeof(T);
359i = x.Length - (Vector512<T>.Count * sizeof(T));
367Vector512<byte> v =
370TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + Vector512<T>.Count))).AsUInt16());
372(v & Vector512<byte>.One).StoreUnsafe(ref Unsafe.As<bool, byte>(ref destinationRef), (uint)i);
460if (Vector512.IsHardwareAccelerated && TOperator.Vectorizable && Vector512<T>.IsSupported)
462int vectorsFromEnd = x.Length - (Vector512<T>.Count * sizeof(T));
469i += Vector512<T>.Count * sizeof(T);
476i = x.Length - (Vector512<T>.Count * sizeof(T));
484Vector512<byte> v =
488TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + Vector512<T>.Count))).AsUInt32()),
490TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (2 * Vector512<T>.Count)))).AsUInt32(),
491TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (3 * Vector512<T>.Count)))).AsUInt32()));
493(v & Vector512<byte>.One).StoreUnsafe(ref Unsafe.As<bool, byte>(ref destinationRef), (uint)i);
587if (Vector512.IsHardwareAccelerated && TOperator.Vectorizable && Vector512<T>.IsSupported)
591int vectorsFromEnd = x.Length - (Vector512<T>.Count * sizeof(T));
598i += Vector512<T>.Count * sizeof(T);
605i = x.Length - (Vector512<T>.Count * sizeof(T));
613Vector512<byte> v =
618TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + Vector512<T>.Count))).AsUInt64()),
620TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (2 * Vector512<T>.Count)))).AsUInt64(),
621TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (3 * Vector512<T>.Count)))).AsUInt64())),
624TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (4 * Vector512<T>.Count)))).AsUInt64(),
625TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (5 * Vector512<T>.Count)))).AsUInt64()),
627TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (6 * Vector512<T>.Count)))).AsUInt64(),
628TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(i + (7 * Vector512<T>.Count)))).AsUInt64())));
630(v & Vector512<byte>.One).StoreUnsafe(ref Unsafe.As<bool, byte>(ref destinationRef), (uint)i);
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IStatefulUnaryOperator.cs (80)
20Vector512<T> Invoke(Vector512<T> x);
51if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TStatefulUnaryOperator.Vectorizable)
53if (remainder >= (uint)Vector512<T>.Count)
547Vector512<T> beg = op.Invoke(Vector512.LoadUnsafe(ref xRef));
548Vector512<T> end = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count));
550if (remainder > (uint)(Vector512<T>.Count * 8))
575nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
580Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
585Vector512<T> vector1;
586Vector512<T> vector2;
587Vector512<T> vector3;
588Vector512<T> vector4;
595while (remainder >= (uint)(Vector512<T>.Count * 8))
599vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)));
600vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)));
601vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)));
602vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)));
604vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
605vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
606vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
607vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
611vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)));
612vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)));
613vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)));
614vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)));
616vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
617vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
618vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
619vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
624xPtr += (uint)(Vector512<T>.Count * 8);
625dPtr += (uint)(Vector512<T>.Count * 8);
627remainder -= (uint)(Vector512<T>.Count * 8);
632while (remainder >= (uint)(Vector512<T>.Count * 8))
636vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)));
637vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)));
638vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)));
639vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)));
641vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
642vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
643vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
644vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
648vector1 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)));
649vector2 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)));
650vector3 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)));
651vector4 = op.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)));
653vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
654vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
655vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
656vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
661xPtr += (uint)(Vector512<T>.Count * 8);
662dPtr += (uint)(Vector512<T>.Count * 8);
664remainder -= (uint)(Vector512<T>.Count * 8);
683remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
685switch (remainder / (uint)Vector512<T>.Count)
689Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)));
690vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
696Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)));
697vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
703Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)));
704vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
710Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)));
711vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
717Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)));
718vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
724Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)));
725vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
731Vector512<T> vector = op.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)));
732vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
739end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
System\Numerics\Tensors\netcore\Common\TensorPrimitives.ITernaryOperator.cs (323)
20static abstract Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> z);
30public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> z) => TOperator.Invoke(x, z, y);
72if (TTernaryOperator.Vectorizable && Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported)
74if (remainder >= (uint)Vector512<T>.Count)
694Vector512<T> beg = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
697Vector512<T> end = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
698Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count),
699Vector512.LoadUnsafe(ref zRef, remainder - (uint)Vector512<T>.Count));
701if (remainder > (uint)(Vector512<T>.Count * 8))
730nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
737Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
742Vector512<T> vector1;
743Vector512<T> vector2;
744Vector512<T> vector3;
745Vector512<T> vector4;
752while (remainder >= (uint)(Vector512<T>.Count * 8))
756vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
757Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
758Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 0)));
759vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
760Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
761Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 1)));
762vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
763Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
764Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 2)));
765vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
766Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
767Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 3)));
769vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
770vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
771vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
772vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
776vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
777Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
778Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 4)));
779vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
780Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
781Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 5)));
782vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
783Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
784Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 6)));
785vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
786Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
787Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 7)));
789vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
790vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
791vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
792vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
797xPtr += (uint)(Vector512<T>.Count * 8);
798yPtr += (uint)(Vector512<T>.Count * 8);
799zPtr += (uint)(Vector512<T>.Count * 8);
800dPtr += (uint)(Vector512<T>.Count * 8);
802remainder -= (uint)(Vector512<T>.Count * 8);
807while (remainder >= (uint)(Vector512<T>.Count * 8))
811vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
812Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
813Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 0)));
814vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
815Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
816Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 1)));
817vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
818Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
819Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 2)));
820vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
821Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
822Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 3)));
824vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
825vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
826vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
827vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
831vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
832Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
833Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 4)));
834vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
835Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
836Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 5)));
837vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
838Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
839Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 6)));
840vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
841Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
842Vector512.Load(zPtr + (uint)(Vector512<T>.Count * 7)));
844vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
845vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
846vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
847vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
852xPtr += (uint)(Vector512<T>.Count * 8);
853yPtr += (uint)(Vector512<T>.Count * 8);
854zPtr += (uint)(Vector512<T>.Count * 8);
855dPtr += (uint)(Vector512<T>.Count * 8);
857remainder -= (uint)(Vector512<T>.Count * 8);
878remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
880switch (remainder / (uint)Vector512<T>.Count)
884Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
885Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 8)),
886Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 8)));
887vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
893Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
894Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)),
895Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 7)));
896vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
902Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
903Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)),
904Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 6)));
905vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
911Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
912Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)),
913Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 5)));
914vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
920Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
921Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)),
922Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 4)));
923vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
929Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
930Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)),
931Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 3)));
932vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
938Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
939Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)),
940Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512<T>.Count * 2)));
941vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
948end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
1547if (TTernaryOperator.Vectorizable && Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported)
1549if (remainder >= (uint)Vector512<T>.Count)
2161Vector512<T> zVec = Vector512.Create(z);
2163Vector512<T> beg = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
2166Vector512<T> end = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
2167Vector512.LoadUnsafe(ref yRef, remainder - (uint)Vector512<T>.Count),
2170if (remainder > (uint)(Vector512<T>.Count * 8))
2197nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
2203Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
2208Vector512<T> vector1;
2209Vector512<T> vector2;
2210Vector512<T> vector3;
2211Vector512<T> vector4;
2218while (remainder >= (uint)(Vector512<T>.Count * 8))
2222vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
2223Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
2225vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
2226Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
2228vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
2229Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
2231vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
2232Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
2235vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
2236vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
2237vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
2238vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
2242vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
2243Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
2245vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
2246Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
2248vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
2249Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
2251vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
2252Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
2255vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
2256vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
2257vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
2258vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
2263xPtr += (uint)(Vector512<T>.Count * 8);
2264yPtr += (uint)(Vector512<T>.Count * 8);
2265dPtr += (uint)(Vector512<T>.Count * 8);
2267remainder -= (uint)(Vector512<T>.Count * 8);
2272while (remainder >= (uint)(Vector512<T>.Count * 8))
2276vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
2277Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 0)),
2279vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
2280Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 1)),
2282vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
2283Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 2)),
2285vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
2286Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 3)),
2289vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
2290vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
2291vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
2292vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
2296vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
2297Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 4)),
2299vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
2300Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 5)),
2302vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
2303Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 6)),
2305vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
2306Vector512.Load(yPtr + (uint)(Vector512<T>.Count * 7)),
2309vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
2310vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
2311vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
2312vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
2317xPtr += (uint)(Vector512<T>.Count * 8);
2318yPtr += (uint)(Vector512<T>.Count * 8);
2319dPtr += (uint)(Vector512<T>.Count * 8);
2321remainder -= (uint)(Vector512<T>.Count * 8);
2341remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
2343switch (remainder / (uint)Vector512<T>.Count)
2347Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
2348Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 8)),
2350vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
2356Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
2357Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 7)),
2359vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
2365Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
2366Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 6)),
2368vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
2374Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
2375Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 5)),
2377vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
2383Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
2384Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 4)),
2386vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
2392Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
2393Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 3)),
2395vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
2401Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
2402Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512<T>.Count * 2)),
2404vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
2411end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
3033if (TTernaryOperator.Vectorizable && Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported)
3035if (remainder >= (uint)Vector512<T>.Count)
3637Vector512<T> yVec = Vector512.Create(y);
3638Vector512<T> zVec = Vector512.Create(z);
3640Vector512<T> beg = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef),
3643Vector512<T> end = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<T>.Count),
3647if (remainder > (uint)(Vector512<T>.Count * 8))
3672nuint misalignment = ((uint)sizeof(Vector512<T>) - ((nuint)dPtr % (uint)sizeof(Vector512<T>))) / (uint)sizeof(T);
3677Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<T>)) == 0);
3682Vector512<T> vector1;
3683Vector512<T> vector2;
3684Vector512<T> vector3;
3685Vector512<T> vector4;
3692while (remainder >= (uint)(Vector512<T>.Count * 8))
3696vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
3699vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
3702vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
3705vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
3709vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 0));
3710vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 1));
3711vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 2));
3712vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 3));
3716vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
3719vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
3722vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
3725vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
3729vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 4));
3730vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 5));
3731vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 6));
3732vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<T>.Count * 7));
3737xPtr += (uint)(Vector512<T>.Count * 8);
3738dPtr += (uint)(Vector512<T>.Count * 8);
3740remainder -= (uint)(Vector512<T>.Count * 8);
3745while (remainder >= (uint)(Vector512<T>.Count * 8))
3749vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 0)),
3752vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 1)),
3755vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 2)),
3758vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 3)),
3762vector1.Store(dPtr + (uint)(Vector512<T>.Count * 0));
3763vector2.Store(dPtr + (uint)(Vector512<T>.Count * 1));
3764vector3.Store(dPtr + (uint)(Vector512<T>.Count * 2));
3765vector4.Store(dPtr + (uint)(Vector512<T>.Count * 3));
3769vector1 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 4)),
3772vector2 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 5)),
3775vector3 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 6)),
3778vector4 = TTernaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<T>.Count * 7)),
3782vector1.Store(dPtr + (uint)(Vector512<T>.Count * 4));
3783vector2.Store(dPtr + (uint)(Vector512<T>.Count * 5));
3784vector3.Store(dPtr + (uint)(Vector512<T>.Count * 6));
3785vector4.Store(dPtr + (uint)(Vector512<T>.Count * 7));
3790xPtr += (uint)(Vector512<T>.Count * 8);
3791dPtr += (uint)(Vector512<T>.Count * 8);
3793remainder -= (uint)(Vector512<T>.Count * 8);
3812remainder = (remainder + (uint)(Vector512<T>.Count - 1)) & (nuint)(-Vector512<T>.Count);
3814switch (remainder / (uint)Vector512<T>.Count)
3818Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 8)),
3821vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 8));
3827Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 7)),
3830vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 7));
3836Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 6)),
3839vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 6));
3845Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 5)),
3848vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 5));
3854Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 4)),
3857vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 4));
3863Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 3)),
3866vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 3));
3872Vector512<T> vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<T>.Count * 2)),
3875vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<T>.Count * 2));
3882end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<T>.Count);
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IUnaryInputBinaryOutput.cs (40)
20static abstract (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x);
30static abstract (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x, Vector512<T> y);
41public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x, Vector512<T> y) => TOperator.Invoke(y, x);
71if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TUnaryOperator.Vectorizable)
73oneVectorFromEnd = x.Length - Vector512<T>.Count;
79(Vector512<T> first, Vector512<T> second) = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
83i += Vector512<T>.Count;
90i = x.Length - Vector512<T>.Count;
92(Vector512<T> first, Vector512<T> second) = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
206if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TOperator.Vectorizable)
208oneVectorFromEnd = x.Length - Vector512<T>.Count;
214(Vector512<T> first, Vector512<T> second) = TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i), Vector512.LoadUnsafe(ref yRef, (uint)i));
218i += Vector512<T>.Count;
225Vector512<T> mask = Vector512.Equals(CreateRemainderMaskVector512<T>(x.Length - i), Vector512<T>.Zero);
227i = x.Length - Vector512<T>.Count;
229Vector512<T> first = Vector512.ConditionalSelect(mask,
233Vector512<T> second = Vector512.ConditionalSelect(mask,
387if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && TOperator.Vectorizable)
389oneVectorFromEnd = x.Length - Vector512<T>.Count;
392Vector512<T> yVec = Vector512.Create(y);
397(Vector512<T> first, Vector512<T> second) = TOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i), yVec);
401i += Vector512<T>.Count;
408Vector512<T> mask = Vector512.Equals(CreateRemainderMaskVector512<T>(x.Length - i), Vector512<T>.Zero);
410i = x.Length - Vector512<T>.Count;
412Vector512<T> first = Vector512.ConditionalSelect(mask,
416Vector512<T> second = Vector512.ConditionalSelect(mask,
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IUnaryOneToFourOperator.cs (24)
21static abstract (Vector512<TOutput>, Vector512<TOutput>, Vector512<TOutput>, Vector512<TOutput>) Invoke(Vector512<TInput> x);
46Debug.Assert(Vector512<TInput>.IsSupported);
47Debug.Assert(Vector512<TOutput>.IsSupported);
49oneVectorFromEnd = x.Length - Vector512<TInput>.Count;
55(Vector512<TOutput>, Vector512<TOutput>, Vector512<TOutput>, Vector512<TOutput>) results = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
57results.Item2.StoreUnsafe(ref destinationRef, (uint)(i + Vector512<TOutput>.Count));
58results.Item3.StoreUnsafe(ref destinationRef, (uint)(i + (Vector512<TOutput>.Count * 2)));
59results.Item4.StoreUnsafe(ref destinationRef, (uint)(i + (Vector512<TOutput>.Count * 3)));
61i += Vector512<TInput>.Count;
68i = x.Length - Vector512<TInput>.Count;
70(Vector512<TOutput>, Vector512<TOutput>, Vector512<TOutput>, Vector512<TOutput>) results = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i));
72results.Item2.StoreUnsafe(ref destinationRef, (uint)(i + Vector512<TOutput>.Count));
73results.Item3.StoreUnsafe(ref destinationRef, (uint)(i + (Vector512<TOutput>.Count * 2)));
74results.Item4.StoreUnsafe(ref destinationRef, (uint)(i + (Vector512<TOutput>.Count * 3)));
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IUnaryOperator.cs (83)
37static abstract Vector512<TOutput> Invoke(Vector512<TInput> x);
47public static Vector512<T> Invoke(Vector512<T> x) => x;
91if (Vector512.IsHardwareAccelerated && Vector512<TInput>.IsSupported && Vector512<TOutput>.IsSupported && TUnaryOperator.Vectorizable && sizeof(TInput) == sizeof(TOutput))
93if (remainder >= (uint)Vector512<TInput>.Count)
587Vector512<TOutput> beg = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef));
588Vector512<TOutput> end = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512<TInput>.Count));
590if (remainder > (uint)(Vector512<TInput>.Count * 8))
615nuint misalignment = ((uint)sizeof(Vector512<TInput>) - ((nuint)dPtr % (uint)sizeof(Vector512<TInput>))) / (uint)sizeof(TInput);
620Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512<TInput>)) == 0);
625Vector512<TOutput> vector1;
626Vector512<TOutput> vector2;
627Vector512<TOutput> vector3;
628Vector512<TOutput> vector4;
635while (remainder >= (uint)(Vector512<TInput>.Count * 8))
639vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 0)));
640vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 1)));
641vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 2)));
642vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 3)));
644vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 0));
645vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 1));
646vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 2));
647vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 3));
651vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 4)));
652vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 5)));
653vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 6)));
654vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 7)));
656vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 4));
657vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 5));
658vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 6));
659vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512<TOutput>.Count * 7));
664xPtr += (uint)(Vector512<TInput>.Count * 8);
665dPtr += (uint)(Vector512<TInput>.Count * 8);
667remainder -= (uint)(Vector512<TInput>.Count * 8);
672while (remainder >= (uint)(Vector512<TInput>.Count * 8))
676vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 0)));
677vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 1)));
678vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 2)));
679vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 3)));
681vector1.Store(dPtr + (uint)(Vector512<TOutput>.Count * 0));
682vector2.Store(dPtr + (uint)(Vector512<TOutput>.Count * 1));
683vector3.Store(dPtr + (uint)(Vector512<TOutput>.Count * 2));
684vector4.Store(dPtr + (uint)(Vector512<TOutput>.Count * 3));
688vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 4)));
689vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 5)));
690vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 6)));
691vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512<TInput>.Count * 7)));
693vector1.Store(dPtr + (uint)(Vector512<TOutput>.Count * 4));
694vector2.Store(dPtr + (uint)(Vector512<TOutput>.Count * 5));
695vector3.Store(dPtr + (uint)(Vector512<TOutput>.Count * 6));
696vector4.Store(dPtr + (uint)(Vector512<TOutput>.Count * 7));
701xPtr += (uint)(Vector512<TInput>.Count * 8);
702dPtr += (uint)(Vector512<TOutput>.Count * 8);
704remainder -= (uint)(Vector512<TInput>.Count * 8);
723remainder = (remainder + (uint)(Vector512<TInput>.Count - 1)) & (nuint)(-Vector512<TInput>.Count);
725switch (remainder / (uint)Vector512<TInput>.Count)
729Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 8)));
730vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 8));
736Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 7)));
737vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 7));
743Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 6)));
744vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 6));
750Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 5)));
751vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 5));
757Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 4)));
758vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 4));
764Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 3)));
765vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 3));
771Vector512<TOutput> vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512<TInput>.Count * 2)));
772vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512<TOutput>.Count * 2));
779end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512<TOutput>.Count);
System\Numerics\Tensors\netcore\TensorPrimitives.Clamp.cs (12)
227public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> min, Vector512<T> max) => Vector512.Clamp(x, min, max);
275public static Vector512<T> Invoke(Vector512<T> min, Vector512<T> max, Vector512<T> x) => Vector512.Clamp(x, min, max);
324public static Vector512<T> Invoke(Vector512<T> max, Vector512<T> x, Vector512<T> min) => Vector512.Clamp(x, min, max);
System\Numerics\Tensors\netcore\TensorPrimitives.ConvertHelpers.cs (77)
172public static Vector512<float> Invoke(Vector512<int> x) => Vector512.ConvertToSingle(x);
183public static Vector512<float> Invoke(Vector512<uint> x) => Vector512.ConvertToSingle(x);
194public static Vector512<double> Invoke(Vector512<ulong> x) => Vector512.ConvertToDouble(x);
205public static Vector512<double> Invoke(Vector512<long> x) => Vector512.ConvertToDouble(x);
216public static (Vector512<double> Lower, Vector512<double> Upper) Invoke(Vector512<float> x) => Vector512.Widen(x);
227public static Vector512<float> Invoke(Vector512<double> lower, Vector512<double> upper) => Vector512.Narrow(lower, upper);
238public static (Vector512<ushort> Lower, Vector512<ushort> Upper) Invoke(Vector512<byte> x) => Vector512.Widen(x);
267public static (Vector512<uint>, Vector512<uint>, Vector512<uint>, Vector512<uint>) Invoke(Vector512<byte> x)
269(Vector512<ushort> Lower, Vector512<ushort> Upper) ushorts = Vector512.Widen(x);
270(Vector512<uint> Lower, Vector512<uint> Upper) uintsLower = Vector512.Widen(ushorts.Lower);
271(Vector512<uint> Lower, Vector512<uint> Upper) uintsUpper = Vector512.Widen(ushorts.Upper);
308public static (Vector512<float>, Vector512<float>, Vector512<float>, Vector512<float>) Invoke(Vector512<byte> x)
328public static (Vector512<short> Lower, Vector512<short> Upper) Invoke(Vector512<sbyte> x) => Vector512.Widen(x);
339public static (Vector512<uint> Lower, Vector512<uint> Upper) Invoke(Vector512<ushort> x) => Vector512.Widen(x);
350public static (Vector512<int> Lower, Vector512<int> Upper) Invoke(Vector512<short> x) => Vector512.Widen(x);
361public static (Vector512<ulong> Lower, Vector512<ulong> Upper) Invoke(Vector512<uint> x) => Vector512.Widen(x);
372public static (Vector512<long> Lower, Vector512<long> Upper) Invoke(Vector512<int> x) => Vector512.Widen(x);
498public static (Vector512<float> Lower, Vector512<float> Upper) Invoke(Vector512<short> x)
500(Vector512<int> lowerInt32, Vector512<int> upperInt32) = Vector512.Widen(x);
505static Vector512<float> HalfAsWidenedUInt32ToSingle(Vector512<uint> value)
508Vector512<uint> sign = value & Vector512.Create(SingleSignMask);
511Vector512<uint> bitValueInProcess = value;
514Vector512<uint> offsetExponent = bitValueInProcess & Vector512.Create(HalfExponentMask);
517Vector512<uint> subnormalMask = Vector512.Equals(offsetExponent, Vector512<uint>.Zero);
520Vector512<uint> infinityOrNaNMask = Vector512.Equals(offsetExponent, Vector512.Create(HalfExponentMask));
523Vector512<uint> maskedExponentLowerBound = subnormalMask & Vector512.Create(ExponentLowerBound);
526Vector512<uint> offsetMaskedExponentLowerBound = Vector512.Create(ExponentOffset) | maskedExponentLowerBound;
532offsetMaskedExponentLowerBound = Vector512.ConditionalSelect(Vector512.Equals(infinityOrNaNMask, Vector512<uint>.Zero),
543Vector512<uint> absoluteValue = (bitValueInProcess.AsSingle() - maskedExponentLowerBound.AsSingle()).AsUInt32();
703public static Vector512<ushort> Invoke(Vector512<float> lower, Vector512<float> upper)
709static Vector512<uint> SingleToHalfAsWidenedUInt32(Vector512<float> value)
711Vector512<uint> bitValue = value.AsUInt32();
714Vector512<uint> sign = Vector512.ShiftRightLogical(bitValue & Vector512.Create(SingleSignMask), 16);
717Vector512<uint> realMask = Vector512.Equals(value, value).AsUInt32();
726Vector512<uint> exponentOffset0 = Vector512.Max(value, Vector512.Create(MinExp).AsSingle()).AsUInt32();
739Vector512<uint> maskedHalfExponentForNaN = ~realMask & Vector512.Create(ExponentMask);
745Vector512<uint> newExponent = Vector512.ShiftRightLogical(bitValue, 13);
757Vector512<uint> signAndMaskedExponent = maskedHalfExponentForNaN | sign;
System\Numerics\Tensors\netcore\TensorPrimitives.ConvertTruncating.cs (28)
116public static Vector512<int> Invoke(Vector512<float> x) => Vector512.ConvertToInt32(x);
132public static Vector512<uint> Invoke(Vector512<float> x) => Vector512.ConvertToUInt32(x);
148public static Vector512<ulong> Invoke(Vector512<double> x) => Vector512.ConvertToUInt64(x);
164public static Vector512<long> Invoke(Vector512<double> x) => Vector512.ConvertToInt64(x);
175public static Vector512<byte> Invoke(Vector512<ushort> lower, Vector512<ushort> upper) => Vector512.Narrow(lower, upper);
186public static Vector512<sbyte> Invoke(Vector512<short> lower, Vector512<short> upper) => Vector512.Narrow(lower, upper);
197public static Vector512<ushort> Invoke(Vector512<uint> lower, Vector512<uint> upper) => Vector512.Narrow(lower, upper);
208public static Vector512<short> Invoke(Vector512<int> lower, Vector512<int> upper) => Vector512.Narrow(lower, upper);
219public static Vector512<uint> Invoke(Vector512<ulong> lower, Vector512<ulong> upper) => Vector512.Narrow(lower, upper);
230public static Vector512<int> Invoke(Vector512<long> lower, Vector512<long> upper) => Vector512.Narrow(lower, upper);
241public static Vector512<TTo> Invoke(Vector512<TFrom> x) => throw new NotSupportedException();
System\Numerics\Tensors\netcore\TensorPrimitives.CosineSimilarity.cs (41)
61if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && x.Length >= Vector512<T>.Count)
66Vector512<T> dotProductVector = Vector512<T>.Zero;
67Vector512<T> xSumOfSquaresVector = Vector512<T>.Zero;
68Vector512<T> ySumOfSquaresVector = Vector512<T>.Zero;
71int oneVectorFromEnd = x.Length - Vector512<T>.Count;
80i += Vector512<T>.Count;
87Vector512<T> remainderMask = CreateRemainderMaskVector512<T>(x.Length - i);
90Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512<T>.Count)) & remainderMask,
91Vector512.LoadUnsafe(ref yRef, (uint)(x.Length - Vector512<T>.Count)) & remainderMask,
201if (Vector512.IsHardwareAccelerated && x.Length >= Vector512<short>.Count)
207Vector512<float> dotProductVector = Vector512<float>.Zero;
208Vector512<float> xSumOfSquaresVector = Vector512<float>.Zero;
209Vector512<float> ySumOfSquaresVector = Vector512<float>.Zero;
212int oneVectorFromEnd = x.Length - Vector512<short>.Count;
216(Vector512<float> xVecLower, Vector512<float> xVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i));
217(Vector512<float> yVecLower, Vector512<float> yVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(Vector512.LoadUnsafe(ref yRef, (uint)i));
222i += Vector512<short>.Count;
229Vector512<short> remainderMask = CreateRemainderMaskVector512<short>(x.Length - i);
231(Vector512<float> xVecLower, Vector512<float> xVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(
232Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512<short>.Count)) & remainderMask);
233(Vector512<float> yVecLower, Vector512<float> yVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(
234Vector512.LoadUnsafe(ref yRef, (uint)(x.Length - Vector512<short>.Count)) & remainderMask);
363private static void Update<T>(Vector512<T> xVec, Vector512<T> yVec, ref Vector512<T> dotProductVector, ref Vector512<T> xSumOfSquaresVector, ref Vector512<T> ySumOfSquaresVector) where T : INumberBase<T>
383private static T Finalize<T>(Vector512<T> dotProductVector, Vector512<T> xSumOfSquaresVector, Vector512<T> ySumOfSquaresVector) where T : IRootFunctions<T> =>
System\Numerics\Tensors\netcore\TensorPrimitives.Half.cs (31)
335public static Vector512<short> Invoke(Vector512<short> x)
337(Vector512<float> xVecLower, Vector512<float> xVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(x);
373public static Vector512<short> Invoke(Vector512<short> x, Vector512<short> y)
375(Vector512<float> xVecLower, Vector512<float> xVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(x);
376(Vector512<float> yVecLower, Vector512<float> yVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(y);
412public static Vector512<short> Invoke(Vector512<short> x, Vector512<short> y)
414(Vector512<float> xVecLower, Vector512<float> xVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(x);
415(Vector512<float> yVecLower, Vector512<float> yVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(y);
437public static short Invoke(Vector512<short> x)
439(Vector512<float> xVecLower, Vector512<float> xVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(x);
480public static Vector512<short> Invoke(Vector512<short> x, Vector512<short> y, Vector512<short> z)
482(Vector512<float> xVecLower, Vector512<float> xVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(x);
483(Vector512<float> yVecLower, Vector512<float> yVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(y);
484(Vector512<float> zVecLower, Vector512<float> zVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(z);
System\Numerics\Tensors\netcore\TensorPrimitives.IndexOfMax.cs (39)
87public static void Invoke(ref Vector512<T> result, Vector512<T> current, ref Vector512<T> resultIndex, Vector512<T> currentIndex)
89Vector512<T> useResult = Vector512.GreaterThan(result, current);
90Vector512<T> equalMask = Vector512.Equals(result, current);
92if (equalMask != Vector512<T>.Zero)
94Vector512<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
98Vector512<T> currentNegative = IsNegative(current);
99Vector512<T> sameSign = Vector512.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As<int, T>();
148if (Vector512.IsHardwareAccelerated && Vector512<T>.IsSupported && x.Length >= Vector512<T>.Count)
153static Vector512<T> CreateVector512T(int i) =>
160Vector512<T> resultIndex =
162sizeof(T) == sizeof(long) ? Vector512<long>.Indices.As<long, T>() :
163sizeof(T) == sizeof(int) ? Vector512<int>.Indices.As<int, T>() :
164sizeof(T) == sizeof(short) ? Vector512<short>.Indices.As<short, T>() :
165Vector512<byte>.Indices.As<byte, T>();
172Vector512<T> currentIndex = resultIndex;
173Vector512<T> increment = CreateVector512T(Vector512<T>.Count);
177Vector512<T> result = Vector512.LoadUnsafe(ref xRef);
178Vector512<T> current;
180Vector512<T> nanMask;
184if (nanMask != Vector512<T>.Zero)
190int oneVectorFromEnd = x.Length - Vector512<T>.Count;
191int i = Vector512<T>.Count;
203if (nanMask != Vector512<T>.Zero)
211i += Vector512<T>.Count;
217current = Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512<T>.Count));
223if (nanMask != Vector512<T>.Zero)
449private static int IndexOfFirstMatch<T>(Vector512<T> mask) =>
460private static unsafe Vector512<T> IndexLessThan<T>(Vector512<T> indices1, Vector512<T> indices2) =>
504private static unsafe Vector512<T> ElementWiseSelect<T>(Vector512<T> mask, Vector512<T> left, Vector512<T> right)
System\Numerics\Tensors\netcore\TensorPrimitives.Tan.cs (48)
94public static Vector512<T> Invoke(Vector512<T> x)
198public static Vector512<float> Invoke(Vector512<float> x)
200Vector512<float> uxMasked = Vector512.Abs(x);
206Vector512<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked, Vector512.Create(2 / float.Pi), Vector512.Create(AlmHuge));
207Vector512<uint> odd = dn.AsUInt32() << 31;
210Vector512<float> f = uxMasked;
216Vector512<float> f2 = f * f;
217Vector512<float> f4 = f2 * f2;
218Vector512<float> f8 = f4 * f4;
219Vector512<float> f12 = f8 * f4;
220Vector512<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1));
221Vector512<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C4), f2, Vector512.Create(C3));
222Vector512<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C6), f2, Vector512.Create(C5));
223Vector512<float> b1 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
224Vector512<float> b2 = MultiplyAddEstimateOperator<float>.Invoke(f8, a3, f12 * Vector512.Create(C7));
225Vector512<float> poly = MultiplyAddEstimateOperator<float>.Invoke(f * f2, b1 + b2, f);
227Vector512<float> result = (poly.AsUInt32() ^ (x.AsUInt32() & Vector512.Create(~SignMask))).AsSingle();
228return Vector512.ConditionalSelect(Vector512.Equals(odd, Vector512<uint>.Zero).AsSingle(),
355public static Vector512<double> Invoke(Vector512<double> x)
357Vector512<double> uxMasked = Vector512.Abs(x);
364Vector512<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector512.Create(2 / double.Pi), Vector512.Create(AlmHuge));
365Vector512<ulong> odd = dn.AsUInt64() << 63;
369Vector512<double> f = uxMasked;
375Vector512<double> g = f * f;
376Vector512<double> g2 = g * g;
377Vector512<double> g3 = g * g2;
378Vector512<double> g5 = g3 * g2;
379Vector512<double> g7 = g5 * g2;
380Vector512<double> g9 = g7 * g2;
381Vector512<double> g11 = g9 * g2;
382Vector512<double> g13 = g11 * g2;
383Vector512<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C3), g, Vector512.Create(C1));
384Vector512<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C7), g, Vector512.Create(C5));
385Vector512<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C11), g, Vector512.Create(C9));
386Vector512<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C15), g, Vector512.Create(C13));
387Vector512<double> a5 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C19), g, Vector512.Create(C17));
388Vector512<double> a6 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C23), g, Vector512.Create(C21));
389Vector512<double> a7 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C27), g, Vector512.Create(C25));
390Vector512<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(g, a1, g3 * a2);
391Vector512<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(g5, a3, g7 * a4);
392Vector512<double> b3 = MultiplyAddEstimateOperator<double>.Invoke(g9, a5, g11 * a6);
393Vector512<double> q = MultiplyAddEstimateOperator<double>.Invoke(g13, a7, b1 + b2 + b3);
394Vector512<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, q, f);
396Vector512<double> result = (poly.AsUInt64() ^ (x.AsUInt64() & Vector512.Create(~SignMask))).AsDouble();
397return Vector512.ConditionalSelect(Vector512.Equals(odd, Vector512<ulong>.Zero).AsDouble(),