System\Numerics\Tensors\netcore\Common\TensorPrimitives.IAggregationOperator.cs (148)
18static abstract T Invoke(Vector128<T> x);
33public Vector128<T> Invoke(Vector128<T> x) => TOperator.Invoke(x);
112if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TTransformOperator.Vectorizable)
116if (remainder >= (uint)Vector128<T>.Count)
152Vector128<T> vresult = Vector128.Create(TAggregationOperator.IdentityValue);
156Vector128<T> beg = transform.Invoke(Vector128.LoadUnsafe(ref xRef));
157Vector128<T> end = transform.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
161if (remainder > (uint)(Vector128<T>.Count * 8))
187misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)xPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
190Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector128<T>)) == 0);
200misalignment = (uint)Vector128<T>.Count;
205Vector128<T> vector1;
206Vector128<T> vector2;
207Vector128<T> vector3;
208Vector128<T> vector4;
212while (remainder >= (uint)(Vector128<T>.Count * 8))
216vector1 = transform.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)));
217vector2 = transform.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)));
218vector3 = transform.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)));
219vector4 = transform.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)));
228vector1 = transform.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)));
229vector2 = transform.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)));
230vector3 = transform.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)));
231vector4 = transform.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)));
241xPtr += (uint)(Vector128<T>.Count * 8);
243remainder -= (uint)(Vector128<T>.Count * 8);
264(nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector128<T>.Count);
272Vector128<T> vector = transform.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)));
279Vector128<T> vector = transform.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)));
286Vector128<T> vector = transform.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)));
293Vector128<T> vector = transform.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)));
300Vector128<T> vector = transform.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)));
307Vector128<T> vector = transform.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)));
314Vector128<T> vector = transform.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 1)));
799Vector128<T> beg = transform.Invoke(Vector128.LoadUnsafe(ref xRef));
800Vector128<T> end = transform.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
802end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
813Vector128<T> beg = transform.Invoke(Vector128.LoadUnsafe(ref xRef));
946Vector128<T> beg = transform.Invoke(Vector128.LoadUnsafe(ref xRef));
947Vector128<T> end = transform.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
949end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
960Vector128<T> beg = transform.Invoke(Vector128.LoadUnsafe(ref xRef));
1046Vector128<T> beg = transform.Invoke(Vector128.LoadUnsafe(ref xRef));
1047Vector128<T> end = transform.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
1049end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
1059Vector128<T> beg = transform.Invoke(Vector128.LoadUnsafe(ref xRef));
1129Vector128<T> beg = transform.Invoke(Vector128.LoadUnsafe(ref xRef));
1130Vector128<T> end = transform.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
1132end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
1142Vector128<T> beg = transform.Invoke(Vector128.LoadUnsafe(ref xRef));
1231if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TBinaryOperator.Vectorizable)
1235if (remainder >= (uint)Vector128<T>.Count)
1272Vector128<T> vresult = Vector128.Create(TAggregationOperator.IdentityValue);
1276Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1278Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
1279Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
1283if (remainder > (uint)(Vector128<T>.Count * 8))
1311misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)xPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
1316Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector128<T>)) == 0);
1326misalignment = (uint)Vector128<T>.Count;
1334Vector128<T> vector1;
1335Vector128<T> vector2;
1336Vector128<T> vector3;
1337Vector128<T> vector4;
1341while (remainder >= (uint)(Vector128<T>.Count * 8))
1345vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
1346Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)));
1347vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
1348Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)));
1349vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
1350Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)));
1351vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
1352Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)));
1361vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
1362Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)));
1363vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
1364Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)));
1365vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
1366Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)));
1367vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
1368Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)));
1378xPtr += (uint)(Vector128<T>.Count * 8);
1379yPtr += (uint)(Vector128<T>.Count * 8);
1381remainder -= (uint)(Vector128<T>.Count * 8);
1403(nuint blocks, nuint trailing) = Math.DivRem(remainder, (nuint)Vector128<T>.Count);
1411Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)),
1412Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 7)));
1419Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)),
1420Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 6)));
1427Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)),
1428Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 5)));
1435Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)),
1436Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 4)));
1443Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)),
1444Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 3)));
1451Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)),
1452Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 2)));
1459Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 1)),
1460Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 1)));
1998Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
2000Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
2001Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
2003end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
2014Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
2151Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
2153Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
2154Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
2156end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
2167Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
2257Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
2259Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
2260Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
2262end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
2272Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
2348Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
2350Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
2351Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
2353end = Vector128.ConditionalSelect(CreateRemainderMaskVector128<T>((int)(remainder % (uint)Vector128<T>.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue));
2363Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
2391private static Vector128<T> CreateAlignmentMaskVector128<T>(int count)
2499private static Vector128<T> CreateRemainderMaskVector128<T>(int count)
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IBinaryOperator.cs (238)
18static abstract Vector128<T> Invoke(Vector128<T> x, Vector128<T> y);
121if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TBinaryOperator.Vectorizable)
123if (remainder >= (uint)Vector128<T>.Count)
160Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
162Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
163Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
165if (remainder > (uint)(Vector128<T>.Count * 8))
192nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
198Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
203Vector128<T> vector1;
204Vector128<T> vector2;
205Vector128<T> vector3;
206Vector128<T> vector4;
213while (remainder >= (uint)(Vector128<T>.Count * 8))
217vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
218Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)));
219vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
220Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)));
221vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
222Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)));
223vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
224Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)));
226vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
227vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
228vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
229vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
233vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
234Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)));
235vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
236Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)));
237vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
238Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)));
239vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
240Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)));
242vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
243vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
244vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
245vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
250xPtr += (uint)(Vector128<T>.Count * 8);
251yPtr += (uint)(Vector128<T>.Count * 8);
252dPtr += (uint)(Vector128<T>.Count * 8);
254remainder -= (uint)(Vector128<T>.Count * 8);
259while (remainder >= (uint)(Vector128<T>.Count * 8))
263vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
264Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)));
265vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
266Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)));
267vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
268Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)));
269vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
270Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)));
272vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
273vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
274vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
275vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
279vector1 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
280Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)));
281vector2 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
282Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)));
283vector3 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
284Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)));
285vector4 = TBinaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
286Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)));
288vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
289vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
290vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
291vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
296xPtr += (uint)(Vector128<T>.Count * 8);
297yPtr += (uint)(Vector128<T>.Count * 8);
298dPtr += (uint)(Vector128<T>.Count * 8);
300remainder -= (uint)(Vector128<T>.Count * 8);
320remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
322switch (remainder / (uint)Vector128<T>.Count)
326Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8)),
327Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 8)));
328vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
334Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)),
335Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 7)));
336vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
342Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)),
343Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 6)));
344vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
350Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)),
351Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 5)));
352vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
358Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)),
359Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 4)));
360vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
366Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)),
367Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 3)));
368vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
374Vector128<T> vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)),
375Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 2)));
376vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
383end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
986Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
988Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
989Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
992end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
1002Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1150Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1152Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
1153Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
1156end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
1166Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1259Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1261Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
1262Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
1265end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
1274Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1347Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1349Vector128<T> end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
1350Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count));
1353end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
1362Vector128<T> beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1453if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TTransformOperator.Vectorizable && TBinaryOperator.Vectorizable)
1455if (remainder >= (uint)Vector128<T>.Count)
1492Vector128<T> yVec = Vector128.Create(y);
1494Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
1496Vector128<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count)),
1499if (remainder > (uint)(Vector128<T>.Count * 8))
1524nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
1529Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
1534Vector128<T> vector1;
1535Vector128<T> vector2;
1536Vector128<T> vector3;
1537Vector128<T> vector4;
1544while (remainder >= (uint)(Vector128<T>.Count * 8))
1548vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0))),
1550vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1))),
1552vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2))),
1554vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3))),
1557vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
1558vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
1559vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
1560vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
1564vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4))),
1566vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5))),
1568vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6))),
1570vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7))),
1573vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
1574vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
1575vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
1576vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
1581xPtr += (uint)(Vector128<T>.Count * 8);
1582dPtr += (uint)(Vector128<T>.Count * 8);
1584remainder -= (uint)(Vector128<T>.Count * 8);
1589while (remainder >= (uint)(Vector128<T>.Count * 8))
1593vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0))),
1595vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1))),
1597vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2))),
1599vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3))),
1602vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
1603vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
1604vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
1605vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
1609vector1 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4))),
1611vector2 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5))),
1613vector3 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6))),
1615vector4 = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7))),
1618vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
1619vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
1620vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
1621vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
1626xPtr += (uint)(Vector128<T>.Count * 8);
1627dPtr += (uint)(Vector128<T>.Count * 8);
1629remainder -= (uint)(Vector128<T>.Count * 8);
1648remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
1650switch (remainder / (uint)Vector128<T>.Count)
1654Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8))),
1656vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
1662Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7))),
1664vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
1670Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6))),
1672vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
1678Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5))),
1680vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
1686Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4))),
1688vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
1694Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3))),
1696vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
1702Vector128<T> vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2))),
1704vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
1711end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
2308Vector128<T> yVec = Vector128.Create(y);
2310Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
2312Vector128<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count)),
2316end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
2326Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
2476Vector128<T> yVec = Vector128.Create(y);
2478Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
2480Vector128<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count)),
2484end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
2494Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
2589Vector128<T> yVec = Vector128.Create(y);
2591Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
2593Vector128<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count)),
2597end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
2606Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
2681Vector128<T> yVec = Vector128.Create(y);
2683Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
2685Vector128<T> end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count)),
2689end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
2698Vector128<T> beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)),
2737private static T HorizontalAggregate<T, TAggregate>(Vector128<T> x) where TAggregate : struct, IBinaryOperator<T>
2773public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => TOperator.Invoke(y, x);
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IBooleanUnaryOperator.cs (47)
19static abstract Vector128<T> Invoke(Vector128<T> x);
28static abstract bool ShouldEarlyExit(Vector128<T> result);
40public static bool ShouldEarlyExit(Vector128<T> result) => Vector128.AnyWhereAllBitsSet(result);
67public static bool ShouldEarlyExit(Vector128<T> result) =>
68typeof(T) == typeof(float) ? Vector128.EqualsAny(result.AsUInt32(), Vector128<uint>.Zero) :
69typeof(T) == typeof(double) ? Vector128.EqualsAny(result.AsUInt64(), Vector128<ulong>.Zero) :
70Vector128.EqualsAny(result, Vector128<T>.Zero);
156if (Vector128.IsHardwareAccelerated && TOperator.Vectorizable && Vector128<T>.IsSupported)
158oneVectorFromEnd = x.Length - Vector128<T>.Count;
169i += Vector128<T>.Count;
175TAnyAll.ShouldEarlyExit(TOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128<T>.Count)))))
296if (Vector128.IsHardwareAccelerated && TOperator.Vectorizable && Vector128<T>.IsSupported)
298int vectorFromEnd = x.Length - Vector128<T>.Count;
305i += Vector128<T>.Count;
312i = x.Length - Vector128<T>.Count;
320Vector128<byte> v = TOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i)).AsByte();
322(v & Vector128<byte>.One).StoreUnsafe(ref Unsafe.As<bool, byte>(ref destinationRef), (uint)i);
410if (Vector128.IsHardwareAccelerated && TOperator.Vectorizable && Vector128<T>.IsSupported)
412int vectorsFromEnd = x.Length - (Vector128<T>.Count * sizeof(T));
419i += Vector128<T>.Count * sizeof(T);
426i = x.Length - (Vector128<T>.Count * sizeof(T));
434Vector128<byte> v =
437TOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)(i + Vector128<T>.Count))).AsUInt16());
439(v & Vector128<byte>.One).StoreUnsafe(ref Unsafe.As<bool, byte>(ref destinationRef), (uint)i);
535if (Vector128.IsHardwareAccelerated && TOperator.Vectorizable && Vector128<T>.IsSupported)
537int vectorsFromEnd = x.Length - (Vector128<T>.Count * sizeof(T));
544i += Vector128<T>.Count * sizeof(T);
551i = x.Length - (Vector128<T>.Count * sizeof(T));
559Vector128<byte> v =
563TOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)(i + Vector128<T>.Count))).AsUInt32()),
565TOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)(i + (2 * Vector128<T>.Count)))).AsUInt32(),
566TOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)(i + (3 * Vector128<T>.Count)))).AsUInt32()));
568(v & Vector128<byte>.One).StoreUnsafe(ref Unsafe.As<bool, byte>(ref destinationRef), (uint)i);
682if (Vector128.IsHardwareAccelerated && TOperator.Vectorizable && Vector128<T>.IsSupported)
686int vectorsFromEnd = x.Length - (Vector128<T>.Count * sizeof(T));
693i += Vector128<T>.Count * sizeof(T);
700i = x.Length - (Vector128<T>.Count * sizeof(T));
708Vector128<byte> v =
713TOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)(i + Vector128<T>.Count))).AsUInt64()),
715TOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)(i + (2 * Vector128<T>.Count)))).AsUInt64(),
716TOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)(i + (3 * Vector128<T>.Count)))).AsUInt64())),
719TOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)(i + (4 * Vector128<T>.Count)))).AsUInt64(),
720TOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)(i + (5 * Vector128<T>.Count)))).AsUInt64()),
722TOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)(i + (6 * Vector128<T>.Count)))).AsUInt64(),
723TOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)(i + (7 * Vector128<T>.Count)))).AsUInt64())));
725(v & Vector128<byte>.One).StoreUnsafe(ref Unsafe.As<bool, byte>(ref destinationRef), (uint)i);
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IStatefulUnaryOperator.cs (100)
18Vector128<T> Invoke(Vector128<T> x);
87if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TStatefulUnaryOperator.Vectorizable)
89if (remainder >= (uint)Vector128<T>.Count)
125Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
126Vector128<T> end = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
128if (remainder > (uint)(Vector128<T>.Count * 8))
153nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
158Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
163Vector128<T> vector1;
164Vector128<T> vector2;
165Vector128<T> vector3;
166Vector128<T> vector4;
173while (remainder >= (uint)(Vector128<T>.Count * 8))
177vector1 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)));
178vector2 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)));
179vector3 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)));
180vector4 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)));
182vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
183vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
184vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
185vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
189vector1 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)));
190vector2 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)));
191vector3 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)));
192vector4 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)));
194vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
195vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
196vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
197vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
202xPtr += (uint)(Vector128<T>.Count * 8);
203dPtr += (uint)(Vector128<T>.Count * 8);
205remainder -= (uint)(Vector128<T>.Count * 8);
210while (remainder >= (uint)(Vector128<T>.Count * 8))
214vector1 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)));
215vector2 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)));
216vector3 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)));
217vector4 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)));
219vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
220vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
221vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
222vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
226vector1 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)));
227vector2 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)));
228vector3 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)));
229vector4 = op.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)));
231vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
232vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
233vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
234vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
239xPtr += (uint)(Vector128<T>.Count * 8);
240dPtr += (uint)(Vector128<T>.Count * 8);
242remainder -= (uint)(Vector128<T>.Count * 8);
261remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
263switch (remainder / (uint)Vector128<T>.Count)
267Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8)));
268vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
274Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)));
275vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
281Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)));
282vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
288Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)));
289vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
295Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)));
296vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
302Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)));
303vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
309Vector128<T> vector = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)));
310vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
317end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
855Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
856Vector128<T> end = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
859end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
869Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
999Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
1000Vector128<T> end = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
1003end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
1013Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
1096Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
1097Vector128<T> end = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
1100end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
1109Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
1176Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
1177Vector128<T> end = op.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count));
1180end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
1189Vector128<T> beg = op.Invoke(Vector128.LoadUnsafe(ref xRef));
System\Numerics\Tensors\netcore\Common\TensorPrimitives.ITernaryOperator.cs (407)
18static abstract Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> z);
28public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> z) => TOperator.Invoke(x, z, y);
108if (TTernaryOperator.Vectorizable && Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported)
110if (remainder >= (uint)Vector128<T>.Count)
148Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
151Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
152Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
153Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
155if (remainder > (uint)(Vector128<T>.Count * 8))
184nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
191Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
196Vector128<T> vector1;
197Vector128<T> vector2;
198Vector128<T> vector3;
199Vector128<T> vector4;
206while (remainder >= (uint)(Vector128<T>.Count * 8))
210vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
211Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)),
212Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 0)));
213vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
214Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)),
215Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 1)));
216vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
217Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)),
218Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 2)));
219vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
220Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)),
221Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 3)));
223vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
224vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
225vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
226vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
230vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
231Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)),
232Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 4)));
233vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
234Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)),
235Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 5)));
236vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
237Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)),
238Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 6)));
239vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
240Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)),
241Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 7)));
243vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
244vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
245vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
246vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
251xPtr += (uint)(Vector128<T>.Count * 8);
252yPtr += (uint)(Vector128<T>.Count * 8);
253zPtr += (uint)(Vector128<T>.Count * 8);
254dPtr += (uint)(Vector128<T>.Count * 8);
256remainder -= (uint)(Vector128<T>.Count * 8);
261while (remainder >= (uint)(Vector128<T>.Count * 8))
265vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
266Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)),
267Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 0)));
268vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
269Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)),
270Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 1)));
271vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
272Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)),
273Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 2)));
274vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
275Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)),
276Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 3)));
278vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
279vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
280vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
281vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
285vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
286Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)),
287Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 4)));
288vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
289Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)),
290Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 5)));
291vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
292Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)),
293Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 6)));
294vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
295Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)),
296Vector128.Load(zPtr + (uint)(Vector128<T>.Count * 7)));
298vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
299vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
300vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
301vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
306xPtr += (uint)(Vector128<T>.Count * 8);
307yPtr += (uint)(Vector128<T>.Count * 8);
308zPtr += (uint)(Vector128<T>.Count * 8);
309dPtr += (uint)(Vector128<T>.Count * 8);
311remainder -= (uint)(Vector128<T>.Count * 8);
332remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
334switch (remainder / (uint)Vector128<T>.Count)
338Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8)),
339Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 8)),
340Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 8)));
341vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
347Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)),
348Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 7)),
349Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 7)));
350vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
356Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)),
357Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 6)),
358Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 6)));
359vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
365Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)),
366Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 5)),
367Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 5)));
368vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
374Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)),
375Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 4)),
376Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 4)));
377vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
383Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)),
384Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 3)),
385Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 3)));
386vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
392Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)),
393Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 2)),
394Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128<T>.Count * 2)));
395vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
402end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
1070Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1073Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
1074Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
1075Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
1078end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
1088Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1254Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1257Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
1258Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
1259Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
1262end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
1272Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1375Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1378Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
1379Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
1380Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
1383end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
1392Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1471Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1474Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
1475Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
1476Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128<T>.Count));
1479end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
1488Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1583if (TTernaryOperator.Vectorizable && Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported)
1585if (remainder >= (uint)Vector128<T>.Count)
1623Vector128<T> zVec = Vector128.Create(z);
1625Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
1628Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
1629Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
1632if (remainder > (uint)(Vector128<T>.Count * 8))
1659nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
1665Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
1670Vector128<T> vector1;
1671Vector128<T> vector2;
1672Vector128<T> vector3;
1673Vector128<T> vector4;
1680while (remainder >= (uint)(Vector128<T>.Count * 8))
1684vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
1685Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)),
1687vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
1688Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)),
1690vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
1691Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)),
1693vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
1694Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)),
1697vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
1698vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
1699vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
1700vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
1704vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
1705Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)),
1707vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
1708Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)),
1710vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
1711Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)),
1713vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
1714Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)),
1717vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
1718vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
1719vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
1720vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
1725xPtr += (uint)(Vector128<T>.Count * 8);
1726yPtr += (uint)(Vector128<T>.Count * 8);
1727dPtr += (uint)(Vector128<T>.Count * 8);
1729remainder -= (uint)(Vector128<T>.Count * 8);
1734while (remainder >= (uint)(Vector128<T>.Count * 8))
1738vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
1739Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 0)),
1741vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
1742Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 1)),
1744vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
1745Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 2)),
1747vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
1748Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 3)),
1751vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
1752vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
1753vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
1754vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
1758vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
1759Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 4)),
1761vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
1762Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 5)),
1764vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
1765Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 6)),
1767vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
1768Vector128.Load(yPtr + (uint)(Vector128<T>.Count * 7)),
1771vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
1772vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
1773vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
1774vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
1779xPtr += (uint)(Vector128<T>.Count * 8);
1780yPtr += (uint)(Vector128<T>.Count * 8);
1781dPtr += (uint)(Vector128<T>.Count * 8);
1783remainder -= (uint)(Vector128<T>.Count * 8);
1803remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
1805switch (remainder / (uint)Vector128<T>.Count)
1809Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8)),
1810Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 8)),
1812vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
1818Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)),
1819Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 7)),
1821vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
1827Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)),
1828Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 6)),
1830vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
1836Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)),
1837Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 5)),
1839vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
1845Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)),
1846Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 4)),
1848vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
1854Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)),
1855Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 3)),
1857vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
1863Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)),
1864Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128<T>.Count * 2)),
1866vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
1873end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
2535Vector128<T> zVec = Vector128.Create(z);
2537Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
2540Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
2541Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
2545end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
2555Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
2723Vector128<T> zVec = Vector128.Create(z);
2725Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
2728Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
2729Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
2733end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
2743Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
2848Vector128<T> zVec = Vector128.Create(z);
2850Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
2853Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
2854Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
2858end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
2867Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
2948Vector128<T> zVec = Vector128.Create(z);
2950Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
2953Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
2954Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128<T>.Count),
2958end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
2967Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
3069if (TTernaryOperator.Vectorizable && Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported)
3071if (remainder >= (uint)Vector128<T>.Count)
3109Vector128<T> yVec = Vector128.Create(y);
3110Vector128<T> zVec = Vector128.Create(z);
3112Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
3115Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
3119if (remainder > (uint)(Vector128<T>.Count * 8))
3144nuint misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)dPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
3149Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<T>)) == 0);
3154Vector128<T> vector1;
3155Vector128<T> vector2;
3156Vector128<T> vector3;
3157Vector128<T> vector4;
3164while (remainder >= (uint)(Vector128<T>.Count * 8))
3168vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
3171vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
3174vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
3177vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
3181vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 0));
3182vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 1));
3183vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 2));
3184vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 3));
3188vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
3191vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
3194vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
3197vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
3201vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 4));
3202vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 5));
3203vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 6));
3204vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<T>.Count * 7));
3209xPtr += (uint)(Vector128<T>.Count * 8);
3210dPtr += (uint)(Vector128<T>.Count * 8);
3212remainder -= (uint)(Vector128<T>.Count * 8);
3217while (remainder >= (uint)(Vector128<T>.Count * 8))
3221vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 0)),
3224vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 1)),
3227vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 2)),
3230vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 3)),
3234vector1.Store(dPtr + (uint)(Vector128<T>.Count * 0));
3235vector2.Store(dPtr + (uint)(Vector128<T>.Count * 1));
3236vector3.Store(dPtr + (uint)(Vector128<T>.Count * 2));
3237vector4.Store(dPtr + (uint)(Vector128<T>.Count * 3));
3241vector1 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 4)),
3244vector2 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 5)),
3247vector3 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 6)),
3250vector4 = TTernaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<T>.Count * 7)),
3254vector1.Store(dPtr + (uint)(Vector128<T>.Count * 4));
3255vector2.Store(dPtr + (uint)(Vector128<T>.Count * 5));
3256vector3.Store(dPtr + (uint)(Vector128<T>.Count * 6));
3257vector4.Store(dPtr + (uint)(Vector128<T>.Count * 7));
3262xPtr += (uint)(Vector128<T>.Count * 8);
3263dPtr += (uint)(Vector128<T>.Count * 8);
3265remainder -= (uint)(Vector128<T>.Count * 8);
3284remainder = (remainder + (uint)(Vector128<T>.Count - 1)) & (nuint)(-Vector128<T>.Count);
3286switch (remainder / (uint)Vector128<T>.Count)
3290Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 8)),
3293vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 8));
3299Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 7)),
3302vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 7));
3308Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 6)),
3311vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 6));
3317Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 5)),
3320vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 5));
3326Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 4)),
3329vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 4));
3335Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 3)),
3338vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 3));
3344Vector128<T> vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<T>.Count * 2)),
3347vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<T>.Count * 2));
3354end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<T>.Count);
4007Vector128<T> yVec = Vector128.Create(y);
4008Vector128<T> zVec = Vector128.Create(z);
4010Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
4013Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
4018end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
4028Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
4197Vector128<T> yVec = Vector128.Create(y);
4198Vector128<T> zVec = Vector128.Create(z);
4200Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
4203Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
4208end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
4218Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
4324Vector128<T> yVec = Vector128.Create(y);
4325Vector128<T> zVec = Vector128.Create(z);
4327Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
4330Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
4335end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
4344Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
4426Vector128<T> yVec = Vector128.Create(y);
4427Vector128<T> zVec = Vector128.Create(z);
4429Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
4432Vector128<T> end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<T>.Count),
4437end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<T>.Count);
4446Vector128<T> beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef),
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IUnaryInputBinaryOutput.cs (40)
17static abstract (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x);
27static abstract (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x, Vector128<T> y);
38public static (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x, Vector128<T> y) => TOperator.Invoke(y, x);
130if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TUnaryOperator.Vectorizable)
132oneVectorFromEnd = x.Length - Vector128<T>.Count;
138(Vector128<T> first, Vector128<T> second) = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref sourceRef, (uint)i));
142i += Vector128<T>.Count;
149i = x.Length - Vector128<T>.Count;
151(Vector128<T> first, Vector128<T> second) = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref sourceRef, (uint)i));
301if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TOperator.Vectorizable)
303oneVectorFromEnd = x.Length - Vector128<T>.Count;
309(Vector128<T> first, Vector128<T> second) = TOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i), Vector128.LoadUnsafe(ref yRef, (uint)i));
313i += Vector128<T>.Count;
320Vector128<T> mask = Vector128.Equals(CreateRemainderMaskVector128<T>(x.Length - i), Vector128<T>.Zero);
322i = x.Length - Vector128<T>.Count;
324Vector128<T> first = Vector128.ConditionalSelect(mask,
328Vector128<T> second = Vector128.ConditionalSelect(mask,
486if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && TOperator.Vectorizable)
488oneVectorFromEnd = x.Length - Vector128<T>.Count;
491Vector128<T> yVec = Vector128.Create(y);
496(Vector128<T> first, Vector128<T> second) = TOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i), yVec);
500i += Vector128<T>.Count;
507Vector128<T> mask = Vector128.Equals(CreateRemainderMaskVector128<T>(x.Length - i), Vector128<T>.Zero);
509i = x.Length - Vector128<T>.Count;
511Vector128<T> first = Vector128.ConditionalSelect(mask,
515Vector128<T> second = Vector128.ConditionalSelect(mask,
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IUnaryOneToFourOperator.cs (24)
19static abstract (Vector128<TOutput>, Vector128<TOutput>, Vector128<TOutput>, Vector128<TOutput>) Invoke(Vector128<TInput> x);
120Debug.Assert(Vector128<TInput>.IsSupported);
121Debug.Assert(Vector128<TOutput>.IsSupported);
123oneVectorFromEnd = x.Length - Vector128<TInput>.Count;
129(Vector128<TOutput>, Vector128<TOutput>, Vector128<TOutput>, Vector128<TOutput>) results = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref sourceRef, (uint)i));
131results.Item2.StoreUnsafe(ref destinationRef, (uint)(i + Vector128<TOutput>.Count));
132results.Item3.StoreUnsafe(ref destinationRef, (uint)(i + (Vector128<TOutput>.Count * 2)));
133results.Item4.StoreUnsafe(ref destinationRef, (uint)(i + (Vector128<TOutput>.Count * 3)));
135i += Vector128<TInput>.Count;
142i = x.Length - Vector128<TInput>.Count;
144(Vector128<TOutput>, Vector128<TOutput>, Vector128<TOutput>, Vector128<TOutput>) results = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref sourceRef, (uint)i));
146results.Item2.StoreUnsafe(ref destinationRef, (uint)(i + Vector128<TOutput>.Count));
147results.Item3.StoreUnsafe(ref destinationRef, (uint)(i + (Vector128<TOutput>.Count * 2)));
148results.Item4.StoreUnsafe(ref destinationRef, (uint)(i + (Vector128<TOutput>.Count * 3)));
System\Numerics\Tensors\netcore\Common\TensorPrimitives.IUnaryOperator.cs (103)
35static abstract Vector128<TOutput> Invoke(Vector128<TInput> x);
45public static Vector128<T> Invoke(Vector128<T> x) => x;
127if (Vector128.IsHardwareAccelerated && Vector128<TInput>.IsSupported && Vector128<TOutput>.IsSupported && TUnaryOperator.Vectorizable && sizeof(TInput) == sizeof(TOutput))
129if (remainder >= (uint)Vector128<TInput>.Count)
165Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
166Vector128<TOutput> end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<TInput>.Count));
168if (remainder > (uint)(Vector128<TInput>.Count * 8))
193nuint misalignment = ((uint)sizeof(Vector128<TInput>) - ((nuint)dPtr % (uint)sizeof(Vector128<TInput>))) / (uint)sizeof(TInput);
198Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128<TInput>)) == 0);
203Vector128<TOutput> vector1;
204Vector128<TOutput> vector2;
205Vector128<TOutput> vector3;
206Vector128<TOutput> vector4;
213while (remainder >= (uint)(Vector128<TInput>.Count * 8))
217vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 0)));
218vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 1)));
219vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 2)));
220vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 3)));
222vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 0));
223vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 1));
224vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 2));
225vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 3));
229vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 4)));
230vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 5)));
231vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 6)));
232vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 7)));
234vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 4));
235vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 5));
236vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 6));
237vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128<TOutput>.Count * 7));
242xPtr += (uint)(Vector128<TInput>.Count * 8);
243dPtr += (uint)(Vector128<TOutput>.Count * 8);
245remainder -= (uint)(Vector128<TInput>.Count * 8);
250while (remainder >= (uint)(Vector128<TInput>.Count * 8))
254vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 0)));
255vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 1)));
256vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 2)));
257vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 3)));
259vector1.Store(dPtr + (uint)(Vector128<TOutput>.Count * 0));
260vector2.Store(dPtr + (uint)(Vector128<TOutput>.Count * 1));
261vector3.Store(dPtr + (uint)(Vector128<TOutput>.Count * 2));
262vector4.Store(dPtr + (uint)(Vector128<TOutput>.Count * 3));
266vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 4)));
267vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 5)));
268vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 6)));
269vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128<TInput>.Count * 7)));
271vector1.Store(dPtr + (uint)(Vector128<TOutput>.Count * 4));
272vector2.Store(dPtr + (uint)(Vector128<TOutput>.Count * 5));
273vector3.Store(dPtr + (uint)(Vector128<TOutput>.Count * 6));
274vector4.Store(dPtr + (uint)(Vector128<TOutput>.Count * 7));
279xPtr += (uint)(Vector128<TInput>.Count * 8);
280dPtr += (uint)(Vector128<TOutput>.Count * 8);
282remainder -= (uint)(Vector128<TInput>.Count * 8);
301remainder = (remainder + (uint)(Vector128<TInput>.Count - 1)) & (nuint)(-Vector128<TInput>.Count);
303switch (remainder / (uint)Vector128<TInput>.Count)
307Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 8)));
308vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 8));
314Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 7)));
315vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 7));
321Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 6)));
322vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 6));
328Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 5)));
329vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 5));
335Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 4)));
336vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 4));
342Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 3)));
343vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 3));
349Vector128<TOutput> vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128<TInput>.Count * 2)));
350vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128<TOutput>.Count * 2));
357end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128<TInput>.Count);
895Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
896Vector128<TOutput> end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<TInput>.Count));
899end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<TOutput>.Count);
909Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
1039Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
1040Vector128<TOutput> end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<TInput>.Count));
1043end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<TOutput>.Count);
1053Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
1136Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
1137Vector128<TOutput> end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<TInput>.Count));
1140end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<TOutput>.Count);
1149Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
1216Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
1217Vector128<TOutput> end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128<TInput>.Count));
1220end.StoreUnsafe(ref dRef, remainder - (uint)Vector128<TOutput>.Count);
1229Vector128<TOutput> beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef));
System\Numerics\Tensors\netcore\TensorPrimitives.Clamp.cs (12)
223public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> min, Vector128<T> max) => Vector128.Clamp(x, min, max);
271public static Vector128<T> Invoke(Vector128<T> min, Vector128<T> max, Vector128<T> x) => Vector128.Clamp(x, min, max);
320public static Vector128<T> Invoke(Vector128<T> max, Vector128<T> x, Vector128<T> min) => Vector128.Clamp(x, min, max);
System\Numerics\Tensors\netcore\TensorPrimitives.ConvertHelpers.cs (77)
168public static Vector128<float> Invoke(Vector128<int> x) => Vector128.ConvertToSingle(x);
179public static Vector128<float> Invoke(Vector128<uint> x) => Vector128.ConvertToSingle(x);
190public static Vector128<double> Invoke(Vector128<ulong> x) => Vector128.ConvertToDouble(x);
201public static Vector128<double> Invoke(Vector128<long> x) => Vector128.ConvertToDouble(x);
212public static (Vector128<double> Lower, Vector128<double> Upper) Invoke(Vector128<float> x) => Vector128.Widen(x);
223public static Vector128<float> Invoke(Vector128<double> lower, Vector128<double> upper) => Vector128.Narrow(lower, upper);
234public static (Vector128<ushort> Lower, Vector128<ushort> Upper) Invoke(Vector128<byte> x) => Vector128.Widen(x);
247public static (Vector128<uint>, Vector128<uint>, Vector128<uint>, Vector128<uint>) Invoke(Vector128<byte> x)
249(Vector128<ushort> Lower, Vector128<ushort> Upper) ushorts = Vector128.Widen(x);
250(Vector128<uint> Lower, Vector128<uint> Upper) uintsLower = Vector128.Widen(ushorts.Lower);
251(Vector128<uint> Lower, Vector128<uint> Upper) uintsUpper = Vector128.Widen(ushorts.Upper);
282public static (Vector128<float>, Vector128<float>, Vector128<float>, Vector128<float>) Invoke(Vector128<byte> x)
324public static (Vector128<short> Lower, Vector128<short> Upper) Invoke(Vector128<sbyte> x) => Vector128.Widen(x);
335public static (Vector128<uint> Lower, Vector128<uint> Upper) Invoke(Vector128<ushort> x) => Vector128.Widen(x);
346public static (Vector128<int> Lower, Vector128<int> Upper) Invoke(Vector128<short> x) => Vector128.Widen(x);
357public static (Vector128<ulong> Lower, Vector128<ulong> Upper) Invoke(Vector128<uint> x) => Vector128.Widen(x);
368public static (Vector128<long> Lower, Vector128<long> Upper) Invoke(Vector128<int> x) => Vector128.Widen(x);
392public static (Vector128<float> Lower, Vector128<float> Upper) Invoke(Vector128<short> x)
394(Vector128<int> lowerInt32, Vector128<int> upperInt32) = Vector128.Widen(x);
399static Vector128<float> HalfAsWidenedUInt32ToSingle(Vector128<uint> value)
402Vector128<uint> sign = value & Vector128.Create(SingleSignMask);
405Vector128<uint> bitValueInProcess = value;
408Vector128<uint> offsetExponent = bitValueInProcess & Vector128.Create(HalfExponentMask);
411Vector128<uint> subnormalMask = Vector128.Equals(offsetExponent, Vector128<uint>.Zero);
414Vector128<uint> infinityOrNaNMask = Vector128.Equals(offsetExponent, Vector128.Create(HalfExponentMask));
417Vector128<uint> maskedExponentLowerBound = subnormalMask & Vector128.Create(ExponentLowerBound);
420Vector128<uint> offsetMaskedExponentLowerBound = Vector128.Create(ExponentOffset) | maskedExponentLowerBound;
426offsetMaskedExponentLowerBound = Vector128.ConditionalSelect(Vector128.Equals(infinityOrNaNMask, Vector128<uint>.Zero),
437Vector128<uint> absoluteValue = (bitValueInProcess.AsSingle() - maskedExponentLowerBound.AsSingle()).AsUInt32();
573public static Vector128<ushort> Invoke(Vector128<float> lower, Vector128<float> upper)
579static Vector128<uint> SingleToHalfAsWidenedUInt32(Vector128<float> value)
581Vector128<uint> bitValue = value.AsUInt32();
584Vector128<uint> sign = Vector128.ShiftRightLogical(bitValue & Vector128.Create(SingleSignMask), 16);
587Vector128<uint> realMask = Vector128.Equals(value, value).AsUInt32();
596Vector128<uint> exponentOffset0 = Vector128.Max(value, Vector128.Create(MinExp).AsSingle()).AsUInt32();
609Vector128<uint> maskedHalfExponentForNaN = ~realMask & Vector128.Create(ExponentMask);
615Vector128<uint> newExponent = Vector128.ShiftRightLogical(bitValue, 13);
627Vector128<uint> signAndMaskedExponent = maskedHalfExponentForNaN | sign;
System\Numerics\Tensors\netcore\TensorPrimitives.ConvertTruncating.cs (28)
114public static Vector128<int> Invoke(Vector128<float> x) => Vector128.ConvertToInt32(x);
130public static Vector128<uint> Invoke(Vector128<float> x) => Vector128.ConvertToUInt32(x);
146public static Vector128<ulong> Invoke(Vector128<double> x) => Vector128.ConvertToUInt64(x);
162public static Vector128<long> Invoke(Vector128<double> x) => Vector128.ConvertToInt64(x);
173public static Vector128<byte> Invoke(Vector128<ushort> lower, Vector128<ushort> upper) => Vector128.Narrow(lower, upper);
184public static Vector128<sbyte> Invoke(Vector128<short> lower, Vector128<short> upper) => Vector128.Narrow(lower, upper);
195public static Vector128<ushort> Invoke(Vector128<uint> lower, Vector128<uint> upper) => Vector128.Narrow(lower, upper);
206public static Vector128<short> Invoke(Vector128<int> lower, Vector128<int> upper) => Vector128.Narrow(lower, upper);
217public static Vector128<uint> Invoke(Vector128<ulong> lower, Vector128<ulong> upper) => Vector128.Narrow(lower, upper);
228public static Vector128<int> Invoke(Vector128<long> lower, Vector128<long> upper) => Vector128.Narrow(lower, upper);
239public static Vector128<TTo> Invoke(Vector128<TFrom> x) => throw new NotSupportedException();
System\Numerics\Tensors\netcore\TensorPrimitives.CosineSimilarity.cs (41)
135if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && x.Length >= Vector128<T>.Count)
140Vector128<T> dotProductVector = Vector128<T>.Zero;
141Vector128<T> xSumOfSquaresVector = Vector128<T>.Zero;
142Vector128<T> ySumOfSquaresVector = Vector128<T>.Zero;
145int oneVectorFromEnd = x.Length - Vector128<T>.Count;
154i += Vector128<T>.Count;
161Vector128<T> remainderMask = CreateRemainderMaskVector128<T>(x.Length - i);
164Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128<T>.Count)) & remainderMask,
165Vector128.LoadUnsafe(ref yRef, (uint)(x.Length - Vector128<T>.Count)) & remainderMask,
285if (Vector128.IsHardwareAccelerated && x.Length >= Vector128<short>.Count)
291Vector128<float> dotProductVector = Vector128<float>.Zero;
292Vector128<float> xSumOfSquaresVector = Vector128<float>.Zero;
293Vector128<float> ySumOfSquaresVector = Vector128<float>.Zero;
296int oneVectorFromEnd = x.Length - Vector128<short>.Count;
300(Vector128<float> xVecLower, Vector128<float> xVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i));
301(Vector128<float> yVecLower, Vector128<float> yVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(Vector128.LoadUnsafe(ref yRef, (uint)i));
306i += Vector128<short>.Count;
313Vector128<short> remainderMask = CreateRemainderMaskVector128<short>(x.Length - i);
315(Vector128<float> xVecLower, Vector128<float> xVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(
316Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128<short>.Count)) & remainderMask);
317(Vector128<float> yVecLower, Vector128<float> yVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(
318Vector128.LoadUnsafe(ref yRef, (uint)(x.Length - Vector128<short>.Count)) & remainderMask);
347private static void Update<T>(Vector128<T> xVec, Vector128<T> yVec, ref Vector128<T> dotProductVector, ref Vector128<T> xSumOfSquaresVector, ref Vector128<T> ySumOfSquaresVector) where T : INumberBase<T>
375private static T Finalize<T>(Vector128<T> dotProductVector, Vector128<T> xSumOfSquaresVector, Vector128<T> ySumOfSquaresVector) where T : IRootFunctions<T> =>
System\Numerics\Tensors\netcore\TensorPrimitives.Half.cs (32)
307source.Length >= Vector128<short>.Count;
319public static Vector128<short> Invoke(Vector128<short> x)
321(Vector128<float> xVecLower, Vector128<float> xVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(x);
355public static Vector128<short> Invoke(Vector128<short> x, Vector128<short> y)
357(Vector128<float> xVecLower, Vector128<float> xVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(x);
358(Vector128<float> yVecLower, Vector128<float> yVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(y);
394public static Vector128<short> Invoke(Vector128<short> x, Vector128<short> y)
396(Vector128<float> xVecLower, Vector128<float> xVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(x);
397(Vector128<float> yVecLower, Vector128<float> yVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(y);
421public static short Invoke(Vector128<short> x)
423(Vector128<float> xVecLower, Vector128<float> xVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(x);
460public static Vector128<short> Invoke(Vector128<short> x, Vector128<short> y, Vector128<short> z)
462(Vector128<float> xVecLower, Vector128<float> xVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(x);
463(Vector128<float> yVecLower, Vector128<float> yVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(y);
464(Vector128<float> zVecLower, Vector128<float> zVecUpper) = WidenHalfAsInt16ToSingleOperator.Invoke(z);
System\Numerics\Tensors\netcore\TensorPrimitives.IndexOfMax.cs (36)
35public static void Invoke(ref Vector128<T> result, Vector128<T> current, ref Vector128<T> resultIndex, Vector128<T> currentIndex)
37Vector128<T> useResult = Vector128.GreaterThan(result, current);
38Vector128<T> equalMask = Vector128.Equals(result, current);
40if (equalMask != Vector128<T>.Zero)
42Vector128<T> lessThanIndexMask = IndexLessThan(resultIndex, currentIndex);
46Vector128<T> currentNegative = IsNegative(current);
47Vector128<T> sameSign = Vector128.Equals(IsNegative(result).AsInt32(), currentNegative.AsInt32()).As<int, T>();
330if (Vector128.IsHardwareAccelerated && Vector128<T>.IsSupported && x.Length >= Vector128<T>.Count)
335static Vector128<T> CreateVector128T(int i) =>
342Vector128<T> resultIndex =
344sizeof(T) == sizeof(long) ? Vector128<long>.Indices.As<long, T>() :
345sizeof(T) == sizeof(int) ? Vector128<int>.Indices.As<int, T>() :
346sizeof(T) == sizeof(short) ? Vector128<short>.Indices.As<short, T>() :
347Vector128<byte>.Indices.As<byte, T>();
354Vector128<T> currentIndex = resultIndex;
355Vector128<T> increment = CreateVector128T(Vector128<T>.Count);
359Vector128<T> result = Vector128.LoadUnsafe(ref xRef);
360Vector128<T> current;
362Vector128<T> nanMask;
366if (nanMask != Vector128<T>.Zero)
372int oneVectorFromEnd = x.Length - Vector128<T>.Count;
373int i = Vector128<T>.Count;
385if (nanMask != Vector128<T>.Zero)
393i += Vector128<T>.Count;
399current = Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128<T>.Count));
405if (nanMask != Vector128<T>.Zero)
443private static int IndexOfFirstMatch<T>(Vector128<T> mask) =>
470private static unsafe Vector128<T> ElementWiseSelect<T>(Vector128<T> mask, Vector128<T> left, Vector128<T> right)
System\Numerics\Tensors\netcore\TensorPrimitives.LeadingZeroCount.cs (17)
39public static Vector128<T> Invoke(Vector128<T> x)
43Vector128<byte> lookupVectorLow = Vector128.Create((byte)8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4);
44Vector128<byte> lookupVectorHigh = Vector128.Create((byte)3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0);
45Vector128<byte> nibbleMask = Vector128.Create<byte>(0xF);
46Vector128<byte> permuteMask = Vector128.Create<byte>(0x80);
47Vector128<byte> lowNibble = x.AsByte() & nibbleMask;
48Vector128<byte> highNibble = Sse2.ShiftRightLogical(x.AsInt32(), 4).AsByte() & nibbleMask;
49Vector128<byte> nibbleSelectMask = Sse2.CompareEqual(highNibble, Vector128<byte>.Zero);
50Vector128<byte> indexVector = Sse41.BlendVariable(highNibble, lowNibble, nibbleSelectMask) +
60Vector128<uint> lowHalf = Vector128.Create((uint)0x0000FFFF);
61Vector128<uint> x_bot16 = Sse2.Or(Sse2.ShiftLeftLogical(x.AsUInt32(), 16), lowHalf);
62Vector128<uint> x_top16 = Sse2.Or(x.AsUInt32(), lowHalf);
63Vector128<uint> lz_bot16 = Avx512CD.VL.LeadingZeroCount(x_bot16);
64Vector128<uint> lz_top16 = Avx512CD.VL.LeadingZeroCount(x_top16);
65Vector128<uint> lz_top16_shift = Sse2.ShiftLeftLogical(lz_top16, 16);
System\Numerics\Tensors\netcore\TensorPrimitives.Tan.cs (48)
68public static Vector128<T> Invoke(Vector128<T> x)
128public static Vector128<float> Invoke(Vector128<float> x)
130Vector128<float> uxMasked = Vector128.Abs(x);
136Vector128<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked, Vector128.Create(2 / float.Pi), Vector128.Create(AlmHuge));
137Vector128<uint> odd = dn.AsUInt32() << 31;
140Vector128<float> f = uxMasked;
146Vector128<float> f2 = f * f;
147Vector128<float> f4 = f2 * f2;
148Vector128<float> f8 = f4 * f4;
149Vector128<float> f12 = f8 * f4;
150Vector128<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1));
151Vector128<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C4), f2, Vector128.Create(C3));
152Vector128<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C6), f2, Vector128.Create(C5));
153Vector128<float> b1 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
154Vector128<float> b2 = MultiplyAddEstimateOperator<float>.Invoke(f8, a3, f12 * Vector128.Create(C7));
155Vector128<float> poly = MultiplyAddEstimateOperator<float>.Invoke(f * f2, b1 + b2, f);
157Vector128<float> result = (poly.AsUInt32() ^ (x.AsUInt32() & Vector128.Create(~SignMask))).AsSingle();
158return Vector128.ConditionalSelect(Vector128.Equals(odd, Vector128<uint>.Zero).AsSingle(),
261public static Vector128<double> Invoke(Vector128<double> x)
263Vector128<double> uxMasked = Vector128.Abs(x);
270Vector128<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector128.Create(2 / double.Pi), Vector128.Create(AlmHuge));
271Vector128<ulong> odd = dn.AsUInt64() << 63;
275Vector128<double> f = uxMasked;
281Vector128<double> g = f * f;
282Vector128<double> g2 = g * g;
283Vector128<double> g3 = g * g2;
284Vector128<double> g5 = g3 * g2;
285Vector128<double> g7 = g5 * g2;
286Vector128<double> g9 = g7 * g2;
287Vector128<double> g11 = g9 * g2;
288Vector128<double> g13 = g11 * g2;
289Vector128<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C3), g, Vector128.Create(C1));
290Vector128<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C7), g, Vector128.Create(C5));
291Vector128<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C11), g, Vector128.Create(C9));
292Vector128<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C15), g, Vector128.Create(C13));
293Vector128<double> a5 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C19), g, Vector128.Create(C17));
294Vector128<double> a6 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C23), g, Vector128.Create(C21));
295Vector128<double> a7 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C27), g, Vector128.Create(C25));
296Vector128<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(g, a1, g3 * a2);
297Vector128<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(g5, a3, g7 * a4);
298Vector128<double> b3 = MultiplyAddEstimateOperator<double>.Invoke(g9, a5, g11 * a6);
299Vector128<double> q = MultiplyAddEstimateOperator<double>.Invoke(g13, a7, b1 + b2 + b3);
300Vector128<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, q, f);
302Vector128<double> result = (poly.AsUInt64() ^ (x.AsUInt64() & Vector128.Create(~SignMask))).AsDouble();
303return Vector128.ConditionalSelect(Vector128.Equals(odd, Vector128<ulong>.Zero).AsDouble(),