1 type derived from Sse
System.Private.CoreLib (1)
src\libraries\System.Private.CoreLib\src\System\Runtime\Intrinsics\X86\Sse2.cs (1)
12
public abstract class Sse2 :
Sse
476 references to Sse
Microsoft.ML.CpuMath (421)
AvxIntrinsics.cs (170)
93
Sse
.StoreScalar(dst + idx[0], tmp);
95
Sse
.StoreScalar(dst + idx[1], tmp);
97
Sse
.StoreScalar(dst + idx[2], tmp);
99
Sse
.StoreScalar(dst + idx[3], tmp);
101
Sse
.StoreScalar(dst + idx[4], tmp);
103
Sse
.StoreScalar(dst + idx[5], tmp);
105
Sse
.StoreScalar(dst + idx[6], tmp);
107
Sse
.StoreScalar(dst + idx[7], tmp);
235
Vector128<float> sum =
Sse
.Add(res0.GetLower(), GetHigh(in res0));
236
Sse
.StoreAligned(pDstCurrent, sum);
319
Vector128<float> h01 =
Sse
.LoadAlignedVector128(pSrcCurrent);
321
Vector128<float> h11 =
Sse
.Shuffle(h01, h01, 0x55); // B
322
Vector128<float> h21 =
Sse
.Shuffle(h01, h01, 0xAA); // C
323
Vector128<float> h31 =
Sse
.Shuffle(h01, h01, 0xFF); // D
324
h01 =
Sse
.Shuffle(h01, h01, 0x00); // A
367
h01 =
Sse
.LoadAlignedVector128(pSrcCurrent);
369
h11 =
Sse
.Shuffle(h01, h01, 0x55); // B
370
h21 =
Sse
.Shuffle(h01, h01, 0xAA); // C
371
h31 =
Sse
.Shuffle(h01, h01, 0xFF); // D
372
h01 =
Sse
.Shuffle(h01, h01, 0x00); // A
443
Vector128<float> dstVector =
Sse
.LoadVector128(pDstCurrent);
444
dstVector =
Sse
.Add(dstVector, scalarVector128);
445
Sse
.Store(pDstCurrent, dstVector);
452
Vector128<float> dstVector =
Sse
.LoadScalarVector128(pDstCurrent);
453
dstVector =
Sse
.AddScalar(dstVector, scalarVector128);
454
Sse
.StoreScalar(pDstCurrent, dstVector);
596
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
597
srcVector =
Sse
.Multiply(srcVector, scaleVector128);
598
Sse
.Store(pDstCurrent, srcVector);
606
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
607
srcVector =
Sse
.MultiplyScalar(srcVector, scaleVector128);
608
Sse
.StoreScalar(pDstCurrent, srcVector);
643
Vector128<float> dstVector =
Sse
.LoadVector128(pDstCurrent);
644
dstVector =
Sse
.Add(dstVector, b128);
645
dstVector =
Sse
.Multiply(dstVector, a128);
646
Sse
.Store(pDstCurrent, dstVector);
653
Vector128<float> dstVector =
Sse
.LoadScalarVector128(pDstCurrent);
654
dstVector =
Sse
.AddScalar(dstVector, b128);
655
dstVector =
Sse
.MultiplyScalar(dstVector, a128);
656
Sse
.StoreScalar(pDstCurrent, dstVector);
691
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
692
Vector128<float> dstVector =
Sse
.LoadVector128(pDstCurrent);
694
srcVector =
Sse
.Multiply(srcVector, scaleVector128);
695
dstVector =
Sse
.Add(dstVector, srcVector);
696
Sse
.Store(pDstCurrent, dstVector);
704
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
705
Vector128<float> dstVector =
Sse
.LoadScalarVector128(pDstCurrent);
707
srcVector =
Sse
.MultiplyScalar(srcVector, scaleVector128);
708
dstVector =
Sse
.AddScalar(dstVector, srcVector);
709
Sse
.StoreScalar(pDstCurrent, dstVector);
748
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
749
Vector128<float> dstVector =
Sse
.LoadVector128(pDstCurrent);
750
srcVector =
Sse
.Multiply(srcVector, scaleVector128);
751
dstVector =
Sse
.Add(dstVector, srcVector);
752
Sse
.Store(pResCurrent, dstVector);
761
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
762
Vector128<float> dstVector =
Sse
.LoadScalarVector128(pDstCurrent);
763
srcVector =
Sse
.MultiplyScalar(srcVector, scaleVector128);
764
dstVector =
Sse
.AddScalar(dstVector, srcVector);
765
Sse
.StoreScalar(pResCurrent, dstVector);
804
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
807
srcVector =
Sse
.Multiply(srcVector, scaleVector128);
808
dstVector =
Sse
.Add(dstVector, srcVector);
850
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
851
Vector128<float> dstVector =
Sse
.LoadVector128(pDstCurrent);
853
Vector128<float> result =
Sse
.Add(srcVector, dstVector);
854
Sse
.Store(pDstCurrent, result);
862
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
863
Vector128<float> dstVector =
Sse
.LoadScalarVector128(pDstCurrent);
865
Vector128<float> result =
Sse
.AddScalar(srcVector, dstVector);
866
Sse
.StoreScalar(pDstCurrent, result);
903
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
905
dstVector =
Sse
.Add(dstVector, srcVector);
950
Vector128<float> src1Vector =
Sse
.LoadVector128(pSrc1Current);
951
Vector128<float> src2Vector =
Sse
.LoadVector128(pSrc2Current);
952
src2Vector =
Sse
.Multiply(src1Vector, src2Vector);
953
Sse
.Store(pDstCurrent, src2Vector);
962
Vector128<float> src1Vector =
Sse
.LoadScalarVector128(pSrc1Current);
963
Vector128<float> src2Vector =
Sse
.LoadScalarVector128(pSrc2Current);
964
src2Vector =
Sse
.MultiplyScalar(src1Vector, src2Vector);
965
Sse
.StoreScalar(pDstCurrent, src2Vector);
1058
return
Sse
.AddScalar(result.GetLower(), GetHigh(result)).ToScalar();
1080
Vector128<float> resultPadded =
Sse
.AddScalar(result256.GetLower(), GetHigh(result256));
1086
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
1087
result128 =
Sse
.Add(result128,
Sse
.Multiply(srcVector, srcVector));
1096
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
1097
result128 =
Sse
.AddScalar(result128,
Sse
.MultiplyScalar(srcVector, srcVector));
1102
return
Sse
.AddScalar(result128, resultPadded).ToScalar();
1125
Vector128<float> resultPadded =
Sse
.AddScalar(result256.GetLower(), GetHigh(result256));
1132
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
1133
srcVector =
Sse
.Subtract(srcVector, meanVector128);
1134
result128 =
Sse
.Add(result128,
Sse
.Multiply(srcVector, srcVector));
1143
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
1144
srcVector =
Sse
.SubtractScalar(srcVector, meanVector128);
1145
result128 =
Sse
.AddScalar(result128,
Sse
.MultiplyScalar(srcVector, srcVector));
1150
return
Sse
.AddScalar(result128, resultPadded).ToScalar();
1172
Vector128<float> resultPadded =
Sse
.AddScalar(result256.GetLower(), GetHigh(result256));
1178
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
1179
result128 =
Sse
.Add(result128,
Sse
.And(srcVector, SseIntrinsics.AbsMask128));
1188
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
1189
result128 =
Sse
.AddScalar(result128,
Sse
.And(srcVector, SseIntrinsics.AbsMask128));
1194
return
Sse
.AddScalar(result128, resultPadded).ToScalar();
1218
Vector128<float> resultPadded =
Sse
.AddScalar(result256.GetLower(), GetHigh(result256));
1225
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
1226
srcVector =
Sse
.Subtract(srcVector, meanVector128);
1227
result128 =
Sse
.Add(result128,
Sse
.And(srcVector, SseIntrinsics.AbsMask128));
1236
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
1237
srcVector =
Sse
.SubtractScalar(srcVector, meanVector128);
1238
result128 =
Sse
.AddScalar(result128,
Sse
.And(srcVector, SseIntrinsics.AbsMask128));
1243
return
Sse
.AddScalar(result128, resultPadded).ToScalar();
1265
Vector128<float> resultPadded =
Sse
.MaxScalar(result256.GetLower(), GetHigh(result256));
1271
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
1272
result128 =
Sse
.Max(result128,
Sse
.And(srcVector, SseIntrinsics.AbsMask128));
1281
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
1282
result128 =
Sse
.MaxScalar(result128,
Sse
.And(srcVector, SseIntrinsics.AbsMask128));
1287
return
Sse
.MaxScalar(result128, resultPadded).ToScalar();
1311
Vector128<float> resultPadded =
Sse
.MaxScalar(result256.GetLower(), GetHigh(result256));
1318
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
1319
srcVector =
Sse
.Subtract(srcVector, meanVector128);
1320
result128 =
Sse
.Max(result128,
Sse
.And(srcVector, SseIntrinsics.AbsMask128));
1329
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
1330
srcVector =
Sse
.SubtractScalar(srcVector, meanVector128);
1331
result128 =
Sse
.MaxScalar(result128,
Sse
.And(srcVector, SseIntrinsics.AbsMask128));
1336
return
Sse
.MaxScalar(result128, resultPadded).ToScalar();
1362
Vector128<float> resultPadded =
Sse
.AddScalar(result256.GetLower(), GetHigh(result256));
1368
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
1369
Vector128<float> dstVector =
Sse
.LoadVector128(pDstCurrent);
1371
result128 =
Sse
.Add(result128,
Sse
.Multiply(srcVector, dstVector));
1381
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
1382
Vector128<float> dstVector =
Sse
.LoadScalarVector128(pDstCurrent);
1384
result128 =
Sse
.AddScalar(result128,
Sse
.MultiplyScalar(srcVector, dstVector));
1390
return
Sse
.AddScalar(result128, resultPadded).ToScalar();
1419
Vector128<float> resultPadded =
Sse
.AddScalar(result256.GetLower(), GetHigh(result256));
1426
Vector128<float> dstVector =
Sse
.LoadVector128(pDstCurrent);
1428
result128 =
Sse
.Add(result128,
Sse
.Multiply(srcVector, dstVector));
1439
Vector128<float> dstVector =
Sse
.LoadScalarVector128(pDstCurrent);
1441
result128 =
Sse
.AddScalar(result128,
Sse
.MultiplyScalar(srcVector, dstVector));
1447
return
Sse
.AddScalar(result128, resultPadded).ToScalar();
1474
Vector128<float> sqDistanceVectorPadded =
Sse
.AddScalar(sqDistanceVector256.GetLower(), GetHigh(sqDistanceVector256));
1480
Vector128<float> distanceVector =
Sse
.Subtract(
Sse
.LoadVector128(pSrcCurrent),
1481
Sse
.LoadVector128(pDstCurrent));
1482
sqDistanceVector128 =
Sse
.Add(sqDistanceVector128,
1483
Sse
.Multiply(distanceVector, distanceVector));
1491
float norm =
Sse
.AddScalar(sqDistanceVector128, sqDistanceVectorPadded).ToScalar();
1538
Vector128<float> xSrc =
Sse
.LoadVector128(pSrcCurrent);
1540
Vector128<float> xDst1 =
Sse
.LoadVector128(pDst1Current);
1541
xDst1 =
Sse
.Add(xDst1,
Sse
.Multiply(xSrc, xPrimal128));
1544
Sse
.Store(pDst1Current, xDst1);
1545
Sse
.Store(pDst2Current, xDst2);
1597
Vector128<float> xSrc =
Sse
.LoadVector128(pSrcCurrent);
1600
xDst1 =
Sse
.Add(xDst1,
Sse
.Multiply(xSrc, xPrimal128));
CpuMathUtils.netcoreapp.cs (13)
29
=> Avx.IsSupported ? Vector256Alignment : (
Sse
.IsSupported ? Vector128Alignment : FloatAlignment);
57
else if (
Sse
.IsSupported)
136
else if (
Sse
.IsSupported)
171
if (destination.Length < MinInputSize || !
Sse
.IsSupported)
207
if (count < MinInputSize || !
Sse
.IsSupported)
243
if (count < MinInputSize || !
Sse
.IsSupported)
272
if (source.Length < MinInputSize || !
Sse
.IsSupported)
302
if (source.Length < MinInputSize || !
Sse
.IsSupported)
331
if (source.Length < MinInputSize || !
Sse
.IsSupported)
365
if (source.Length < MinInputSize || !
Sse
.IsSupported)
407
if (count < MinInputSize || !
Sse
.IsSupported)
539
if (count < MinInputSize || !
Sse
.IsSupported)
581
if (count < MinInputSize || !
Sse
.IsSupported)
FactorizationMachine\AvxIntrinsics.cs (1)
143
Sse
.StoreScalar(&latentResponse, y.GetLower()); // The lowest slot is the response value.
SseIntrinsics.cs (237)
81
=>
Sse
.Shuffle(x, x, 0x39);
86
Sse
.StoreScalar(dst + idx[0], x);
88
Sse
.StoreScalar(dst + idx[1], rotated);
90
Sse
.StoreScalar(dst + idx[2], rotated);
92
Sse
.StoreScalar(dst + idx[3], rotated);
105
Vector128<float> partialSum =
Sse
.Add(vector,
Sse
.MoveHighToLow(vector, vector));
107
return
Sse
.Add(partialSum,
Sse
.Shuffle(partialSum, partialSum, 0xB1));
115
Vector128<float> x1 =
Sse
.Shuffle(vector, vector, 0xB1);
119
Vector128<float> partialMax =
Sse
.Max(vector, x1);
122
x1 =
Sse
.Shuffle(partialMax, partialMax, 0x02);
126
return
Sse
.MaxScalar(partialMax, x1);
133
Vector128<float> xSign =
Sse
.And(xDst1, signMask); // result = 0x8000 0000 if xDst1 is negative or 0x0000 0000 otherwise
134
Vector128<float> xDst1Abs =
Sse
.Xor(xDst1, xSign);
135
Vector128<float> xCond =
Sse
.CompareGreaterThan(xDst1Abs, xThreshold); // result = 0xFFFF FFFF if true
136
Vector128<float> x2 =
Sse
.Xor(xSign, xThreshold); // -xThreshold if xDst1 is negative and +xThreshold otherwise
137
return
Sse
.And(
Sse
.Subtract(xDst1, x2), xCond);
173
Vector128<float> x01 =
Sse
.LoadAlignedVector128(pMatTemp);
174
Vector128<float> x11 =
Sse
.LoadAlignedVector128(pMatTemp += ccol);
175
Vector128<float> x21 =
Sse
.LoadAlignedVector128(pMatTemp += ccol);
176
Vector128<float> x31 =
Sse
.LoadAlignedVector128(pMatTemp += ccol);
177
Vector128<float> x02 =
Sse
.LoadAlignedVector128(pSrcCurrent);
179
res0 =
Sse
.Add(res0,
Sse
.Multiply(x01, x02));
180
res1 =
Sse
.Add(res1,
Sse
.Multiply(x11, x02));
181
res2 =
Sse
.Add(res2,
Sse
.Multiply(x21, x02));
182
res3 =
Sse
.Add(res3,
Sse
.Multiply(x31, x02));
193
Sse
.StoreAligned(pDstCurrent, res0);
243
x2 =
Sse
.Multiply(x2, x1);
244
result =
Sse
.Add(result, x2);
249
Sse
.StoreAligned(pDstCurrent, result);
275
Vector128<float> x01 =
Sse
.LoadAlignedVector128(pSrcCurrent);
277
Vector128<float> x11 =
Sse
.Shuffle(x01, x01, 0x55); // B
278
Vector128<float> x21 =
Sse
.Shuffle(x01, x01, 0xAA); // C
279
Vector128<float> x31 =
Sse
.Shuffle(x01, x01, 0xFF); // D
280
x01 =
Sse
.Shuffle(x01, x01, 0x00); // A
289
Vector128<float> x02 =
Sse
.LoadAlignedVector128(pMatTemp);
290
Vector128<float> x12 =
Sse
.LoadAlignedVector128(pMatTemp += crow);
291
Vector128<float> x22 =
Sse
.LoadAlignedVector128(pMatTemp += crow);
292
Vector128<float> x32 =
Sse
.LoadAlignedVector128(pMatTemp += crow);
294
x02 =
Sse
.Multiply(x01, x02);
295
x12 =
Sse
.Multiply(x11, x12);
296
x22 =
Sse
.Multiply(x21, x22);
297
x32 =
Sse
.Multiply(x31, x32);
299
x02 =
Sse
.Add(x02, x12);
300
x22 =
Sse
.Add(x22, x32);
301
x02 =
Sse
.Add(x02, x22);
303
Sse
.StoreAligned(pDstCurrent, x02);
313
x01 =
Sse
.LoadAlignedVector128(pSrcCurrent);
315
x11 =
Sse
.Shuffle(x01, x01, 0x55); // B
316
x21 =
Sse
.Shuffle(x01, x01, 0xAA); // C
317
x31 =
Sse
.Shuffle(x01, x01, 0xFF); // D
318
x01 =
Sse
.Shuffle(x01, x01, 0x00); // A
326
Vector128<float> x02 =
Sse
.LoadAlignedVector128(pMatTemp);
327
Vector128<float> x12 =
Sse
.LoadAlignedVector128(pMatTemp += crow);
328
Vector128<float> x22 =
Sse
.LoadAlignedVector128(pMatTemp += crow);
329
Vector128<float> x32 =
Sse
.LoadAlignedVector128(pMatTemp += crow);
330
Vector128<float> x3 =
Sse
.LoadAlignedVector128(pDstCurrent);
332
x02 =
Sse
.Multiply(x01, x02);
333
x12 =
Sse
.Multiply(x11, x12);
334
x22 =
Sse
.Multiply(x21, x22);
335
x32 =
Sse
.Multiply(x31, x32);
337
x02 =
Sse
.Add(x02, x12);
338
x22 =
Sse
.Add(x22, x32);
339
x02 =
Sse
.Add(x02, x22);
340
x3 =
Sse
.Add(x02, x3);
342
Sse
.StoreAligned(pDstCurrent, x3);
367
Vector128<float> dstVector =
Sse
.LoadVector128(pDstCurrent);
368
dstVector =
Sse
.Add(dstVector, scalarVector);
369
Sse
.Store(pDstCurrent, dstVector);
376
Vector128<float> dstVector =
Sse
.LoadScalarVector128(pDstCurrent);
377
dstVector =
Sse
.AddScalar(dstVector, scalarVector);
378
Sse
.StoreScalar(pDstCurrent, dstVector);
406
Vector128<float> temp =
Sse
.LoadVector128(pDstCurrent);
407
temp =
Sse
.Multiply(scaleVector128, temp);
408
Sse
.Store(pDstCurrent, temp);
421
Vector128<float> result =
Sse
.LoadVector128(pDstCurrent);
423
Vector128<float> leadingMask =
Sse
.LoadVector128(((float*)(pLeadingAlignmentMask)) + (misalignment * 4));
424
Vector128<float> trailingMask =
Sse
.LoadVector128(((float*)(pTrailingAlignmentMask)) + ((4 - misalignment) * 4));
426
Vector128<float> temp =
Sse
.And(result, trailingMask);
427
result =
Sse
.Multiply(scaleVector128, result);
430
result =
Sse
.And(result, leadingMask);
431
result =
Sse
.Or(result, temp);
433
Sse
.Store(pDstCurrent, result);
449
Vector128<float> temp =
Sse
.LoadAlignedVector128(pDstCurrent);
450
temp =
Sse
.Multiply(scaleVector128, temp);
451
Sse
.Store(pDstCurrent, temp);
470
Vector128<float> result =
Sse
.LoadVector128(pDstCurrent);
472
Vector128<float> trailingMask =
Sse
.LoadVector128(((float*)(pTrailingAlignmentMask)) + (remainder * 4));
473
Vector128<float> leadingMask =
Sse
.LoadVector128(((float*)(pLeadingAlignmentMask)) + ((4 - remainder) * 4));
475
Vector128<float> temp =
Sse
.And(result, leadingMask);
476
result =
Sse
.Multiply(scaleVector128, result);
479
result =
Sse
.And(result, trailingMask);
480
result =
Sse
.Or(result, temp);
482
Sse
.Store(pDstCurrent, result);
503
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
504
srcVector =
Sse
.Multiply(srcVector, scaleVector);
505
Sse
.Store(pDstCurrent, srcVector);
513
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
514
srcVector =
Sse
.MultiplyScalar(srcVector, scaleVector);
515
Sse
.StoreScalar(pDstCurrent, srcVector);
537
Vector128<float> dstVector =
Sse
.LoadVector128(pDstCurrent);
538
dstVector =
Sse
.Add(dstVector, bVector);
539
dstVector =
Sse
.Multiply(dstVector, aVector);
540
Sse
.Store(pDstCurrent, dstVector);
547
Vector128<float> dstVector =
Sse
.LoadScalarVector128(pDstCurrent);
548
dstVector =
Sse
.AddScalar(dstVector, bVector);
549
dstVector =
Sse
.MultiplyScalar(dstVector, aVector);
550
Sse
.StoreScalar(pDstCurrent, dstVector);
572
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
573
Vector128<float> dstVector =
Sse
.LoadVector128(pDstCurrent);
575
srcVector =
Sse
.Multiply(srcVector, scaleVector);
576
dstVector =
Sse
.Add(dstVector, srcVector);
577
Sse
.Store(pDstCurrent, dstVector);
585
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
586
Vector128<float> dstVector =
Sse
.LoadScalarVector128(pDstCurrent);
588
srcVector =
Sse
.MultiplyScalar(srcVector, scaleVector);
589
dstVector =
Sse
.AddScalar(dstVector, srcVector);
590
Sse
.StoreScalar(pDstCurrent, dstVector);
616
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
617
Vector128<float> dstVector =
Sse
.LoadVector128(pDstCurrent);
618
srcVector =
Sse
.Multiply(srcVector, scaleVector);
619
dstVector =
Sse
.Add(dstVector, srcVector);
620
Sse
.Store(pResCurrent, dstVector);
629
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
630
Vector128<float> dstVector =
Sse
.LoadScalarVector128(pDstCurrent);
631
srcVector =
Sse
.MultiplyScalar(srcVector, scaleVector);
632
dstVector =
Sse
.AddScalar(dstVector, srcVector);
633
Sse
.StoreScalar(pResCurrent, dstVector);
660
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
663
srcVector =
Sse
.Multiply(srcVector, scaleVector);
664
dstVector =
Sse
.Add(dstVector, srcVector);
694
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
695
Vector128<float> dstVector =
Sse
.LoadVector128(pDstCurrent);
697
Vector128<float> result =
Sse
.Add(srcVector, dstVector);
698
Sse
.Store(pDstCurrent, result);
706
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
707
Vector128<float> dstVector =
Sse
.LoadScalarVector128(pDstCurrent);
709
Vector128<float> result =
Sse
.AddScalar(srcVector, dstVector);
710
Sse
.StoreScalar(pDstCurrent, result);
735
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
737
dstVector =
Sse
.Add(dstVector, srcVector);
770
Vector128<float> src1Vector =
Sse
.LoadVector128(pSrc1Current);
771
Vector128<float> src2Vector =
Sse
.LoadVector128(pSrc2Current);
772
src2Vector =
Sse
.Multiply(src1Vector, src2Vector);
773
Sse
.Store(pDstCurrent, src2Vector);
782
Vector128<float> src1Vector =
Sse
.LoadScalarVector128(pSrc1Current);
783
Vector128<float> src2Vector =
Sse
.LoadScalarVector128(pSrc2Current);
784
src2Vector =
Sse
.MultiplyScalar(src1Vector, src2Vector);
785
Sse
.StoreScalar(pDstCurrent, src2Vector);
816
result =
Sse
.Add(result,
Sse
.LoadVector128(pValues));
829
Vector128<float> mask =
Sse
.LoadVector128(((float*)(pLeadingAlignmentMask)) + (misalignment * 4));
830
Vector128<float> temp =
Sse
.And(mask,
Sse
.LoadVector128(pValues));
831
result =
Sse
.Add(result, temp);
849
result =
Sse
.Add(result,
Sse
.LoadAlignedVector128(pValues));
868
Vector128<float> mask =
Sse
.LoadVector128(((float*)(pTrailingAlignmentMask)) + (remainder * 4));
869
Vector128<float> temp =
Sse
.And(mask,
Sse
.LoadVector128(pValues));
870
result =
Sse
.Add(result, temp);
890
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
891
result =
Sse
.Add(result,
Sse
.Multiply(srcVector, srcVector));
900
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
901
result =
Sse
.AddScalar(result,
Sse
.MultiplyScalar(srcVector, srcVector));
922
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
923
srcVector =
Sse
.Subtract(srcVector, meanVector);
924
result =
Sse
.Add(result,
Sse
.Multiply(srcVector, srcVector));
933
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
934
srcVector =
Sse
.SubtractScalar(srcVector, meanVector);
935
result =
Sse
.AddScalar(result,
Sse
.MultiplyScalar(srcVector, srcVector));
955
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
956
result =
Sse
.Add(result,
Sse
.And(srcVector, AbsMask128));
965
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
966
result =
Sse
.AddScalar(result,
Sse
.And(srcVector, AbsMask128));
987
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
988
srcVector =
Sse
.Subtract(srcVector, meanVector);
989
result =
Sse
.Add(result,
Sse
.And(srcVector, AbsMask128));
998
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
999
srcVector =
Sse
.SubtractScalar(srcVector, meanVector);
1000
result =
Sse
.AddScalar(result,
Sse
.And(srcVector, AbsMask128));
1020
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
1021
result =
Sse
.Max(result,
Sse
.And(srcVector, AbsMask128));
1030
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
1031
result =
Sse
.MaxScalar(result,
Sse
.And(srcVector, AbsMask128));
1052
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
1053
srcVector =
Sse
.Subtract(srcVector, meanVector);
1054
result =
Sse
.Max(result,
Sse
.And(srcVector, AbsMask128));
1063
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
1064
srcVector =
Sse
.SubtractScalar(srcVector, meanVector);
1065
result =
Sse
.MaxScalar(result,
Sse
.And(srcVector, AbsMask128));
1089
Vector128<float> srcVector =
Sse
.LoadVector128(pSrcCurrent);
1090
Vector128<float> dstVector =
Sse
.LoadVector128(pDstCurrent);
1092
result =
Sse
.Add(result,
Sse
.Multiply(srcVector, dstVector));
1102
Vector128<float> srcVector =
Sse
.LoadScalarVector128(pSrcCurrent);
1103
Vector128<float> dstVector =
Sse
.LoadScalarVector128(pDstCurrent);
1105
result =
Sse
.AddScalar(result,
Sse
.MultiplyScalar(srcVector, dstVector));
1134
Vector128<float> dstVector =
Sse
.LoadVector128(pDstCurrent);
1136
result =
Sse
.Add(result,
Sse
.Multiply(srcVector, dstVector));
1147
Vector128<float> dstVector =
Sse
.LoadScalarVector128(pDstCurrent);
1149
result =
Sse
.AddScalar(result,
Sse
.MultiplyScalar(srcVector, dstVector));
1174
Vector128<float> distanceVector =
Sse
.Subtract(
Sse
.LoadVector128(pSrcCurrent),
1175
Sse
.LoadVector128(pDstCurrent));
1176
sqDistanceVector =
Sse
.Add(sqDistanceVector,
1177
Sse
.Multiply(distanceVector, distanceVector));
1217
Vector128<float> xSrc =
Sse
.LoadVector128(pSrcCurrent);
1219
Vector128<float> xDst1 =
Sse
.LoadVector128(pDst1Current);
1220
xDst1 =
Sse
.Add(xDst1,
Sse
.Multiply(xSrc, xPrimal));
1223
Sse
.Store(pDst1Current, xDst1);
1224
Sse
.Store(pDst2Current, xDst2);
1262
Vector128<float> xSrc =
Sse
.LoadVector128(pSrcCurrent);
1265
xDst1 =
Sse
.Add(xDst1,
Sse
.Multiply(xSrc, xPrimal));
Microsoft.ML.CpuMath.UnitTests (3)
UnitTests.cs (3)
123
Assert.True(System.Runtime.Intrinsics.X86.
Sse
.IsSupported);
128
Assert.True(System.Runtime.Intrinsics.X86.
Sse
.IsSupported);
133
Assert.False(System.Runtime.Intrinsics.X86.
Sse
.IsSupported);
System.Numerics.Tensors (4)
System\Numerics\Tensors\netcore\TensorPrimitives.Reciprocal.cs (4)
106
if (
Sse
.IsSupported)
108
if (typeof(T) == typeof(float)) return
Sse
.Reciprocal(x.AsSingle()).As<float, T>();
172
if (
Sse
.IsSupported)
174
if (typeof(T) == typeof(float)) return
Sse
.ReciprocalSqrt(x.AsSingle()).As<float, T>();
System.Private.CoreLib (47)
src\libraries\System.Private.CoreLib\src\System\Numerics\Matrix4x4.Impl.cs (44)
1242
if (
Sse
.IsSupported)
1249
[CompExactlyDependsOn(typeof(
Sse
))]
1252
if (!
Sse
.IsSupported)
1265
Vector128<float> vTemp1 =
Sse
.Shuffle(row1, row2, 0b01_00_01_00); //_MM_SHUFFLE(1, 0, 1, 0)
1266
Vector128<float> vTemp3 =
Sse
.Shuffle(row1, row2, 0b11_10_11_10); //_MM_SHUFFLE(3, 2, 3, 2)
1267
Vector128<float> vTemp2 =
Sse
.Shuffle(row3, row4, 0b01_00_01_00); //_MM_SHUFFLE(1, 0, 1, 0)
1268
Vector128<float> vTemp4 =
Sse
.Shuffle(row3, row4, 0b11_10_11_10); //_MM_SHUFFLE(3, 2, 3, 2)
1270
row1 =
Sse
.Shuffle(vTemp1, vTemp2, 0b10_00_10_00); //_MM_SHUFFLE(2, 0, 2, 0)
1271
row2 =
Sse
.Shuffle(vTemp1, vTemp2, 0b11_01_11_01); //_MM_SHUFFLE(3, 1, 3, 1)
1272
row3 =
Sse
.Shuffle(vTemp3, vTemp4, 0b10_00_10_00); //_MM_SHUFFLE(2, 0, 2, 0)
1273
row4 =
Sse
.Shuffle(vTemp3, vTemp4, 0b11_01_11_01); //_MM_SHUFFLE(3, 1, 3, 1)
1279
Vector128<float> V02 =
Sse
.Shuffle(row3, row1, 0b10_00_10_00); //_MM_SHUFFLE(2, 0, 2, 0)
1280
Vector128<float> V12 =
Sse
.Shuffle(row4, row2, 0b11_01_11_01); //_MM_SHUFFLE(3, 1, 3, 1)
1290
V02 =
Sse
.Shuffle(row3, row1, 0b11_01_11_01); //_MM_SHUFFLE(3, 1, 3, 1)
1291
V12 =
Sse
.Shuffle(row4, row2, 0b10_00_10_00); //_MM_SHUFFLE(2, 0, 2, 0)
1298
V11 =
Sse
.Shuffle(D0, D2, 0b01_01_11_01); //_MM_SHUFFLE(1, 1, 3, 1)
1300
V10 =
Sse
.Shuffle(V11, D0, 0b00_11_00_10); //_MM_SHUFFLE(0, 3, 0, 2)
1302
V11 =
Sse
.Shuffle(V11, D0, 0b10_01_10_01); //_MM_SHUFFLE(2, 1, 2, 1)
1305
Vector128<float> V13 =
Sse
.Shuffle(D1, D2, 0b11_11_11_01); //_MM_SHUFFLE(3, 3, 3, 1)
1307
V12 =
Sse
.Shuffle(V13, D1, 0b00_11_00_10); //_MM_SHUFFLE(0, 3, 0, 2)
1309
V13 =
Sse
.Shuffle(V13, D1, 0b10_01_10_01); //_MM_SHUFFLE(2, 1, 2, 1)
1317
V11 =
Sse
.Shuffle(D0, D2, 0b00_00_01_00); //_MM_SHUFFLE(0, 0, 1, 0)
1319
V10 =
Sse
.Shuffle(D0, V11, 0b10_01_00_11); //_MM_SHUFFLE(2, 1, 0, 3)
1321
V11 =
Sse
.Shuffle(D0, V11, 0b00_10_01_10); //_MM_SHUFFLE(0, 2, 1, 2)
1324
V13 =
Sse
.Shuffle(D1, D2, 0b10_10_01_00); //_MM_SHUFFLE(2, 2, 1, 0)
1326
V12 =
Sse
.Shuffle(D1, V13, 0b10_01_00_11); //_MM_SHUFFLE(2, 1, 0, 3)
1328
V13 =
Sse
.Shuffle(D1, V13, 0b_00_10_01_10); //_MM_SHUFFLE(0, 2, 1, 2)
1338
V10 =
Sse
.Shuffle(D0, D2, 0b01_00_10_10); //_MM_SHUFFLE(1, 0, 2, 2)
1343
V11 =
Sse
.Shuffle(D0, D2, 0b01_00_11_00); //_MM_SHUFFLE(1, 0, 3, 0)
1348
V12 =
Sse
.Shuffle(D1, D2, 0b11_10_10_10); //_MM_SHUFFLE(3, 2, 2, 2)
1353
V13 =
Sse
.Shuffle(D1, D2, 0b11_10_11_00); //_MM_SHUFFLE(3, 2, 3, 0)
1373
C0 =
Sse
.Shuffle(C0, C1, 0b11_01_10_00); //_MM_SHUFFLE(3, 1, 2, 0)
1374
C2 =
Sse
.Shuffle(C2, C3, 0b11_01_10_00); //_MM_SHUFFLE(3, 1, 2, 0)
1375
C4 =
Sse
.Shuffle(C4, C5, 0b11_01_10_00); //_MM_SHUFFLE(3, 1, 2, 0)
1376
C6 =
Sse
.Shuffle(C6, C7, 0b11_01_10_00); //_MM_SHUFFLE(3, 1, 2, 0)
1677
else if (
Sse
.IsSupported)
1684
Vector128<float> lowerXZ =
Sse
.UnpackLow(x, z); // x[0], z[0], x[1], z[1]
1685
Vector128<float> lowerYW =
Sse
.UnpackLow(y, w); // y[0], w[0], y[1], w[1]
1686
Vector128<float> upperXZ =
Sse
.UnpackHigh(x, z); // x[2], z[2], x[3], z[3]
1687
Vector128<float> upperYW =
Sse
.UnpackHigh(y, w); // y[2], w[2], y[3], z[3]
1689
result.X =
Sse
.UnpackLow(lowerXZ, lowerYW).AsVector4(); // x[0], y[0], z[0], w[0]
1690
result.Y =
Sse
.UnpackHigh(lowerXZ, lowerYW).AsVector4(); // x[1], y[1], z[1], w[1]
1691
result.Z =
Sse
.UnpackLow(upperXZ, upperYW).AsVector4(); // x[2], y[2], z[2], w[2]
1692
result.W =
Sse
.UnpackHigh(upperXZ, upperYW).AsVector4(); // x[3], y[3], z[3], w[3]
src\libraries\System.Private.CoreLib\src\System\Runtime\Intrinsics\Vector128.cs (2)
534
Vector128<float> result =
Sse
.Multiply(upper, Create(65536.0f));
535
return
Sse
.Add(result, lower);
src\libraries\System.Private.CoreLib\src\System\Runtime\Intrinsics\X86\Sse2.cs (1)
23
public new abstract class X64 :
Sse
.X64
System.Runtime.Intrinsics (1)
artifacts\obj\System.Runtime.Intrinsics\Debug\net10.0\System.Runtime.Intrinsics.Forwards.cs (1)
43
[assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Runtime.Intrinsics.X86.
Sse
))]