1 type derived from Sse
System.Private.CoreLib (1)
src\libraries\System.Private.CoreLib\src\System\Runtime\Intrinsics\X86\Sse2.cs (1)
12public abstract class Sse2 : Sse
476 references to Sse
Microsoft.ML.CpuMath (421)
AvxIntrinsics.cs (170)
93Sse.StoreScalar(dst + idx[0], tmp); 95Sse.StoreScalar(dst + idx[1], tmp); 97Sse.StoreScalar(dst + idx[2], tmp); 99Sse.StoreScalar(dst + idx[3], tmp); 101Sse.StoreScalar(dst + idx[4], tmp); 103Sse.StoreScalar(dst + idx[5], tmp); 105Sse.StoreScalar(dst + idx[6], tmp); 107Sse.StoreScalar(dst + idx[7], tmp); 235Vector128<float> sum = Sse.Add(res0.GetLower(), GetHigh(in res0)); 236Sse.StoreAligned(pDstCurrent, sum); 319Vector128<float> h01 = Sse.LoadAlignedVector128(pSrcCurrent); 321Vector128<float> h11 = Sse.Shuffle(h01, h01, 0x55); // B 322Vector128<float> h21 = Sse.Shuffle(h01, h01, 0xAA); // C 323Vector128<float> h31 = Sse.Shuffle(h01, h01, 0xFF); // D 324h01 = Sse.Shuffle(h01, h01, 0x00); // A 367h01 = Sse.LoadAlignedVector128(pSrcCurrent); 369h11 = Sse.Shuffle(h01, h01, 0x55); // B 370h21 = Sse.Shuffle(h01, h01, 0xAA); // C 371h31 = Sse.Shuffle(h01, h01, 0xFF); // D 372h01 = Sse.Shuffle(h01, h01, 0x00); // A 443Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent); 444dstVector = Sse.Add(dstVector, scalarVector128); 445Sse.Store(pDstCurrent, dstVector); 452Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent); 453dstVector = Sse.AddScalar(dstVector, scalarVector128); 454Sse.StoreScalar(pDstCurrent, dstVector); 596Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 597srcVector = Sse.Multiply(srcVector, scaleVector128); 598Sse.Store(pDstCurrent, srcVector); 606Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 607srcVector = Sse.MultiplyScalar(srcVector, scaleVector128); 608Sse.StoreScalar(pDstCurrent, srcVector); 643Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent); 644dstVector = Sse.Add(dstVector, b128); 645dstVector = Sse.Multiply(dstVector, a128); 646Sse.Store(pDstCurrent, dstVector); 653Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent); 654dstVector = Sse.AddScalar(dstVector, b128); 655dstVector = Sse.MultiplyScalar(dstVector, a128); 656Sse.StoreScalar(pDstCurrent, dstVector); 691Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 692Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent); 694srcVector = Sse.Multiply(srcVector, scaleVector128); 695dstVector = Sse.Add(dstVector, srcVector); 696Sse.Store(pDstCurrent, dstVector); 704Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 705Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent); 707srcVector = Sse.MultiplyScalar(srcVector, scaleVector128); 708dstVector = Sse.AddScalar(dstVector, srcVector); 709Sse.StoreScalar(pDstCurrent, dstVector); 748Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 749Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent); 750srcVector = Sse.Multiply(srcVector, scaleVector128); 751dstVector = Sse.Add(dstVector, srcVector); 752Sse.Store(pResCurrent, dstVector); 761Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 762Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent); 763srcVector = Sse.MultiplyScalar(srcVector, scaleVector128); 764dstVector = Sse.AddScalar(dstVector, srcVector); 765Sse.StoreScalar(pResCurrent, dstVector); 804Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 807srcVector = Sse.Multiply(srcVector, scaleVector128); 808dstVector = Sse.Add(dstVector, srcVector); 850Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 851Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent); 853Vector128<float> result = Sse.Add(srcVector, dstVector); 854Sse.Store(pDstCurrent, result); 862Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 863Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent); 865Vector128<float> result = Sse.AddScalar(srcVector, dstVector); 866Sse.StoreScalar(pDstCurrent, result); 903Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 905dstVector = Sse.Add(dstVector, srcVector); 950Vector128<float> src1Vector = Sse.LoadVector128(pSrc1Current); 951Vector128<float> src2Vector = Sse.LoadVector128(pSrc2Current); 952src2Vector = Sse.Multiply(src1Vector, src2Vector); 953Sse.Store(pDstCurrent, src2Vector); 962Vector128<float> src1Vector = Sse.LoadScalarVector128(pSrc1Current); 963Vector128<float> src2Vector = Sse.LoadScalarVector128(pSrc2Current); 964src2Vector = Sse.MultiplyScalar(src1Vector, src2Vector); 965Sse.StoreScalar(pDstCurrent, src2Vector); 1058return Sse.AddScalar(result.GetLower(), GetHigh(result)).ToScalar(); 1080Vector128<float> resultPadded = Sse.AddScalar(result256.GetLower(), GetHigh(result256)); 1086Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 1087result128 = Sse.Add(result128, Sse.Multiply(srcVector, srcVector)); 1096Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 1097result128 = Sse.AddScalar(result128, Sse.MultiplyScalar(srcVector, srcVector)); 1102return Sse.AddScalar(result128, resultPadded).ToScalar(); 1125Vector128<float> resultPadded = Sse.AddScalar(result256.GetLower(), GetHigh(result256)); 1132Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 1133srcVector = Sse.Subtract(srcVector, meanVector128); 1134result128 = Sse.Add(result128, Sse.Multiply(srcVector, srcVector)); 1143Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 1144srcVector = Sse.SubtractScalar(srcVector, meanVector128); 1145result128 = Sse.AddScalar(result128, Sse.MultiplyScalar(srcVector, srcVector)); 1150return Sse.AddScalar(result128, resultPadded).ToScalar(); 1172Vector128<float> resultPadded = Sse.AddScalar(result256.GetLower(), GetHigh(result256)); 1178Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 1179result128 = Sse.Add(result128, Sse.And(srcVector, SseIntrinsics.AbsMask128)); 1188Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 1189result128 = Sse.AddScalar(result128, Sse.And(srcVector, SseIntrinsics.AbsMask128)); 1194return Sse.AddScalar(result128, resultPadded).ToScalar(); 1218Vector128<float> resultPadded = Sse.AddScalar(result256.GetLower(), GetHigh(result256)); 1225Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 1226srcVector = Sse.Subtract(srcVector, meanVector128); 1227result128 = Sse.Add(result128, Sse.And(srcVector, SseIntrinsics.AbsMask128)); 1236Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 1237srcVector = Sse.SubtractScalar(srcVector, meanVector128); 1238result128 = Sse.AddScalar(result128, Sse.And(srcVector, SseIntrinsics.AbsMask128)); 1243return Sse.AddScalar(result128, resultPadded).ToScalar(); 1265Vector128<float> resultPadded = Sse.MaxScalar(result256.GetLower(), GetHigh(result256)); 1271Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 1272result128 = Sse.Max(result128, Sse.And(srcVector, SseIntrinsics.AbsMask128)); 1281Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 1282result128 = Sse.MaxScalar(result128, Sse.And(srcVector, SseIntrinsics.AbsMask128)); 1287return Sse.MaxScalar(result128, resultPadded).ToScalar(); 1311Vector128<float> resultPadded = Sse.MaxScalar(result256.GetLower(), GetHigh(result256)); 1318Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 1319srcVector = Sse.Subtract(srcVector, meanVector128); 1320result128 = Sse.Max(result128, Sse.And(srcVector, SseIntrinsics.AbsMask128)); 1329Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 1330srcVector = Sse.SubtractScalar(srcVector, meanVector128); 1331result128 = Sse.MaxScalar(result128, Sse.And(srcVector, SseIntrinsics.AbsMask128)); 1336return Sse.MaxScalar(result128, resultPadded).ToScalar(); 1362Vector128<float> resultPadded = Sse.AddScalar(result256.GetLower(), GetHigh(result256)); 1368Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 1369Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent); 1371result128 = Sse.Add(result128, Sse.Multiply(srcVector, dstVector)); 1381Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 1382Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent); 1384result128 = Sse.AddScalar(result128, Sse.MultiplyScalar(srcVector, dstVector)); 1390return Sse.AddScalar(result128, resultPadded).ToScalar(); 1419Vector128<float> resultPadded = Sse.AddScalar(result256.GetLower(), GetHigh(result256)); 1426Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent); 1428result128 = Sse.Add(result128, Sse.Multiply(srcVector, dstVector)); 1439Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent); 1441result128 = Sse.AddScalar(result128, Sse.MultiplyScalar(srcVector, dstVector)); 1447return Sse.AddScalar(result128, resultPadded).ToScalar(); 1474Vector128<float> sqDistanceVectorPadded = Sse.AddScalar(sqDistanceVector256.GetLower(), GetHigh(sqDistanceVector256)); 1480Vector128<float> distanceVector = Sse.Subtract(Sse.LoadVector128(pSrcCurrent), 1481Sse.LoadVector128(pDstCurrent)); 1482sqDistanceVector128 = Sse.Add(sqDistanceVector128, 1483Sse.Multiply(distanceVector, distanceVector)); 1491float norm = Sse.AddScalar(sqDistanceVector128, sqDistanceVectorPadded).ToScalar(); 1538Vector128<float> xSrc = Sse.LoadVector128(pSrcCurrent); 1540Vector128<float> xDst1 = Sse.LoadVector128(pDst1Current); 1541xDst1 = Sse.Add(xDst1, Sse.Multiply(xSrc, xPrimal128)); 1544Sse.Store(pDst1Current, xDst1); 1545Sse.Store(pDst2Current, xDst2); 1597Vector128<float> xSrc = Sse.LoadVector128(pSrcCurrent); 1600xDst1 = Sse.Add(xDst1, Sse.Multiply(xSrc, xPrimal128));
CpuMathUtils.netcoreapp.cs (13)
29=> Avx.IsSupported ? Vector256Alignment : (Sse.IsSupported ? Vector128Alignment : FloatAlignment); 57else if (Sse.IsSupported) 136else if (Sse.IsSupported) 171if (destination.Length < MinInputSize || !Sse.IsSupported) 207if (count < MinInputSize || !Sse.IsSupported) 243if (count < MinInputSize || !Sse.IsSupported) 272if (source.Length < MinInputSize || !Sse.IsSupported) 302if (source.Length < MinInputSize || !Sse.IsSupported) 331if (source.Length < MinInputSize || !Sse.IsSupported) 365if (source.Length < MinInputSize || !Sse.IsSupported) 407if (count < MinInputSize || !Sse.IsSupported) 539if (count < MinInputSize || !Sse.IsSupported) 581if (count < MinInputSize || !Sse.IsSupported)
FactorizationMachine\AvxIntrinsics.cs (1)
143Sse.StoreScalar(&latentResponse, y.GetLower()); // The lowest slot is the response value.
SseIntrinsics.cs (237)
81=> Sse.Shuffle(x, x, 0x39); 86Sse.StoreScalar(dst + idx[0], x); 88Sse.StoreScalar(dst + idx[1], rotated); 90Sse.StoreScalar(dst + idx[2], rotated); 92Sse.StoreScalar(dst + idx[3], rotated); 105Vector128<float> partialSum = Sse.Add(vector, Sse.MoveHighToLow(vector, vector)); 107return Sse.Add(partialSum, Sse.Shuffle(partialSum, partialSum, 0xB1)); 115Vector128<float> x1 = Sse.Shuffle(vector, vector, 0xB1); 119Vector128<float> partialMax = Sse.Max(vector, x1); 122x1 = Sse.Shuffle(partialMax, partialMax, 0x02); 126return Sse.MaxScalar(partialMax, x1); 133Vector128<float> xSign = Sse.And(xDst1, signMask); // result = 0x8000 0000 if xDst1 is negative or 0x0000 0000 otherwise 134Vector128<float> xDst1Abs = Sse.Xor(xDst1, xSign); 135Vector128<float> xCond = Sse.CompareGreaterThan(xDst1Abs, xThreshold); // result = 0xFFFF FFFF if true 136Vector128<float> x2 = Sse.Xor(xSign, xThreshold); // -xThreshold if xDst1 is negative and +xThreshold otherwise 137return Sse.And(Sse.Subtract(xDst1, x2), xCond); 173Vector128<float> x01 = Sse.LoadAlignedVector128(pMatTemp); 174Vector128<float> x11 = Sse.LoadAlignedVector128(pMatTemp += ccol); 175Vector128<float> x21 = Sse.LoadAlignedVector128(pMatTemp += ccol); 176Vector128<float> x31 = Sse.LoadAlignedVector128(pMatTemp += ccol); 177Vector128<float> x02 = Sse.LoadAlignedVector128(pSrcCurrent); 179res0 = Sse.Add(res0, Sse.Multiply(x01, x02)); 180res1 = Sse.Add(res1, Sse.Multiply(x11, x02)); 181res2 = Sse.Add(res2, Sse.Multiply(x21, x02)); 182res3 = Sse.Add(res3, Sse.Multiply(x31, x02)); 193Sse.StoreAligned(pDstCurrent, res0); 243x2 = Sse.Multiply(x2, x1); 244result = Sse.Add(result, x2); 249Sse.StoreAligned(pDstCurrent, result); 275Vector128<float> x01 = Sse.LoadAlignedVector128(pSrcCurrent); 277Vector128<float> x11 = Sse.Shuffle(x01, x01, 0x55); // B 278Vector128<float> x21 = Sse.Shuffle(x01, x01, 0xAA); // C 279Vector128<float> x31 = Sse.Shuffle(x01, x01, 0xFF); // D 280x01 = Sse.Shuffle(x01, x01, 0x00); // A 289Vector128<float> x02 = Sse.LoadAlignedVector128(pMatTemp); 290Vector128<float> x12 = Sse.LoadAlignedVector128(pMatTemp += crow); 291Vector128<float> x22 = Sse.LoadAlignedVector128(pMatTemp += crow); 292Vector128<float> x32 = Sse.LoadAlignedVector128(pMatTemp += crow); 294x02 = Sse.Multiply(x01, x02); 295x12 = Sse.Multiply(x11, x12); 296x22 = Sse.Multiply(x21, x22); 297x32 = Sse.Multiply(x31, x32); 299x02 = Sse.Add(x02, x12); 300x22 = Sse.Add(x22, x32); 301x02 = Sse.Add(x02, x22); 303Sse.StoreAligned(pDstCurrent, x02); 313x01 = Sse.LoadAlignedVector128(pSrcCurrent); 315x11 = Sse.Shuffle(x01, x01, 0x55); // B 316x21 = Sse.Shuffle(x01, x01, 0xAA); // C 317x31 = Sse.Shuffle(x01, x01, 0xFF); // D 318x01 = Sse.Shuffle(x01, x01, 0x00); // A 326Vector128<float> x02 = Sse.LoadAlignedVector128(pMatTemp); 327Vector128<float> x12 = Sse.LoadAlignedVector128(pMatTemp += crow); 328Vector128<float> x22 = Sse.LoadAlignedVector128(pMatTemp += crow); 329Vector128<float> x32 = Sse.LoadAlignedVector128(pMatTemp += crow); 330Vector128<float> x3 = Sse.LoadAlignedVector128(pDstCurrent); 332x02 = Sse.Multiply(x01, x02); 333x12 = Sse.Multiply(x11, x12); 334x22 = Sse.Multiply(x21, x22); 335x32 = Sse.Multiply(x31, x32); 337x02 = Sse.Add(x02, x12); 338x22 = Sse.Add(x22, x32); 339x02 = Sse.Add(x02, x22); 340x3 = Sse.Add(x02, x3); 342Sse.StoreAligned(pDstCurrent, x3); 367Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent); 368dstVector = Sse.Add(dstVector, scalarVector); 369Sse.Store(pDstCurrent, dstVector); 376Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent); 377dstVector = Sse.AddScalar(dstVector, scalarVector); 378Sse.StoreScalar(pDstCurrent, dstVector); 406Vector128<float> temp = Sse.LoadVector128(pDstCurrent); 407temp = Sse.Multiply(scaleVector128, temp); 408Sse.Store(pDstCurrent, temp); 421Vector128<float> result = Sse.LoadVector128(pDstCurrent); 423Vector128<float> leadingMask = Sse.LoadVector128(((float*)(pLeadingAlignmentMask)) + (misalignment * 4)); 424Vector128<float> trailingMask = Sse.LoadVector128(((float*)(pTrailingAlignmentMask)) + ((4 - misalignment) * 4)); 426Vector128<float> temp = Sse.And(result, trailingMask); 427result = Sse.Multiply(scaleVector128, result); 430result = Sse.And(result, leadingMask); 431result = Sse.Or(result, temp); 433Sse.Store(pDstCurrent, result); 449Vector128<float> temp = Sse.LoadAlignedVector128(pDstCurrent); 450temp = Sse.Multiply(scaleVector128, temp); 451Sse.Store(pDstCurrent, temp); 470Vector128<float> result = Sse.LoadVector128(pDstCurrent); 472Vector128<float> trailingMask = Sse.LoadVector128(((float*)(pTrailingAlignmentMask)) + (remainder * 4)); 473Vector128<float> leadingMask = Sse.LoadVector128(((float*)(pLeadingAlignmentMask)) + ((4 - remainder) * 4)); 475Vector128<float> temp = Sse.And(result, leadingMask); 476result = Sse.Multiply(scaleVector128, result); 479result = Sse.And(result, trailingMask); 480result = Sse.Or(result, temp); 482Sse.Store(pDstCurrent, result); 503Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 504srcVector = Sse.Multiply(srcVector, scaleVector); 505Sse.Store(pDstCurrent, srcVector); 513Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 514srcVector = Sse.MultiplyScalar(srcVector, scaleVector); 515Sse.StoreScalar(pDstCurrent, srcVector); 537Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent); 538dstVector = Sse.Add(dstVector, bVector); 539dstVector = Sse.Multiply(dstVector, aVector); 540Sse.Store(pDstCurrent, dstVector); 547Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent); 548dstVector = Sse.AddScalar(dstVector, bVector); 549dstVector = Sse.MultiplyScalar(dstVector, aVector); 550Sse.StoreScalar(pDstCurrent, dstVector); 572Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 573Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent); 575srcVector = Sse.Multiply(srcVector, scaleVector); 576dstVector = Sse.Add(dstVector, srcVector); 577Sse.Store(pDstCurrent, dstVector); 585Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 586Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent); 588srcVector = Sse.MultiplyScalar(srcVector, scaleVector); 589dstVector = Sse.AddScalar(dstVector, srcVector); 590Sse.StoreScalar(pDstCurrent, dstVector); 616Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 617Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent); 618srcVector = Sse.Multiply(srcVector, scaleVector); 619dstVector = Sse.Add(dstVector, srcVector); 620Sse.Store(pResCurrent, dstVector); 629Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 630Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent); 631srcVector = Sse.MultiplyScalar(srcVector, scaleVector); 632dstVector = Sse.AddScalar(dstVector, srcVector); 633Sse.StoreScalar(pResCurrent, dstVector); 660Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 663srcVector = Sse.Multiply(srcVector, scaleVector); 664dstVector = Sse.Add(dstVector, srcVector); 694Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 695Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent); 697Vector128<float> result = Sse.Add(srcVector, dstVector); 698Sse.Store(pDstCurrent, result); 706Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 707Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent); 709Vector128<float> result = Sse.AddScalar(srcVector, dstVector); 710Sse.StoreScalar(pDstCurrent, result); 735Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 737dstVector = Sse.Add(dstVector, srcVector); 770Vector128<float> src1Vector = Sse.LoadVector128(pSrc1Current); 771Vector128<float> src2Vector = Sse.LoadVector128(pSrc2Current); 772src2Vector = Sse.Multiply(src1Vector, src2Vector); 773Sse.Store(pDstCurrent, src2Vector); 782Vector128<float> src1Vector = Sse.LoadScalarVector128(pSrc1Current); 783Vector128<float> src2Vector = Sse.LoadScalarVector128(pSrc2Current); 784src2Vector = Sse.MultiplyScalar(src1Vector, src2Vector); 785Sse.StoreScalar(pDstCurrent, src2Vector); 816result = Sse.Add(result, Sse.LoadVector128(pValues)); 829Vector128<float> mask = Sse.LoadVector128(((float*)(pLeadingAlignmentMask)) + (misalignment * 4)); 830Vector128<float> temp = Sse.And(mask, Sse.LoadVector128(pValues)); 831result = Sse.Add(result, temp); 849result = Sse.Add(result, Sse.LoadAlignedVector128(pValues)); 868Vector128<float> mask = Sse.LoadVector128(((float*)(pTrailingAlignmentMask)) + (remainder * 4)); 869Vector128<float> temp = Sse.And(mask, Sse.LoadVector128(pValues)); 870result = Sse.Add(result, temp); 890Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 891result = Sse.Add(result, Sse.Multiply(srcVector, srcVector)); 900Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 901result = Sse.AddScalar(result, Sse.MultiplyScalar(srcVector, srcVector)); 922Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 923srcVector = Sse.Subtract(srcVector, meanVector); 924result = Sse.Add(result, Sse.Multiply(srcVector, srcVector)); 933Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 934srcVector = Sse.SubtractScalar(srcVector, meanVector); 935result = Sse.AddScalar(result, Sse.MultiplyScalar(srcVector, srcVector)); 955Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 956result = Sse.Add(result, Sse.And(srcVector, AbsMask128)); 965Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 966result = Sse.AddScalar(result, Sse.And(srcVector, AbsMask128)); 987Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 988srcVector = Sse.Subtract(srcVector, meanVector); 989result = Sse.Add(result, Sse.And(srcVector, AbsMask128)); 998Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 999srcVector = Sse.SubtractScalar(srcVector, meanVector); 1000result = Sse.AddScalar(result, Sse.And(srcVector, AbsMask128)); 1020Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 1021result = Sse.Max(result, Sse.And(srcVector, AbsMask128)); 1030Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 1031result = Sse.MaxScalar(result, Sse.And(srcVector, AbsMask128)); 1052Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 1053srcVector = Sse.Subtract(srcVector, meanVector); 1054result = Sse.Max(result, Sse.And(srcVector, AbsMask128)); 1063Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 1064srcVector = Sse.SubtractScalar(srcVector, meanVector); 1065result = Sse.MaxScalar(result, Sse.And(srcVector, AbsMask128)); 1089Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent); 1090Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent); 1092result = Sse.Add(result, Sse.Multiply(srcVector, dstVector)); 1102Vector128<float> srcVector = Sse.LoadScalarVector128(pSrcCurrent); 1103Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent); 1105result = Sse.AddScalar(result, Sse.MultiplyScalar(srcVector, dstVector)); 1134Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent); 1136result = Sse.Add(result, Sse.Multiply(srcVector, dstVector)); 1147Vector128<float> dstVector = Sse.LoadScalarVector128(pDstCurrent); 1149result = Sse.AddScalar(result, Sse.MultiplyScalar(srcVector, dstVector)); 1174Vector128<float> distanceVector = Sse.Subtract(Sse.LoadVector128(pSrcCurrent), 1175Sse.LoadVector128(pDstCurrent)); 1176sqDistanceVector = Sse.Add(sqDistanceVector, 1177Sse.Multiply(distanceVector, distanceVector)); 1217Vector128<float> xSrc = Sse.LoadVector128(pSrcCurrent); 1219Vector128<float> xDst1 = Sse.LoadVector128(pDst1Current); 1220xDst1 = Sse.Add(xDst1, Sse.Multiply(xSrc, xPrimal)); 1223Sse.Store(pDst1Current, xDst1); 1224Sse.Store(pDst2Current, xDst2); 1262Vector128<float> xSrc = Sse.LoadVector128(pSrcCurrent); 1265xDst1 = Sse.Add(xDst1, Sse.Multiply(xSrc, xPrimal));
Microsoft.ML.CpuMath.UnitTests (3)
UnitTests.cs (3)
123Assert.True(System.Runtime.Intrinsics.X86.Sse.IsSupported); 128Assert.True(System.Runtime.Intrinsics.X86.Sse.IsSupported); 133Assert.False(System.Runtime.Intrinsics.X86.Sse.IsSupported);
System.Numerics.Tensors (4)
System\Numerics\Tensors\netcore\TensorPrimitives.Reciprocal.cs (4)
106if (Sse.IsSupported) 108if (typeof(T) == typeof(float)) return Sse.Reciprocal(x.AsSingle()).As<float, T>(); 172if (Sse.IsSupported) 174if (typeof(T) == typeof(float)) return Sse.ReciprocalSqrt(x.AsSingle()).As<float, T>();
System.Private.CoreLib (47)
src\libraries\System.Private.CoreLib\src\System\Numerics\Matrix4x4.Impl.cs (44)
1242if (Sse.IsSupported) 1249[CompExactlyDependsOn(typeof(Sse))] 1252if (!Sse.IsSupported) 1265Vector128<float> vTemp1 = Sse.Shuffle(row1, row2, 0b01_00_01_00); //_MM_SHUFFLE(1, 0, 1, 0) 1266Vector128<float> vTemp3 = Sse.Shuffle(row1, row2, 0b11_10_11_10); //_MM_SHUFFLE(3, 2, 3, 2) 1267Vector128<float> vTemp2 = Sse.Shuffle(row3, row4, 0b01_00_01_00); //_MM_SHUFFLE(1, 0, 1, 0) 1268Vector128<float> vTemp4 = Sse.Shuffle(row3, row4, 0b11_10_11_10); //_MM_SHUFFLE(3, 2, 3, 2) 1270row1 = Sse.Shuffle(vTemp1, vTemp2, 0b10_00_10_00); //_MM_SHUFFLE(2, 0, 2, 0) 1271row2 = Sse.Shuffle(vTemp1, vTemp2, 0b11_01_11_01); //_MM_SHUFFLE(3, 1, 3, 1) 1272row3 = Sse.Shuffle(vTemp3, vTemp4, 0b10_00_10_00); //_MM_SHUFFLE(2, 0, 2, 0) 1273row4 = Sse.Shuffle(vTemp3, vTemp4, 0b11_01_11_01); //_MM_SHUFFLE(3, 1, 3, 1) 1279Vector128<float> V02 = Sse.Shuffle(row3, row1, 0b10_00_10_00); //_MM_SHUFFLE(2, 0, 2, 0) 1280Vector128<float> V12 = Sse.Shuffle(row4, row2, 0b11_01_11_01); //_MM_SHUFFLE(3, 1, 3, 1) 1290V02 = Sse.Shuffle(row3, row1, 0b11_01_11_01); //_MM_SHUFFLE(3, 1, 3, 1) 1291V12 = Sse.Shuffle(row4, row2, 0b10_00_10_00); //_MM_SHUFFLE(2, 0, 2, 0) 1298V11 = Sse.Shuffle(D0, D2, 0b01_01_11_01); //_MM_SHUFFLE(1, 1, 3, 1) 1300V10 = Sse.Shuffle(V11, D0, 0b00_11_00_10); //_MM_SHUFFLE(0, 3, 0, 2) 1302V11 = Sse.Shuffle(V11, D0, 0b10_01_10_01); //_MM_SHUFFLE(2, 1, 2, 1) 1305Vector128<float> V13 = Sse.Shuffle(D1, D2, 0b11_11_11_01); //_MM_SHUFFLE(3, 3, 3, 1) 1307V12 = Sse.Shuffle(V13, D1, 0b00_11_00_10); //_MM_SHUFFLE(0, 3, 0, 2) 1309V13 = Sse.Shuffle(V13, D1, 0b10_01_10_01); //_MM_SHUFFLE(2, 1, 2, 1) 1317V11 = Sse.Shuffle(D0, D2, 0b00_00_01_00); //_MM_SHUFFLE(0, 0, 1, 0) 1319V10 = Sse.Shuffle(D0, V11, 0b10_01_00_11); //_MM_SHUFFLE(2, 1, 0, 3) 1321V11 = Sse.Shuffle(D0, V11, 0b00_10_01_10); //_MM_SHUFFLE(0, 2, 1, 2) 1324V13 = Sse.Shuffle(D1, D2, 0b10_10_01_00); //_MM_SHUFFLE(2, 2, 1, 0) 1326V12 = Sse.Shuffle(D1, V13, 0b10_01_00_11); //_MM_SHUFFLE(2, 1, 0, 3) 1328V13 = Sse.Shuffle(D1, V13, 0b_00_10_01_10); //_MM_SHUFFLE(0, 2, 1, 2) 1338V10 = Sse.Shuffle(D0, D2, 0b01_00_10_10); //_MM_SHUFFLE(1, 0, 2, 2) 1343V11 = Sse.Shuffle(D0, D2, 0b01_00_11_00); //_MM_SHUFFLE(1, 0, 3, 0) 1348V12 = Sse.Shuffle(D1, D2, 0b11_10_10_10); //_MM_SHUFFLE(3, 2, 2, 2) 1353V13 = Sse.Shuffle(D1, D2, 0b11_10_11_00); //_MM_SHUFFLE(3, 2, 3, 0) 1373C0 = Sse.Shuffle(C0, C1, 0b11_01_10_00); //_MM_SHUFFLE(3, 1, 2, 0) 1374C2 = Sse.Shuffle(C2, C3, 0b11_01_10_00); //_MM_SHUFFLE(3, 1, 2, 0) 1375C4 = Sse.Shuffle(C4, C5, 0b11_01_10_00); //_MM_SHUFFLE(3, 1, 2, 0) 1376C6 = Sse.Shuffle(C6, C7, 0b11_01_10_00); //_MM_SHUFFLE(3, 1, 2, 0) 1677else if (Sse.IsSupported) 1684Vector128<float> lowerXZ = Sse.UnpackLow(x, z); // x[0], z[0], x[1], z[1] 1685Vector128<float> lowerYW = Sse.UnpackLow(y, w); // y[0], w[0], y[1], w[1] 1686Vector128<float> upperXZ = Sse.UnpackHigh(x, z); // x[2], z[2], x[3], z[3] 1687Vector128<float> upperYW = Sse.UnpackHigh(y, w); // y[2], w[2], y[3], z[3] 1689result.X = Sse.UnpackLow(lowerXZ, lowerYW).AsVector4(); // x[0], y[0], z[0], w[0] 1690result.Y = Sse.UnpackHigh(lowerXZ, lowerYW).AsVector4(); // x[1], y[1], z[1], w[1] 1691result.Z = Sse.UnpackLow(upperXZ, upperYW).AsVector4(); // x[2], y[2], z[2], w[2] 1692result.W = Sse.UnpackHigh(upperXZ, upperYW).AsVector4(); // x[3], y[3], z[3], w[3]
src\libraries\System.Private.CoreLib\src\System\Runtime\Intrinsics\Vector128.cs (2)
534Vector128<float> result = Sse.Multiply(upper, Create(65536.0f)); 535return Sse.Add(result, lower);
src\libraries\System.Private.CoreLib\src\System\Runtime\Intrinsics\X86\Sse2.cs (1)
23public new abstract class X64 : Sse.X64
System.Runtime.Intrinsics (1)
artifacts\obj\System.Runtime.Intrinsics\Debug\net10.0\System.Runtime.Intrinsics.Forwards.cs (1)
43[assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Runtime.Intrinsics.X86.Sse))]