|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace System
{
// This is a separate class instead of 'partial SpanHelpers' to hide the private helpers
// included in this file which are specific to the packed implementation.
internal static partial class PackedSpanHelpers
{
// We only do this optimization if we have support for X86 intrinsics (Sse2) as the packing is noticeably cheaper compared to ARM (AdvSimd).
// While the impact on the worst-case (match at the start) is minimal on X86, it's prohibitively large on ARM.
public static bool PackedIndexOfIsSupported => Sse2.IsSupported;
// Not all values can benefit from packing the searchSpace. See comments in PackSources below.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe bool CanUsePackedIndexOf<T>(T value)
{
Debug.Assert(PackedIndexOfIsSupported);
Debug.Assert(RuntimeHelpers.IsBitwiseEquatable<T>());
Debug.Assert(sizeof(T) == sizeof(ushort));
return Unsafe.BitCast<T, ushort>(value) - 1u < 254u;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Sse2))]
public static int IndexOf(ref char searchSpace, char value, int length) =>
IndexOf<SpanHelpers.DontNegate<short>, NopTransform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value, length);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Sse2))]
public static int IndexOfAnyExcept(ref char searchSpace, char value, int length) =>
IndexOf<SpanHelpers.Negate<short>, NopTransform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value, length);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Sse2))]
public static int IndexOfAny(ref char searchSpace, char value0, char value1, int length) =>
IndexOfAny<SpanHelpers.DontNegate<short>, NopTransform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value0, (short)value1, length);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Sse2))]
public static int IndexOfAnyExcept(ref char searchSpace, char value0, char value1, int length) =>
IndexOfAny<SpanHelpers.Negate<short>, NopTransform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value0, (short)value1, length);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Sse2))]
public static int IndexOfAny(ref char searchSpace, char value0, char value1, char value2, int length) =>
IndexOfAny<SpanHelpers.DontNegate<short>>(ref Unsafe.As<char, short>(ref searchSpace), (short)value0, (short)value1, (short)value2, length);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Sse2))]
public static int IndexOfAnyExcept(ref char searchSpace, char value0, char value1, char value2, int length) =>
IndexOfAny<SpanHelpers.Negate<short>>(ref Unsafe.As<char, short>(ref searchSpace), (short)value0, (short)value1, (short)value2, length);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Sse2))]
public static int IndexOfAnyIgnoreCase(ref char searchSpace, char value, int length)
{
Debug.Assert((value | 0x20) == value);
return IndexOf<SpanHelpers.DontNegate<short>, Or20Transform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value, length);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Sse2))]
public static int IndexOfAnyExceptIgnoreCase(ref char searchSpace, char value, int length)
{
Debug.Assert((value | 0x20) == value);
return IndexOf<SpanHelpers.Negate<short>, Or20Transform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value, length);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Sse2))]
public static int IndexOfAnyIgnoreCase(ref char searchSpace, char value0, char value1, int length)
{
Debug.Assert((value0 | 0x20) == value0);
Debug.Assert((value1 | 0x20) == value1);
return IndexOfAny<SpanHelpers.DontNegate<short>, Or20Transform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value0, (short)value1, length);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Sse2))]
public static int IndexOfAnyExceptIgnoreCase(ref char searchSpace, char value0, char value1, int length)
{
Debug.Assert((value0 | 0x20) == value0);
Debug.Assert((value1 | 0x20) == value1);
return IndexOfAny<SpanHelpers.Negate<short>, Or20Transform>(ref Unsafe.As<char, short>(ref searchSpace), (short)value0, (short)value1, length);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Sse2))]
public static int IndexOfAnyInRange(ref char searchSpace, char lowInclusive, char rangeInclusive, int length) =>
IndexOfAnyInRange<SpanHelpers.DontNegate<short>>(ref Unsafe.As<char, short>(ref searchSpace), (short)lowInclusive, (short)rangeInclusive, length);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Sse2))]
public static int IndexOfAnyExceptInRange(ref char searchSpace, char lowInclusive, char rangeInclusive, int length) =>
IndexOfAnyInRange<SpanHelpers.Negate<short>>(ref Unsafe.As<char, short>(ref searchSpace), (short)lowInclusive, (short)rangeInclusive, length);
[CompExactlyDependsOn(typeof(Sse2))]
public static bool Contains(ref short searchSpace, short value, int length)
{
Debug.Assert(CanUsePackedIndexOf(value));
if (length < Vector128<short>.Count)
{
nuint offset = 0;
if (length >= 4)
{
length -= 4;
if (searchSpace == value ||
Unsafe.Add(ref searchSpace, 1) == value ||
Unsafe.Add(ref searchSpace, 2) == value ||
Unsafe.Add(ref searchSpace, 3) == value)
{
return true;
}
offset = 4;
}
while (length > 0)
{
length -= 1;
if (Unsafe.Add(ref searchSpace, offset) == value)
{
return true;
}
offset += 1;
}
}
else
{
ref short currentSearchSpace = ref searchSpace;
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The else condition for this if statement is identical in semantics to Avx2 specific code
if (Avx512BW.IsSupported && Vector512.IsHardwareAccelerated && length > Vector512<short>.Count)
#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
{
Vector512<byte> packedValue = Vector512.Create((byte)value);
if (length > 2 * Vector512<short>.Count)
{
// Process the input in chunks of 64 characters (2 * Vector512<short>).
// If the input length is a multiple of 64, don't consume the last 16 characters in this loop.
// Let the fallback below handle it instead. This is why the condition is
// ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "IsAddressLessThanOrEqualTo".
ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - (2 * Vector512<short>.Count));
do
{
Vector512<short> source0 = Vector512.LoadUnsafe(ref currentSearchSpace);
Vector512<short> source1 = Vector512.LoadUnsafe(ref currentSearchSpace, (nuint)Vector512<short>.Count);
Vector512<byte> packedSource = PackSources(source0, source1);
if (Vector512.EqualsAny(packedValue, packedSource))
{
return true;
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector512<short>.Count);
}
while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-32 characters remaining. Process the first and last vector in the search space.
// They may overlap, but we're only interested in whether any value matched.
{
ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector512<short>.Count);
ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
? ref oneVectorAwayFromEnd
: ref currentSearchSpace;
Vector512<short> source0 = Vector512.LoadUnsafe(ref firstVector);
Vector512<short> source1 = Vector512.LoadUnsafe(ref oneVectorAwayFromEnd);
Vector512<byte> packedSource = PackSources(source0, source1);
if (Vector512.EqualsAny(packedValue, packedSource))
{
return true;
}
}
}
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The else condition for this if statement is identical in semantics to Avx2 specific code
else if (Avx2.IsSupported && length > Vector256<short>.Count)
#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
{
Vector256<byte> packedValue = Vector256.Create((byte)value);
if (length > 2 * Vector256<short>.Count)
{
// Process the input in chunks of 32 characters (2 * Vector256<short>).
// If the input length is a multiple of 32, don't consume the last 16 characters in this loop.
// Let the fallback below handle it instead. This is why the condition is
// ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "IsAddressLessThanOrEqualTo".
ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - (2 * Vector256<short>.Count));
do
{
Vector256<short> source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
Vector256<short> source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256<short>.Count);
Vector256<byte> packedSource = PackSources(source0, source1);
Vector256<byte> result = Vector256.Equals(packedValue, packedSource);
if (result != Vector256<byte>.Zero)
{
return true;
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256<short>.Count);
}
while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-32 characters remaining. Process the first and last vector in the search space.
// They may overlap, but we're only interested in whether any value matched.
{
ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector256<short>.Count);
ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
? ref oneVectorAwayFromEnd
: ref currentSearchSpace;
Vector256<short> source0 = Vector256.LoadUnsafe(ref firstVector);
Vector256<short> source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd);
Vector256<byte> packedSource = PackSources(source0, source1);
Vector256<byte> result = Vector256.Equals(packedValue, packedSource);
if (result != Vector256<byte>.Zero)
{
return true;
}
}
}
else
{
Vector128<byte> packedValue = Vector128.Create((byte)value);
#pragma warning disable IntrinsicsInSystemPrivateCoreLibConditionParsing // A negated IsSupported condition isn't parseable by the intrinsics analyzer, but in this case, it is only used in combination
// with the check above of Avx2.IsSupported && length > Vector256<short>.Count which makes the logic
// in this if statement dead code when Avx2.IsSupported. Presumably this negated IsSupported check is to assist the JIT in
// not generating dead code.
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // This is paired with the check above, and since these if statements are contained in 1 function, the code
// may take a dependence on the JIT compiler producing a consistent value for the result of a call to IsSupported
// This logic MUST NOT be extracted to a helper function
if (!Avx2.IsSupported && length > 2 * Vector128<short>.Count)
#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
#pragma warning restore IntrinsicsInSystemPrivateCoreLibConditionParsing
{
// Process the input in chunks of 16 characters (2 * Vector128<short>).
// If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
// Let the fallback below handle it instead. This is why the condition is
// ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "IsAddressLessThanOrEqualTo".
ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - (2 * Vector128<short>.Count));
do
{
Vector128<short> source0 = Vector128.LoadUnsafe(ref currentSearchSpace);
Vector128<short> source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128<short>.Count);
Vector128<byte> packedSource = PackSources(source0, source1);
Vector128<byte> result = Vector128.Equals(packedValue, packedSource);
if (result != Vector128<byte>.Zero)
{
return true;
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector128<short>.Count);
}
while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-16 characters remaining. Process the first and last vector in the search space.
// They may overlap, but we're only interested in whether any value matched.
{
ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector128<short>.Count);
ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
? ref oneVectorAwayFromEnd
: ref currentSearchSpace;
Vector128<short> source0 = Vector128.LoadUnsafe(ref firstVector);
Vector128<short> source1 = Vector128.LoadUnsafe(ref oneVectorAwayFromEnd);
Vector128<byte> packedSource = PackSources(source0, source1);
Vector128<byte> result = Vector128.Equals(packedValue, packedSource);
if (result != Vector128<byte>.Zero)
{
return true;
}
}
}
}
return false;
}
[CompExactlyDependsOn(typeof(Sse2))]
private static int IndexOf<TNegator, TTransform>(ref short searchSpace, short value, int length)
where TNegator : struct, SpanHelpers.INegator<short>
where TTransform : struct, ITransform
{
Debug.Assert(CanUsePackedIndexOf(value));
if (length < Vector128<short>.Count)
{
nuint offset = 0;
if (length >= 4)
{
length -= 4;
if (TNegator.NegateIfNeeded(TTransform.TransformInput(searchSpace) == value)) return 0;
if (TNegator.NegateIfNeeded(TTransform.TransformInput(Unsafe.Add(ref searchSpace, 1)) == value)) return 1;
if (TNegator.NegateIfNeeded(TTransform.TransformInput(Unsafe.Add(ref searchSpace, 2)) == value)) return 2;
if (TNegator.NegateIfNeeded(TTransform.TransformInput(Unsafe.Add(ref searchSpace, 3)) == value)) return 3;
offset = 4;
}
while (length > 0)
{
length -= 1;
if (TNegator.NegateIfNeeded(TTransform.TransformInput(Unsafe.Add(ref searchSpace, offset)) == value)) return (int)offset;
offset += 1;
}
}
else
{
ref short currentSearchSpace = ref searchSpace;
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The else condition for this if statement is identical in semantics to Avx2 specific code
if (Avx512BW.IsSupported && Vector512.IsHardwareAccelerated && length > Vector512<short>.Count)
#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
{
Vector512<byte> packedValue = Vector512.Create((byte)value);
if (length > 2 * Vector512<short>.Count)
{
// Process the input in chunks of 64 characters (2 * Vector512<short>).
// If the input length is a multiple of 64, don't consume the last 16 characters in this loop.
// Let the fallback below handle it instead. This is why the condition is
// ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "IsAddressLessThanOrEqualTo".
ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - (2 * Vector512<short>.Count));
do
{
Vector512<short> source0 = Vector512.LoadUnsafe(ref currentSearchSpace);
Vector512<short> source1 = Vector512.LoadUnsafe(ref currentSearchSpace, (nuint)Vector512<short>.Count);
Vector512<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
if (HasMatch<TNegator>(packedValue, packedSource))
{
return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, GetMatchMask<TNegator>(packedValue, packedSource));
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector512<short>.Count);
}
while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-32 characters remaining. Process the first and last vector in the search space.
// They may overlap, but we'll handle that in the index calculation if we do get a match.
{
ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector512<short>.Count);
ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
? ref oneVectorAwayFromEnd
: ref currentSearchSpace;
Vector512<short> source0 = Vector512.LoadUnsafe(ref firstVector);
Vector512<short> source1 = Vector512.LoadUnsafe(ref oneVectorAwayFromEnd);
Vector512<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
if (HasMatch<TNegator>(packedValue, packedSource))
{
return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, GetMatchMask<TNegator>(packedValue, packedSource));
}
}
}
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The else condition for this if statement is identical in semantics to Avx2 specific code
else if (Avx2.IsSupported && length > Vector256<short>.Count)
#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
{
Vector256<byte> packedValue = Vector256.Create((byte)value);
if (length > 2 * Vector256<short>.Count)
{
// Process the input in chunks of 32 characters (2 * Vector256<short>).
// If the input length is a multiple of 32, don't consume the last 16 characters in this loop.
// Let the fallback below handle it instead. This is why the condition is
// ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "IsAddressLessThanOrEqualTo".
ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - (2 * Vector256<short>.Count));
do
{
Vector256<short> source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
Vector256<short> source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256<short>.Count);
Vector256<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
Vector256<byte> result = Vector256.Equals(packedValue, packedSource);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector256<byte>.Zero)
{
return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256<short>.Count);
}
while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-32 characters remaining. Process the first and last vector in the search space.
// They may overlap, but we'll handle that in the index calculation if we do get a match.
{
ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector256<short>.Count);
ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
? ref oneVectorAwayFromEnd
: ref currentSearchSpace;
Vector256<short> source0 = Vector256.LoadUnsafe(ref firstVector);
Vector256<short> source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd);
Vector256<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
Vector256<byte> result = Vector256.Equals(packedValue, packedSource);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector256<byte>.Zero)
{
return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
}
}
}
else
{
Vector128<byte> packedValue = Vector128.Create((byte)value);
#pragma warning disable IntrinsicsInSystemPrivateCoreLibConditionParsing // A negated IsSupported condition isn't parseable by the intrinsics analyzer, but in this case, it is only used in combination
// with the check above of Avx2.IsSupported && length > Vector256<short>.Count which makes the logic
// in this if statement dead code when Avx2.IsSupported. Presumably this negated IsSupported check is to assist the JIT in
// not generating dead code.
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // This is paired with the check above, and since these if statements are contained in 1 function, the code
// may take a dependence on the JIT compiler producing a consistent value for the result of a call to IsSupported
// This logic MUST NOT be extracted to a helper function
if (!Avx2.IsSupported && length > 2 * Vector128<short>.Count)
#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
#pragma warning restore IntrinsicsInSystemPrivateCoreLibConditionParsing
{
// Process the input in chunks of 16 characters (2 * Vector128<short>).
// If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
// Let the fallback below handle it instead. This is why the condition is
// ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "IsAddressLessThanOrEqualTo".
ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - (2 * Vector128<short>.Count));
do
{
Vector128<short> source0 = Vector128.LoadUnsafe(ref currentSearchSpace);
Vector128<short> source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128<short>.Count);
Vector128<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
Vector128<byte> result = Vector128.Equals(packedValue, packedSource);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector128<byte>.Zero)
{
return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector128<short>.Count);
}
while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-16 characters remaining. Process the first and last vector in the search space.
// They may overlap, but we'll handle that in the index calculation if we do get a match.
{
ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector128<short>.Count);
ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
? ref oneVectorAwayFromEnd
: ref currentSearchSpace;
Vector128<short> source0 = Vector128.LoadUnsafe(ref firstVector);
Vector128<short> source1 = Vector128.LoadUnsafe(ref oneVectorAwayFromEnd);
Vector128<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
Vector128<byte> result = Vector128.Equals(packedValue, packedSource);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector128<byte>.Zero)
{
return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
}
}
}
}
return -1;
}
[CompExactlyDependsOn(typeof(Sse2))]
private static int IndexOfAny<TNegator, TTransform>(ref short searchSpace, short value0, short value1, int length)
where TNegator : struct, SpanHelpers.INegator<short>
where TTransform : struct, ITransform
{
Debug.Assert(CanUsePackedIndexOf(value0));
Debug.Assert(CanUsePackedIndexOf(value1));
if (length < Vector128<short>.Count)
{
nuint offset = 0;
short lookUp;
if (length >= 4)
{
length -= 4;
lookUp = TTransform.TransformInput(searchSpace);
if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return 0;
lookUp = TTransform.TransformInput(Unsafe.Add(ref searchSpace, 1));
if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return 1;
lookUp = TTransform.TransformInput(Unsafe.Add(ref searchSpace, 2));
if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return 2;
lookUp = TTransform.TransformInput(Unsafe.Add(ref searchSpace, 3));
if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return 3;
offset = 4;
}
while (length > 0)
{
length -= 1;
lookUp = TTransform.TransformInput(Unsafe.Add(ref searchSpace, offset));
if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset;
offset += 1;
}
}
else
{
ref short currentSearchSpace = ref searchSpace;
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The else condition for this if statement is identical in semantics to Avx2 specific code
if (Avx512BW.IsSupported && Vector512.IsHardwareAccelerated && length > Vector512<short>.Count)
#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
{
Vector512<byte> packedValue0 = Vector512.Create((byte)value0);
Vector512<byte> packedValue1 = Vector512.Create((byte)value1);
if (length > 2 * Vector512<short>.Count)
{
// Process the input in chunks of 64 characters (2 * Vector512<short>).
// If the input length is a multiple of 64, don't consume the last 16 characters in this loop.
// Let the fallback below handle it instead. This is why the condition is
// ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "IsAddressLessThanOrEqualTo".
ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - (2 * Vector512<short>.Count));
do
{
Vector512<short> source0 = Vector512.LoadUnsafe(ref currentSearchSpace);
Vector512<short> source1 = Vector512.LoadUnsafe(ref currentSearchSpace, (nuint)Vector512<short>.Count);
Vector512<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
Vector512<byte> result = NegateIfNeeded<TNegator>(Vector512.Equals(packedValue0, packedSource) | Vector512.Equals(packedValue1, packedSource));
if (result != Vector512<byte>.Zero)
{
return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector512<short>.Count);
}
while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-32 characters remaining. Process the first and last vector in the search space.
// They may overlap, but we'll handle that in the index calculation if we do get a match.
{
ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector512<short>.Count);
ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
? ref oneVectorAwayFromEnd
: ref currentSearchSpace;
Vector512<short> source0 = Vector512.LoadUnsafe(ref firstVector);
Vector512<short> source1 = Vector512.LoadUnsafe(ref oneVectorAwayFromEnd);
Vector512<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
Vector512<byte> result = NegateIfNeeded<TNegator>(Vector512.Equals(packedValue0, packedSource) | Vector512.Equals(packedValue1, packedSource));
if (result != Vector512<byte>.Zero)
{
return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
}
}
}
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The else condition for this if statement is identical in semantics to Avx2 specific code
else if (Avx2.IsSupported && length > Vector256<short>.Count)
#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
{
Vector256<byte> packedValue0 = Vector256.Create((byte)value0);
Vector256<byte> packedValue1 = Vector256.Create((byte)value1);
if (length > 2 * Vector256<short>.Count)
{
// Process the input in chunks of 32 characters (2 * Vector256<short>).
// If the input length is a multiple of 32, don't consume the last 16 characters in this loop.
// Let the fallback below handle it instead. This is why the condition is
// ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "IsAddressLessThanOrEqualTo".
ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - (2 * Vector256<short>.Count));
do
{
Vector256<short> source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
Vector256<short> source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256<short>.Count);
Vector256<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
Vector256<byte> result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector256<byte>.Zero)
{
return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256<short>.Count);
}
while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-32 characters remaining. Process the first and last vector in the search space.
// They may overlap, but we'll handle that in the index calculation if we do get a match.
{
ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector256<short>.Count);
ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
? ref oneVectorAwayFromEnd
: ref currentSearchSpace;
Vector256<short> source0 = Vector256.LoadUnsafe(ref firstVector);
Vector256<short> source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd);
Vector256<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
Vector256<byte> result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector256<byte>.Zero)
{
return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
}
}
}
else
{
Vector128<byte> packedValue0 = Vector128.Create((byte)value0);
Vector128<byte> packedValue1 = Vector128.Create((byte)value1);
#pragma warning disable IntrinsicsInSystemPrivateCoreLibConditionParsing // A negated IsSupported condition isn't parseable by the intrinsics analyzer, but in this case, it is only used in combination
// with the check above of Avx2.IsSupported && length > Vector256<short>.Count which makes the logic
// in this if statement dead code when Avx2.IsSupported. Presumably this negated IsSupported check is to assist the JIT in
// not generating dead code.
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // This is paired with the check above, and since these if statements are contained in 1 function, the code
// may take a dependence on the JIT compiler producing a consistent value for the result of a call to IsSupported
// This logic MUST NOT be extracted to a helper function
if (!Avx2.IsSupported && length > 2 * Vector128<short>.Count)
#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
#pragma warning restore IntrinsicsInSystemPrivateCoreLibConditionParsing
{
// Process the input in chunks of 16 characters (2 * Vector128<short>).
// If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
// Let the fallback below handle it instead. This is why the condition is
// ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "IsAddressLessThanOrEqualTo".
ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - (2 * Vector128<short>.Count));
do
{
Vector128<short> source0 = Vector128.LoadUnsafe(ref currentSearchSpace);
Vector128<short> source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128<short>.Count);
Vector128<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
Vector128<byte> result = Vector128.Equals(packedValue0, packedSource) | Vector128.Equals(packedValue1, packedSource);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector128<byte>.Zero)
{
return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector128<short>.Count);
}
while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-16 characters remaining. Process the first and last vector in the search space.
// They may overlap, but we'll handle that in the index calculation if we do get a match.
{
ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector128<short>.Count);
ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
? ref oneVectorAwayFromEnd
: ref currentSearchSpace;
Vector128<short> source0 = Vector128.LoadUnsafe(ref firstVector);
Vector128<short> source1 = Vector128.LoadUnsafe(ref oneVectorAwayFromEnd);
Vector128<byte> packedSource = TTransform.TransformInput(PackSources(source0, source1));
Vector128<byte> result = Vector128.Equals(packedValue0, packedSource) | Vector128.Equals(packedValue1, packedSource);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector128<byte>.Zero)
{
return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
}
}
}
}
return -1;
}
[CompExactlyDependsOn(typeof(Sse2))]
private static int IndexOfAny<TNegator>(ref short searchSpace, short value0, short value1, short value2, int length)
where TNegator : struct, SpanHelpers.INegator<short>
{
Debug.Assert(CanUsePackedIndexOf(value0));
Debug.Assert(CanUsePackedIndexOf(value1));
Debug.Assert(CanUsePackedIndexOf(value2));
if (length < Vector128<short>.Count)
{
nuint offset = 0;
short lookUp;
if (length >= 4)
{
length -= 4;
lookUp = searchSpace;
if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return 0;
lookUp = Unsafe.Add(ref searchSpace, 1);
if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return 1;
lookUp = Unsafe.Add(ref searchSpace, 2);
if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return 2;
lookUp = Unsafe.Add(ref searchSpace, 3);
if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return 3;
offset = 4;
}
while (length > 0)
{
length -= 1;
lookUp = Unsafe.Add(ref searchSpace, offset);
if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset;
offset += 1;
}
}
else
{
ref short currentSearchSpace = ref searchSpace;
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The else condition for this if statement is identical in semantics to Avx2 specific code
if (Avx512BW.IsSupported && Vector512.IsHardwareAccelerated && length > Vector512<short>.Count)
#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
{
Vector512<byte> packedValue0 = Vector512.Create((byte)value0);
Vector512<byte> packedValue1 = Vector512.Create((byte)value1);
Vector512<byte> packedValue2 = Vector512.Create((byte)value2);
if (length > 2 * Vector512<short>.Count)
{
// Process the input in chunks of 64 characters (2 * Vector512<short>).
// If the input length is a multiple of 64, don't consume the last 16 characters in this loop.
// Let the fallback below handle it instead. This is why the condition is
// ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "IsAddressLessThanOrEqualTo".
ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - (2 * Vector512<short>.Count));
do
{
Vector512<short> source0 = Vector512.LoadUnsafe(ref currentSearchSpace);
Vector512<short> source1 = Vector512.LoadUnsafe(ref currentSearchSpace, (nuint)Vector512<short>.Count);
Vector512<byte> packedSource = PackSources(source0, source1);
Vector512<byte> result = NegateIfNeeded<TNegator>(Vector512.Equals(packedValue0, packedSource) | Vector512.Equals(packedValue1, packedSource) | Vector512.Equals(packedValue2, packedSource));
if (result != Vector512<byte>.Zero)
{
return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector512<short>.Count);
}
while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-32 characters remaining. Process the first and last vector in the search space.
// They may overlap, but we'll handle that in the index calculation if we do get a match.
{
ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector512<short>.Count);
ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
? ref oneVectorAwayFromEnd
: ref currentSearchSpace;
Vector512<short> source0 = Vector512.LoadUnsafe(ref firstVector);
Vector512<short> source1 = Vector512.LoadUnsafe(ref oneVectorAwayFromEnd);
Vector512<byte> packedSource = PackSources(source0, source1);
Vector512<byte> result = NegateIfNeeded<TNegator>(Vector512.Equals(packedValue0, packedSource) | Vector512.Equals(packedValue1, packedSource) | Vector512.Equals(packedValue2, packedSource));
if (result != Vector512<byte>.Zero)
{
return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
}
}
}
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The else condition for this if statement is identical in semantics to Avx2 specific code
else if (Avx2.IsSupported && length > Vector256<short>.Count)
#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
{
Vector256<byte> packedValue0 = Vector256.Create((byte)value0);
Vector256<byte> packedValue1 = Vector256.Create((byte)value1);
Vector256<byte> packedValue2 = Vector256.Create((byte)value2);
if (length > 2 * Vector256<short>.Count)
{
// Process the input in chunks of 32 characters (2 * Vector256<short>).
// If the input length is a multiple of 32, don't consume the last 16 characters in this loop.
// Let the fallback below handle it instead. This is why the condition is
// ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "IsAddressLessThanOrEqualTo".
ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - (2 * Vector256<short>.Count));
do
{
Vector256<short> source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
Vector256<short> source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256<short>.Count);
Vector256<byte> packedSource = PackSources(source0, source1);
Vector256<byte> result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource) | Vector256.Equals(packedValue2, packedSource);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector256<byte>.Zero)
{
return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256<short>.Count);
}
while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-32 characters remaining. Process the first and last vector in the search space.
// They may overlap, but we'll handle that in the index calculation if we do get a match.
{
ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector256<short>.Count);
ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
? ref oneVectorAwayFromEnd
: ref currentSearchSpace;
Vector256<short> source0 = Vector256.LoadUnsafe(ref firstVector);
Vector256<short> source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd);
Vector256<byte> packedSource = PackSources(source0, source1);
Vector256<byte> result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource) | Vector256.Equals(packedValue2, packedSource);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector256<byte>.Zero)
{
return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
}
}
}
else
{
Vector128<byte> packedValue0 = Vector128.Create((byte)value0);
Vector128<byte> packedValue1 = Vector128.Create((byte)value1);
Vector128<byte> packedValue2 = Vector128.Create((byte)value2);
#pragma warning disable IntrinsicsInSystemPrivateCoreLibConditionParsing // A negated IsSupported condition isn't parseable by the intrinsics analyzer, but in this case, it is only used in combination
// with the check above of Avx2.IsSupported && length > Vector256<short>.Count which makes the logic
// in this if statement dead code when Avx2.IsSupported. Presumably this negated IsSupported check is to assist the JIT in
// not generating dead code.
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // This is paired with the check above, and since these if statements are contained in 1 function, the code
// may take a dependence on the JIT compiler producing a consistent value for the result of a call to IsSupported
// This logic MUST NOT be extracted to a helper function
if (!Avx2.IsSupported && length > 2 * Vector128<short>.Count)
#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
#pragma warning restore IntrinsicsInSystemPrivateCoreLibConditionParsing
{
// Process the input in chunks of 16 characters (2 * Vector128<short>).
// If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
// Let the fallback below handle it instead. This is why the condition is
// ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "IsAddressLessThanOrEqualTo".
ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - (2 * Vector128<short>.Count));
do
{
Vector128<short> source0 = Vector128.LoadUnsafe(ref currentSearchSpace);
Vector128<short> source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128<short>.Count);
Vector128<byte> packedSource = PackSources(source0, source1);
Vector128<byte> result = Vector128.Equals(packedValue0, packedSource) | Vector128.Equals(packedValue1, packedSource) | Vector128.Equals(packedValue2, packedSource);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector128<byte>.Zero)
{
return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector128<short>.Count);
}
while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-16 characters remaining. Process the first and last vector in the search space.
// They may overlap, but we'll handle that in the index calculation if we do get a match.
{
ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector128<short>.Count);
ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
? ref oneVectorAwayFromEnd
: ref currentSearchSpace;
Vector128<short> source0 = Vector128.LoadUnsafe(ref firstVector);
Vector128<short> source1 = Vector128.LoadUnsafe(ref oneVectorAwayFromEnd);
Vector128<byte> packedSource = PackSources(source0, source1);
Vector128<byte> result = Vector128.Equals(packedValue0, packedSource) | Vector128.Equals(packedValue1, packedSource) | Vector128.Equals(packedValue2, packedSource);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector128<byte>.Zero)
{
return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
}
}
}
}
return -1;
}
[CompExactlyDependsOn(typeof(Sse2))]
private static int IndexOfAnyInRange<TNegator>(ref short searchSpace, short lowInclusive, short rangeInclusive, int length)
where TNegator : struct, SpanHelpers.INegator<short>
{
Debug.Assert(CanUsePackedIndexOf(lowInclusive));
Debug.Assert(CanUsePackedIndexOf((short)(lowInclusive + rangeInclusive)));
Debug.Assert(rangeInclusive >= 0);
if (length < Vector128<short>.Count)
{
uint lowInclusiveUint = (uint)lowInclusive;
uint rangeInclusiveUint = (uint)rangeInclusive;
for (int i = 0; i < length; i++)
{
uint current = (uint)Unsafe.Add(ref searchSpace, i);
if (TNegator.NegateIfNeeded((current - lowInclusiveUint) <= rangeInclusiveUint))
{
return i;
}
}
}
else
{
ref short currentSearchSpace = ref searchSpace;
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The else condition for this if statement is identical in semantics to Avx2 specific code
if (Avx512BW.IsSupported && Vector512.IsHardwareAccelerated && length > Vector512<short>.Count)
#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
{
Vector512<byte> lowVector = Vector512.Create((byte)lowInclusive);
Vector512<byte> rangeVector = Vector512.Create((byte)rangeInclusive);
if (length > 2 * Vector512<short>.Count)
{
// Process the input in chunks of 64 characters (2 * Vector512<short>).
// If the input length is a multiple of 64, don't consume the last 16 characters in this loop.
// Let the fallback below handle it instead. This is why the condition is
// ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "IsAddressLessThanOrEqualTo".
ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - (2 * Vector512<short>.Count));
do
{
Vector512<short> source0 = Vector512.LoadUnsafe(ref currentSearchSpace);
Vector512<short> source1 = Vector512.LoadUnsafe(ref currentSearchSpace, (nuint)Vector512<short>.Count);
Vector512<byte> packedSource = PackSources(source0, source1) - lowVector;
if (HasMatchInRange<TNegator>(packedSource, rangeVector))
{
return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, GetMatchInRangeMask<TNegator>(packedSource, rangeVector));
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector512<short>.Count);
}
while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-32 characters remaining. Process the first and last vector in the search space.
// They may overlap, but we'll handle that in the index calculation if we do get a match.
{
ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector512<short>.Count);
ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
? ref oneVectorAwayFromEnd
: ref currentSearchSpace;
Vector512<short> source0 = Vector512.LoadUnsafe(ref firstVector);
Vector512<short> source1 = Vector512.LoadUnsafe(ref oneVectorAwayFromEnd);
Vector512<byte> packedSource = PackSources(source0, source1) - lowVector;
if (HasMatchInRange<TNegator>(packedSource, rangeVector))
{
return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, GetMatchInRangeMask<TNegator>(packedSource, rangeVector));
}
}
}
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The else condition for this if statement is identical in semantics to Avx2 specific code
else if (Avx2.IsSupported && length > Vector256<short>.Count)
#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
{
Vector256<byte> lowVector = Vector256.Create((byte)lowInclusive);
Vector256<byte> rangeVector = Vector256.Create((byte)rangeInclusive);
if (length > 2 * Vector256<short>.Count)
{
// Process the input in chunks of 32 characters (2 * Vector256<short>).
// If the input length is a multiple of 32, don't consume the last 16 characters in this loop.
// Let the fallback below handle it instead. This is why the condition is
// ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "IsAddressLessThanOrEqualTo".
ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - (2 * Vector256<short>.Count));
do
{
Vector256<short> source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
Vector256<short> source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256<short>.Count);
Vector256<byte> packedSource = PackSources(source0, source1);
Vector256<byte> result = Vector256.LessThanOrEqual(packedSource - lowVector, rangeVector);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector256<byte>.Zero)
{
return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256<short>.Count);
}
while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-32 characters remaining. Process the first and last vector in the search space.
// They may overlap, but we'll handle that in the index calculation if we do get a match.
{
ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector256<short>.Count);
ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
? ref oneVectorAwayFromEnd
: ref currentSearchSpace;
Vector256<short> source0 = Vector256.LoadUnsafe(ref firstVector);
Vector256<short> source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd);
Vector256<byte> packedSource = PackSources(source0, source1);
Vector256<byte> result = Vector256.LessThanOrEqual(packedSource - lowVector, rangeVector);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector256<byte>.Zero)
{
return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
}
}
}
else
{
Vector128<byte> lowVector = Vector128.Create((byte)lowInclusive);
Vector128<byte> rangeVector = Vector128.Create((byte)rangeInclusive);
#pragma warning disable IntrinsicsInSystemPrivateCoreLibConditionParsing // A negated IsSupported condition isn't parseable by the intrinsics analyzer, but in this case, it is only used in combination
// with the check above of Avx2.IsSupported && length > Vector256<short>.Count which makes the logic
// in this if statement dead code when Avx2.IsSupported. Presumably this negated IsSupported check is to assist the JIT in
// not generating dead code.
#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // This is paired with the check above, and since these if statements are contained in 1 function, the code
// may take a dependence on the JIT compiler producing a consistent value for the result of a call to IsSupported
// This logic MUST NOT be extracted to a helper function
if (!Avx2.IsSupported && length > 2 * Vector128<short>.Count)
#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough
#pragma warning restore IntrinsicsInSystemPrivateCoreLibConditionParsing
{
// Process the input in chunks of 16 characters (2 * Vector128<short>).
// If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
// Let the fallback below handle it instead. This is why the condition is
// ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "IsAddressLessThanOrEqualTo".
ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - (2 * Vector128<short>.Count));
do
{
Vector128<short> source0 = Vector128.LoadUnsafe(ref currentSearchSpace);
Vector128<short> source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128<short>.Count);
Vector128<byte> packedSource = PackSources(source0, source1);
Vector128<byte> result = Vector128.LessThanOrEqual(packedSource - lowVector, rangeVector);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector128<byte>.Zero)
{
return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector128<short>.Count);
}
while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-16 characters remaining. Process the first and last vector in the search space.
// They may overlap, but we'll handle that in the index calculation if we do get a match.
{
ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector128<short>.Count);
ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
? ref oneVectorAwayFromEnd
: ref currentSearchSpace;
Vector128<short> source0 = Vector128.LoadUnsafe(ref firstVector);
Vector128<short> source1 = Vector128.LoadUnsafe(ref oneVectorAwayFromEnd);
Vector128<byte> packedSource = PackSources(source0, source1);
Vector128<byte> result = Vector128.LessThanOrEqual(packedSource - lowVector, rangeVector);
result = NegateIfNeeded<TNegator>(result);
if (result != Vector128<byte>.Zero)
{
return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
}
}
}
}
return -1;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Avx512BW))]
private static Vector512<byte> PackSources(Vector512<short> source0, Vector512<short> source1)
{
Debug.Assert(Avx512BW.IsSupported);
// Pack two vectors of characters into bytes. While the type is Vector256<short>, these are really UInt16 characters.
// X86: Downcast every character using saturation.
// - Values <= 32767 result in min(value, 255).
// - Values > 32767 result in 0. Because of this we can't accept needles that contain 0.
return Avx512BW.PackUnsignedSaturate(source0, source1).AsByte();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Avx2))]
private static Vector256<byte> PackSources(Vector256<short> source0, Vector256<short> source1)
{
Debug.Assert(Avx2.IsSupported);
// Pack two vectors of characters into bytes. While the type is Vector256<short>, these are really UInt16 characters.
// X86: Downcast every character using saturation.
// - Values <= 32767 result in min(value, 255).
// - Values > 32767 result in 0. Because of this we can't accept needles that contain 0.
return Avx2.PackUnsignedSaturate(source0, source1).AsByte();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Sse2))]
private static Vector128<byte> PackSources(Vector128<short> source0, Vector128<short> source1)
{
Debug.Assert(Sse2.IsSupported);
// Pack two vectors of characters into bytes. While the type is Vector128<short>, these are really UInt16 characters.
// X86: Downcast every character using saturation.
// - Values <= 32767 result in min(value, 255).
// - Values > 32767 result in 0. Because of this we can't accept needles that contain 0.
return Sse2.PackUnsignedSaturate(source0, source1).AsByte();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool NegateIfNeeded<TNegator>(bool result)
where TNegator : struct, SpanHelpers.INegator<short> =>
typeof(TNegator) == typeof(SpanHelpers.DontNegate<short>) ? result : !result;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<byte> NegateIfNeeded<TNegator>(Vector128<byte> result)
where TNegator : struct, SpanHelpers.INegator<short> =>
typeof(TNegator) == typeof(SpanHelpers.DontNegate<short>) ? result : ~result;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector256<byte> NegateIfNeeded<TNegator>(Vector256<byte> result)
where TNegator : struct, SpanHelpers.INegator<short> =>
typeof(TNegator) == typeof(SpanHelpers.DontNegate<short>) ? result : ~result;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector512<byte> NegateIfNeeded<TNegator>(Vector512<byte> result)
where TNegator : struct, SpanHelpers.INegator<short> =>
typeof(TNegator) == typeof(SpanHelpers.DontNegate<short>) ? result : ~result;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool HasMatch<TNegator>(Vector512<byte> left, Vector512<byte> right)
where TNegator : struct, SpanHelpers.INegator<short>
{
return (typeof(TNegator) == typeof(SpanHelpers.DontNegate<short>))
? Vector512.EqualsAny(left, right) : !Vector512.EqualsAll(left, right);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector512<byte> GetMatchMask<TNegator>(Vector512<byte> left, Vector512<byte> right)
where TNegator : struct, SpanHelpers.INegator<short>
{
return (typeof(TNegator) == typeof(SpanHelpers.DontNegate<short>))
? Vector512.Equals(left, right) : ~Vector512.Equals(left, right);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool HasMatchInRange<TNegator>(Vector512<byte> left, Vector512<byte> right)
where TNegator : struct, SpanHelpers.INegator<short>
{
return (typeof(TNegator) == typeof(SpanHelpers.DontNegate<short>))
? Vector512.LessThanOrEqualAny(left, right) : !Vector512.LessThanOrEqualAll(left, right);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector512<byte> GetMatchInRangeMask<TNegator>(Vector512<byte> left, Vector512<byte> right)
where TNegator : struct, SpanHelpers.INegator<short>
{
return (typeof(TNegator) == typeof(SpanHelpers.DontNegate<short>))
? Vector512.LessThanOrEqual(left, right) : ~Vector512.LessThanOrEqual(left, right);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int ComputeFirstIndex(ref short searchSpace, ref short current, Vector128<byte> equals)
{
uint notEqualsElements = equals.ExtractMostSignificantBits();
int index = BitOperations.TrailingZeroCount(notEqualsElements);
return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / sizeof(short));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Avx2))]
private static int ComputeFirstIndex(ref short searchSpace, ref short current, Vector256<byte> equals)
{
uint notEqualsElements = FixUpPackedVector256Result(equals).ExtractMostSignificantBits();
int index = BitOperations.TrailingZeroCount(notEqualsElements);
return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / sizeof(short));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Avx512F))]
private static int ComputeFirstIndex(ref short searchSpace, ref short current, Vector512<byte> equals)
{
ulong notEqualsElements = FixUpPackedVector512Result(equals).ExtractMostSignificantBits();
int index = BitOperations.TrailingZeroCount(notEqualsElements);
return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / sizeof(short));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int ComputeFirstIndexOverlapped(ref short searchSpace, ref short current0, ref short current1, Vector128<byte> equals)
{
uint notEqualsElements = equals.ExtractMostSignificantBits();
int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements);
if (offsetInVector >= Vector128<short>.Count)
{
// We matched within the second vector
current0 = ref current1;
offsetInVector -= Vector128<short>.Count;
}
return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current0) / sizeof(short));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Avx2))]
private static int ComputeFirstIndexOverlapped(ref short searchSpace, ref short current0, ref short current1, Vector256<byte> equals)
{
uint notEqualsElements = FixUpPackedVector256Result(equals).ExtractMostSignificantBits();
int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements);
if (offsetInVector >= Vector256<short>.Count)
{
// We matched within the second vector
current0 = ref current1;
offsetInVector -= Vector256<short>.Count;
}
return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current0) / sizeof(short));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Avx512F))]
private static int ComputeFirstIndexOverlapped(ref short searchSpace, ref short current0, ref short current1, Vector512<byte> equals)
{
ulong notEqualsElements = FixUpPackedVector512Result(equals).ExtractMostSignificantBits();
int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements);
if (offsetInVector >= Vector512<short>.Count)
{
// We matched within the second vector
current0 = ref current1;
offsetInVector -= Vector512<short>.Count;
}
return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current0) / sizeof(short));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Avx2))]
internal static Vector256<byte> FixUpPackedVector256Result(Vector256<byte> result)
{
Debug.Assert(Avx2.IsSupported);
// Avx2.PackUnsignedSaturate(Vector256<short>.One, Vector256.Create<short>(2)) will result in
// 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2
// We want to swap the X and Y bits
// 1, 1, 1, 1, 1, 1, 1, 1, X, X, X, X, X, X, X, X, Y, Y, Y, Y, Y, Y, Y, Y, 2, 2, 2, 2, 2, 2, 2, 2
return Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Avx512F))]
internal static Vector512<byte> FixUpPackedVector512Result(Vector512<byte> result)
{
Debug.Assert(Avx512F.IsSupported);
// Avx512BW.PackUnsignedSaturate will interleave the inputs in 8-byte blocks.
// We want to preserve the order of the two input vectors, so we deinterleave the packed value.
return Avx512F.PermuteVar8x64(result.AsInt64(), Vector512.Create(0, 2, 4, 6, 1, 3, 5, 7)).AsByte();
}
private interface ITransform
{
static abstract short TransformInput(short input);
static abstract Vector128<byte> TransformInput(Vector128<byte> input);
static abstract Vector256<byte> TransformInput(Vector256<byte> input);
static abstract Vector512<byte> TransformInput(Vector512<byte> input);
}
private readonly struct NopTransform : ITransform
{
public static short TransformInput(short input) => input;
public static Vector128<byte> TransformInput(Vector128<byte> input) => input;
public static Vector256<byte> TransformInput(Vector256<byte> input) => input;
public static Vector512<byte> TransformInput(Vector512<byte> input) => input;
}
private readonly struct Or20Transform : ITransform
{
public static short TransformInput(short input) => (short)(input | 0x20);
public static Vector128<byte> TransformInput(Vector128<byte> input) => input | Vector128.Create((byte)0x20);
public static Vector256<byte> TransformInput(Vector256<byte> input) => input | Vector256.Create((byte)0x20);
public static Vector512<byte> TransformInput(Vector512<byte> input) => input | Vector512.Create((byte)0x20);
}
}
}
|