// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. #nullable enable // NOTE: This code is derived from an implementation originally in dotnet/runtime: // https://github.com/dotnet/runtime/blob/v8.0.3/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSet.cs // // See the commentary in https://github.com/dotnet/roslyn/pull/50156 for notes on incorporating changes made to the // reference implementation. using System; using System.Collections; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; using System.Threading; using Microsoft.CodeAnalysis.Collections.Internal; namespace Microsoft.CodeAnalysis.Collections { [DebuggerTypeProxy(typeof(ICollectionDebugView<>))] [DebuggerDisplay("Count = {Count}")] internal class SegmentedHashSet<T> : ICollection<T>, ISet<T>, IReadOnlyCollection<T> #if NET5_0_OR_GREATER , IReadOnlySet<T> #endif { private const bool SupportsComparerDevirtualization #if NET = true; #else = false; #endif // This uses the same array-based implementation as Dictionary<TKey, TValue>. /// <summary>Cutoff point for stackallocs. This corresponds to the number of ints.</summary> private const int StackAllocThreshold = 100; /// <summary> /// When constructing a hashset from an existing collection, it may contain duplicates, /// so this is used as the max acceptable excess ratio of capacity to count. Note that /// this is only used on the ctor and not to automatically shrink if the hashset has, e.g, /// a lot of adds followed by removes. Users must explicitly shrink by calling TrimExcess. /// This is set to 3 because capacity is acceptable as 2x rounded up to nearest prime. /// </summary> private const int ShrinkThreshold = 3; private const int StartOfFreeList = -3; private static IEnumerator<T>? s_emptyEnumerator; private SegmentedArray<int> _buckets; private SegmentedArray<Entry> _entries; private ulong _fastModMultiplier; private int _count; private int _freeList; private int _freeCount; private int _version; #if NET private readonly IEqualityComparer<T>? _comparer; #else /// <summary> /// <see cref="EqualityComparer{T}.Default"/> doesn't devirtualize on .NET Framework, so we always ensure /// <see cref="_comparer"/> is initialized to a non-<see langword="null"/> value. /// </summary> private readonly IEqualityComparer<T> _comparer; #endif #region Constructors public SegmentedHashSet() : this((IEqualityComparer<T>?)null) { } public SegmentedHashSet(IEqualityComparer<T>? comparer) { // For reference types, we always want to store a comparer instance, either // the one provided, or if one wasn't provided, the default (accessing // EqualityComparer<TKey>.Default with shared generics on every dictionary // access can add measurable overhead). For value types, if no comparer is // provided, or if the default is provided, we'd prefer to use // EqualityComparer<TKey>.Default.Equals on every use, enabling the JIT to // devirtualize and possibly inline the operation. if (!typeof(T).IsValueType) { _comparer = comparer ?? EqualityComparer<T>.Default; } else if (comparer is not null && // first check for null to avoid forcing default comparer instantiation unnecessarily comparer != EqualityComparer<T>.Default) { _comparer = comparer; } #if !NETCOREAPP // .NET Framework doesn't support devirtualization, so we always initialize comparer to a non-null value _comparer ??= EqualityComparer<T>.Default; #endif } public SegmentedHashSet(int capacity) : this(capacity, null) { } public SegmentedHashSet(IEnumerable<T> collection) : this(collection, null) { } public SegmentedHashSet(IEnumerable<T> collection, IEqualityComparer<T>? comparer) : this(comparer) { if (collection == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.collection); } if (collection is SegmentedHashSet<T> otherAsHashSet && EqualityComparersAreEqual(this, otherAsHashSet)) { ConstructFrom(otherAsHashSet); } else { // To avoid excess resizes, first set size based on collection's count. The collection may // contain duplicates, so call TrimExcess if resulting SegmentedHashSet is larger than the threshold. if (collection is ICollection<T> coll) { var count = coll.Count; if (count > 0) { Initialize(count); } } UnionWith(collection); if (_count > 0 && _entries.Length / _count > ShrinkThreshold) { TrimExcess(); } } } public SegmentedHashSet(int capacity, IEqualityComparer<T>? comparer) : this(comparer) { if (capacity < 0) { ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.capacity); } if (capacity > 0) { Initialize(capacity); } } /// <summary>Initializes the SegmentedHashSet from another SegmentedHashSet with the same element type and equality comparer.</summary> private void ConstructFrom(SegmentedHashSet<T> source) { if (source.Count == 0) { // As well as short-circuiting on the rest of the work done, // this avoids errors from trying to access source._buckets // or source._entries when they aren't initialized. return; } var capacity = source._buckets.Length; var threshold = HashHelpers.ExpandPrime(source.Count + 1); if (threshold >= capacity) { _buckets = (SegmentedArray<int>)source._buckets.Clone(); _entries = (SegmentedArray<Entry>)source._entries.Clone(); _freeList = source._freeList; _freeCount = source._freeCount; _count = source._count; _fastModMultiplier = source._fastModMultiplier; } else { Initialize(source.Count); var entries = source._entries; for (var i = 0; i < source._count; i++) { ref var entry = ref entries[i]; if (entry._next >= -1) { AddIfNotPresent(entry._value, out _); } } } Debug.Assert(Count == source.Count); } #endregion #region ICollection<T> methods void ICollection<T>.Add(T item) => AddIfNotPresent(item, out _); /// <summary>Removes all elements from the <see cref="SegmentedHashSet{T}"/> object.</summary> public void Clear() { var count = _count; if (count > 0) { Debug.Assert(_buckets.Length > 0, "_buckets should be non-empty"); Debug.Assert(_entries.Length > 0, "_entries should be non-empty"); SegmentedArray.Clear(_buckets); _count = 0; _freeList = -1; _freeCount = 0; SegmentedArray.Clear(_entries, 0, count); } } /// <summary>Determines whether the <see cref="SegmentedHashSet{T}"/> contains the specified element.</summary> /// <param name="item">The element to locate in the <see cref="SegmentedHashSet{T}"/> object.</param> /// <returns>true if the <see cref="SegmentedHashSet{T}"/> object contains the specified element; otherwise, false.</returns> public bool Contains(T item) => FindItemIndex(item) >= 0; /// <summary>Gets the index of the item in <see cref="_entries"/>, or -1 if it's not in the set.</summary> private int FindItemIndex(T item) { var buckets = _buckets; if (buckets.Length > 0) { var entries = _entries; Debug.Assert(entries.Length > 0, "Expected _entries to be initialized"); uint collisionCount = 0; var comparer = _comparer; if (SupportsComparerDevirtualization && typeof(T).IsValueType && // comparer can only be null for value types; enable JIT to eliminate entire if block for ref types comparer == null) { // ValueType: Devirtualize with EqualityComparer<TValue>.Default intrinsic var hashCode = item!.GetHashCode(); var i = GetBucketRef(hashCode) - 1; // Value in _buckets is 1-based while (i >= 0) { ref var entry = ref entries[i]; if (entry._hashCode == hashCode && EqualityComparer<T>.Default.Equals(entry._value, item)) { return i; } i = entry._next; collisionCount++; if (collisionCount > (uint)entries.Length) { // The chain of entries forms a loop, which means a concurrent update has happened. ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); } } } else { Debug.Assert(comparer is not null); var hashCode = item != null ? comparer!.GetHashCode(item) : 0; var i = GetBucketRef(hashCode) - 1; // Value in _buckets is 1-based while (i >= 0) { ref var entry = ref entries[i]; if (entry._hashCode == hashCode && comparer!.Equals(entry._value, item)) { return i; } i = entry._next; collisionCount++; if (collisionCount > (uint)entries.Length) { // The chain of entries forms a loop, which means a concurrent update has happened. ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); } } } } return -1; } /// <summary>Gets a reference to the specified hashcode's bucket, containing an index into <see cref="_entries"/>.</summary> [MethodImpl(MethodImplOptions.AggressiveInlining)] private ref int GetBucketRef(int hashCode) { var buckets = _buckets; return ref buckets[(int)HashHelpers.FastMod((uint)hashCode, (uint)buckets.Length, _fastModMultiplier)]; } public bool Remove(T item) { if (_buckets.Length > 0) { var entries = _entries; Debug.Assert(entries.Length > 0, "entries should be non-empty"); uint collisionCount = 0; var last = -1; IEqualityComparer<T>? comparer = _comparer; Debug.Assert((SupportsComparerDevirtualization && typeof(T).IsValueType) || comparer is not null); int hashCode = SupportsComparerDevirtualization && typeof(T).IsValueType && comparer == null ? item!.GetHashCode() : item is not null ? comparer!.GetHashCode(item) : 0; ref var bucket = ref GetBucketRef(hashCode); var i = bucket - 1; // Value in buckets is 1-based while (i >= 0) { ref var entry = ref entries[i]; if (entry._hashCode == hashCode && (comparer?.Equals(entry._value, item) ?? EqualityComparer<T>.Default.Equals(entry._value, item))) { if (last < 0) { bucket = entry._next + 1; // Value in buckets is 1-based } else { entries[last]._next = entry._next; } Debug.Assert((StartOfFreeList - _freeList) < 0, "shouldn't underflow because max hashtable length is MaxPrimeArrayLength = 0x7FEFFFFD(2146435069) _freelist underflow threshold 2147483646"); entry._next = StartOfFreeList - _freeList; #if NET if (RuntimeHelpers.IsReferenceOrContainsReferences<T>()) #endif { entry._value = default!; } _freeList = i; _freeCount++; return true; } last = i; i = entry._next; collisionCount++; if (collisionCount > (uint)entries.Length) { // The chain of entries forms a loop; which means a concurrent update has happened. // Break out of the loop and throw, rather than looping forever. ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); } } } return false; } /// <summary>Gets the number of elements that are contained in the set.</summary> public int Count => _count - _freeCount; bool ICollection<T>.IsReadOnly => false; #endregion #region IEnumerable methods public Enumerator GetEnumerator() => new(this); IEnumerator<T> IEnumerable<T>.GetEnumerator() => Count == 0 ? GetEmptyEnumerator() : GetEnumerator(); IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable<T>)this).GetEnumerator(); private static IEnumerator<T> GetEmptyEnumerator() { return LazyInitializer.EnsureInitialized(ref s_emptyEnumerator, static () => new Enumerator(new SegmentedHashSet<T>()))!; } #endregion #region SegmentedHashSet methods /// <summary>Adds the specified element to the <see cref="SegmentedHashSet{T}"/>.</summary> /// <param name="item">The element to add to the set.</param> /// <returns>true if the element is added to the <see cref="SegmentedHashSet{T}"/> object; false if the element is already present.</returns> public bool Add(T item) => AddIfNotPresent(item, out _); /// <summary>Searches the set for a given value and returns the equal value it finds, if any.</summary> /// <param name="equalValue">The value to search for.</param> /// <param name="actualValue">The value from the set that the search found, or the default value of <typeparamref name="T"/> when the search yielded no match.</param> /// <returns>A value indicating whether the search was successful.</returns> /// <remarks> /// This can be useful when you want to reuse a previously stored reference instead of /// a newly constructed one (so that more sharing of references can occur) or to look up /// a value that has more complete data than the value you currently have, although their /// comparer functions indicate they are equal. /// </remarks> public bool TryGetValue(T equalValue, [MaybeNullWhen(false)] out T actualValue) { if (_buckets.Length > 0) { var index = FindItemIndex(equalValue); if (index >= 0) { actualValue = _entries[index]._value; return true; } } actualValue = default; return false; } /// <summary>Modifies the current <see cref="SegmentedHashSet{T}"/> object to contain all elements that are present in itself, the specified collection, or both.</summary> /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param> public void UnionWith(IEnumerable<T> other) { if (other == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); } foreach (var item in other) { AddIfNotPresent(item, out _); } } /// <summary>Modifies the current <see cref="SegmentedHashSet{T}"/> object to contain only elements that are present in that object and in the specified collection.</summary> /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param> public void IntersectWith(IEnumerable<T> other) { if (other == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); } // Intersection of anything with empty set is empty set, so return if count is 0. // Same if the set intersecting with itself is the same set. if (Count == 0 || other == this) { return; } // If other is known to be empty, intersection is empty set; remove all elements, and we're done. if (other is ICollection<T> otherAsCollection) { if (otherAsCollection.Count == 0) { Clear(); return; } // Faster if other is a hashset using same equality comparer; so check // that other is a hashset using the same equality comparer. if (other is SegmentedHashSet<T> otherAsSet && EqualityComparersAreEqual(this, otherAsSet)) { IntersectWithHashSetWithSameComparer(otherAsSet); return; } } IntersectWithEnumerable(other); } /// <summary>Removes all elements in the specified collection from the current <see cref="SegmentedHashSet{T}"/> object.</summary> /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param> public void ExceptWith(IEnumerable<T> other) { if (other == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); } // This is already the empty set; return. if (Count == 0) { return; } // Special case if other is this; a set minus itself is the empty set. if (other == this) { Clear(); return; } // Remove every element in other from this. foreach (var element in other) { Remove(element); } } /// <summary>Modifies the current <see cref="SegmentedHashSet{T}"/> object to contain only elements that are present either in that object or in the specified collection, but not both.</summary> /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param> public void SymmetricExceptWith(IEnumerable<T> other) { if (other == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); } // If set is empty, then symmetric difference is other. if (Count == 0) { UnionWith(other); return; } // Special-case this; the symmetric difference of a set with itself is the empty set. if (other == this) { Clear(); return; } // If other is a SegmentedHashSet, it has unique elements according to its equality comparer, // but if they're using different equality comparers, then assumption of uniqueness // will fail. So first check if other is a hashset using the same equality comparer; // symmetric except is a lot faster and avoids bit array allocations if we can assume // uniqueness. if (other is SegmentedHashSet<T> otherAsSet && EqualityComparersAreEqual(this, otherAsSet)) { SymmetricExceptWithUniqueHashSet(otherAsSet); } else { SymmetricExceptWithEnumerable(other); } } /// <summary>Determines whether a <see cref="SegmentedHashSet{T}"/> object is a subset of the specified collection.</summary> /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param> /// <returns>true if the <see cref="SegmentedHashSet{T}"/> object is a subset of <paramref name="other"/>; otherwise, false.</returns> public bool IsSubsetOf(IEnumerable<T> other) { if (other == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); } // The empty set is a subset of any set, and a set is a subset of itself. // Set is always a subset of itself if (Count == 0 || other == this) { return true; } // Faster if other has unique elements according to this equality comparer; so check // that other is a hashset using the same equality comparer. if (other is SegmentedHashSet<T> otherAsSet && EqualityComparersAreEqual(this, otherAsSet)) { // if this has more elements then it can't be a subset if (Count > otherAsSet.Count) { return false; } // already checked that we're using same equality comparer. simply check that // each element in this is contained in other. return IsSubsetOfHashSetWithSameComparer(otherAsSet); } (var uniqueCount, var unfoundCount) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: false); return uniqueCount == Count && unfoundCount >= 0; } /// <summary>Determines whether a <see cref="SegmentedHashSet{T}"/> object is a proper subset of the specified collection.</summary> /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param> /// <returns>true if the <see cref="SegmentedHashSet{T}"/> object is a proper subset of <paramref name="other"/>; otherwise, false.</returns> public bool IsProperSubsetOf(IEnumerable<T> other) { if (other == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); } // No set is a proper subset of itself. if (other == this) { return false; } if (other is ICollection<T> otherAsCollection) { // No set is a proper subset of an empty set. if (otherAsCollection.Count == 0) { return false; } // The empty set is a proper subset of anything but the empty set. if (Count == 0) { return otherAsCollection.Count > 0; } // Faster if other is a hashset (and we're using same equality comparer). if (other is SegmentedHashSet<T> otherAsSet && EqualityComparersAreEqual(this, otherAsSet)) { if (Count >= otherAsSet.Count) { return false; } // This has strictly less than number of items in other, so the following // check suffices for proper subset. return IsSubsetOfHashSetWithSameComparer(otherAsSet); } } (var uniqueCount, var unfoundCount) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: false); return uniqueCount == Count && unfoundCount > 0; } /// <summary>Determines whether a <see cref="SegmentedHashSet{T}"/> object is a proper superset of the specified collection.</summary> /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param> /// <returns>true if the <see cref="SegmentedHashSet{T}"/> object is a superset of <paramref name="other"/>; otherwise, false.</returns> public bool IsSupersetOf(IEnumerable<T> other) { if (other == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); } // A set is always a superset of itself. if (other == this) { return true; } // Try to fall out early based on counts. if (other is ICollection<T> otherAsCollection) { // If other is the empty set then this is a superset. if (otherAsCollection.Count == 0) { return true; } // Try to compare based on counts alone if other is a hashset with same equality comparer. if (other is SegmentedHashSet<T> otherAsSet && EqualityComparersAreEqual(this, otherAsSet) && otherAsSet.Count > Count) { return false; } } foreach (T element in other) { if (!Contains(element)) { return false; } } return true; } /// <summary>Determines whether a <see cref="SegmentedHashSet{T}"/> object is a proper superset of the specified collection.</summary> /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param> /// <returns>true if the <see cref="SegmentedHashSet{T}"/> object is a proper superset of <paramref name="other"/>; otherwise, false.</returns> public bool IsProperSupersetOf(IEnumerable<T> other) { if (other == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); } // The empty set isn't a proper superset of any set, and a set is never a strict superset of itself. if (Count == 0 || other == this) { return false; } if (other is ICollection<T> otherAsCollection) { // If other is the empty set then this is a superset. if (otherAsCollection.Count == 0) { // Note that this has at least one element, based on above check. return true; } // Faster if other is a hashset with the same equality comparer if (other is SegmentedHashSet<T> otherAsSet && EqualityComparersAreEqual(this, otherAsSet)) { if (otherAsSet.Count >= Count) { return false; } // Now perform element check. return otherAsSet.IsSubsetOfHashSetWithSameComparer(this); } } // Couldn't fall out in the above cases; do it the long way (var uniqueCount, var unfoundCount) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: true); return uniqueCount < Count && unfoundCount == 0; } /// <summary>Determines whether the current <see cref="SegmentedHashSet{T}"/> object and a specified collection share common elements.</summary> /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param> /// <returns>true if the <see cref="SegmentedHashSet{T}"/> object and <paramref name="other"/> share at least one common element; otherwise, false.</returns> public bool Overlaps(IEnumerable<T> other) { if (other == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); } if (Count == 0) { return false; } // Set overlaps itself if (other == this) { return true; } foreach (var element in other) { if (Contains(element)) { return true; } } return false; } /// <summary>Determines whether a <see cref="SegmentedHashSet{T}"/> object and the specified collection contain the same elements.</summary> /// <param name="other">The collection to compare to the current <see cref="SegmentedHashSet{T}"/> object.</param> /// <returns>true if the <see cref="SegmentedHashSet{T}"/> object is equal to <paramref name="other"/>; otherwise, false.</returns> public bool SetEquals(IEnumerable<T> other) { if (other == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.other); } // A set is equal to itself. if (other == this) { return true; } // Faster if other is a hashset and we're using same equality comparer. if (other is SegmentedHashSet<T> otherAsSet && EqualityComparersAreEqual(this, otherAsSet)) { // Attempt to return early: since both contain unique elements, if they have // different counts, then they can't be equal. if (Count != otherAsSet.Count) { return false; } // Already confirmed that the sets have the same number of distinct elements, so if // one is a subset of the other then they must be equal. return IsSubsetOfHashSetWithSameComparer(otherAsSet); } else { // If this count is 0 but other contains at least one element, they can't be equal. if (Count == 0 && other is ICollection<T> otherAsCollection && otherAsCollection.Count > 0) { return false; } (var uniqueCount, var unfoundCount) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: true); return uniqueCount == Count && unfoundCount == 0; } } public void CopyTo(T[] array) => CopyTo(array, 0, Count); /// <summary>Copies the elements of a <see cref="SegmentedHashSet{T}"/> object to an array, starting at the specified array index.</summary> /// <param name="array">The destination array.</param> /// <param name="arrayIndex">The zero-based index in array at which copying begins.</param> public void CopyTo(T[] array, int arrayIndex) => CopyTo(array, arrayIndex, Count); public void CopyTo(T[] array, int arrayIndex, int count) { if (array == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.array); } #if NET8_0_OR_GREATER ArgumentOutOfRangeException.ThrowIfNegative(arrayIndex); ArgumentOutOfRangeException.ThrowIfNegative(count); #else // Check array index valid index into array. if (arrayIndex < 0) { throw new ArgumentOutOfRangeException(nameof(arrayIndex), arrayIndex, SR.ArgumentOutOfRange_NeedNonNegNum); } // Also throw if count less than 0. if (count < 0) { throw new ArgumentOutOfRangeException(nameof(count), count, SR.ArgumentOutOfRange_NeedNonNegNum); } #endif // Will the array, starting at arrayIndex, be able to hold elements? Note: not // checking arrayIndex >= array.Length (consistency with list of allowing // count of 0; subsequent check takes care of the rest) if (arrayIndex > array.Length || count > array.Length - arrayIndex) { ThrowHelper.ThrowArgumentException(ExceptionResource.Arg_ArrayPlusOffTooSmall); } var entries = _entries; for (var i = 0; i < _count && count != 0; i++) { ref var entry = ref entries[i]; if (entry._next >= -1) { array[arrayIndex++] = entry._value; count--; } } } /// <summary>Removes all elements that match the conditions defined by the specified predicate from a <see cref="SegmentedHashSet{T}"/> collection.</summary> public int RemoveWhere(Predicate<T> match) { if (match == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.match); } var entries = _entries; var numRemoved = 0; for (var i = 0; i < _count; i++) { ref var entry = ref entries[i]; if (entry._next >= -1) { // Cache value in case delegate removes it var value = entry._value; if (match(value)) { // Check again that remove actually removed it. if (Remove(value)) { numRemoved++; } } } } return numRemoved; } /// <summary>Gets the <see cref="IEqualityComparer"/> object that is used to determine equality for the values in the set.</summary> public IEqualityComparer<T> Comparer { get { return _comparer ?? EqualityComparer<T>.Default; } } /// <summary>Ensures that this hash set can hold the specified number of elements without growing.</summary> public int EnsureCapacity(int capacity) { if (capacity < 0) { ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.capacity); } var currentCapacity = _entries.Length; if (currentCapacity >= capacity) { return currentCapacity; } if (_buckets.Length == 0) { return Initialize(capacity); } var newSize = HashHelpers.GetPrime(capacity); Resize(newSize); return newSize; } private void Resize() => Resize(HashHelpers.ExpandPrime(_count)); private void Resize(int newSize) { Debug.Assert(_entries.Length > 0, "_entries should be non-empty"); Debug.Assert(newSize >= _entries.Length); var count = _count; // Rather than creating a copy of _entries, instead reuse as much of it's data as possible. var entries = CreateNewSegmentedArrayReusingOldSegments(_entries, newSize); // Assign member variables after both arrays allocated to guard against corruption from OOM if second fails _buckets = new SegmentedArray<int>(newSize); _fastModMultiplier = HashHelpers.GetFastModMultiplier((uint)newSize); for (var i = 0; i < count; i++) { ref var entry = ref entries[i]; if (entry._next >= -1) { ref var bucket = ref GetBucketRef(entry._hashCode); entry._next = bucket - 1; // Value in _buckets is 1-based bucket = i + 1; } } _entries = entries; } private static SegmentedArray<Entry> CreateNewSegmentedArrayReusingOldSegments(SegmentedArray<Entry> oldArray, int newSize) { var segments = SegmentedCollectionsMarshal.AsSegments(oldArray); var oldSegmentCount = segments.Length; var newSegmentCount = (newSize + SegmentedArrayHelper.GetSegmentSize<Entry>() - 1) >> SegmentedArrayHelper.GetSegmentShift<Entry>(); // Grow the array of segments, if necessary Array.Resize(ref segments, newSegmentCount); // Resize all segments to full segment size from the last old segment to the next to last // new segment. for (var i = oldSegmentCount - 1; i < newSegmentCount - 1; i++) Array.Resize(ref segments[i], SegmentedArrayHelper.GetSegmentSize<Entry>()); // Resize the last segment var lastSegmentSize = newSize - ((newSegmentCount - 1) << SegmentedArrayHelper.GetSegmentShift<Entry>()); Array.Resize(ref segments[newSegmentCount - 1], lastSegmentSize); return SegmentedCollectionsMarshal.AsSegmentedArray(newSize, segments); } /// <summary> /// Sets the capacity of a <see cref="SegmentedHashSet{T}"/> object to the actual number of elements it contains, /// rounded up to a nearby, implementation-specific value. /// </summary> public void TrimExcess() { var capacity = Count; var newSize = HashHelpers.GetPrime(capacity); var oldEntries = _entries; var currentCapacity = oldEntries.Length; if (newSize >= currentCapacity) { return; } var oldCount = _count; _version++; Initialize(newSize); var entries = _entries; var count = 0; for (var i = 0; i < oldCount; i++) { var hashCode = oldEntries[i]._hashCode; // At this point, we know we have entries. if (oldEntries[i]._next >= -1) { ref var entry = ref entries[count]; entry = oldEntries[i]; ref var bucket = ref GetBucketRef(hashCode); entry._next = bucket - 1; // Value in _buckets is 1-based bucket = count + 1; count++; } } _count = capacity; _freeCount = 0; } #endregion #region Helper methods /// <summary>Returns an <see cref="IEqualityComparer"/> object that can be used for equality testing of a <see cref="SegmentedHashSet{T}"/> object.</summary> public static IEqualityComparer<SegmentedHashSet<T>> CreateSetComparer() => new SegmentedHashSetEqualityComparer<T>(); /// <summary> /// Initializes buckets and slots arrays. Uses suggested capacity by finding next prime /// greater than or equal to capacity. /// </summary> private int Initialize(int capacity) { var size = HashHelpers.GetPrime(capacity); var buckets = new SegmentedArray<int>(size); var entries = new SegmentedArray<Entry>(size); // Assign member variables after both arrays are allocated to guard against corruption from OOM if second fails. _freeList = -1; _buckets = buckets; _entries = entries; _fastModMultiplier = HashHelpers.GetFastModMultiplier((uint)size); return size; } /// <summary>Adds the specified element to the set if it's not already contained.</summary> /// <param name="value">The element to add to the set.</param> /// <param name="location">The index into <see cref="_entries"/> of the element.</param> /// <returns>true if the element is added to the <see cref="SegmentedHashSet{T}"/> object; false if the element is already present.</returns> private bool AddIfNotPresent(T value, out int location) { if (_buckets.Length == 0) { Initialize(0); } Debug.Assert(_buckets.Length > 0); var entries = _entries; Debug.Assert(entries.Length > 0, "expected entries to be non-empty"); var comparer = _comparer; int hashCode; uint collisionCount = 0; ref var bucket = ref RoslynUnsafe.NullRef<int>(); if (SupportsComparerDevirtualization && typeof(T).IsValueType && // comparer can only be null for value types; enable JIT to eliminate entire if block for ref types comparer == null) { hashCode = value!.GetHashCode(); bucket = ref GetBucketRef(hashCode); var i = bucket - 1; // Value in _buckets is 1-based // ValueType: Devirtualize with EqualityComparer<TValue>.Default intrinsic while (i >= 0) { ref var entry = ref entries[i]; if (entry._hashCode == hashCode && EqualityComparer<T>.Default.Equals(entry._value, value)) { location = i; return false; } i = entry._next; collisionCount++; if (collisionCount > (uint)entries.Length) { // The chain of entries forms a loop, which means a concurrent update has happened. ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); } } } else { Debug.Assert(comparer is not null); hashCode = value != null ? comparer!.GetHashCode(value) : 0; bucket = ref GetBucketRef(hashCode); var i = bucket - 1; // Value in _buckets is 1-based while (i >= 0) { ref var entry = ref entries[i]; if (entry._hashCode == hashCode && comparer!.Equals(entry._value, value)) { location = i; return false; } i = entry._next; collisionCount++; if (collisionCount > (uint)entries.Length) { // The chain of entries forms a loop, which means a concurrent update has happened. ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); } } } int index; if (_freeCount > 0) { index = _freeList; _freeCount--; Debug.Assert((StartOfFreeList - entries[_freeList]._next) >= -1, "shouldn't overflow because `next` cannot underflow"); _freeList = StartOfFreeList - entries[_freeList]._next; } else { var count = _count; if (count == entries.Length) { Resize(); bucket = ref GetBucketRef(hashCode); } index = count; _count = count + 1; entries = _entries; } { ref var entry = ref entries[index]; entry._hashCode = hashCode; entry._next = bucket - 1; // Value in _buckets is 1-based entry._value = value; bucket = index + 1; _version++; location = index; } return true; } /// <summary> /// Implementation Notes: /// If other is a hashset and is using same equality comparer, then checking subset is /// faster. Simply check that each element in this is in other. /// /// Note: if other doesn't use same equality comparer, then Contains check is invalid, /// which is why callers must take are of this. /// /// If callers are concerned about whether this is a proper subset, they take care of that. /// </summary> internal bool IsSubsetOfHashSetWithSameComparer(SegmentedHashSet<T> other) { foreach (var item in this) { if (!other.Contains(item)) { return false; } } return true; } /// <summary> /// If other is a hashset that uses same equality comparer, intersect is much faster /// because we can use other's Contains /// </summary> private void IntersectWithHashSetWithSameComparer(SegmentedHashSet<T> other) { var entries = _entries; for (var i = 0; i < _count; i++) { ref var entry = ref entries[i]; if (entry._next >= -1) { var item = entry._value; if (!other.Contains(item)) { Remove(item); } } } } /// <summary> /// Iterate over other. If contained in this, mark an element in bit array corresponding to /// its position in _slots. If anything is unmarked (in bit array), remove it. /// /// This attempts to allocate on the stack, if below StackAllocThreshold. /// </summary> private unsafe void IntersectWithEnumerable(IEnumerable<T> other) { Debug.Assert(_buckets.Length > 0, "_buckets shouldn't be empty; callers should check first"); // Keep track of current last index; don't want to move past the end of our bit array // (could happen if another thread is modifying the collection). var originalCount = _count; int intArrayLength = BitHelper.ToIntArrayLength(originalCount); Span<int> span = stackalloc int[StackAllocThreshold]; var bitHelper = intArrayLength <= StackAllocThreshold ? new BitHelper(span.Slice(0, intArrayLength), clear: true) : new BitHelper(new int[intArrayLength], clear: false); // Mark if contains: find index of in slots array and mark corresponding element in bit array. foreach (var item in other) { var index = FindItemIndex(item); if (index >= 0) { bitHelper.MarkBit(index); } } // If anything unmarked, remove it. Perf can be optimized here if BitHelper had a // FindFirstUnmarked method. for (var i = 0; i < originalCount; i++) { ref var entry = ref _entries[i]; if (entry._next >= -1 && !bitHelper.IsMarked(i)) { Remove(entry._value); } } } /// <summary> /// if other is a set, we can assume it doesn't have duplicate elements, so use this /// technique: if can't remove, then it wasn't present in this set, so add. /// /// As with other methods, callers take care of ensuring that other is a hashset using the /// same equality comparer. /// </summary> /// <param name="other"></param> private void SymmetricExceptWithUniqueHashSet(SegmentedHashSet<T> other) { foreach (var item in other) { if (!Remove(item)) { AddIfNotPresent(item, out _); } } } /// <summary> /// Implementation notes: /// /// Used for symmetric except when other isn't a SegmentedHashSet. This is more tedious because /// other may contain duplicates. SegmentedHashSet technique could fail in these situations: /// 1. Other has a duplicate that's not in this: SegmentedHashSet technique would add then /// remove it. /// 2. Other has a duplicate that's in this: SegmentedHashSet technique would remove then add it /// back. /// In general, its presence would be toggled each time it appears in other. /// /// This technique uses bit marking to indicate whether to add/remove the item. If already /// present in collection, it will get marked for deletion. If added from other, it will /// get marked as something not to remove. /// /// </summary> /// <param name="other"></param> private unsafe void SymmetricExceptWithEnumerable(IEnumerable<T> other) { var originalCount = _count; int intArrayLength = BitHelper.ToIntArrayLength(originalCount); Span<int> itemsToRemoveSpan = stackalloc int[StackAllocThreshold / 2]; var itemsToRemove = intArrayLength <= StackAllocThreshold / 2 ? new BitHelper(itemsToRemoveSpan.Slice(0, intArrayLength), clear: true) : new BitHelper(new int[intArrayLength], clear: false); Span<int> itemsAddedFromOtherSpan = stackalloc int[StackAllocThreshold / 2]; var itemsAddedFromOther = intArrayLength <= StackAllocThreshold / 2 ? new BitHelper(itemsAddedFromOtherSpan.Slice(0, intArrayLength), clear: true) : new BitHelper(new int[intArrayLength], clear: false); foreach (var item in other) { if (AddIfNotPresent(item, out var location)) { // wasn't already present in collection; flag it as something not to remove // *NOTE* if location is out of range, we should ignore. BitHelper will // detect that it's out of bounds and not try to mark it. But it's // expected that location could be out of bounds because adding the item // will increase _lastIndex as soon as all the free spots are filled. itemsAddedFromOther.MarkBit(location); } else { // already there...if not added from other, mark for remove. // *NOTE* Even though BitHelper will check that location is in range, we want // to check here. There's no point in checking items beyond originalCount // because they could not have been in the original collection if (location < originalCount && !itemsAddedFromOther.IsMarked(location)) { itemsToRemove.MarkBit(location); } } } // if anything marked, remove it for (var i = 0; i < originalCount; i++) { if (itemsToRemove.IsMarked(i)) { Remove(_entries[i]._value); } } } /// <summary> /// Determines counts that can be used to determine equality, subset, and superset. This /// is only used when other is an IEnumerable and not a SegmentedHashSet. If other is a SegmentedHashSet /// these properties can be checked faster without use of marking because we can assume /// other has no duplicates. /// /// The following count checks are performed by callers: /// 1. Equals: checks if unfoundCount = 0 and uniqueFoundCount = _count; i.e. everything /// in other is in this and everything in this is in other /// 2. Subset: checks if unfoundCount >= 0 and uniqueFoundCount = _count; i.e. other may /// have elements not in this and everything in this is in other /// 3. Proper subset: checks if unfoundCount > 0 and uniqueFoundCount = _count; i.e /// other must have at least one element not in this and everything in this is in other /// 4. Proper superset: checks if unfound count = 0 and uniqueFoundCount strictly less /// than _count; i.e. everything in other was in this and this had at least one element /// not contained in other. /// /// An earlier implementation used delegates to perform these checks rather than returning /// an ElementCount struct; however this was changed due to the perf overhead of delegates. /// </summary> /// <param name="other"></param> /// <param name="returnIfUnfound">Allows us to finish faster for equals and proper superset /// because unfoundCount must be 0.</param> private (int UniqueCount, int UnfoundCount) CheckUniqueAndUnfoundElements(IEnumerable<T> other, bool returnIfUnfound) { // Need special case in case this has no elements. if (_count == 0) { var numElementsInOther = 0; foreach (var item in other) { numElementsInOther++; break; // break right away, all we want to know is whether other has 0 or 1 elements } return (UniqueCount: 0, UnfoundCount: numElementsInOther); } Debug.Assert((_buckets.Length > 0) && (_count > 0), "_buckets was empty but count greater than 0"); var originalCount = _count; int intArrayLength = BitHelper.ToIntArrayLength(originalCount); Span<int> span = stackalloc int[StackAllocThreshold]; var bitHelper = intArrayLength <= StackAllocThreshold ? new BitHelper(span.Slice(0, intArrayLength), clear: true) : new BitHelper(new int[intArrayLength], clear: false); var unfoundCount = 0; // count of items in other not found in this var uniqueFoundCount = 0; // count of unique items in other found in this foreach (var item in other) { var index = FindItemIndex(item); if (index >= 0) { if (!bitHelper.IsMarked(index)) { // Item hasn't been seen yet. bitHelper.MarkBit(index); uniqueFoundCount++; } } else { unfoundCount++; if (returnIfUnfound) { break; } } } return (uniqueFoundCount, unfoundCount); } /// <summary> /// Checks if equality comparers are equal. This is used for algorithms that can /// speed up if it knows the other item has unique elements. I.e. if they're using /// different equality comparers, then uniqueness assumption between sets break. /// </summary> internal static bool EqualityComparersAreEqual(SegmentedHashSet<T> set1, SegmentedHashSet<T> set2) => set1.Comparer.Equals(set2.Comparer); #endregion private struct Entry { public int _hashCode; /// <summary> /// 0-based index of next entry in chain: -1 means end of chain /// also encodes whether this entry _itself_ is part of the free list by changing sign and subtracting 3, /// so -2 means end of free list, -3 means index 0 but on free list, -4 means index 1 but on free list, etc. /// </summary> public int _next; public T _value; } public struct Enumerator : IEnumerator<T> { private readonly SegmentedHashSet<T> _hashSet; private readonly int _version; private int _index; private T _current; internal Enumerator(SegmentedHashSet<T> hashSet) { _hashSet = hashSet; _version = hashSet._version; _index = 0; _current = default!; } public bool MoveNext() { if (_version != _hashSet._version) { ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumFailedVersion(); } // Use unsigned comparison since we set index to dictionary.count+1 when the enumeration ends. // dictionary.count+1 could be negative if dictionary.count is int.MaxValue while ((uint)_index < (uint)_hashSet._count) { ref var entry = ref _hashSet._entries[_index++]; if (entry._next >= -1) { _current = entry._value; return true; } } _index = _hashSet._count + 1; _current = default!; return false; } public readonly T Current => _current; public readonly void Dispose() { } readonly object? IEnumerator.Current { get { if (_index == 0 || (_index == _hashSet._count + 1)) { ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumOpCantHappen(); } return _current; } } void IEnumerator.Reset() { if (_version != _hashSet._version) { ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumFailedVersion(); } _index = 0; _current = default!; } } } } |