|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections;
using System.Collections.Generic;
using Microsoft.ML.Runtime;
namespace Microsoft.ML.Internal.Utilities
{
/// <summary>
/// An array-like data structure that supports storing more than
/// <see cref="Utils.ArrayMaxSize"/> many entries, up to 0x7FEFFFFF00000L.
/// The entries are indexed by 64-bit integers, and a single entry can be accessed by
/// the indexer if no modifications to the entries is desired, or the <see cref="ApplyAt"/>
/// method. Efficient looping can be accomplished by calling the <see cref="ApplyRange"/> method.
/// This data structure employs the "length and capacity" pattern. The logical length
/// can be retrieved from the <see cref="Length"/> property, which can possibly be strictly less
/// than the total capacity.
/// </summary>
/// <typeparam name="T">The type of entries.</typeparam>
[BestFriend]
internal sealed class BigArray<T> : IEnumerable<T>
{
// REVIEW: This class merges and replaces the original private BigArray implementation in CacheDataView.
// There the block size was 25 bits. Need to understand the performance implication of this 32x change.
private const int BlockSizeBits = 20;
private const int BlockSize = 1 << BlockSizeBits;
// This field works both as the largest valid index in the big array and
// a bit mask used to determine block index. This is only valid if BlockSize
// is a power of two.
private const int BlockSizeMinusOne = BlockSize - 1;
private const long MaxSize = (long)Utils.ArrayMaxSize << BlockSizeBits;
// In the first few iterations, we are conservative with our
// memory allocations, but beyond this many subarrays, we go
// for full BlockSize allocation.
private const int FullAllocationBeyond = 4;
// The 2-D jagged array containing the entries.
// Its total size is larger than or equal to _length, but
// less than Length + BlockSize.
// Each one-dimension subarray has length equal to BlockSize,
// except for the last one, which has a positive length
// less than or equal to BlockSize.
private T[][] _entries;
// The logical length of the array. May be strictly less than
// the actual total size of _entries.
private long _length;
/// <summary>
/// Gets the logical length of the big array.
/// </summary>
public long Length { get { return _length; } }
/// <summary>
/// Gets or sets the entry at <paramref name="index"/>.
/// </summary>
/// <remarks>
/// This indexer is not efficient for looping. If looping access to entries is desired,
/// use the <see cref="ApplyRange"/> method instead.
/// Note that unlike a normal array, the value returned from this indexer getter cannot be modified
/// (for example, by ++ operator or passing into a method as a ref parameter). To modify an entry, use
/// the <see cref="ApplyAt"/> method instead.
/// </remarks>
public T this[long index]
{
get
{
Contracts.CheckParam(0 <= index && index < _length, nameof(index), "Index out of range.");
int bI = (int)(index >> BlockSizeBits);
int idx = (int)(index & BlockSizeMinusOne);
return _entries[bI][idx];
}
set
{
Contracts.CheckParam(0 <= index && index < _length, nameof(index), "Index out of range.");
int bI = (int)(index >> BlockSizeBits);
int idx = (int)(index & BlockSizeMinusOne);
_entries[bI][idx] = value;
}
}
/// <summary>
/// Initializes a new instance of the <see cref="BigArray{T}"/> class with a specified size.
/// </summary>
public BigArray(long size = 0)
{
// Verifies the preconditional invariant that BlockSize is a power of two.
Contracts.Assert(BlockSize > 1 && (BlockSize & (BlockSize - 1)) == 0, "Block size is not a power of two.");
Contracts.CheckParam(size >= 0, nameof(size), "Must be non-negative.");
if (size == 0)
{
_entries = new T[0][];
return;
}
Contracts.CheckParam(size <= MaxSize, nameof(size), "Size of BigArray is too large.");
var longBlockCount = ((size - 1) >> BlockSizeBits) + 1;
Contracts.Assert(longBlockCount <= Utils.ArrayMaxSize);
int blockCount = (int)longBlockCount;
int lastBlockSize = (int)(((size - 1) & BlockSizeMinusOne) + 1);
Contracts.Assert(blockCount > 0);
Contracts.Assert(0 < lastBlockSize && lastBlockSize <= BlockSize);
_length = size;
_entries = new T[blockCount][];
for (int i = 0; i < blockCount - 1; i++)
_entries[i] = new T[BlockSize];
_entries[blockCount - 1] = new T[lastBlockSize];
}
public delegate void Visitor(long index, ref T item);
/// <summary>
/// Applies a <see cref="Visitor"/> method at a given <paramref name="index"/>.
/// </summary>
public void ApplyAt(long index, Visitor manip)
{
Contracts.CheckValue(manip, nameof(manip));
Contracts.CheckParam(0 <= index && index < _length, nameof(index), "Index out of range.");
int bI = (int)(index >> BlockSizeBits);
int idx = (int)(index & BlockSizeMinusOne);
manip(index, ref _entries[bI][idx]);
}
/// <summary>
/// Implements a more efficient way to loop over index range in [min, lim) and apply
/// the specified method delegate.
/// </summary>
public void ApplyRange(long min, long lim, Visitor manip)
{
Contracts.CheckValue(manip, nameof(manip));
Contracts.CheckParam(min >= 0, nameof(min), "Specified minimum index must be non-negative.");
Contracts.CheckParam(lim <= _length, nameof(lim), "Specified limit index must be no more than length of the array.");
if (min >= lim)
return;
long max = lim - 1;
int minBlockIndex = (int)(min >> BlockSizeBits);
int minIndexInBlock = (int)(min & BlockSizeMinusOne);
int maxBlockIndex = (int)(max >> BlockSizeBits);
int maxIndexInBlock = (int)(max & BlockSizeMinusOne);
long index = min;
for (int bI = minBlockIndex; bI <= maxBlockIndex; bI++)
{
int idxMin = bI == minBlockIndex ? minIndexInBlock : 0;
int idxMax = bI == maxBlockIndex ? maxIndexInBlock : BlockSizeMinusOne;
var block = _entries[bI];
for (int idx = idxMin; idx <= idxMax; idx++)
manip(index++, ref block[idx]);
}
Contracts.Assert(index == lim);
}
/// <summary>
/// Fills the entries with index in [min, lim) with the given value.
/// </summary>
public void FillRange(long min, long lim, T value)
{
Contracts.CheckParam(min >= 0, nameof(min), "Specified minimum index must be non-negative.");
Contracts.CheckParam(lim <= _length, nameof(lim), "Specified limit index must be no more than length of the array.");
if (min >= lim)
return;
long max = lim - 1;
int minBlockIndex = (int)(min >> BlockSizeBits);
int minIndexInBlock = (int)(min & BlockSizeMinusOne);
int maxBlockIndex = (int)(max >> BlockSizeBits);
int maxIndexInBlock = (int)(max & BlockSizeMinusOne);
#if DEBUG
long index = min;
#endif
for (int bI = minBlockIndex; bI <= maxBlockIndex; bI++)
{
int idxMin = bI == minBlockIndex ? minIndexInBlock : 0;
int idxMax = bI == maxBlockIndex ? maxIndexInBlock : BlockSizeMinusOne;
var block = _entries[bI];
for (int idx = idxMin; idx <= idxMax; idx++)
{
block[idx] = value;
#if DEBUG
index++;
#endif
}
}
#if DEBUG
Contracts.Assert(index == lim);
#endif
}
/// <summary>
/// Resizes the array so that its logical length equals <paramref name="newLength"/>. This method
/// is more efficient than initialize another array and copy the entries because it preserves
/// existing blocks. The actual capacity of the array may become larger than <paramref name="newLength"/>.
/// If <paramref name="newLength"/> equals <see cref="Length"/>, then no operation is done.
/// If <paramref name="newLength"/> is less than <see cref="Length"/>, the array shrinks in size
/// so that both its length and its capacity equal <paramref name="newLength"/>.
/// If <paramref name="newLength"/> is larger than <see cref="Length"/>, the array capacity grows
/// to the smallest integral multiple of <see cref="BlockSize"/> that is larger than <paramref name="newLength"/>,
/// unless <paramref name="newLength"/> is less than <see cref="BlockSize"/>, in which case the capacity
/// grows to double its current capacity or <paramref name="newLength"/>, which ever is larger,
/// but up to <see cref="BlockSize"/>.
/// </summary>
public void Resize(long newLength)
{
Contracts.CheckParam(newLength >= 0, nameof(newLength), "Specified new size must be non-negative.");
Contracts.CheckParam(newLength <= MaxSize, nameof(newLength), "Specified new size is too large.");
if (newLength == _length)
return;
if (newLength == 0)
{
// Shrink to empty.
_entries = new T[0][];
_length = newLength;
return;
}
var longBlockCount = ((newLength - 1) >> BlockSizeBits) + 1;
Contracts.Assert(0 < longBlockCount && longBlockCount <= Utils.ArrayMaxSize);
int newBlockCount = (int)longBlockCount;
int newLastBlockLength = (int)(((newLength - 1) & BlockSizeMinusOne) + 1);
Contracts.Assert(0 < newLastBlockLength && newLastBlockLength <= BlockSize);
if (_length == 0)
{
_entries = new T[newBlockCount][];
for (int i = 0; i < newBlockCount - 1; i++)
_entries[i] = new T[BlockSize];
_entries[newBlockCount - 1] = new T[newLastBlockLength];
_length = newLength;
return;
}
int curBlockCount = _entries.GetLength(0);
Contracts.Assert(curBlockCount > 0);
int curLastBlockSize = Utils.Size(_entries[curBlockCount - 1]);
int curLastBlockLength = (int)(((_length - 1) & BlockSizeMinusOne) + 1);
Contracts.Assert(0 < curLastBlockLength && curLastBlockLength <= curLastBlockSize && curLastBlockSize <= BlockSize);
if (newLength < _length)
{
// Shrink to a smaller array
Contracts.Assert(newBlockCount < curBlockCount || (newBlockCount == curBlockCount && newLastBlockLength < curLastBlockLength));
Array.Resize(ref _entries, newBlockCount);
Array.Resize(ref _entries[newBlockCount - 1], newLastBlockLength);
}
else if (newLength <= ((long)curBlockCount) << BlockSizeBits)
{
// Grow to a larger array, but with the same number of blocks.
// So only need to grow the size of the last block if necessary.
Contracts.Assert(curBlockCount == newBlockCount);
if (newLastBlockLength > curLastBlockSize)
{
if (newBlockCount == 1)
{
Contracts.Assert(_length == curLastBlockLength);
Contracts.Assert(newLength == newLastBlockLength);
Contracts.Assert(_entries.Length == 1);
Array.Resize(ref _entries[0], Math.Min(BlockSize, Math.Max(2 * curLastBlockSize, newLastBlockLength)));
}
else
{
// Grow the last block to full size if there are more than one blocks.
Array.Resize(ref _entries[newBlockCount - 1], BlockSize);
}
}
}
else
{
// Need more blocks.
Contracts.Assert(newBlockCount > curBlockCount);
Array.Resize(ref _entries, newBlockCount);
Array.Resize(ref _entries[curBlockCount - 1], BlockSize);
for (int bI = curBlockCount; bI < newBlockCount; bI++)
_entries[bI] = new T[BlockSize];
}
_length = newLength;
}
/// <summary>
/// Trims the capacity to logical length.
/// </summary>
public void TrimCapacity()
{
if (_length == 0)
{
Contracts.Assert(Utils.Size(_entries) == 0);
return;
}
int maMax;
int miLim;
LongLimToMajorMaxMinorLim(_length, out maMax, out miLim);
Contracts.Assert(maMax >= 0);
Contracts.Assert(0 < miLim && miLim <= Utils.Size(_entries[maMax]));
if (Utils.Size(_entries[maMax]) != miLim)
Array.Resize(ref _entries[maMax], miLim);
Array.Resize(ref _entries, maMax + 1);
}
/// <summary>
/// Appends the elements of <paramref name="src"/> to the end.
/// This method is thread safe related to calls to <see cref="M:CopyTo"/> (assuming those copy operations
/// are happening over ranges already added), but concurrent calls to
/// <see cref="M:AddRange"/> should not be attempted. Intended usage is that
/// one thread will call this method, while multiple threads may access
/// previously added ranges from <see cref="M:CopyTo"/>, concurrently with
/// this method or themselves.
/// </summary>
public void AddRange(ReadOnlySpan<T> src)
{
if (src.IsEmpty)
return;
int maMin;
int miMin;
int maMax;
int miLim;
LongMinToMajorMinorMin(_length, out maMin, out miMin);
LongLimToMajorMaxMinorLim(_length + src.Length, out maMax, out miLim);
Contracts.Assert(maMin <= maMax); // Could be violated if length == 0, but we already took care of this.
Utils.EnsureSize(ref _entries, maMax + 1, BlockSize);
switch (maMax - maMin)
{
case 0:
// Spans only one subarray, most common case and simplest implementation.
Contracts.Assert(miLim - miMin == src.Length);
Utils.EnsureSize(ref _entries[maMax], maMax >= FullAllocationBeyond ? BlockSize : miLim, BlockSize);
src.CopyTo(_entries[maMax].AsSpan(miMin));
break;
case 1:
// Spans two subarrays.
Contracts.Assert((BlockSize - miMin) + miLim == src.Length);
Utils.EnsureSize(ref _entries[maMin], BlockSize, BlockSize);
int firstSubArrayCapacity = BlockSize - miMin;
src.Slice(0, firstSubArrayCapacity).CopyTo(_entries[maMin].AsSpan(miMin));
Contracts.Assert(_entries[maMax] == null);
Utils.EnsureSize(ref _entries[maMax], maMax >= FullAllocationBeyond ? BlockSize : miLim, BlockSize);
src.Slice(firstSubArrayCapacity, miLim).CopyTo(_entries[maMax]);
break;
default:
// Spans three or more subarrays. Very rare.
int miSubMin = miMin;
// Copy the first segment.
Utils.EnsureSize(ref _entries[maMin], BlockSize, BlockSize);
int srcSoFar = BlockSize - miMin;
src.Slice(0, srcSoFar).CopyTo(_entries[maMin].AsSpan(miMin));
// Copy the internal segments.
for (int major = maMin + 1; major < maMax; ++major)
{
Contracts.Assert(_entries[major] == null);
_entries[major] = new T[BlockSize];
src.Slice(srcSoFar, BlockSize).CopyTo(_entries[major]);
srcSoFar += BlockSize;
Contracts.Assert(srcSoFar < src.Length);
}
// Copy the last segment.
Contracts.Assert(src.Length - srcSoFar == miLim);
Contracts.Assert(_entries[maMax] == null);
Utils.EnsureSize(ref _entries[maMax], maMax >= FullAllocationBeyond ? BlockSize : miLim, BlockSize);
src.Slice(srcSoFar, miLim).CopyTo(_entries[maMax]);
break;
}
_length += src.Length;
}
/// <summary>
/// Copies the subarray starting from index <paramref name="idx"/> of length
/// <paramref name="length"/> to the destination array <paramref name="dst"/>.
/// Concurrent calls to this method is valid even with one single concurrent call
/// to <see cref="M:AddRange"/>.
/// </summary>
public void CopyTo(long idx, Span<T> dst, int length)
{
// Accesses on the internal arrays of this class should be valid even if
// some other thread is utilizing AddRange, since Utils.EnsureSize(...) will
// not replace the array until any allocation or copying has already happened.
Contracts.Assert(0 <= length && length <= dst.Length);
Contracts.Assert(idx <= Length && length <= Length - idx);
if (length == 0)
return;
int maMin;
int miMin;
int maMax;
int miLim;
LongMinToMajorMinorMin(idx, out maMin, out miMin);
LongLimToMajorMaxMinorLim(idx + length, out maMax, out miLim);
Contracts.Assert(maMin <= maMax); // Could happen if length == 0, but we already took care of this.
switch (maMax - maMin)
{
case 0:
// Spans only one subarray, most common case and simplest implementation.
Contracts.Assert(miLim - miMin == length);
Contracts.Assert(miLim <= Utils.Size(_entries[maMax]));
_entries[maMax].AsSpan(miMin, length).CopyTo(dst);
break;
case 1:
// Spans two subarrays.
Contracts.Assert((BlockSize - miMin) + miLim == length);
Contracts.Assert(BlockSize <= Utils.Size(_entries[maMin]));
_entries[maMin].AsSpan(miMin, BlockSize - miMin).CopyTo(dst);
Contracts.Assert(miLim <= Utils.Size(_entries[maMax]));
_entries[maMax].AsSpan(0, miLim).CopyTo(dst.Slice(BlockSize - miMin));
break;
default:
// Spans three or more subarrays. Very rare.
int miSubMin = miMin;
// Copy the first segment.
Contracts.Assert(BlockSize <= Utils.Size(_entries[maMin]));
int dstSoFar = BlockSize - miMin;
_entries[maMin].AsSpan(miMin, dstSoFar).CopyTo(dst);
// Copy the internal segments.
for (int major = maMin + 1; major < maMax; ++major)
{
Contracts.Assert(BlockSize <= Utils.Size(_entries[major]));
_entries[major].AsSpan(0, BlockSize).CopyTo(dst.Slice(dstSoFar));
dstSoFar += BlockSize;
Contracts.Assert(dstSoFar < length);
}
// Copy the last segment.
Contracts.Assert(length - dstSoFar == miLim);
Contracts.Assert(miLim <= Utils.Size(_entries[maMax]));
_entries[maMax].AsSpan(0, miLim).CopyTo(dst.Slice(dstSoFar));
break;
}
}
private static void LongMinToMajorMinorMin(long min, out int major, out int minor)
{
Contracts.Assert(min >= 0);
Contracts.Assert((min >> BlockSizeBits) < int.MaxValue);
major = (int)(min >> BlockSizeBits);
minor = (int)(min & BlockSizeMinusOne);
Contracts.Assert((long)major * BlockSize + minor == min);
}
private static void LongLimToMajorMaxMinorLim(long lim, out int major, out int minor)
{
Contracts.Assert(lim > 0);
Contracts.Assert((lim >> BlockSizeBits) < int.MaxValue);
// Note that lim below this point is the original lim minus 1.
major = (int)((--lim) >> BlockSizeBits);
minor = (int)((lim & BlockSizeMinusOne) + 1);
Contracts.Assert((long)major * BlockSize + minor == lim + 1);
}
public IEnumerator<T> GetEnumerator()
{
long cur = 0;
while (cur < _length)
{
yield return this[cur];
cur++;
}
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
}
|