File: System\Linq\Parallel\QueryOperators\AssociativeAggregationOperator.cs
Web Access
Project: src\src\libraries\System.Linq.Parallel\src\System.Linq.Parallel.csproj (System.Linq.Parallel)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
// =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
//
// AssociativeAggregationOperator.cs
//
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
 
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Threading;
 
namespace System.Linq.Parallel
{
    /// <summary>
    /// The aggregation operator is a little unique, in that the enumerators it returns
    /// yield intermediate results instead of the final results. That's because there is
    /// one last Aggregate operation that must occur in order to perform the final reduction
    /// over the intermediate streams. In other words, the intermediate enumerators produced
    /// by this operator are never seen by other query operators or consumers directly.
    ///
    /// An aggregation performs parallel prefixing internally. Given a binary operator O,
    /// it will generate intermediate results by folding O across partitions; then it
    /// performs a final reduction by folding O across the intermediate results. The
    /// analysis engine knows about associativity and commutativity, and will ensure the
    /// style of partitioning inserted into the tree is compatible with the operator.
    ///
    /// For instance, say O is + (meaning it is AC), our input is {1,2,...,8}, and we
    /// use 4 partitions to calculate the aggregation. Sequentially this would look
    /// like this O(O(O(1,2),...),8), in other words ((1+2)+...)+8. The parallel prefix
    /// of this (w/ 4 partitions) instead calculates the intermediate aggregations, i.e.:
    /// t1 = O(1,2), t2 = O(3,4), ... t4 = O(7,8), aka t1 = 1+2, t2 = 3+4, t4 = 7+8.
    /// The final step is to aggregate O over these intermediaries, i.e.
    /// O(O(O(t1,t2),t3),t4), or ((t1+t2)+t3)+t4. This generalizes to any binary operator.
    ///
    /// Because some aggregations use a different input, intermediate, and output types,
    /// we support an even more generalized aggregation type. In this model, we have
    /// three operators, an intermediate (used for the incremental aggregations), a
    /// final (used for the final summary of intermediate results), and a result selector
    /// (used to perform whatever transformation is needed on the final summary).
    /// </summary>
    /// <typeparam name="TInput"></typeparam>
    /// <typeparam name="TIntermediate"></typeparam>
    /// <typeparam name="TOutput"></typeparam>
    internal sealed class AssociativeAggregationOperator<TInput, TIntermediate, TOutput> : UnaryQueryOperator<TInput, TIntermediate>
    {
        private readonly TIntermediate _seed; // A seed used during aggregation.
        private readonly bool _seedIsSpecified; // Whether a seed was specified. If not, the first element will be used.
        private readonly bool _throwIfEmpty; // Whether to throw an exception if the data source is empty.
 
        // An intermediate reduction function.
        private readonly Func<TIntermediate, TInput, TIntermediate> _intermediateReduce;
 
        // A final reduction function.
        private readonly Func<TIntermediate, TIntermediate, TIntermediate> _finalReduce;
 
        // The result selector function.
        private readonly Func<TIntermediate, TOutput> _resultSelector;
 
        // A function that constructs seed instances
        private readonly Func<TIntermediate>? _seedFactory;
 
        //---------------------------------------------------------------------------------------
        // Constructs a new instance of an associative operator.
        //
        // Assumptions:
        //     This operator must be associative.
        //
 
        internal AssociativeAggregationOperator(IEnumerable<TInput> child, TIntermediate seed, Func<TIntermediate>? seedFactory, bool seedIsSpecified,
                                                Func<TIntermediate, TInput, TIntermediate> intermediateReduce,
                                                Func<TIntermediate, TIntermediate, TIntermediate> finalReduce,
                                                Func<TIntermediate, TOutput> resultSelector, bool throwIfEmpty, QueryAggregationOptions options)
            : base(child)
        {
            Debug.Assert(child != null, "child data source cannot be null");
            Debug.Assert(intermediateReduce != null, "need an intermediate reduce function");
            Debug.Assert(finalReduce != null, "need a final reduce function");
            Debug.Assert(resultSelector != null, "need a result selector function");
            Debug.Assert(options.IsValidQueryAggregationOption(), "enum out of valid range");
            Debug.Assert((options & QueryAggregationOptions.Associative) == QueryAggregationOptions.Associative, "expected an associative operator");
            Debug.Assert(typeof(TIntermediate) == typeof(TInput) || seedIsSpecified, "seed must be specified if TIntermediate differs from TInput");
 
            _seed = seed;
            _seedFactory = seedFactory;
            _seedIsSpecified = seedIsSpecified;
            _intermediateReduce = intermediateReduce;
            _finalReduce = finalReduce;
            _resultSelector = resultSelector;
            _throwIfEmpty = throwIfEmpty;
        }
 
        //---------------------------------------------------------------------------------------
        // Executes the entire query tree, and aggregates the intermediate results into the
        // final result based on the binary operators and final reduction.
        //
        // Return Value:
        //     The single result of aggregation.
        //
 
        internal TOutput Aggregate()
        {
            Debug.Assert(_finalReduce != null);
            Debug.Assert(_resultSelector != null);
 
            TIntermediate accumulator = default(TIntermediate)!;
            bool hadElements = false;
 
            // Because the final reduction is typically much cheaper than the intermediate
            // reductions over the individual partitions, and because each parallel partition
            // will do a lot of work to produce a single output element, we prefer to turn off
            // pipelining, and process the final reductions serially.
            using (IEnumerator<TIntermediate> enumerator = GetEnumerator(ParallelMergeOptions.FullyBuffered, true))
            {
                // We just reduce the elements in each output partition. If the operation is associative,
                // this will yield the correct answer. If not, we should never be calling this routine.
                while (enumerator.MoveNext())
                {
                    if (hadElements)
                    {
                        // Accumulate results by passing the current accumulation and current element to
                        // the reduction operation.
                        try
                        {
                            accumulator = _finalReduce(accumulator, enumerator.Current);
                        }
                        catch (Exception ex)
                        {
                            // We need to wrap all exceptions into an aggregate.
                            throw new AggregateException(ex);
                        }
                    }
                    else
                    {
                        // This is the first element. Just set the accumulator to the first element.
                        accumulator = enumerator.Current;
                        hadElements = true;
                    }
                }
 
                // If there were no elements, we must throw an exception.
                if (!hadElements)
                {
                    if (_throwIfEmpty)
                    {
                        throw new InvalidOperationException(SR.NoElements);
                    }
                    else
                    {
                        accumulator = _seedFactory == null ? _seed : _seedFactory();
                    }
                }
            }
 
            // Finally, run the selection routine to yield the final element.
            try
            {
                return _resultSelector(accumulator);
            }
            catch (Exception ex)
            {
                // We need to wrap all exceptions into an aggregate.
                throw new AggregateException(ex);
            }
        }
 
        //---------------------------------------------------------------------------------------
        // Just opens the current operator, including opening the child and wrapping it with
        // partitions as needed.
        //
 
        internal override QueryResults<TIntermediate> Open(QuerySettings settings, bool preferStriping)
        {
            // We just open the child operator.
            QueryResults<TInput> childQueryResults = Child.Open(settings, preferStriping);
            return new UnaryQueryOperatorResults(childQueryResults, this, settings, preferStriping);
        }
 
        internal override void WrapPartitionedStream<TKey>(
            PartitionedStream<TInput, TKey> inputStream, IPartitionedStreamRecipient<TIntermediate> recipient,
            bool preferStriping, QuerySettings settings)
        {
            int partitionCount = inputStream.PartitionCount;
            PartitionedStream<TIntermediate, int> outputStream = new PartitionedStream<TIntermediate, int>(
                partitionCount, Util.GetDefaultComparer<int>(), OrdinalIndexState.Correct);
 
            for (int i = 0; i < partitionCount; i++)
            {
                outputStream[i] = new AssociativeAggregationOperatorEnumerator<TKey>(inputStream[i], this, i, settings.CancellationState.MergedCancellationToken);
            }
 
            recipient.Receive(outputStream);
        }
 
        //---------------------------------------------------------------------------------------
        // Returns an enumerable that represents the query executing sequentially.
        //
 
        [ExcludeFromCodeCoverage(Justification = "This method should never be called. Associative aggregation can always be parallelized")]
        internal override IEnumerable<TIntermediate> AsSequentialQuery(CancellationToken token)
        {
            Debug.Fail("This method should never be called. Associative aggregation can always be parallelized.");
            throw new NotSupportedException();
        }
 
 
        //---------------------------------------------------------------------------------------
        // Whether this operator performs a premature merge that would not be performed in
        // a similar sequential operation (i.e., in LINQ to Objects).
        //
 
        internal override bool LimitsParallelism
        {
            get { return false; }
        }
 
 
        //---------------------------------------------------------------------------------------
        // This enumerator type encapsulates the intermediary aggregation over the underlying
        // (possibly partitioned) data source.
        //
 
        private sealed class AssociativeAggregationOperatorEnumerator<TKey> : QueryOperatorEnumerator<TIntermediate, int>
        {
            private readonly QueryOperatorEnumerator<TInput, TKey> _source; // The source data.
            private readonly AssociativeAggregationOperator<TInput, TIntermediate, TOutput> _reduceOperator; // The operator.
            private readonly int _partitionIndex; // The index of this partition.
            private readonly CancellationToken _cancellationToken;
            private bool _accumulated; // Whether we've accumulated already. (false-sharing risk, but only written once)
 
 
            //---------------------------------------------------------------------------------------
            // Instantiates a new aggregation operator.
            //
 
            internal AssociativeAggregationOperatorEnumerator(QueryOperatorEnumerator<TInput, TKey> source,
                                                              AssociativeAggregationOperator<TInput, TIntermediate, TOutput> reduceOperator, int partitionIndex,
                                                              CancellationToken cancellationToken)
            {
                Debug.Assert(source != null);
                Debug.Assert(reduceOperator != null);
 
                _source = source;
                _reduceOperator = reduceOperator;
                _partitionIndex = partitionIndex;
                _cancellationToken = cancellationToken;
            }
 
            //---------------------------------------------------------------------------------------
            // This API, upon the first time calling it, walks the entire source query tree. It begins
            // with an accumulator value set to the aggregation operator's seed, and always passes
            // the accumulator along with the current element from the data source to the binary
            // intermediary aggregation operator. The return value is kept in the accumulator. At
            // the end, we will have our intermediate result, ready for final aggregation.
            //
 
            internal override bool MoveNext([MaybeNullWhen(false), AllowNull] ref TIntermediate currentElement, ref int currentKey)
            {
                Debug.Assert(_reduceOperator != null);
                Debug.Assert(_reduceOperator._intermediateReduce != null, "expected a compiled operator");
 
                // Only produce a single element.  Return false if MoveNext() was already called before.
                if (_accumulated)
                {
                    return false;
                }
                _accumulated = true;
 
                bool hadNext = false;
                TIntermediate accumulator = default(TIntermediate)!;
 
                // Initialize the accumulator.
                if (_reduceOperator._seedIsSpecified)
                {
                    // If the seed is specified, initialize accumulator to the seed value.
                    accumulator = _reduceOperator._seedFactory == null
                                      ? _reduceOperator._seed
                                      : _reduceOperator._seedFactory();
                }
                else
                {
                    // If the seed is not specified, then we take the first element as the seed.
                    // Seed may be unspecified only if TInput is the same as TIntermediate.
                    Debug.Assert(typeof(TInput) == typeof(TIntermediate));
 
                    TInput acc = default(TInput)!;
                    TKey accKeyUnused = default(TKey)!;
                    if (!_source.MoveNext(ref acc!, ref accKeyUnused)) return false;
                    hadNext = true;
                    accumulator = (TIntermediate)((object?)acc!);
                }
 
                // Scan through the source and accumulate the result.
                TInput input = default(TInput)!;
                TKey keyUnused = default(TKey)!;
                int i = 0;
                while (_source.MoveNext(ref input!, ref keyUnused))
                {
                    if ((i++ & CancellationState.POLL_INTERVAL) == 0)
                        _cancellationToken.ThrowIfCancellationRequested();
                    hadNext = true;
                    accumulator = _reduceOperator._intermediateReduce(accumulator, input);
                }
 
                if (hadNext)
                {
                    currentElement = accumulator;
                    currentKey = _partitionIndex; // A reduction's "index" is just its partition number.
                    return true;
                }
 
                return false;
            }
 
            protected override void Dispose(bool disposing)
            {
                Debug.Assert(_source != null);
                _source.Dispose();
            }
        }
    }
}