File: System\Diagnostics\Metrics\ExponentialHistogramAggregator.cs
Web Access
Project: src\src\libraries\System.Diagnostics.DiagnosticSource\src\System.Diagnostics.DiagnosticSource.csproj (System.Diagnostics.DiagnosticSource)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Collections.Generic;
 
namespace System.Diagnostics.Metrics
{
    internal sealed class QuantileAggregation
    {
        public QuantileAggregation(params double[] quantiles)
        {
            Quantiles = quantiles;
            Array.Sort(Quantiles);
        }
 
        public double[] Quantiles { get; set; }
        public double MaxRelativeError { get; set; } = 0.001;
    }
 
 
 
    // This histogram ensures that the quantiles reported from the histogram are within some bounded % error of the correct
    // value. More mathematically, if we have a set of measurements where quantile X = Y, the histogram should always report a
    // value Y` where Y*(1-E) <= Y` <= Y*(1+E). E is our allowable error, so if E = 0.01 then the reported value Y` is
    // between 0.99*Y and 1.01*Y. We achieve this by ensuring that if a bucket holds measurements from M_min to M_max
    // then M_max - M_min <= M_min*E. We can determine which bucket must hold quantile X and we know that all values assigned to
    // the bucket are within the error bound if we approximate the result as M_min.
    // Note: we should be able to refine this to return the bucket midpoint rather than bucket lower bound, halving the number of
    // buckets to achieve the same error bound.
    //
    // Implementation: The histogram buckets are implemented as an array of arrays (a tree). The top level has a fixed 4096 entries
    // corresponding to every possible sign+exponent in the encoding of a double (IEEE 754 spec). The 2nd level has variable size
    // depending on how many buckets are needed to achieve the error bounds. For ease of insertion we round the 2nd level size up to
    // the nearest power of 2. This lets us mask off the first k bits in the mantissa to map a measurement to one of 2^k 2nd level
    // buckets. The top level array is pre-allocated but the 2nd level arrays are created on demand.
    //
    // PERF Note: This histogram has a fast Update() but the _counters array has a sizable memory footprint (32KB+ on 64 bit)
    // It is probably well suited for tracking 10s or maybe 100s of histograms but if we wanted to go higher
    // we probably want to trade a little more CPU cost in Update() + code complexity to avoid eagerly allocating 4096
    // top level entries.
    internal sealed class ExponentialHistogramAggregator : Aggregator
    {
        private const int ExponentArraySize = 4096;
        private const int ExponentShift = 52;
        private const double MinRelativeError = 0.0001;
        private const int PositiveIntAndNan = ExponentArraySize / 2 - 1;
        private const int NegativeIntAndNan = ExponentArraySize - 1;
 
        private readonly QuantileAggregation _config;
        private int[]?[] _counters;
        private int _count;
        private double _sum;
        private readonly int _mantissaMax;
        private readonly int _mantissaMask;
        private readonly int _mantissaShift;
 
        private struct Bucket
        {
            public Bucket(double value, int count)
            {
                Value = value;
                Count = count;
            }
            public double Value;
            public int Count;
        }
 
        public ExponentialHistogramAggregator(QuantileAggregation config)
        {
            _config = config;
            _counters = new int[ExponentArraySize][];
            if (_config.MaxRelativeError < MinRelativeError)
            {
                // Ensure that we don't create enormous histograms trying to get overly high precision
                throw new ArgumentException();
            }
            int mantissaBits = (int)Math.Ceiling(Math.Log(1 / _config.MaxRelativeError, 2)) - 1;
            _mantissaShift = 52 - mantissaBits;
            _mantissaMax = 1 << mantissaBits;
            _mantissaMask = _mantissaMax - 1;
        }
 
        public override IAggregationStatistics Collect()
        {
            int[]?[] counters;
            int count;
            double sum;
            lock (this)
            {
                counters = _counters;
                count = _count;
                sum = _sum;
                _counters = new int[ExponentArraySize][];
                _count = 0;
                _sum = 0;
            }
 
            QuantileValue[] quantiles = new QuantileValue[_config.Quantiles.Length];
            int nextQuantileIndex = 0;
            if (nextQuantileIndex == _config.Quantiles.Length)
            {
                return new HistogramStatistics(quantiles, count, sum);
            }
 
            // Consider each bucket to have N entries in it, and each entry has value GetBucketCanonicalValue().
            // If all these entries were inserted in a sorted array, we are trying to find the value of the entry with
            // index=target.
            int target = QuantileToRank(_config.Quantiles[nextQuantileIndex], count);
 
            // the total number of entries in all buckets iterated so far
            int cur = 0;
            foreach (Bucket b in IterateBuckets(counters))
            {
                cur += b.Count;
                while (cur > target)
                {
                    quantiles[nextQuantileIndex] = new QuantileValue(
                        _config.Quantiles[nextQuantileIndex], b.Value);
                    nextQuantileIndex++;
                    if (nextQuantileIndex == _config.Quantiles.Length)
                    {
                        return new HistogramStatistics(quantiles, count, sum);
                    }
                    target = QuantileToRank(_config.Quantiles[nextQuantileIndex], count);
                }
            }
 
            Debug.Assert(count == 0);
            return new HistogramStatistics(Array.Empty<QuantileValue>(), count, sum);
        }
 
        private IEnumerable<Bucket> IterateBuckets(int[]?[] counters)
        {
            // iterate over the negative exponent buckets
            const int LowestNegativeOffset = ExponentArraySize / 2;
            // exponent = ExponentArraySize-1 encodes infinity and NaN, which we want to ignore
            for (int exponent = ExponentArraySize - 2; exponent >= LowestNegativeOffset; exponent--)
            {
                int[]? mantissaCounts = counters[exponent];
                if (mantissaCounts == null)
                {
                    continue;
                }
                for (int mantissa = _mantissaMax - 1; mantissa >= 0; mantissa--)
                {
                    int count = mantissaCounts[mantissa];
                    if (count > 0)
                    {
                        yield return new Bucket(GetBucketCanonicalValue(exponent, mantissa), count);
                    }
                }
            }
 
            // iterate over the positive exponent buckets
            // exponent = lowestNegativeOffset-1 encodes infinity and NaN, which we want to ignore
            for (int exponent = 0; exponent < LowestNegativeOffset - 1; exponent++)
            {
                int[]? mantissaCounts = counters[exponent];
                if (mantissaCounts == null)
                {
                    continue;
                }
                for (int mantissa = 0; mantissa < _mantissaMax; mantissa++)
                {
                    int count = mantissaCounts[mantissa];
                    if (count > 0)
                    {
                        yield return new Bucket(GetBucketCanonicalValue(exponent, mantissa), count);
                    }
                }
            }
        }
 
        public override void Update(double measurement)
        {
            lock (this)
            {
                // This is relying on the bit representation of IEEE 754 to decompose
                // the double. The sign bit + exponent bits land in exponent, the
                // remainder lands in mantissa.
                // the bucketing precision comes entirely from how many significant
                // bits of the mantissa are preserved.
                ulong bits = (ulong)BitConverter.DoubleToInt64Bits(measurement);
                int exponent = (int)(bits >> ExponentShift);
                int mantissa = (int)(bits >> _mantissaShift) & _mantissaMask;
                ref int[]? mantissaCounts = ref _counters[exponent];
                mantissaCounts ??= new int[_mantissaMax];
                mantissaCounts[mantissa]++;
 
                // Don't increase the count if there are any NaN or +/-Infinity values that were logged
                if (exponent != PositiveIntAndNan && exponent != NegativeIntAndNan)
                {
                    _count++;
                    _sum += measurement;
                }
            }
        }
 
        private static int QuantileToRank(double quantile, int count)
        {
            return Math.Min(Math.Max(0, (int)(quantile * count)), count - 1);
        }
 
        // This is the upper bound for negative valued buckets and the
        // lower bound for positive valued buckets
        private double GetBucketCanonicalValue(int exponent, int mantissa)
        {
            long bits = ((long)exponent << ExponentShift) | ((long)mantissa << _mantissaShift);
            return BitConverter.Int64BitsToDouble(bits);
        }
    }
}