|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using Microsoft.ML.Runtime;
namespace Microsoft.ML.Internal.Utilities
{
/// <summary>
/// A class containing common statistical functions
/// </summary>
[BestFriend]
internal static class Stats
{
/// <summary>
/// Returns a number uniformly sampled from 0...(rangeSize-1)
/// </summary>
/// <param name="rangeSize">Size of range to sample from, between 0 and int.MaxValue^2</param>
/// <param name="rand">Random number generator</param>
/// <returns>Sampled value</returns>
public static long SampleLong(long rangeSize, Random rand)
{
Contracts.CheckParam(rangeSize > 0, nameof(rangeSize), "rangeSize must be positive.");
if (rangeSize < int.MaxValue)
return rand.Next((int)rangeSize);
Contracts.Check(rangeSize <= (long)int.MaxValue * int.MaxValue,
"rangeSize must be no more than int.MaxValue^2");
int max = (int)Math.Ceiling(Math.Sqrt(rangeSize));
while ((long)max * max < rangeSize)
max++; // might happen due to rounding
long result;
do
{
result = (long)max * rand.Next(max) + rand.Next(max);
} while (result >= rangeSize);
return result;
}
private static readonly double _vScale = 2 * Math.Sqrt(2 / Math.E);
/// <summary>
/// Returns a number sampled from a zero-mean, unit variance Gaussian
/// </summary>
/// <param name="rand">A Random to use for the sampling</param>
/// <returns>a sample</returns>
/// <remarks>uses Joseph L. Leva's algorithm from "A fast normal random number generator", 1992</remarks>
public static double SampleFromGaussian(Random rand)
{
double u;
double v;
double q;
do
{
u = rand.NextDouble();
v = _vScale * (rand.NextDouble() - 0.5);
double x = u - 0.449871;
double y = Math.Abs(v) + 0.386595;
q = x * x + y * (0.19600 * y - 0.25472 * x);
} while (q > 0.27597 && (q > 0.27846 || v * v > -4 * u * u * Math.Log(u)));
return v / u;
}
/// <summary>
/// Returns a sample from the gamma distribution with scale parameter 1, shape parameter alpha
/// </summary>
/// <param name="alpha">Shape parameter</param>
/// <param name="r">The random number generator to use</param>
/// <returns>Sample from gamma distribution</returns>
/// <remarks>Uses Marsaglia and Tsang's fast algorithm</remarks>
public static double SampleFromGamma(Random r, double alpha)
{
Contracts.CheckParam(alpha > 0, nameof(alpha), "alpha must be positive");
if (alpha < 1)
return SampleFromGamma(r, alpha + 1) * Math.Pow(r.NextDouble(), 1.0 / alpha);
double d = alpha - 1.0 / 3;
double c = 1 / Math.Sqrt(9 * d);
double x;
double u;
double v;
while (true)
{
do
{
x = SampleFromGaussian(r);
v = Math.Pow(1.0 + c * x, 3);
} while (v <= 0);
u = r.NextDouble();
double xSqr = x * x;
if (u < 1.0 - 0.0331 * xSqr * xSqr ||
Math.Log(u) < 0.5 * xSqr + d * (1.0 - v + Math.Log(v)))
{
return d * v;
}
}
}
/// <summary>
/// Generates a beta-distributed random variable
/// </summary>
/// <param name="rand">Random generator to use</param>
/// <param name="alpha1">first parameter</param>
/// <param name="alpha2">second parameter</param>
/// <returns>Sample from distribution</returns>
public static double SampleFromBeta(Random rand, double alpha1, double alpha2)
{
double gamma1 = SampleFromGamma(rand, alpha1);
double gamma2 = SampleFromGamma(rand, alpha2);
return gamma1 / (gamma1 + gamma2);
}
/// <summary>
/// Generates a dirichlet-distributed random variable
/// </summary>
/// <param name="rand">Random generator to use</param>
/// <param name="alphas">array of parameters</param>
/// <param name="result">array in which to store resulting sample</param>
public static void SampleFromDirichlet(Random rand, double[] alphas, double[] result)
{
Contracts.Check(alphas.Length == result.Length,
"Dirichlet parameters must have the same dimensionality as sample space.");
double total = 0;
for (int i = 0; i < alphas.Length; i++)
{
total += result[i] = SampleFromGamma(rand, alphas[i]);
}
for (int i = 0; i < alphas.Length; i++)
{
result[i] /= total;
}
}
public static int SampleFromPoisson(Random rand, double lambda)
{
if (lambda < 5)
{
double expLam = Math.Exp(-lambda);
int k = 0;
double t = rand.NextDouble();
while (t > expLam)
{
k++;
t *= rand.NextDouble();
}
return k;
}
else
{
double sqrtLam = Math.Sqrt(lambda);
double logLam = double.NaN;
while (true)
{
double u = 0.64 * rand.NextDouble();
double v = -0.68 + 1.28 * rand.NextDouble();
double sqrV = 0;
if (lambda > 13.5)
{
sqrV = v * v;
if (v >= 0)
{
if (sqrV > 6.5 * u * (0.64 - u) * (u + 0.2))
continue;
}
else if (sqrV > 9.6 * u * (0.66 - u) * (u + 0.07))
continue;
}
int k = (int)(sqrtLam * v / u + lambda + 0.5);
if (k < 0)
continue;
double sqrU = u * u;
if (lambda > 13.5)
{
if (v >= 0)
{
if (sqrV < 15.2 * sqrU * (0.61 - u) * (0.8 - u))
return k;
}
else if (sqrV < 6.76 * sqrU * (0.62 - u) * (1.4 - u))
return k;
}
double logkFac = MathUtils.LogFactorial(k);
if (double.IsNaN(logLam))
logLam = Math.Log(lambda);
double p = sqrtLam * Math.Exp(-lambda + k * logLam - logkFac);
if (sqrU < p)
return k;
}
}
}
// Mean refers to the mu parameter. Scale refers to the b parameter.
// https://en.wikipedia.org/wiki/Laplace_distribution
public static float SampleFromLaplacian(Random rand, float mean, float scale)
{
float u = rand.NextSingle();
u = u - 0.5f;
float ret = mean;
if (u >= 0)
ret -= scale * MathUtils.Log(1 - 2 * u);
else
ret += scale * MathUtils.Log(1 + 2 * u);
return ret;
}
/// <summary>
/// Sample from a standard Cauchy distribution:
/// https://en.wikipedia.org/wiki/Lorentzian_function
/// </summary>
/// <param name="rand"></param>
/// <returns></returns>
public static float SampleFromCauchy(Random rand)
{
return (float)Math.Tan(Math.PI * (rand.NextSingle() - 0.5));
}
/// <summary>
/// Returns a number sampled from the binomial distribution with parameters n and p
/// </summary>
/// <param name="r">Random generator to use</param>
/// <param name="n">Parameter N of binomial</param>
/// <param name="p">Parameter p of binomial</param>
/// <returns></returns>
/// <remarks>Should be robust for all values of n, p</remarks>
public static int SampleFromBinomial(Random r, int n, double p)
{
return BinoRand.Next(r, n, p);
}
private static class BinoRand
{
// n*p at which we switch algorithms
private const int NPThresh = 10;
private static readonly double[] _fctab = new double[] {
0.08106146679532726, 0.04134069595540929, 0.02767792568499834,
0.02079067210376509, 0.01664469118982119, 0.01387612882307075,
0.01189670994589177, 0.01041126526197209, 0.009255462182712733,
0.008330563433362871
};
private const double One12 = 1.0 / 12;
private const double One360 = 1.0 / 360;
private const double One1260 = 1.0 / 1260;
private static double Fc(int k)
{
if (k < _fctab.Length)
return _fctab[k];
else
{
long k1 = k + 1;
long k1Sq = k1 * k1;
return (One12 - (One360 - One1260 / k1Sq) / k1Sq) / k1;
}
}
public static int Next(Random rand, int n, double p)
{
int x;
double pin = Math.Min(p, 1 - p);
if (n * pin < NPThresh)
x = InvTransform(n, pin, rand);
else
x = GenerateLarge(n, pin, rand);
if (pin != p)
return n - x;
else
return x;
}
// For small n
// Inverse transformation algorithm
// Described in Kachitvichyanukul and Schmeiser: "Binomial Random Variate Generation"
private static int InvTransform(int n, double p, Random rn)
{
int x = 0;
double u = rn.NextDouble();
double q = 1 - p;
double s = Math.Pow(q, n);
double r = p / q;
double a = (n + 1) * r;
for (; ; )
{
if (u <= s)
break;
u -= s;
x++;
s *= ((a / x) - r);
}
return x;
}
// For large n
// Algorithm from W. Hormann: "The Generation of Binomial Random Variables"
// This is algorithm BTRD
private static int GenerateLarge(int n, double p, Random rn)
{
double np = n * p;
double q = 1 - p;
double npq = np * q;
double spq = Math.Sqrt(npq);
double b = 1.15 + 2.53 * spq;
double vr = 0.92 - 4.2 / b;
double urvr = 0.86 * vr;
double a = -0.0873 + 0.0248 * b + 0.01 * p;
double a2 = 2.0 * a;
double c = np + 0.5;
// don't initialize these yet, because we may not need them
double alpha = double.NaN;
double r = double.NaN;
double nr = double.NaN;
int m = 0;
// especially this one: it requires a log
double h = double.NaN;
// Step 1
for (; ; )
{
double v = rn.NextDouble();
double u;
if (v <= urvr)
{
u = v / vr - 0.43;
return (int)Math.Floor((a2 / (0.5 - Math.Abs(u)) + b) * u + c);
}
// Step 2
if (v >= vr)
{
u = rn.NextDouble() - 0.5;
}
else
{
u = v / vr - 0.93;
u = Math.Sign(u) * 0.5 - u;
v = rn.NextDouble() * vr;
}
// Step 3.0
double us = 0.5 - Math.Abs(u);
double kd = Math.Floor((a2 / us + b) * u + c);
if (kd < 0 || kd > n)
continue;
int k = (int)kd;
if (double.IsNaN(alpha))
{
alpha = (2.83 + 5.1 / b) * spq;
m = (int)Math.Floor((n + 1) * p);
r = p / q;
nr = (n + 1) * r;
}
v *= alpha / (a / (us * us) + b);
int km = Math.Abs(k - m);
if (km <= 15)
{
// Step 3.1
// Recursive evaluation of f(k)
double f = 1;
if (m < k)
{
for (int i = m + 1; i <= k; ++i)
{
f *= (nr / (double)i - r);
}
}
else
{
for (int i = k + 1; i <= m; ++i)
{
v *= (nr / (double)i - r);
}
}
if (v <= f)
{
return k;
}
else
continue;
}
// Step 3.2: squeeze-acceptance or rejection
v = Math.Log(v);
double rho = (km / npq) * (((km / 3.0 + 0.625) * km + 0.1666666667) / npq + 0.5);
double t = -0.5 * km * km / npq;
if (v < t - rho)
{
return k;
}
if (v > t + rho)
continue;
// Step 3.3: setup for 3.4, moved to initialization
// Step 3.4: final acceptance-rejection test
int nm = (n - m + 1);
if (double.IsNaN(h))
h = (m + 0.5) * Math.Log((m + 1) / (r * nm)) + Fc(m) + Fc(n - m);
int nk = n - k + 1;
double vval = h + (n + 1) * Math.Log((double)nm / (double)nk) + (k + 0.5) * Math.Log(nk * r / (double)(k + 1)) - Fc(k) - Fc(n - k);
if (v <= vval)
{
return k;
}
}
}
}
}
}
|