|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Microsoft.ML.Data;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Runtime;
using Microsoft.ML.SearchSpace;
using Microsoft.ML.Trainers.FastTree;
using static Microsoft.ML.Trainers.FastTree.Dataset.RowForwardIndexer;
namespace Microsoft.ML.AutoML
{
/// <summary>
/// SMAC is based on SMBO (sequential model based optimization). This implementation uses random forest as regressor to and
/// Expected Improvemnt as acquisition function. In practice, smac works well on search space which dimension is no larger than 20.
/// </summary>
internal class SmacTuner : ITuner
{
private readonly ITuner _randomTuner;
private readonly MLContext _context;
private readonly SearchSpace.SearchSpace _searchSpace;
private readonly List<TrialResult> _histories;
private Queue<Parameter> _candidates;
private readonly Random _rnd = new Random();
private readonly IChannel _channel;
#region Smac Tuner arguments
// Number of points to use for random initialization
private readonly int _numberInitialPopulation;
private readonly int _fitModelEveryNTrials;
// Number of regression trees in forest
private readonly int _numOfTrees;
// Minimum number of data points required to be in a node if it is to be split further
private readonly int _nMinForSplit;
// Fraction of eligible dimensions to split on (i.e., split ratio)
private readonly float _splitRatio;
// Number of search parents to use for local search in maximizing EI acquisition function
private readonly int _localSearchParentCount;
// Number of random configurations when maximizing EI acquisition function
private readonly int _numRandomEISearchConfigurations;
private readonly double _epsilon;
// Number of neighbors to sample for locally searching each numerical parameter
private readonly int _numNeighborsForNumericalParams;
#endregion
public SmacTuner(MLContext context,
SearchSpace.SearchSpace searchSpace,
int numberInitialPopulation = 20,
int fitModelEveryNTrials = 10,
int numberOfTrees = 10,
int nMinForSpit = 2,
float splitRatio = 0.8f,
int localSearchParentCount = 5,
int numRandomEISearchConfigurations = 5000,
double epsilon = 1e-5,
int numNeighboursForNumericalParams = 4,
int? seed = null,
IChannel channel = null)
{
_context = context;
_searchSpace = searchSpace;
_randomTuner = new RandomSearchTuner(_searchSpace, seed);
_histories = new List<TrialResult>();
_candidates = new Queue<Parameter>();
_fitModelEveryNTrials = fitModelEveryNTrials;
_numberInitialPopulation = numberInitialPopulation;
if (seed is int)
{
_rnd = new Random(seed.Value);
}
_numOfTrees = numberOfTrees;
_nMinForSplit = nMinForSpit;
_splitRatio = splitRatio;
_localSearchParentCount = localSearchParentCount;
_numRandomEISearchConfigurations = numRandomEISearchConfigurations;
_epsilon = epsilon;
_numNeighborsForNumericalParams = numNeighboursForNumericalParams;
_channel = channel;
}
public Parameter Propose(TrialSettings settings)
{
var trialCount = _histories.Count + _candidates.Count;
if (trialCount <= _numberInitialPopulation)
{
return _randomTuner.Propose(settings);
}
else
{
if (_candidates.Count <= 0)
{
var model = FitModel(_histories);
_candidates = new Queue<Parameter>(GenerateCandidateConfigurations(_fitModelEveryNTrials, _histories, model));
}
return _candidates.Dequeue();
}
}
// test purpose
internal Queue<Parameter> Candidates => _candidates;
// test purpose
internal List<TrialResult> Histories => _histories;
private FastForestRegressionModelParameters FitModel(IEnumerable<TrialResult> history)
{
Single[] losses = new Single[history.Count()];
Single[][] features = new Single[history.Count()][];
int i = 0;
foreach (var r in history)
{
features[i] = _searchSpace.MappingToFeatureSpace(r.TrialSettings.Parameter).Select(f => Convert.ToSingle(f)).ToArray();
losses[i] = Convert.ToSingle(r.Loss);
i++;
}
ArrayDataViewBuilder dvBuilder = new ArrayDataViewBuilder(_context);
dvBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single, losses);
dvBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single, features);
_channel?.Trace("SMAC - start fitting random forest regressor");
IDataView data = dvBuilder.GetDataView();
// Set relevant random forest arguments.
// Train random forest.
var trainer = _context.Regression.Trainers.FastForest(new FastForestRegressionTrainer.Options()
{
FeatureFraction = _splitRatio,
NumberOfTrees = _numOfTrees,
MinimumExampleCountPerLeaf = _nMinForSplit,
});
var trainTestSplit = _context.Data.TrainTestSplit(data);
var model = trainer.Fit(trainTestSplit.TrainSet);
var predictor = model.Model;
var test = model.Transform(trainTestSplit.TestSet);
var eval = _context.Regression.Evaluate(test);
_channel?.Trace("SMAC - end fitting random forest regressor");
_channel?.Trace($"SMAC - fitting metric: rsquare {eval.RSquared}");
// Return random forest predictor.
return predictor;
}
/// <summary>
/// Generates a set of candidate configurations to sweep through, based on a combination of random and local
/// search, as outlined in Hutter et al. - Sequential Model-Based Optimization for General Algorithm Configuration.
/// Makes use of class private members which determine how many candidates are returned. This number will include
/// random configurations interleaved (per the paper), and thus will be double the specified value.
/// </summary>
/// <param name="numOfCandidates">Number of candidate solutions to return.</param>
/// <param name="previousRuns">History of previously evaluated points, with their empirical performance values.</param>
/// <param name="forest">Trained random forest ensemble. Used in evaluating the candidates.</param>
/// <returns>An array of ParamaterSets which are the candidate configurations to sweep.</returns>
private Parameter[] GenerateCandidateConfigurations(int numOfCandidates, IEnumerable<TrialResult> previousRuns, FastForestRegressionModelParameters forest)
{
// Get k best previous runs ParameterSets.
var bestKParamSets = _histories.OrderBy(i => i.Loss).Take(_localSearchParentCount).Select(r => r.TrialSettings.Parameter);
// Perform local searches using the k best previous run configurations.
var eiChallengers = GreedyPlusRandomSearch(bestKParamSets, forest, numOfCandidates >> 1);
// Generate another set of random configurations to interleave.
var randomChallengers = Enumerable.Range(0, numOfCandidates - eiChallengers.Length).Select(i => _randomTuner.Propose(new TrialSettings()));
// Return interleaved challenger candidates with random candidates. Since the number of candidates from either can be less than
// the number asked for, since we only generate unique candidates, and the number from either method may vary considerably.
return eiChallengers.Concat(randomChallengers).ToArray();
}
/// <summary>
/// Does a mix of greedy local search around best performing parameter sets, while throwing random parameter sets into the mix.
/// </summary>
/// <param name="parents">Beginning locations for local greedy search.</param>
/// <param name="forest">Trained random forest, used later for evaluating parameters.</param>
/// <param name="numOfCandidates">Number of candidate configurations returned by the method (top K).</param>
/// <returns>Array of parameter sets, which will then be evaluated.</returns>
private Parameter[] GreedyPlusRandomSearch(IEnumerable<Parameter> parents, FastForestRegressionModelParameters forest, int numOfCandidates)
{
double bestLoss = _histories.Min(t => t.Metric);
var configurations = new HashSet<Tuple<double, Parameter>>();
// Perform local search.
foreach (var c in parents)
{
var bestChildKvp = LocalSearch(c, forest, bestLoss);
configurations.Add(bestChildKvp);
}
// Additional set of random configurations to choose from during local search.
var randomParameters = Enumerable.Range(0, _numRandomEISearchConfigurations).Select(i => _randomTuner.Propose(new TrialSettings()));
var randomConfigurations = randomParameters.Select(parameter => new Tuple<double, Parameter>(EvaluateConfigurationsByEI(forest, bestLoss, parameter), parameter));
var orderedConfigurations = configurations.Concat(randomConfigurations).OrderByDescending(p => p.Item1);
var comparer = Parameter.FromInt(0);
var retainedConfigs = new HashSet<Parameter>(orderedConfigurations.Select(x => x.Item2), comparer);
// remove configurations matching previous run
foreach (var previousRun in _histories)
{
retainedConfigs.Remove(previousRun.TrialSettings.Parameter);
}
return retainedConfigs.Take(numOfCandidates).ToArray();
}
/// <summary>
/// Performs a local one-mutation neighborhood greedy search.
/// </summary>
/// <param name="startParameter">Starting parameter set configuration.</param>
/// <param name="forest">Trained forest, for evaluation of points.</param>
/// <param name="bestLoss">Best performance seen thus far.</param>
/// <returns></returns>
private Tuple<double, Parameter> LocalSearch(Parameter startParameter, FastForestRegressionModelParameters forest, double bestLoss)
{
try
{
double currentBestEI = EvaluateConfigurationsByEI(forest, bestLoss, startParameter);
var currentBestConfig = startParameter;
for (; ; )
{
Parameter[] neighborhood = GetOneMutationNeighborhood(currentBestConfig);
var eis = neighborhood.Select(p => EvaluateConfigurationsByEI(forest, bestLoss, p)).ToArray();
var maxIndex = eis.ArgMax();
if (Math.Abs(eis[maxIndex] - currentBestEI) < _epsilon)
break;
else
{
currentBestConfig = neighborhood[maxIndex];
currentBestEI = eis[maxIndex];
}
}
return new Tuple<double, Parameter>(currentBestEI, currentBestConfig);
}
catch (Exception e)
{
throw new InvalidOperationException("SMAC sweeper localSearch threw exception", e);
}
}
private Parameter[] GetOneMutationNeighborhood(Parameter currentBestConfig)
{
var neighborhood = new List<Parameter>();
var features = _searchSpace.MappingToFeatureSpace(currentBestConfig);
for (int d = 0; d != _searchSpace.FeatureSpaceDim; ++d)
{
var newFeatures = features.Select(x => x).ToArray();
if (_searchSpace.Step[d] is int step)
{
// if step is not null, it means the parameter on that index is discrete.
// in that case, to sample a new value, we need to add the current feature value with 1/step
var nextStep = features[d] + (1.0 / step);
if (nextStep > 1)
{
nextStep -= 1;
}
newFeatures[d] = nextStep;
neighborhood.Add(_searchSpace.SampleFromFeatureSpace(newFeatures));
}
else
{
// if step is null, it means the parameter on that index is numeric.
// create k neighbours for that value
for (int j = 0; j != _numNeighborsForNumericalParams; ++j)
{
var mu = features[d];
var sigma = 0.2;
while (true)
{
// sample normal(mu, sigma) from [0,1] uniform using box-muller transform
var u1 = _rnd.NextDouble();
var u2 = _rnd.NextDouble();
var newFeatured = mu + sigma * Math.Sqrt(-2.0 * Math.Log(u1)) * Math.Sin(2.0 * Math.PI * u2);
if (newFeatured >= 0 && newFeatured <= 1)
{
newFeatures[d] = newFeatured;
break;
}
}
neighborhood.Add(_searchSpace.SampleFromFeatureSpace(newFeatures));
}
}
}
return neighborhood.ToArray();
}
private double EvaluateConfigurationsByEI(FastForestRegressionModelParameters forest, double bestVal, Parameter parameter)
{
double[] leafPredictions = GetForestRegressionLeafValues(forest, parameter);
double[] forestStatistics = ComputeForestStats(leafPredictions);
return ComputeEI(bestVal, forestStatistics);
}
/// <summary>
/// Goes through forest to extract the set of leaf values associated with filtering each configuration.
/// </summary>
/// <param name="forest">Trained forest predictor, used for filtering configs.</param>
/// <param name="parameter">Parameter configuration.</param>
/// <returns>1D array where rows correspond to configurations, and columns to the predicted leaf values.</returns>
private double[] GetForestRegressionLeafValues(FastForestRegressionModelParameters forest, Parameter parameter)
{
List<double> leafValues = new List<double>();
var transformedParams = _searchSpace.MappingToFeatureSpace(parameter).Select(p => Convert.ToSingle(p)).ToArray();
for (var treeId = 0; treeId < forest.TrainedTreeEnsemble.Trees.Count; treeId++)
{
var features = new VBuffer<float>(transformedParams.Length, transformedParams);
List<int> path = null;
var leafId = forest.GetLeaf(treeId, features, ref path);
var leafValue = forest.GetLeafValue(treeId, leafId);
leafValues.Add(leafValue);
}
return leafValues.ToArray();
}
/// <summary>
/// Computes the empirical means and standard deviations for trees in the forest for each configuration.
/// </summary>
/// <param name="leafValues">The sets of leaf values from which the means and standard deviations are computed.</param>
/// <returns>A 2D array with one row per set of tree values, and the columns being mean and stddev, respectively.</returns>
private double[] ComputeForestStats(double[] leafValues)
{
double[] meansAndStdDevs = new double[2];
// Computes the empirical mean and empirical std dev from the leaf prediction values.\double[] row = new double[2];
meansAndStdDevs[0] = VectorUtils.GetMean(leafValues);
meansAndStdDevs[1] = VectorUtils.GetStandardDeviation(leafValues);
return meansAndStdDevs;
}
private double ComputeEI(double bestLoss, double[] forestStatistics)
{
double empMean = forestStatistics[0];
double empStdDev = forestStatistics[1];
double centered = bestLoss - empMean;
if (empStdDev == 0)
{
return centered;
}
double ztrans = centered / empStdDev;
return centered * SweeperProbabilityUtils.StdNormalCdf(ztrans) + empStdDev * SweeperProbabilityUtils.StdNormalPdf(ztrans);
}
public void Update(TrialResult result)
{
if (!double.IsNaN(result.Loss) && !double.IsInfinity(result.Loss))
{
_histories.Add(result);
}
}
}
}
|