SweepableLightGBMBinaryExperiment.cs

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Microsoft.ML.Data;
using Microsoft.ML.SearchSpace;
 
namespace Microsoft.ML.AutoML.Samples
{
    public static class SweepableLightGBMBinaryExperiment
    {
        class LightGBMOption
        {
            [Range(4, 32768, init: 4, logBase: false)]
            public int NumberOfLeaves { get; set; } = 4;
 
            [Range(4, 32768, init: 4, logBase: false)]
            public int NumberOfTrees { get; set; } = 4;
        }
 
        public static async Task RunAsync()
        {
            // This example shows how to use Sweepable API to run hyper-parameter optimization over
            // LightGBM trainer with a customized search space.
 
            // Create a new context for ML.NET operations. It can be used for
            // exception tracking and logging, as a catalog of available operations
            // and as the source of randomness. Setting the seed to a fixed number
            // in this example to make outputs deterministic.
            var seed = 0;
            var context = new MLContext(seed);
 
            // Create a list of training data points and convert it to IDataView.
            var data = GenerateRandomBinaryClassificationDataPoints(100, seed);
            var dataView = context.Data.LoadFromEnumerable(data);
 
            // Split the dataset into train and test sets with 10% of the data used for testing.
            var trainTestSplit = context.Data.TrainTestSplit(dataView, testFraction: 0.1);
 
            // Define a customized search space for LightGBM
            var lgbmSearchSpace = new SearchSpace<LightGBMOption>();
 
            // Define the sweepable LightGBM estimator.
            var lgbm = context.Auto().CreateSweepableEstimator((_context, option) =>
            {
                return _context.BinaryClassification.Trainers.LightGbm(
                    "Label",
                    "Features",
                    numberOfLeaves: option.NumberOfLeaves,
                    numberOfIterations: option.NumberOfTrees);
            }, lgbmSearchSpace);
 
            // Create sweepable pipeline
            var pipeline = new EstimatorChain<ITransformer>().Append(lgbm);
 
            // Create an AutoML experiment
            var experiment = context.Auto().CreateExperiment();
 
            // Redirect AutoML log to console
            context.Log += (object o, LoggingEventArgs e) =>
            {
                if (e.Source == nameof(AutoMLExperiment) && e.Kind > Runtime.ChannelMessageKind.Trace)
                {
                    Console.WriteLine(e.RawMessage);
                }
            };
 
            // Config experiment to optimize "Accuracy" metric on given dataset.
            // This experiment will run hyper-parameter optimization on given pipeline
            experiment.SetPipeline(pipeline)
                      .SetDataset(trainTestSplit.TrainSet, fold: 5) // use 5-fold cross validation to evaluate each trial
                      .SetBinaryClassificationMetric(BinaryClassificationMetric.Accuracy, "Label")
                      .SetMaxModelToExplore(100); // explore 100 trials
 
            // start automl experiment
            var result = await experiment.RunAsync();
 
            // Expected output samples during training. The pipeline will be unknown because it's created using
            // customized sweepable estimator, therefore AutoML doesn't have the knowledge of the exact type of the estimator.
            //      Update Running Trial - Id: 0
            //      Update Completed Trial - Id: 0 - Metric: 0.5105967259285338 - Pipeline: Unknown=>Unknown - Duration: 616 - Peak CPU: 0.00% - Peak Memory in MB: 35.54
            //      Update Best Trial - Id: 0 - Metric: 0.5105967259285338 - Pipeline: Unknown=>Unknown
 
            // evaluate test dataset on best model.
            var bestModel = result.Model;
            var eval = bestModel.Transform(trainTestSplit.TestSet);
            var metrics = context.BinaryClassification.Evaluate(eval);
 
            PrintMetrics(metrics);
 
            // Expected output:
            //  Accuracy: 0.67
            //  AUC: 0.75
            //  F1 Score: 0.33
            //  Negative Precision: 0.88
            //  Negative Recall: 0.70
            //  Positive Precision: 0.25
            //  Positive Recall: 0.50
 
            //  TEST POSITIVE RATIO: 0.1667(2.0 / (2.0 + 10.0))
            //  Confusion table
            //            ||======================
            //  PREDICTED || positive | negative | Recall
            //  TRUTH     ||======================
            //   positive || 1 | 1 | 0.5000
            //   negative || 3 | 7 | 0.7000
            //            ||======================
            //  Precision || 0.2500 | 0.8750 |
        }
 
        private static IEnumerable<BinaryClassificationDataPoint> GenerateRandomBinaryClassificationDataPoints(int count,
            int seed = 0)
 
        {
            var random = new Random(seed);
            float randomFloat() => (float)random.NextDouble();
            for (int i = 0; i < count; i++)
            {
                var label = randomFloat() > 0.5f;
                yield return new BinaryClassificationDataPoint
                {
                    Label = label,
                    // Create random features that are correlated with the label.
                    // For data points with false label, the feature values are
                    // slightly increased by adding a constant.
                    Features = Enumerable.Repeat(label, 50)
                        .Select(x => x ? randomFloat() : randomFloat() +
                        0.1f).ToArray()
 
                };
            }
        }
 
        // Example with label and 50 feature values. A data set is a collection of
        // such examples.
        private class BinaryClassificationDataPoint
        {
            public bool Label { get; set; }
 
            [VectorType(50)]
            public float[] Features { get; set; }
        }
 
        // Class used to capture predictions.
        private class Prediction
        {
            // Original label.
            public bool Label { get; set; }
            // Predicted label from the trainer.
            public bool PredictedLabel { get; set; }
        }
 
        // Pretty-print BinaryClassificationMetrics objects.
        private static void PrintMetrics(BinaryClassificationMetrics metrics)
        {
            Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
            Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
            Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
            Console.WriteLine($"Negative Precision: " +
                $"{metrics.NegativePrecision:F2}");
 
            Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
            Console.WriteLine($"Positive Precision: " +
                $"{metrics.PositivePrecision:F2}");
 
            Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
            Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
        }
    }
}
File: Sweepable\SweepableLightGBMBinaryExperiment.cs	Web Access
Project: src\docs\samples\Microsoft.ML.AutoML.Samples\Microsoft.ML.AutoML.Samples.csproj (Microsoft.ML.AutoML.Samples)