|
using System;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers.LightGbm;
namespace Samples.Dynamic.Trainers.Regression
{
class LightGbmWithOptionsAdvanced
{
// This example requires installation of additional NuGet package
// for Microsoft.ML.LightGBM
// at https://www.nuget.org/packages/Microsoft.ML.LightGbm/
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();
// Download and load the housing dataset into an IDataView.
var dataView = Microsoft.ML.SamplesUtils.DatasetUtils.LoadHousingRegressionDataset(mlContext);
//////////////////// Data Preview ////////////////////
/// Only 6 columns are displayed here.
// MedianHomeValue CrimesPerCapita PercentResidental PercentNonRetail CharlesRiver NitricOxides RoomsPerDwelling PercentPre40s ...
// 24.00 0.00632 18.00 2.310 0 0.5380 6.5750 65.20 ...
// 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90 ...
// 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10 ...
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.1);
// Create a pipeline with LightGbm estimator with advanced options.
// Here we only need LightGbm trainer as data is already processed
// in a form consumable by the trainer.
var labelName = "MedianHomeValue";
var featureNames = dataView.Schema
.Select(column => column.Name) // Get the column names
.Where(name => name != labelName) // Drop the Label
.ToArray();
var pipeline = mlContext.Transforms.Concatenate(
"Features", featureNames)
.Append(mlContext.Regression.Trainers.LightGbm(
new LightGbmRegressionTrainer.Options
{
LabelColumnName = labelName,
NumberOfLeaves = 4,
MinimumExampleCountPerLeaf = 6,
LearningRate = 0.001,
Booster = new GossBooster.Options()
{
TopRate = 0.3,
OtherRate = 0.2
}
}));
// Fit this pipeline to the training data.
var model = pipeline.Fit(split.TrainSet);
// Get the feature importance based on the information gain used during
// training.
VBuffer<float> weights = default;
model.LastTransformer.Model.GetFeatureWeights(ref weights);
var weightsValues = weights.DenseValues().ToArray();
Console.WriteLine($"weight 0 - {weightsValues[0]}");
// CrimesPerCapita (weight 0) = 0.1898361
Console.WriteLine($"weight 5 - {weightsValues[5]}");
// RoomsPerDwelling (weight 5) = 1
// Evaluate how the model is doing on the test data.
var dataWithPredictions = model.Transform(split.TestSet);
var metrics = mlContext.Regression.Evaluate(
dataWithPredictions,
labelColumnName: labelName);
PrintMetrics(metrics);
// Expected output
// L1: 4.97
// L2: 51.37
// LossFunction: 51.37
// RMS: 7.17
// RSquared: 0.08
}
public static void PrintMetrics(RegressionMetrics metrics)
{
Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError);
Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError);
Console.WriteLine(
"Root Mean Squared Error: " + metrics.RootMeanSquaredError);
Console.WriteLine("RSquared: " + metrics.RSquared);
}
}
}
|