File: Training.cs
Web Access
Project: src\test\Microsoft.ML.IntegrationTests\Microsoft.ML.IntegrationTests.csproj (Microsoft.ML.IntegrationTests)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.IntegrationTests.Datasets;
using Microsoft.ML.TestFramework.Attributes;
using Microsoft.ML.TestFrameworkCommon;
using Microsoft.ML.Trainers;
using Microsoft.ML.Trainers.FastTree;
using Xunit;
using Xunit.Abstractions;
 
namespace Microsoft.ML.IntegrationTests
{
    public class Training : IntegrationTestBaseClass
    {
        public Training(ITestOutputHelper output) : base(output)
        {
        }
 
        /// <summary>
        /// Training: It is easy to compare trainer evaluations on the same dataset.
        /// </summary>
        [FieldAwareFactorizationMachineFact]
        public void CompareTrainerEvaluations()
        {
            var mlContext = new MLContext(seed: 1);
 
            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile<TweetSentiment>(TestCommon.GetDataPath(DataDir, TestDatasets.Sentiment.trainFilename),
                separatorChar: TestDatasets.Sentiment.fileSeparator,
                hasHeader: TestDatasets.Sentiment.fileHasHeader,
                allowQuoting: TestDatasets.Sentiment.allowQuoting);
            var trainTestSplit = mlContext.Data.TrainTestSplit(data);
            var trainData = trainTestSplit.TrainSet;
            var testData = trainTestSplit.TestSet;
 
            // Create a transformation pipeline.
            var featurizationPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
                .AppendCacheCheckpoint(mlContext);
 
            // Create a selection of learners.
            var sdcaTrainer = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(
                    new SdcaLogisticRegressionBinaryTrainer.Options { NumberOfThreads = 1 });
 
            var fastTreeTrainer = mlContext.BinaryClassification.Trainers.FastTree(
                    new FastTreeBinaryTrainer.Options { NumberOfThreads = 1 });
 
            var ffmTrainer = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine();
 
            // Fit the data transformation pipeline.
            var featurization = featurizationPipeline.Fit(trainData);
            var featurizedTrain = featurization.Transform(trainData);
            var featurizedTest = featurization.Transform(testData);
 
            // Fit the trainers.
            var sdca = sdcaTrainer.Fit(featurizedTrain);
            var fastTree = fastTreeTrainer.Fit(featurizedTrain);
            var ffm = ffmTrainer.Fit(featurizedTrain);
 
            // Evaluate the trainers.
            var sdcaPredictions = sdca.Transform(featurizedTest);
            var sdcaMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(sdcaPredictions);
            var fastTreePredictions = fastTree.Transform(featurizedTest);
            var fastTreeMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(fastTreePredictions);
            var ffmPredictions = sdca.Transform(featurizedTest);
            var ffmMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(ffmPredictions);
 
            // Validate the results.
            Common.AssertMetrics(sdcaMetrics);
            Common.AssertMetrics(fastTreeMetrics);
            Common.AssertMetrics(ffmMetrics);
        }
 
        /// <summary>
        /// Training: Models can be trained starting from an existing model.
        /// </summary>
        [Fact]
        public void ContinueTrainingAveragePerceptron()
        {
            var mlContext = new MLContext(seed: 1);
 
            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile<TweetSentiment>(TestCommon.GetDataPath(DataDir, TestDatasets.Sentiment.trainFilename),
                separatorChar: TestDatasets.Sentiment.fileSeparator,
                hasHeader: TestDatasets.Sentiment.fileHasHeader,
                allowQuoting: TestDatasets.Sentiment.allowQuoting);
 
            // Create a transformation pipeline.
            var featurizationPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
                .AppendCacheCheckpoint(mlContext);
 
            var trainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron(
                new AveragedPerceptronTrainer.Options { NumberOfIterations = 1 });
 
            // Fit the data transformation pipeline.
            var featurization = featurizationPipeline.Fit(data);
            var featurizedData = featurization.Transform(data);
 
            // Fit the first trainer.
            var firstModel = trainer.Fit(featurizedData);
            var firstModelWeights = firstModel.Model.Weights;
 
            // Fist the first trainer again.
            var firstModelPrime = trainer.Fit(featurizedData);
            var firstModelWeightsPrime = firstModel.Model.Weights;
 
            // Fit the second trainer.
            var secondModel = trainer.Fit(featurizedData, firstModel.Model);
            var secondModelWeights = secondModel.Model.Weights;
 
            // Validate that continued training occurred.
            // Training from the same initial condition, same seed should create the same model.
            Common.AssertEqual(firstModelWeights.ToArray(), firstModelWeightsPrime.ToArray());
            // Continued training should create a different model.
            Common.AssertNotEqual(firstModelWeights.ToArray(), secondModelWeights.ToArray());
        }
 
        /// <summary>
        /// Training: Models can be trained starting from an existing model.
        /// </summary>
        [FieldAwareFactorizationMachineFact]
        public void ContinueTrainingFieldAwareFactorizationMachine()
        {
            var mlContext = new MLContext(seed: 1);
 
            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile<TweetSentiment>(TestCommon.GetDataPath(DataDir, TestDatasets.Sentiment.trainFilename),
                separatorChar: TestDatasets.Sentiment.fileSeparator,
                hasHeader: TestDatasets.Sentiment.fileHasHeader,
                allowQuoting: TestDatasets.Sentiment.allowQuoting);
 
            // Create a transformation pipeline.
            var featurizationPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
                .AppendCacheCheckpoint(mlContext);
 
            var trainer = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(
                new FieldAwareFactorizationMachineTrainer.Options { NumberOfIterations = 100 });
 
            // Fit the data transformation pipeline.
            var featurization = featurizationPipeline.Fit(data);
            var featurizedData = featurization.Transform(data);
 
            // Fit the first trainer.
            var firstModel = trainer.Fit(featurizedData);
            var firstModelWeights = firstModel.Model.GetLinearWeights();
 
            // Fist the first trainer again.
            var firstModelPrime = trainer.Fit(featurizedData);
            var firstModelWeightsPrime = firstModel.Model.GetLinearWeights();
 
            // Fit the second trainer.
            var secondModel = trainer.Fit(featurizedData, modelParameters: firstModel.Model);
            var secondModelWeights = secondModel.Model.GetLinearWeights();
 
            // Validate that continued training occurred.
            // Training from the same initial condition, same seed should create the same model.
            Assert.Equal(firstModelWeights, firstModelWeightsPrime);
            // Continued training should create a different model.
            Assert.NotEqual(firstModelWeights, secondModelWeights);
        }
 
        /// <summary>
        /// Training: Models can be trained starting from an existing model.
        /// </summary>
        [Fact]
        public void ContinueTrainingLinearSupportVectorMachine()
        {
            var mlContext = new MLContext(seed: 1);
 
            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile<TweetSentiment>(TestCommon.GetDataPath(DataDir, TestDatasets.Sentiment.trainFilename),
                separatorChar: TestDatasets.Sentiment.fileSeparator,
                hasHeader: TestDatasets.Sentiment.fileHasHeader,
                allowQuoting: TestDatasets.Sentiment.allowQuoting);
 
            // Create a transformation pipeline.
            var featurizationPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
                .AppendCacheCheckpoint(mlContext);
 
            var trainer = mlContext.BinaryClassification.Trainers.LinearSvm(
                new LinearSvmTrainer.Options { NumberOfIterations = 1 });
 
            // Fit the data transformation pipeline.
            var featurization = featurizationPipeline.Fit(data);
            var featurizedData = featurization.Transform(data);
 
            // Fit the first trainer.
            var firstModel = trainer.Fit(featurizedData);
            var firstModelWeights = firstModel.Model.Weights;
 
            // Fist the first trainer again.
            var firstModelPrime = trainer.Fit(featurizedData);
            var firstModelWeightsPrime = firstModel.Model.Weights;
 
            // Fit the second trainer.
            var secondModel = trainer.Fit(featurizedData, firstModel.Model);
            var secondModelWeights = secondModel.Model.Weights;
 
            // Validate that continued training occurred.
            // Training from the same initial condition, same seed should create the same model.
            Common.AssertEqual(firstModelWeights.ToArray(), firstModelWeightsPrime.ToArray());
            // Continued training should create a different model.
            Common.AssertNotEqual(firstModelWeights.ToArray(), secondModelWeights.ToArray());
        }
 
        /// <summary>
        /// Training: Models can be trained starting from an existing model.
        /// </summary>
        [Fact]
        public void ContinueTrainingLogisticRegression()
        {
            var mlContext = new MLContext(seed: 1);
 
            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile<TweetSentiment>(TestCommon.GetDataPath(DataDir, TestDatasets.Sentiment.trainFilename),
                separatorChar: TestDatasets.Sentiment.fileSeparator,
                hasHeader: TestDatasets.Sentiment.fileHasHeader,
                allowQuoting: TestDatasets.Sentiment.allowQuoting);
 
            // Create a transformation pipeline.
            var featurizationPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
                .AppendCacheCheckpoint(mlContext);
 
            var trainer = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(
                new LbfgsLogisticRegressionBinaryTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 });
 
            // Fit the data transformation pipeline.
            var featurization = featurizationPipeline.Fit(data);
            var featurizedData = featurization.Transform(data);
 
            // Fit the first trainer.
            var firstModel = trainer.Fit(featurizedData);
            var firstModelWeights = firstModel.Model.SubModel.Weights;
 
            // Fist the first trainer again.
            var firstModelPrime = trainer.Fit(featurizedData);
            var firstModelWeightsPrime = firstModel.Model.SubModel.Weights;
 
            // Fit the second trainer.
            var secondModel = trainer.Fit(featurizedData, firstModel.Model.SubModel);
            var secondModelWeights = secondModel.Model.SubModel.Weights;
 
            // Validate that continued training occurred.
            // Training from the same initial condition, same seed should create the same model.
            Common.AssertEqual(firstModelWeights.ToArray(), firstModelWeightsPrime.ToArray());
            // Continued training should create a different model.
            Common.AssertNotEqual(firstModelWeights.ToArray(), secondModelWeights.ToArray());
        }
 
        /// <summary>
        /// Training: Models can be trained starting from an existing model.
        /// </summary>
        [Fact]
        public void ContinueTrainingLogisticRegressionMulticlass()
        {
            var mlContext = new MLContext(seed: 1);
 
            var data = mlContext.Data.LoadFromTextFile<Iris>(TestCommon.GetDataPath(DataDir, TestDatasets.iris.trainFilename),
                hasHeader: TestDatasets.iris.fileHasHeader,
                separatorChar: TestDatasets.iris.fileSeparator);
 
            // Create a training pipeline.
            var featurizationPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
                .Append(mlContext.Transforms.Conversion.MapValueToKey("Label"))
                .AppendCacheCheckpoint(mlContext);
 
            var trainer = mlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy(
                new LbfgsMaximumEntropyMulticlassTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 });
 
            // Fit the data transformation pipeline.
            var featurization = featurizationPipeline.Fit(data);
            var featurizedData = featurization.Transform(data);
 
            // Fit the first trainer.
            var firstModel = trainer.Fit(featurizedData);
            VBuffer<float>[] firstModelWeights = null;
            firstModel.Model.GetWeights(ref firstModelWeights, out int firstModelNumClasses);
 
            // Fist the first trainer again.
            var firstModelPrime = trainer.Fit(featurizedData);
            VBuffer<float>[] firstModelWeightsPrime = null;
            firstModel.Model.GetWeights(ref firstModelWeightsPrime, out int firstModelNumClassesPrime);
 
            // Fit the second trainer.
            var secondModel = trainer.Fit(featurizedData, firstModel.Model);
            VBuffer<float>[] secondModelWeights = null;
            secondModel.Model.GetWeights(ref secondModelWeights, out int secondModelNumClasses);
 
            // Validate that continued training occurred.
            // Training from the same initial condition, same seed should create the same model.
            Assert.Equal(firstModelNumClasses, firstModelNumClassesPrime);
            for (int i = 0; i < firstModelNumClasses; i++)
                Common.AssertEqual(firstModelWeights[i].DenseValues().ToArray(), firstModelWeightsPrime[i].DenseValues().ToArray());
            // Continued training should create a different model.
            Assert.Equal(firstModelNumClasses, secondModelNumClasses);
            for (int i = 0; i < firstModelNumClasses; i++)
                Common.AssertNotEqual(firstModelWeights[i].DenseValues().ToArray(), secondModelWeights[i].DenseValues().ToArray());
        }
 
        /// <summary>
        /// Training: Models can be trained starting from an existing model.
        /// </summary>
        [Fact]
        public void ContinueTrainingOnlineGradientDescent()
        {
            var mlContext = new MLContext(seed: 1);
 
            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile<HousingRegression>(TestCommon.GetDataPath(DataDir, TestDatasets.housing.trainFilename),
                separatorChar: TestDatasets.housing.fileSeparator,
                hasHeader: TestDatasets.housing.fileHasHeader);
 
            // Create a transformation pipeline.
            var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
                .Append(mlContext.Transforms.NormalizeMinMax("Features"))
                .AppendCacheCheckpoint(mlContext);
 
            var trainer = mlContext.Regression.Trainers.OnlineGradientDescent(
                new OnlineGradientDescentTrainer.Options { NumberOfIterations = 10 });
 
            // Fit the data transformation pipeline.
            var featurization = featurizationPipeline.Fit(data);
            var featurizedData = featurization.Transform(data);
 
            // Fit the first trainer.
            var firstModel = trainer.Fit(featurizedData);
            var firstModelWeights = firstModel.Model.Weights;
 
            // Fist the first trainer again.
            var firstModelPrime = trainer.Fit(featurizedData);
            var firstModelWeightsPrime = firstModel.Model.Weights;
 
            // Fit the second trainer.
            var secondModel = trainer.Fit(featurizedData, firstModel.Model);
            var secondModelWeights = secondModel.Model.Weights;
 
            // Validate that continued training occurred.
            // Training from the same initial condition, same seed should create the same model.
            Common.AssertEqual(firstModelWeights.ToArray(), firstModelWeightsPrime.ToArray());
            // Continued training should create a different model.
            Common.AssertNotEqual(firstModelWeights.ToArray(), secondModelWeights.ToArray());
        }
 
        /// <summary>
        /// Training: Models can be trained starting from an existing model.
        /// </summary>
        [Fact]
        public void ContinueTrainingPoissonRegression()
        {
            var mlContext = new MLContext(seed: 1);
 
            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile<HousingRegression>(TestCommon.GetDataPath(DataDir, TestDatasets.housing.trainFilename),
                separatorChar: TestDatasets.housing.fileSeparator,
                hasHeader: TestDatasets.housing.fileHasHeader);
 
            // Create a transformation pipeline.
            var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
                .Append(mlContext.Transforms.NormalizeMinMax("Features"))
                .AppendCacheCheckpoint(mlContext);
 
            var trainer = mlContext.Regression.Trainers.LbfgsPoissonRegression(
                new LbfgsPoissonRegressionTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 100 });
 
            // Fit the data transformation pipeline.
            var featurization = featurizationPipeline.Fit(data);
            var featurizedData = featurization.Transform(data);
 
            // Fit the first trainer.
            var firstModel = trainer.Fit(featurizedData);
            var firstModelWeights = firstModel.Model.Weights;
 
            // Fist the first trainer again.
            var firstModelPrime = trainer.Fit(featurizedData);
            var firstModelWeightsPrime = firstModel.Model.Weights;
 
            // Fit the second trainer.
            var secondModel = trainer.Fit(featurizedData, firstModel.Model);
            var secondModelWeights = secondModel.Model.Weights;
 
            // Validate that continued training occurred.
            // Training from the same initial condition, same seed should create the same model.
            Common.AssertEqual(firstModelWeights.ToArray(), firstModelWeightsPrime.ToArray());
            // Continued training should create a different model.
            Common.AssertNotEqual(firstModelWeights.ToArray(), secondModelWeights.ToArray());
        }
 
        /// <summary>
        /// Training: Models can be trained starting from an existing model.
        /// </summary>
        [NativeDependencyFact("MklImports")]
        public void ContinueTrainingSymbolicStochasticGradientDescent()
        {
            var mlContext = new MLContext(seed: 1);
 
            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile<TweetSentiment>(TestCommon.GetDataPath(DataDir, TestDatasets.Sentiment.trainFilename),
                separatorChar: TestDatasets.Sentiment.fileSeparator,
                hasHeader: TestDatasets.Sentiment.fileHasHeader,
                allowQuoting: TestDatasets.Sentiment.allowQuoting);
 
            // Create a transformation pipeline.
            var featurizationPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
                .AppendCacheCheckpoint(mlContext);
 
            var trainer = mlContext.BinaryClassification.Trainers.SymbolicSgdLogisticRegression(
                new SymbolicSgdLogisticRegressionBinaryTrainer.Options
                {
                    NumberOfThreads = 1,
                    NumberOfIterations = 10
                });
 
            // Fit the data transformation pipeline.
            var featurization = featurizationPipeline.Fit(data);
            var featurizedData = featurization.Transform(data);
 
            // Fit the first trainer.
            var firstModel = trainer.Fit(featurizedData);
            var firstModelWeights = firstModel.Model.SubModel.Weights;
 
            // Fist the first trainer again.
            var firstModelPrime = trainer.Fit(featurizedData);
            var firstModelWeightsPrime = firstModel.Model.SubModel.Weights;
 
            // Fit the second trainer.
            var secondModel = trainer.Fit(featurizedData, firstModel.Model.SubModel);
            var secondModelWeights = secondModel.Model.SubModel.Weights;
 
            // Validate that continued training occurred.
            // Training from the same initial condition, same seed should create the same model.
            Common.AssertEqual(firstModelWeights.ToArray(), firstModelWeightsPrime.ToArray());
            // Continued training should create a different model.
            Common.AssertNotEqual(firstModelWeights.ToArray(), secondModelWeights.ToArray());
        }
 
        /// <summary>
        /// Training: Meta-components function as expected. For OVA (one-versus-all), a user will be able to specify only
        /// binary classifier trainers. If they specify a different model class there should be a compile error.
        /// </summary>
        [Fact]
        public void MetacomponentsFunctionAsExpectedOva()
        {
            var mlContext = new MLContext(seed: 1);
 
            var data = mlContext.Data.LoadFromTextFile<Iris>(TestCommon.GetDataPath(DataDir, TestDatasets.iris.trainFilename),
                hasHeader: TestDatasets.iris.fileHasHeader,
                separatorChar: TestDatasets.iris.fileSeparator);
 
            // Create a model training an OVA trainer with a binary classifier.
            var binaryClassificationTrainer = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(
                new LbfgsLogisticRegressionBinaryTrainer.Options { MaximumNumberOfIterations = 10, NumberOfThreads = 1, });
            var binaryClassificationPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
                .AppendCacheCheckpoint(mlContext)
                .Append(mlContext.Transforms.Conversion.MapValueToKey("Label"))
                .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(binaryClassificationTrainer));
 
            // Fit the binary classification pipeline.
            var binaryClassificationModel = binaryClassificationPipeline.Fit(data);
 
            // Transform the data
            var binaryClassificationPredictions = binaryClassificationModel.Transform(data);
 
            // Evaluate the model.
            var binaryClassificationMetrics = mlContext.MulticlassClassification.Evaluate(binaryClassificationPredictions);
        }
 
        /// <summary>
        /// Training: Meta-components function as expected. For OVA (one-versus-all), a user will be able to specify only
        /// binary classifier trainers. If they specify a different model class there should be a compile error.
        /// </summary>
        [Fact]
        public void MetacomponentsFunctionWithKeyHandling()
        {
            var mlContext = new MLContext(seed: 1);
 
            var data = mlContext.Data.LoadFromTextFile<Iris>(TestCommon.GetDataPath(DataDir, TestDatasets.iris.trainFilename),
                hasHeader: TestDatasets.iris.fileHasHeader,
                separatorChar: TestDatasets.iris.fileSeparator);
 
            // Create a model training an OVA trainer with a binary classifier.
            var binaryClassificationTrainer = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(
                new LbfgsLogisticRegressionBinaryTrainer.Options { MaximumNumberOfIterations = 10, NumberOfThreads = 1, });
            var binaryClassificationPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
                .AppendCacheCheckpoint(mlContext)
                .Append(mlContext.Transforms.Conversion.MapValueToKey("Label"))
                .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(binaryClassificationTrainer))
                .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));
 
            // Fit the binary classification pipeline.
            var binaryClassificationModel = binaryClassificationPipeline.Fit(data);
 
            // Transform the data
            var binaryClassificationPredictions = binaryClassificationModel.Transform(data);
 
            // Evaluate the model.
            var binaryClassificationMetrics = mlContext.MulticlassClassification.Evaluate(binaryClassificationPredictions);
 
            Assert.Equal(0.4367, binaryClassificationMetrics.LogLoss, 0.0001);
        }
    }
}