ONNX.cs

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System.IO;
using Microsoft.ML.Data;
using Microsoft.ML.IntegrationTests.Datasets;
using Microsoft.ML.TestFrameworkCommon;
using Microsoft.ML.TestFrameworkCommon.Attributes;
using Microsoft.ML.Trainers;
using Microsoft.ML.Trainers.FastTree;
using Xunit;
using Xunit.Abstractions;
 
namespace Microsoft.ML.IntegrationTests
{
    public class ONNX : IntegrationTestBaseClass
    {
        // These two members are meant to be changed
        // Only when manually testing the Onnx GPU nuggets
        private const bool _fallbackToCpu = true;
        private static int? _gpuDeviceId = null;
 
        public ONNX(ITestOutputHelper output) : base(output)
        {
        }
 
        /// <summary>
        /// ONNX: Models can be serialized to ONNX, deserialized back to ML.NET, and used a pipeline.
        /// </summary>
        [OnnxFactAttribute]
        public void SaveOnnxModelLoadAndScoreFastTree()
        {
            var mlContext = new MLContext(seed: 1);
 
            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile<HousingRegression>(TestCommon.GetDataPath(DataDir, TestDatasets.housing.trainFilename), hasHeader: true);
 
            // Create a pipeline to train on the housing data.
            var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
                .Append(mlContext.Transforms.NormalizeMinMax("Features"))
                .AppendCacheCheckpoint(mlContext)
                .Append(mlContext.Regression.Trainers.FastTree(
                    new FastTreeRegressionTrainer.Options { NumberOfThreads = 1, NumberOfTrees = 10 }));
 
            // Fit the pipeline.
            var model = pipeline.Fit(data);
 
            // Serialize the pipeline to a file.
            var modelFileName = "SaveOnnxLoadAndScoreFastTreeModel.onnx";
            var modelPath = TestCommon.DeleteOutputPath(OutDir, modelFileName);
            using (var file = File.Create(modelPath))
                mlContext.Model.ConvertToOnnx(model, data, file);
 
            // Load the model as a transform.
            // ONNX uses tensors and will return an output of a tensor with the dimension of [1,1] for a single float.
            // Therefore the VectorScoreColumn class (which contains a float [] field called Score) is used for the return
            // type on the Prediction engine.
            // See #2980 and #2981 for more information.
            var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
            var onnxModel = onnxEstimator.Fit(data);
 
            // Create prediction engine and test predictions.
            var originalPredictionEngine = mlContext.Model.CreatePredictionEngine<HousingRegression, ScoreColumn>(model);
            var onnxPredictionEngine = mlContext.Model.CreatePredictionEngine<HousingRegression, VectorScoreColumn>(onnxModel);
 
            // Take a handful of examples out of the dataset and compute predictions.
            var dataEnumerator = mlContext.Data.CreateEnumerable<HousingRegression>(mlContext.Data.TakeRows(data, 5), false);
            foreach (var row in dataEnumerator)
            {
                var originalPrediction = originalPredictionEngine.Predict(row);
                var onnxPrediction = onnxPredictionEngine.Predict(row);
                // Check that the predictions are identical.
                Assert.Equal(originalPrediction.Score, onnxPrediction.Score[0], 0.0001);
            }
        }
 
        /// <summary>
        /// ONNX: Models can be serialized to ONNX, deserialized back to ML.NET, and used a pipeline.
        /// </summary>
        [OnnxFactAttribute]
        public void SaveOnnxModelLoadAndScoreKMeans()
        {
            var mlContext = new MLContext(seed: 1);
 
            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile<HousingRegression>(TestCommon.GetDataPath(DataDir, TestDatasets.housing.trainFilename), hasHeader: true);
 
            // Create a pipeline to train on the housing data.
            var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
                .Append(mlContext.Transforms.NormalizeMinMax("Features"))
                .AppendCacheCheckpoint(mlContext)
                .Append(mlContext.Clustering.Trainers.KMeans(
                    new KMeansTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 }));
 
            // Fit the pipeline.
            var model = pipeline.Fit(data);
 
            // Serialize the pipeline to a file.
            var modelFileName = "SaveOnnxLoadAndScoreKMeansModel.onnx";
            var modelPath = TestCommon.DeleteOutputPath(OutDir, modelFileName);
            using (var file = File.Create(modelPath))
                mlContext.Model.ConvertToOnnx(model, data, file);
 
            // Load the model as a transform.
            var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
            var onnxModel = onnxEstimator.Fit(data);
 
            // TODO #2980: ONNX outputs don't match the outputs of the model, so we must hand-correct this for now.
            // TODO #2981: ONNX models cannot be fit as part of a pipeline, so we must use a workaround like this.
            var onnxWorkaroundPipeline = onnxModel.Append(
                mlContext.Transforms.CopyColumns("Score", "Score").Fit(onnxModel.Transform(data)));
 
            // Create prediction engine and test predictions.
            var originalPredictionEngine = mlContext.Model.CreatePredictionEngine<HousingRegression, VectorScoreColumn>(model);
            // TODO #2982: ONNX produces vector types and not the original output type.
            var onnxPredictionEngine = mlContext.Model.CreatePredictionEngine<HousingRegression, VectorScoreColumn>(onnxWorkaroundPipeline);
 
            // Take a handful of examples out of the dataset and compute predictions.
            var dataEnumerator = mlContext.Data.CreateEnumerable<HousingRegression>(mlContext.Data.TakeRows(data, 5), false);
            foreach (var row in dataEnumerator)
            {
                var originalPrediction = originalPredictionEngine.Predict(row);
                var onnxPrediction = onnxPredictionEngine.Predict(row);
                // Check that the predictions are identical.
                Common.AssertEqual(originalPrediction.Score, onnxPrediction.Score, precision: 4);
            }
        }
 
        /// <summary>
        /// ONNX: Models can be serialized to ONNX, deserialized back to ML.NET, and used a pipeline.
        /// </summary>
        [OnnxFactAttribute]
        public void SaveOnnxModelLoadAndScoreSDCA()
        {
            var mlContext = new MLContext(seed: 1);
 
            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile<HousingRegression>(TestCommon.GetDataPath(DataDir, TestDatasets.housing.trainFilename), hasHeader: true);
 
            // Create a pipeline to train on the housing data.
            var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
                .Append(mlContext.Transforms.NormalizeMinMax("Features"))
                .AppendCacheCheckpoint(mlContext)
                .Append(mlContext.Regression.Trainers.Sdca(
                    new SdcaRegressionTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 }));
 
            // Fit the pipeline.
            var model = pipeline.Fit(data);
 
            // Serialize the pipeline to a file.
            var modelFileName = "SaveOnnxLoadAndScoreSdcaModel.onnx";
            var modelPath = TestCommon.DeleteOutputPath(OutDir, modelFileName);
            using (var file = File.Create(modelPath))
                mlContext.Model.ConvertToOnnx(model, data, file);
 
            // Load the model as a transform.
            var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
            var onnxModel = onnxEstimator.Fit(data);
 
            // Create prediction engine and test predictions.
            var originalPredictionEngine = mlContext.Model.CreatePredictionEngine<HousingRegression, ScoreColumn>(model);
            // TODO #2982: ONNX produces vector types and not the original output type.
            var onnxPredictionEngine = mlContext.Model.CreatePredictionEngine<HousingRegression, VectorScoreColumn>(onnxModel);
 
            // Take a handful of examples out of the dataset and compute predictions.
            var dataEnumerator = mlContext.Data.CreateEnumerable<HousingRegression>(mlContext.Data.TakeRows(data, 5), false);
            foreach (var row in dataEnumerator)
            {
                var originalPrediction = originalPredictionEngine.Predict(row);
                var onnxPrediction = onnxPredictionEngine.Predict(row);
                // Check that the predictions are identical.
                Assert.Equal(originalPrediction.Score, onnxPrediction.Score[0], 0.0001);
            }
        }
    }
}
File: ONNX.cs	Web Access
Project: src\test\Microsoft.ML.IntegrationTests\Microsoft.ML.IntegrationTests.csproj (Microsoft.ML.IntegrationTests)