TestPredictors.cs

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Generic;
using System.IO;
using Microsoft.ML.TestFramework.Attributes;
using Microsoft.ML.TestFrameworkCommon.Attributes;
 
namespace Microsoft.ML.RunTests
{
    using System.Linq;
    using System.Runtime.InteropServices;
    using Microsoft.ML;
    using Microsoft.ML.Data;
    using Microsoft.ML.EntryPoints;
    using Microsoft.ML.Internal.Utilities;
    using Microsoft.ML.Runtime;
    using Microsoft.ML.TestFramework;
    using Microsoft.ML.TestFrameworkCommon;
    using Microsoft.ML.TestFrameworkCommon.Attributes;
    using Microsoft.ML.Trainers;
    using Microsoft.ML.Trainers.FastTree;
    using Microsoft.ML.Trainers.LightGbm;
    using Xunit;
    using Xunit.Abstractions;
    using TestLearners = TestLearnersBase;
 
    /// <summary>
    /// Tests using maml commands (IDV) functionality.
    /// </summary>
    public sealed partial class TestPredictors : BaseTestPredictors
    {
        protected override void Initialize()
        {
            base.Initialize();
            InitializeEnvironment(Env);
        }
 
        protected override void InitializeEnvironment(IHostEnvironment environment)
        {
            base.InitializeEnvironment(environment);
 
            environment.ComponentCatalog.RegisterAssembly(typeof(LightGbmBinaryModelParameters).Assembly);
            environment.ComponentCatalog.RegisterAssembly(typeof(SymbolicSgdLogisticRegressionBinaryTrainer).Assembly);
        }
 
        /// <summary>
        /// Get a list of datasets for binary classifier base test.
        /// </summary>
        public IList<TestDataset> GetDatasetsForBinaryClassifierBaseTest()
        {
            // MSM dataset is not yet ported.
            return new[] {
                TestDatasets.breastCancer,
                /* TestDatasets.msm */
            };
        }
 
        public IList<TestDataset> GetDatasetsForMulticlassClassificationTest()
        {
            return new[] {
                TestDatasets.breastCancer,
                TestDatasets.iris
            };
        }
 
        /// <summary>
        /// Get a list of datasets for regressor test.
        /// </summary>
        public IList<TestDataset> GetDatasetsForRegressorTest()
        {
            return new[] { TestDatasets.housing };
        }
 
        /// <summary>
        /// Get a list of datasets for ranking test.
        /// </summary>
        public IList<TestDataset> GetDatasetsForRankingTest()
        {
            return new[] { TestDatasets.rankingText };
        }
 
        /// <summary>
        /// Get a list of datasets for binary classifier base test.
        /// </summary>
        public IList<TestDataset> GetDatasetsForBinaryClassifierMoreTest()
        {
            return new[] {
                TestDatasets.breastCancerBoolLabel,
                TestDatasets.breastCancerPipeMissing,
                TestDatasets.breastCancerPipeMissingFilter,
                TestDatasets.msm
            };
        }
 
        /// <summary>
        /// Get a list of datasets for the WeightingPredictorsTest test.
        /// </summary>
        public IList<TestDataset> GetDatasetsForClassificationWeightingPredictorsTest()
        {
            return new[] { TestDatasets.breastCancerWeighted };
        }
 
        /// <summary>
        ///A test for binary classifiers
        ///</summary>
        [Fact]
        [TestCategory("Binary")]
        public void BinaryClassifierPerceptronTest()
        {
            var binaryPredictors = new[] { TestLearners.perceptron };
            var binaryClassificationDatasets = GetDatasetsForBinaryClassifierBaseTest();
            RunAllTests(binaryPredictors, binaryClassificationDatasets, digitsOfPrecision: 6);
            Done();
        }
 
        [Fact]
        [TestCategory("Binary")]
        [TestCategory("SimpleLearners")]
        public void BinaryPriorTest()
        {
            var predictors = new[] {
                TestLearners.binaryPrior};
            RunAllTests(predictors, new[] { TestDatasets.breastCancerBoolLabel });
            Done();
        }
 
        /// <summary>
        ///A test for binary classifiers
        ///</summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("FastRank")]
        public void EarlyStoppingTest()
        {
            RunMTAThread(() =>
            {
                var dataset = TestDatasets.msm.Clone();
                dataset.validFilename = dataset.testFilename;
                var predictor = TestLearners.fastRankClassificationPruning;
                Run_TrainTest(predictor, dataset);
            });
            Done();
        }
 
        /// <summary>
        /// Multiclass Logistic Regression test.
        /// </summary>
        [X86X64Fact("Currently flaky on non x86/x64 devices. Disabling until we figure it out. See https://github.com/dotnet/machinelearning/issues/6684")]
        [TestCategory("Multiclass")]
        [TestCategory("Logistic Regression")]
        public void MulticlassLRTest()
        {
            RunOneAllTests(TestLearners.multiclassLogisticRegression, TestDatasets.iris, digitsOfPrecision: 3);
            Done();
        }
 
        /// <summary>
        /// Multiclass Logistic Regression with non-negative coefficients test.
        /// </summary>
        [Fact]
        [TestCategory("Multiclass")]
        [TestCategory("Logistic Regression")]
        public void MulticlassLRNonNegativeTest()
        {
            // [TEST_STABILITY]: use lower digit precision as dotnet core 3.1 generates slightly different result
#if NETCOREAPP3_1_OR_GREATER
            RunOneAllTests(TestLearners.multiclassLogisticRegressionNonNegative, TestDatasets.iris, digitsOfPrecision: 3);
#else
            RunOneAllTests(TestLearners.multiclassLogisticRegressionNonNegative, TestDatasets.iris, digitsOfPrecision: 4);
#endif
            Done();
        }
 
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Multiclass")]
        [TestCategory("SDCA")]
        public void MulticlassSdcaTest()
        {
            var predictors = new[] {
                TestLearners.multiclassSdca, TestLearners.multiclassSdcaL1, TestLearners.multiclassSdcaSmoothedHinge };
            var datasets = GetDatasetsForMulticlassClassificationTest();
            RunAllTests(predictors, datasets);
            Done();
        }
 
        /// <summary>
        /// Multiclass Logistic Regression test with a tree featurizer.
        /// </summary>
        [Fact]
        [TestCategory("Multiclass")]
        [TestCategory("Logistic Regression")]
        [TestCategory("FastTree")]
        //Skipping test temporarily. This test will be re-enabled once the cause of failures has been determined
        public void MulticlassTreeFeaturizedLRTest()
        {
            RunMTAThread(() =>
            {
                RunOneAllTests(TestLearners.multiclassLogisticRegression, TestDatasets.irisTreeFeaturized, digitsOfPrecision: 4);
                RunOneAllTests(TestLearners.multiclassLogisticRegression, TestDatasets.irisTreeFeaturizedPermuted, digitsOfPrecision: 4);
            });
            Done();
        }
 
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Multiclass")]
        [TestCategory("Evaluators")]
        public void MulticlassCVTest()
        {
            var predictor = new PredictorAndArgs
            {
                Trainer = new SubComponent("MulticlassLogisticRegression", "ot=1e-3 nt=1"),
                MamlArgs = new[]
                {
                    "loader=text{col=TextLabel:TX:0 col=Features:Num:~}",
                    "prexf=expr{col=Label:TextLabel expr={x=>single(x)>4?na(4):single(x)}}",
                    "prexf=missingvaluefilter{col=Label}",
                    "prexf=Term{col=Strat:TextLabel}",
                    "strat=Strat",
                    "evaluator=multiclass{opcs+}"
                }
            };
            Run_CV(predictor, TestDatasets.mnistTiny28, extraTag: "DifferentClassCounts");
            Done();
        }
 
        [Fact]
        [TestCategory("Multiclass")]
        public void MulticlassReductionTest()
        {
            RunOneAllTests(TestLearners.Ova, TestDatasets.iris, digitsOfPrecision: 6);
            RunOneAllTests(TestLearners.OvaWithFastForest, TestDatasets.iris, digitsOfPrecision: 6);
            RunOneAllTests(TestLearners.Pkpd, TestDatasets.iris, digitsOfPrecision: 6);
 
            Done();
        }
 
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Clustering")]
        [TestCategory("KMeans")]
        public void KMeansClusteringTest()
        {
            var predictors = new[] { TestLearners.KMeansDefault, TestLearners.KMeansInitPlusPlus, TestLearners.KMeansInitRandom };
            var datasets = new[] { TestDatasets.adult, TestDatasets.mnistTiny28 };
            RunAllTests(predictors, datasets);
            Done();
        }
 
        [Fact]
        [TestCategory("Binary")]
        [TestCategory("SDCA")]
        public void LinearClassifierTest()
        {
            var binaryPredictors = new[]
                {
                    TestLearners.binarySdca,
                    TestLearners.binarySdcaL1,
                    TestLearners.binarySdcaSmoothedHinge,
                    TestLearners.binarySgd,
                    TestLearners.binarySgdHinge
                };
            var binaryClassificationDatasets = GetDatasetsForBinaryClassifierBaseTest();
            RunAllTests(binaryPredictors, binaryClassificationDatasets, digitsOfPrecision: 5);
            Done();
        }
 
        /// <summary>
        ///A test for binary classifiers
        ///</summary>
        [Fact]
        [TestCategory("Binary")]
        public void BinaryClassifierLogisticRegressionTest()
        {
            var binaryPredictors = new[] { TestLearners.logisticRegression };
            RunOneAllTests(TestLearners.logisticRegression, TestDatasets.breastCancer, summary: true, digitsOfPrecision: 3);
            // RunOneAllTests(TestLearners.logisticRegression, TestDatasets.msm);
            Done();
        }
 
        [NativeDependencyFact("MklImports")]
        [TestCategory("Binary")]
        public void BinaryClassifierSymSgdTest()
        {
            //Skipping test temporarily on Linux. This test will be re-enabled once the cause of failure has been determined.
            if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
                return;
            RunOneAllTests(TestLearners.symSGD, TestDatasets.breastCancer, summary: true, digitsOfPrecision: 4);
            Done();
        }
 
        [Fact]
        [TestCategory("Binary")]
        public void BinaryClassifierTesterThresholdingTest()
        {
            var binaryPredictors = new[] { TestLearners.logisticRegression };
            var binaryClassificationDatasets = new[] { TestDatasets.breastCancer };
            RunAllTests(binaryPredictors, binaryClassificationDatasets, new[] { "eval=BinaryClassifier{threshold=0.95 useRawScore=-}" }, "withThreshold", digitsOfPrecision: 3);
            Done();
        }
 
        /// <summary>
        ///A test for binary classifiers
        ///</summary>
        [Fact]
        [TestCategory("Binary")]
        public void BinaryClassifierLogisticRegressionNormTest()
        {
            var binaryPredictors = new[] { TestLearners.logisticRegressionNorm };
            var binaryClassificationDatasets = GetDatasetsForBinaryClassifierBaseTest();
            RunAllTests(binaryPredictors, binaryClassificationDatasets, digitsOfPrecision: 5);
            Done();
        }
 
        /// <summary>
        ///A test for binary classifiers with non-negative coefficients
        ///</summary>
        [Fact]
        [TestCategory("Binary")]
        public void BinaryClassifierLogisticRegressionNonNegativeTest()
        {
            var binaryPredictors = new[] { TestLearners.logisticRegressionNonNegative };
            var binaryClassificationDatasets = new[] { TestDatasets.breastCancer };
            RunAllTests(binaryPredictors, binaryClassificationDatasets, digitsOfPrecision: 4);
            Done();
        }
 
        /// <summary>
        ///A test for binary classifiers
        ///</summary>
        [Fact]
        [TestCategory("Binary")]
        public void BinaryClassifierLogisticRegressionBinNormTest()
        {
            var binaryPredictors = new[] { TestLearners.logisticRegressionBinNorm };
            var binaryClassificationDatasets = GetDatasetsForBinaryClassifierBaseTest();
 
            // [TEST_STABILITY]: dotnet core 3.1 generates slightly different result
#if NETCOREAPP3_1_OR_GREATER
            RunAllTests(binaryPredictors, binaryClassificationDatasets, digitsOfPrecision: 4);
#else
            RunAllTests(binaryPredictors, binaryClassificationDatasets, digitsOfPrecision: 6);
#endif
            Done();
        }
 
        /// <summary>
        ///A test for binary classifiers
        ///</summary>
        [Fact]
        [TestCategory("Binary")]
        public void BinaryClassifierLogisticRegressionGaussianNormTest()
        {
            var binaryPredictors = new[] { TestLearners.logisticRegressionGaussianNorm };
            var binaryClassificationDatasets = GetDatasetsForBinaryClassifierBaseTest();
            RunAllTests(binaryPredictors, binaryClassificationDatasets, digitsOfPrecision: 2);
            Done();
        }
 
        /// <summary>
        ///A test for binary classifiers
        ///</summary>
        [Fact]
        [TestCategory("Binary")]
        [TestCategory("FastRank")]
        public void BinaryClassifierFastRankClassificationTest()
        {
            RunMTAThread(() =>
            {
                var learner = TestLearners.fastRankClassification;
                var data = TestDatasets.breastCancer;
                string dir = learner.Trainer.Kind;
                string prName = "prcurve-breast-cancer-prcurve.txt";
                string prPath = DeleteOutputPath(dir, prName);
                string eval = $"eval=Binary{{pr={{{prPath} }}}}";
                Run_TrainTest(learner, data, new[] { eval });
                if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) // PR curves are only generated on Windows.
                    CheckEqualityNormalized(dir, prName);
                Run_CV(learner, data);
            });
            Done();
        }
 
        /// <summary>
        ///A test for binary classifiers
        ///</summary>
        [Fact]
        [TestCategory("Binary")]
        [TestCategory("FastForest")]
        public void FastForestClassificationTest()
        {
            RunMTAThread(() =>
            {
                var binaryPredictors = new[] { TestLearners.FastForestClassification };
                var binaryClassificationDatasets = GetDatasetsForBinaryClassifierBaseTest();
                RunAllTests(binaryPredictors, binaryClassificationDatasets);
            });
            Done();
        }
 
        /// <summary>
        ///A test for regressors
        ///</summary>
        [Fact]
        [TestCategory("Regressor")]
        [TestCategory("FastForest")]
        public void FastForestRegressionTest()
        {
            RunMTAThread(() =>
            {
                var regressionPredictors = new[] {
                    TestLearners.FastForestRegression,
                    TestLearners.QuantileRegressionScorer,
                };
                var regressionDatasets = GetDatasetsForRegressorTest();
                RunAllTests(regressionPredictors, regressionDatasets, parseOption: NumberParseOption.UseSingle);
            });
            Done();
        }
 
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Weighting Predictors")]
        [TestCategory("FastForest")]
        public void WeightingFastForestClassificationPredictorsTest()
        {
            RunMTAThread(() =>
            {
                RunAllTests(
                    new[] { TestLearners.FastForestClassification },
                    new[] { TestDatasets.breastCancerDifferentlyWeighted });
            });
            Done();
        }
 
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Weighting Predictors")]
        [TestCategory("FastForest")]
        public void WeightingFastForestRegressionPredictorsTest()
        {
            RunMTAThread(() =>
            {
                var regressionPredictors = new[] {
                    TestLearners.FastForestRegression,
                    TestLearners.QuantileRegressionScorer,
                };
 
                RunAllTests(
                    regressionPredictors,
                    new[] { TestDatasets.housingDifferentlyWeightedRep });
            });
            Done();
        }
 
        [Fact]
        [TestCategory("Binary")]
        [TestCategory("FastTree")]
        public void FastTreeBinaryClassificationTest()
        {
            RunMTAThread(() =>
            {
                var learners = new[] { TestLearners.FastTreeClassfier, TestLearners.FastTreeDropoutClassfier,
                    TestLearners.FastTreeBsrClassfier, TestLearners.FastTreeClassfierDisk };
                var binaryClassificationDatasets = new List<TestDataset> { TestDatasets.breastCancerPipe };
                foreach (var learner in learners)
                {
                    foreach (TestDataset dataset in binaryClassificationDatasets)
                        Run_TrainTest(learner, dataset);
                }
            });
            Done();
        }
 
        [LightGBMFact]
        [TestCategory("Binary")]
        [TestCategory("LightGBM")]
        public void LightGBMClassificationTest()
        {
            var learners = new[] { TestLearners.LightGBMClassifier };
            var binaryClassificationDatasets = new List<TestDataset> { TestDatasets.breastCancerPipe };
            foreach (var learner in learners)
            {
                foreach (TestDataset dataset in binaryClassificationDatasets)
                    Run_TrainTest(learner, dataset);
            }
 
            Done();
        }
 
        /// <summary>
        /// This test checks that the run-time behavior of LightGBM does not change by modifying the flags
        /// used by LightGBM with <see cref="CursOpt.AllFeatures"/>, and that this change does not affect
        /// the features extracted during validation. This is done by checking that an older LightGbm model
        /// trained with <see cref="CursOpt.Features"/> produces the same baselines as it did before this change.
        ///
        /// </summary>
        [LightGBMFact]
        [TestCategory("Binary")]
        [TestCategory("LightGBM")]
        public void LightGBMPreviousModelBaselineTest()
        {
            // The path of previously trained LightGBM model:
            // "machinelearning/data/test/LightGBM-Train-breast-cancer-model.zip"
            // The path of the expected baseline output:
            // "machinelearning/test/BaselineOutput/Common/LightGBMBinary/LightGBM-Test-breast-cancer-out.txt"
 
            string previousBaselineModelPath = GetDataPath("LightGBM-Train-breast-cancer-model.zip");
            Run_Test(TestLearners.LightGBMClassifier, TestDatasets.breastCancerPipeWithoutMamlExtraSettings, previousBaselineModelPath);
            Done();
        }
 
        [LightGBMFact]
        [TestCategory("Binary"), TestCategory("LightGBM")]
        public void GossLightGBMTest()
        {
            var binaryPredictors = new[] { TestLearners.LightGBMGoss };
            var binaryClassificationDatasets = new List<TestDataset> { TestDatasets.breastCancerPipe };
            RunAllTests(binaryPredictors, binaryClassificationDatasets, extraTag: "goss");
            Done();
        }
 
        [LightGBMFact]
        [TestCategory("Binary")]
        [TestCategory("LightGBM")]
        public void DartLightGBMTest()
        {
            var binaryPredictors = new[] { TestLearners.LightGBMDart };
            var binaryClassificationDatasets = new List<TestDataset> { TestDatasets.breastCancerPipe };
            RunAllTests(binaryPredictors, binaryClassificationDatasets, extraTag: "dart");
            Done();
        }
 
        /// <summary>
        /// A test for multi class classifiers.
        /// </summary>
        [LightGBMFact]
        [TestCategory("Multiclass")]
        [TestCategory("LightGBM")]
        public void MulticlassifierLightGBMKeyLabelTest()
        {
            var multiPredictors = new[] { TestLearners.LightGBMMC };
            var multiClassificationDatasets = new[] { TestDatasets.irisLoader };
            RunAllTests(multiPredictors, multiClassificationDatasets, extraTag: "key");
            Done();
        }
 
        /// <summary>
        /// A test for multi class classifiers.
        /// </summary>
        [LightGBMFact]
        [TestCategory("Multiclass")]
        [TestCategory("LightGBM")]
        public void MulticlassifierLightGBMKeyLabelU404Test()
        {
            var multiPredictors = new[] { TestLearners.LightGBMMC };
            var multiClassificationDatasets = new[] { TestDatasets.irisLoaderU404 };
            RunAllTests(multiPredictors, multiClassificationDatasets, extraTag: "keyU404");
            Done();
        }
 
        /// <summary>
        /// A test for regression.
        /// </summary>
        [LightGBMFact]
        [TestCategory("Regression")]
        [TestCategory("LightGBM")]
        public void RegressorLightGBMTest()
        {
            var regPredictors = new[] { TestLearners.LightGBMReg };
            var regDatasets = new[] { TestDatasets.generatedRegressionDataset };
            RunAllTests(regPredictors, regDatasets, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        /// <summary>
        /// A test for regression.
        /// </summary>
        [LightGBMFact]
        [TestCategory("Regression")]
        [TestCategory("LightGBM")]
        public void RegressorLightGBMMAETest()
        {
            var regPredictors = new[] { TestLearners.LightGBMRegMae };
            var regDatasets = new[] { TestDatasets.generatedRegressionDataset };
            RunAllTests(regPredictors, regDatasets, extraTag: "MAE", parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        /// <summary>
        /// A test for regression.
        /// </summary>
        [LightGBMFact]
        [TestCategory("Regression")]
        [TestCategory("LightGBM")]
        public void RegressorLightGBMRMSETest()
        {
            var regPredictors = new[] { TestLearners.LightGBMRegRmse };
            var regDatasets = new[] { TestDatasets.generatedRegressionDataset };
            RunAllTests(regPredictors, regDatasets, extraTag: "RMSE", parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        /// <summary>
        /// A test for ranking. The training does not seem to be accurate.
        /// The evaluation is still based on nDCG which is not really convenient pair-wise ranking.
        /// </summary>
        [Fact(Skip = "Need to find ranking dataset.")]
        [TestCategory("Ranking")]
        [TestCategory("LightGBM")]
        public void RankingLightGBMTest()
        {
            var args = new PredictorAndArgs
            {
                Trainer = new SubComponent("LightGBMRank",
                    "nt=1 iter=20 v=+ mil=20 nl=20 lr=0.2")
            };
 
            var rankPredictors = new[] { args };
            var rankDatasets = new[] { TestDatasets.MQ2008 };
            RunAllTests(rankPredictors, rankDatasets);
            Done();
        }
 
        [NotArm32Fact("RyuJit codegen issue https://github.com/dotnet/runtime/issues/7970")]
        public void TestTreeEnsembleCombiner()
        {
            var dataPath = GetDataPath(TestDatasets.breastCancer.trainFilename);
            var dataView = ML.Data.LoadFromTextFile(dataPath);
 
            var fastTrees = new PredictorModel[3];
            for (int i = 0; i < 3; i++)
            {
                fastTrees[i] = FastTree.TrainBinary(ML, new FastTreeBinaryTrainer.Options
                {
                    FeatureColumnName = "Features",
                    NumberOfTrees = 5,
                    NumberOfLeaves = 4,
                    LabelColumnName = DefaultColumnNames.Label,
                    TrainingData = dataView
                }).PredictorModel;
            }
            CombineAndTestTreeEnsembles(dataView, fastTrees);
        }
 
        [NotArm32Fact("RyuJit codegen issue https://github.com/dotnet/runtime/issues/7970")]
        public void TestTreeEnsembleCombinerWithCategoricalSplits()
        {
            var dataPath = GetDataPath("adult.tiny.with-schema.txt");
            var dataView = ML.Data.LoadFromTextFile(dataPath);
 
            var cat = ML.Transforms.Categorical.OneHotEncoding("Features", "Categories").Fit(dataView).Transform(dataView);
            var fastTrees = new PredictorModel[3];
            for (int i = 0; i < 3; i++)
            {
                fastTrees[i] = FastTree.TrainBinary(ML, new FastTreeBinaryTrainer.Options
                {
                    FeatureColumnName = "Features",
                    NumberOfTrees = 5,
                    NumberOfLeaves = 4,
                    CategoricalSplit = true,
                    LabelColumnName = DefaultColumnNames.Label,
                    TrainingData = cat
                }).PredictorModel;
            }
            CombineAndTestTreeEnsembles(cat, fastTrees);
        }
 
        private void CombineAndTestTreeEnsembles(IDataView idv, PredictorModel[] fastTrees)
        {
            IModelCombiner combiner = new TreeEnsembleCombiner(Env, PredictionKind.BinaryClassification);
 
            var fastTree = combiner.CombineModels(fastTrees.Select(pm => (IPredictorProducing<float>)pm.Predictor));
 
            var data = new RoleMappedData(idv, label: null, feature: "Features");
            var scored = ScoreModel.Score(Env, new ScoreModel.Input() { Data = idv, PredictorModel = new PredictorModelImpl(Env, data, idv, fastTree) }).ScoredData;
            var scoreColumn = scored.Schema.GetColumnOrNull("Score");
            Assert.True(scoreColumn.HasValue);
            var probabilityColumn = scored.Schema.GetColumnOrNull("Probability");
            Assert.True(probabilityColumn.HasValue);
            var predictedLabelColumn = scored.Schema.GetColumnOrNull("PredictedLabel");
            Assert.True(predictedLabelColumn.HasValue);
 
            int predCount = Utils.Size(fastTrees);
            var scoredArray = new IDataView[predCount];
            var scoreColArray = new DataViewSchema.Column?[predCount];
            var probColArray = new DataViewSchema.Column?[predCount];
            var predColArray = new DataViewSchema.Column?[predCount];
            for (int i = 0; i < predCount; i++)
            {
                scoredArray[i] = ScoreModel.Score(Env, new ScoreModel.Input() { Data = idv, PredictorModel = fastTrees[i] }).ScoredData;
 
                scoreColArray[i] = scoredArray[i].Schema.GetColumnOrNull("Score");
                Assert.True(scoreColArray[i].HasValue);
                probColArray[i] = scoredArray[i].Schema.GetColumnOrNull("Probability");
                Assert.True(probColArray[i].HasValue);
                predColArray[i] = scoredArray[i].Schema.GetColumnOrNull("PredictedLabel");
                Assert.True(predColArray[i].HasValue);
            }
 
            var cursors = new DataViewRowCursor[predCount];
            var cols = scored.Schema.Where(c => c.Name.Equals("Score") || c.Name.Equals("Probability") || c.Name.Equals("PredictedLabel"));
 
            for (int i = 0; i < predCount; i++)
                cursors[i] = scoredArray[i].GetRowCursor(cols);
 
            try
            {
                using (var curs = scored.GetRowCursor(cols))
                {
                    var scoreGetter = curs.GetGetter<float>(scoreColumn.Value);
                    var probGetter = curs.GetGetter<float>(probabilityColumn.Value);
                    var predGetter = curs.GetGetter<bool>(predictedLabelColumn.Value);
                    var scoreGetters = new ValueGetter<float>[predCount];
                    var probGetters = new ValueGetter<float>[predCount];
                    var predGetters = new ValueGetter<bool>[predCount];
                    for (int i = 0; i < predCount; i++)
                    {
                        scoreGetters[i] = cursors[i].GetGetter<float>(scoreColArray[i].Value);
                        probGetters[i] = cursors[i].GetGetter<float>(probColArray[i].Value);
                        predGetters[i] = cursors[i].GetGetter<bool>(predColArray[i].Value);
                    }
 
                    float score = 0;
                    float prob = 0;
                    bool pred = default;
                    var scores = new float[predCount];
                    var probs = new float[predCount];
                    var preds = new bool[predCount];
                    while (curs.MoveNext())
                    {
                        scoreGetter(ref score);
                        probGetter(ref prob);
                        predGetter(ref pred);
                        for (int i = 0; i < predCount; i++)
                        {
                            Assert.True(cursors[i].MoveNext());
                            scoreGetters[i](ref scores[i]);
                            probGetters[i](ref probs[i]);
                            predGetters[i](ref preds[i]);
                        }
                        Assert.Equal(score, 0.4 * scores.Sum() / predCount, 0.00001);
                        Assert.Equal(prob, 1 / (1 + Math.Exp(-score)), 0.000001);
                        Assert.True(pred == score > 0);
                    }
                }
            }
            finally
            {
                for (int i = 0; i < predCount; i++)
                    cursors[i].Dispose();
            }
        }
 
        [Fact]
        public void TestEnsembleCombiner()
        {
            var dataPath = GetDataPath(TestDatasets.breastCancer.trainFilename);
            var dataView = ML.Data.LoadFromTextFile(dataPath);
 
            var predictors = new PredictorModel[]
            {
                FastTree.TrainBinary(ML, new FastTreeBinaryTrainer.Options
                {
                    FeatureColumnName = "Features",
                    NumberOfTrees = 5,
                    NumberOfLeaves = 4,
                    LabelColumnName = DefaultColumnNames.Label,
                    TrainingData = dataView
                }).PredictorModel,
                AveragedPerceptronTrainer.TrainBinary(ML, new AveragedPerceptronTrainer.Options()
                {
                    FeatureColumnName = "Features",
                    LabelColumnName = DefaultColumnNames.Label,
                    NumberOfIterations = 2,
                    TrainingData = dataView,
                    NormalizeFeatures = NormalizeOption.No
                }).PredictorModel,
                LbfgsLogisticRegressionBinaryTrainer.TrainBinary(ML, new LbfgsLogisticRegressionBinaryTrainer.Options()
                {
                    FeatureColumnName = "Features",
                    LabelColumnName = DefaultColumnNames.Label,
                    OptimizationTolerance = 10e-4F,
                    TrainingData = dataView,
                    NormalizeFeatures = NormalizeOption.No
                }).PredictorModel,
                LbfgsLogisticRegressionBinaryTrainer.TrainBinary(ML, new LbfgsLogisticRegressionBinaryTrainer.Options()
                {
                    FeatureColumnName = "Features",
                    LabelColumnName = DefaultColumnNames.Label,
                    OptimizationTolerance = 10e-3F,
                    TrainingData = dataView,
                    NormalizeFeatures = NormalizeOption.No
                }).PredictorModel
            };
 
            CombineAndTestEnsembles(dataView, "pe", "oc=average", PredictionKind.BinaryClassification, predictors);
            Done();
        }
 
        [LightGBMFact]
        public void TestMulticlassEnsembleCombiner()
        {
            var dataPath = GetDataPath(TestDatasets.breastCancer.trainFilename);
            var dataView = ML.Data.LoadFromTextFile(dataPath);
 
            var predictors = new PredictorModel[]
            {
                LightGbm.TrainMulticlass(Env, new LightGbmMulticlassTrainer.Options
                {
                    FeatureColumnName = "Features",
                    NumberOfIterations = 5,
                    NumberOfLeaves = 4,
                    LabelColumnName = DefaultColumnNames.Label,
                    TrainingData = dataView
                }).PredictorModel,
                LbfgsMaximumEntropyMulticlassTrainer.TrainMulticlass(Env, new LbfgsMaximumEntropyMulticlassTrainer.Options()
                {
                    FeatureColumnName = "Features",
                    LabelColumnName = DefaultColumnNames.Label,
                    OptimizationTolerance = 10e-4F,
                    TrainingData = dataView,
                    NormalizeFeatures = NormalizeOption.No
                }).PredictorModel,
                LbfgsMaximumEntropyMulticlassTrainer.TrainMulticlass(Env, new LbfgsMaximumEntropyMulticlassTrainer.Options()
                {
                    FeatureColumnName = "Features",
                    LabelColumnName = DefaultColumnNames.Label,
                    OptimizationTolerance = 10e-3F,
                    TrainingData = dataView,
                    NormalizeFeatures = NormalizeOption.No
                }).PredictorModel
            };
            CombineAndTestEnsembles(dataView, "weightedensemblemulticlass", "oc=multiaverage", PredictionKind.MulticlassClassification, predictors);
        }
 
        private void CombineAndTestEnsembles(IDataView idv, string name, string options, PredictionKind predictionKind,
            PredictorModel[] predictors)
        {
            var combiner = ComponentCatalog.CreateInstance<IModelCombiner>(
                Env, typeof(SignatureModelCombiner), name, options, predictionKind);
 
            var predictor = combiner.CombineModels(predictors.Select(pm => pm.Predictor));
 
            var data = new RoleMappedData(idv, label: null, feature: "Features");
            var scored = ScoreModel.Score(Env, new ScoreModel.Input() { Data = idv, PredictorModel = new PredictorModelImpl(Env, data, idv, predictor) }).ScoredData;
 
            var predCount = Utils.Size(predictors);
 
            var scoreCol = scored.Schema["Score"];
 
            DataViewSchema.Column? probCol = null;
            DataViewSchema.Column? predCol = null;
            if (predictionKind == PredictionKind.BinaryClassification)
            {
                probCol = scored.Schema["Probability"];
                predCol = scored.Schema["PredictedLabel"];
            }
 
            var scoredArray = new IDataView[predCount];
            var scoreColArray = new DataViewSchema.Column?[predCount];
            var probColArray = new DataViewSchema.Column?[predCount];
            var predColArray = new DataViewSchema.Column?[predCount];
 
            for (int i = 0; i < predCount; i++)
            {
                scoredArray[i] = ScoreModel.Score(Env, new ScoreModel.Input() { Data = idv, PredictorModel = predictors[i] }).ScoredData;
                scoreColArray[i] = scoredArray[i].Schema["Score"];
 
                if (predictionKind == PredictionKind.BinaryClassification)
                {
                    probColArray[i] = scoredArray[i].Schema["Probability"];
                    predColArray[i] = scoredArray[i].Schema["PredictedLabel"];
                }
                else
                {
                    probColArray[i] = null;
                    predColArray[i] = null;
                }
            }
 
            var cursors = new DataViewRowCursor[predCount];
            var cols = scored.Schema.Where(c => c.Name.Equals("Score") || c.Name.Equals("Probability") || c.Name.Equals("PredictedLabel"));
 
            for (int i = 0; i < predCount; i++)
                cursors[i] = scoredArray[i].GetRowCursor(cols);
 
            try
            {
                using (var curs = scored.GetRowCursor(cols))
                {
                    var scoreGetter = predictionKind == PredictionKind.MulticlassClassification ?
                        (ref float dst) => dst = 0 :
                        curs.GetGetter<float>(scoreCol);
                    var vectorScoreGetter = predictionKind == PredictionKind.MulticlassClassification ?
                        curs.GetGetter<VBuffer<float>>(scoreCol) :
                        (ref VBuffer<float> dst) => dst = default;
                    var probGetter = predictionKind == PredictionKind.BinaryClassification ?
                        curs.GetGetter<float>(probCol.Value) :
                        (ref float dst) => dst = 0;
                    var predGetter = predictionKind == PredictionKind.BinaryClassification ?
                        curs.GetGetter<bool>(predCol.Value) :
                        (ref bool dst) => dst = false;
 
                    var scoreGetters = new ValueGetter<float>[predCount];
                    var vectorScoreGetters = new ValueGetter<VBuffer<float>>[predCount];
                    var probGetters = new ValueGetter<float>[predCount];
                    var predGetters = new ValueGetter<bool>[predCount];
                    for (int i = 0; i < predCount; i++)
                    {
                        scoreGetters[i] = predictionKind == PredictionKind.MulticlassClassification ?
                            (ref float dst) => dst = 0 :
                            cursors[i].GetGetter<float>(scoreColArray[i].Value);
                        vectorScoreGetters[i] = predictionKind == PredictionKind.MulticlassClassification ?
                            cursors[i].GetGetter<VBuffer<float>>(scoreColArray[i].Value) :
                            (ref VBuffer<float> dst) => dst = default;
                        probGetters[i] = predictionKind == PredictionKind.BinaryClassification ?
                            cursors[i].GetGetter<float>(probColArray[i].Value) :
                            (ref float dst) => dst = 0;
                        predGetters[i] = predictionKind == PredictionKind.BinaryClassification ?
                            cursors[i].GetGetter<bool>(predColArray[i].Value) :
                            (ref bool dst) => dst = false;
                    }
 
                    float score = 0;
                    VBuffer<float> vectorScore = default;
                    float prob = 0;
                    bool pred = false;
                    var scores = new float[predCount];
                    var vectorScores = new VBuffer<float>[predCount];
                    var probs = new float[predCount];
                    var preds = new bool[predCount];
                    while (curs.MoveNext())
                    {
                        scoreGetter(ref score);
                        vectorScoreGetter(ref vectorScore);
                        probGetter(ref prob);
                        predGetter(ref pred);
 
                        for (int i = 0; i < predCount; i++)
                        {
                            Assert.True(cursors[i].MoveNext());
                            scoreGetters[i](ref scores[i]);
                            vectorScoreGetters[i](ref vectorScores[i]);
                            probGetters[i](ref probs[i]);
                            predGetters[i](ref preds[i]);
                        }
                        if (scores.All(s => !float.IsNaN(s)))
                            CompareNumbersWithTolerance(score, scores.Sum() / predCount, digitsOfPrecision: 5);
                        for (int i = 0; i < predCount; i++)
                            Assert.Equal(vectorScore.Length, vectorScores[i].Length);
                        for (int i = 0; i < vectorScore.Length; i++)
                        {
                            float sum = 0;
                            for (int j = 0; j < predCount; j++)
                                sum += vectorScores[j].GetItemOrDefault(i);
                            if (float.IsNaN(sum))
                                Assert.Equal((double)vectorScore.GetItemOrDefault(i), (double)sum / predCount, 0.001);
                        }
                        Assert.Equal(probs.Count(p => p >= prob), probs.Count(p => p <= prob));
                    }
                }
            }
            finally
            {
                for (int i = 0; i < predCount; i++)
                    cursors[i].Dispose();
            }
        }
 
 
        [Fact]
        [TestCategory("Binary")]
        [TestCategory("FastTree")]
        public void FastTreeBinaryClassificationCategoricalSplitTest()
        {
            RunMTAThread(() =>
            {
                var learners = new[] { TestLearners.FastTreeClassfier, TestLearners.FastTreeWithCategoricalClassfier,
                    TestLearners.FastTreeClassfierDisk, TestLearners.FastTreeWithCategoricalClassfierDisk };
 
                var binaryClassificationDatasets = new List<TestDataset> { TestDatasets.adultOnlyCat, TestDatasets.adult };
                foreach (var learner in learners)
                {
                    foreach (TestDataset dataset in binaryClassificationDatasets)
                        Run_TrainTest(learner, dataset, extraTag: "Cat", summary: true, saveAsIni: true, digitsOfPrecision: 3);
                }
            });
            Done();
        }
 
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Regression")]
        [TestCategory("FastTree")]
        public void FastTreeRegressionCategoricalSplitTest()
        {
            RunMTAThread(() =>
            {
                var learners = new[] { TestLearners.FastTreeRegressor, TestLearners.FastTreeRegressorCategorical };
 
                var regressionDatasets = new List<TestDataset> { TestDatasets.autosSample };
                foreach (var learner in learners)
                {
                    foreach (TestDataset dataset in regressionDatasets)
                        Run_TrainTest(learner, dataset, extraTag: "Cat", summary: true, saveAsIni: true);
                }
            });
            Done();
        }
 
        [Fact]
        [TestCategory("Binary")]
        [TestCategory("FastTree")]
        public void FastTreeBinaryClassificationNoOpGroupIdTest()
        {
            RunMTAThread(() =>
            {
                var learners = new[] { TestLearners.FastTreeClassfier };
                // In principle the training with this group ID should be the same as the training without
                // this group ID, since the trainer should not be paying attention to the group ID.
                var binaryClassificationDatasets = new List<TestDataset> { TestDatasets.breastCancerGroupId };
                foreach (var learner in learners)
                {
                    foreach (TestDataset dataset in binaryClassificationDatasets)
                        Run_TrainTest(learner, dataset);
                }
            });
            Done();
        }
 
        [Fact]
        [TestCategory("Binary")]
        [TestCategory("FastTree")]
        public void FastTreeHighMinDocsTest()
        {
            RunMTAThread(() =>
            {
                var learners = new[] { TestLearners.FastTreeClassfierHighMinDocs };
                var binaryClassificationDatasets = new List<TestDataset> { TestDatasets.breastCancerPipe };
                foreach (var learner in learners)
                {
                    foreach (TestDataset dataset in binaryClassificationDatasets)
                        Run_TrainTest(learner, dataset);
                }
            });
            Done();
        }
 
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("FastTree")]
        public void FastTreeRankingTest()
        {
            RunMTAThread(() =>
            {
                var learners = new[] { TestLearners.FastTreeRanker, TestLearners.FastTreeRankerCustomGains };
                var rankingDatasets = GetDatasetsForRankingTest();
                foreach (var learner in learners)
                {
                    foreach (TestDataset dataset in rankingDatasets)
                        Run_TrainTest(learner, dataset);
                }
            });
            Done();
        }
 
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("FastTree")]
        public void FastTreeRegressionTest()
        {
            RunMTAThread(() =>
            {
                var learners = new[] {
                    TestLearners.FastTreeRegressor,
                    TestLearners.FastTreeDropoutRegressor,
                    TestLearners.FastTreeTweedieRegressor
                };
                var datasets = GetDatasetsForRegressorTest();
                foreach (var learner in learners)
                {
                    foreach (TestDataset dataset in datasets)
                        Run_TrainTest(learner, dataset);
                }
            });
            Done();
        }
 
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("FastTree")]
        public void GamRegressionTest()
        {
            RunMTAThread(() =>
            {
                var learners = new[] { TestLearners.RegressionGamTrainer };
                var datasets = GetDatasetsForRegressorTest();
                foreach (var learner in learners)
                {
                    foreach (TestDataset dataset in datasets)
                    {
                        Run_TrainTest(learner, dataset);
                    }
                }
            });
            Done();
        }
 
        [Fact]
        [TestCategory("FastTree")]
        public void GamBinaryClassificationTest()
        {
            RunMTAThread(() =>
            {
                var learners = new[] { TestLearners.BinaryClassificationGamTrainer, TestLearners.BinaryClassificationGamTrainerDiskTranspose };
                var datasets = GetDatasetsForBinaryClassifierBaseTest();
 
                foreach (var learner in learners)
                {
                    foreach (TestDataset dataset in datasets)
                    {
                        Run_TrainTest(learner, dataset);
                    }
                }
            });
            Done();
        }
 
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("FastTree")]
        public void FastTreeUnderbuiltRegressionTest()
        {
            // In this test, we specify we want, per tree, 30 splits with a minimum 30 docs per leaf,
            // on a training set with only about 500 examples. This is to test the somewhat unusual
            // case where the number of actual leaves is less than the number of maximum leaves per tree.
            RunMTAThread(() =>
            {
                Run_TrainTest(TestLearners.FastTreeUnderbuiltRegressor, TestDatasets.housing, null, "Underbuilt");
            });
            Done();
        }
 
        /// <summary>
        ///A test for binary classifiers
        ///</summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Binary")]
        public void BinaryClassifierLinearSvmTest()
        {
            var binaryPredictors = new[] { TestLearners.linearSVM };
            var binaryClassificationDatasets = GetDatasetsForBinaryClassifierMoreTest();
            RunAllTests(binaryPredictors, binaryClassificationDatasets);
            Done();
        }
 
        /// <summary>
        /// A test for regressors
        /// </summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Regressor")]
        [TestCategory("FastTree")]
        public void RegressorFastRankTest()
        {
            RunMTAThread(() =>
            {
                var regressionPredictors = new[] { TestLearners.fastRankRegression };
                var regressionDatasets = GetDatasetsForRegressorTest();
                RunAllTests(regressionPredictors, regressionDatasets);
            });
            Done();
        }
 
        /// <summary>
        /// A test for regressors.
        /// </summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Regressor")]
        public void RegressorOgdTest()
        {
            var regressionPredictors = new[] { TestLearners.OGD };
            var regressionDatasets = GetDatasetsForRegressorTest();
            RunAllTests(regressionPredictors, regressionDatasets);
            Done();
        }
 
        /// <summary>
        /// A test for ordinary least squares regression.
        /// </summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Regressor")]
        public void RegressorOlsTest()
        {
            var regressionPredictors = new[] { TestLearners.Ols, TestLearners.OlsNorm, TestLearners.OlsReg };
            var regressionDatasets = GetDatasetsForRegressorTest();
            RunAllTests(regressionPredictors, regressionDatasets);
            Done();
        }
 
        /// <summary>
        /// A test for ordinary least squares regression.
        /// </summary>
        [NativeDependencyFact("MklImports")]
        [TestCategory("Regressor")]
        public void RegressorOlsTestOne()
        {
            Run_TrainTest(TestLearners.Ols, TestDatasets.generatedRegressionDataset, digitsOfPrecision: 4);
            Done();
        }
 
        /// <summary>
        /// Test method for SDCA regression.
        /// </summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Regressor")]
        [TestCategory("SDCAR")]
        public void RegressorSdcaTest()
        {
            var regressionPredictors = new[] { TestLearners.Sdcar, TestLearners.SdcarNorm, TestLearners.SdcarReg };
            RunAllTests(regressionPredictors, new[] { TestDatasets.generatedRegressionDataset });
            Done();
        }
 
        #region "Regressor"
 
#if OLD_TESTS // REVIEW: Port these tests?
        /// <summary>
        /// A test for ordinary least squares regression using synthetic data, under various
        /// conditions. Unlike many other learners, OLS is an attempt to solve a problem exactly,
        /// so we can more precisely judge the quality of the solution.
        /// </summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Regressor")]
        public void RegressorSyntheticOlsTest()
        {
            const int featureCount = 15;
            const float scale = 2;
            float[] model = new float[featureCount + 1];
            Random rgen = new Random(0);
            for (int i = 0; i < model.Length; ++i)
                model[i] = scale * (2 * rgen.NextFloat() - 1);
 
            ListInstances instances = new ListInstances();
            for (int id = 0; id < 10 * model.Length; ++id)
            {
                float label = model[featureCount];
                WritableVector vec;
                if (rgen.Next(2) == 1)
                {
                    // Dense
                    float[] features = new float[featureCount];
                    for (int i = 0; i < features.Length; ++i)
                        label += model[i] * (features[i] = scale * (2 * rgen.NextFloat() - 1));
                    vec = WritableVector.CreateDense(features, false);
                }
                else
                {
                    // Sparse
                    int entryCount = rgen.Next(featureCount);
                    int[] indices = Utils.GetRandomPermutation(rgen, featureCount).Take(entryCount).OrderBy(x => x).ToArray();
                    float[] features = new float[indices.Length];
                    for (int ii = 0; ii < indices.Length; ++ii)
                        label += model[indices[ii]] * (features[ii] = scale * (2 * rgen.NextFloat() - 1));
                    vec = WritableVector.CreateSparse(featureCount, indices, features, false);
                }
                instances.Add(new Instance(vec, label, "", false) { Id = id });
            }
 
            const Double tol = 1e-4;
            TrainHost host = new TrainHost(new Random(0));
 
            var args = new OlsLinearRegressionTrainer.OldArguments();
            {
                // Exactly determined case.
                Log("Train using exactly model.Length examples, so we have an exact solution, but no statistics.");
                ListInstances subinstances = new ListInstances();
                subinstances.AddRange(instances.Take(model.Length));
                var trainer = new OlsLinearRegressionTrainer(args, host);
                trainer.Train(subinstances);
                var pred = trainer.CreatePredictor();
                pred = WriteReloadOlsPredictor(pred);
 
                Assert.Equal(featureCount, pred.InputType.VectorSize, "Unexpected input size");
                Assert.False(pred.HasStatistics, "Should not have statistics with exact specified model");
                Assert.Null(pred.PValues, "Should not have p-values with no-stats model");
                Assert.Null(pred.TValues, "Should not have t-values with no-stats model");
                Assert.Null(pred.StandardErrors, "Should not have standard errors with no-stats model");
                Assert.True(Double.IsNaN(pred.RSquaredAdjusted), "R-squared adjusted should be NaN with no-stats model");
                foreach (Instance inst in subinstances)
                    Assert.Equal(inst.Label, pred.Predict(inst), tol, "Mismatch on example id {0}", inst.Id);
            }
 
            float finalNorm;
            {
                // Overdetermined but still exact case.
                Log("Train using more examples with non-noised label, so we have an exact solution, and statistics.");
                var trainer = new OlsLinearRegressionTrainer(args, host);
                trainer.Train(instances);
                var pred = trainer.CreatePredictor();
                pred = WriteReloadOlsPredictor(pred);
                Assert.Equal(featureCount, pred.InputType.VectorSize, "Unexpected input size");
                Assert.True(pred.HasStatistics, "Should have statistics");
                Assert.Equal(1.0, pred.RSquared, 1e-6, "Coefficient of determination should be 1 for exact specified model");
                Assert.True(FloatUtils.IsFinite(pred.RSquaredAdjusted), "R-squared adjusted should be finite with exact specified model");
                Assert.Equal(featureCount, pred.Weights.Count, "Wrong number of weights");
                Assert.Equal(featureCount + 1, pred.PValues.Count, "Wrong number of pvalues");
                Assert.Equal(featureCount + 1, pred.TValues.Count, "Wrong number of t-values");
                Assert.Equal(featureCount + 1, pred.StandardErrors.Count, "Wrong number of standard errors");
                foreach (Instance inst in instances)
                    Assert.Equal(inst.Label, pred.Predict(inst), tol, "Mismatch on example id {0}", inst.Id);
                finalNorm = pred.Weights.Sum(x => x * x);
 
                // Suppress statistics and retrain.
                args.perParameterSignificance = false;
                var trainer2 = new OlsLinearRegressionTrainer(args, host);
                trainer2.Train(instances);
                args.perParameterSignificance = true;
                var pred2 = trainer2.CreatePredictor();
                pred2 = WriteReloadOlsPredictor(pred2);
 
                Assert.Null(pred2.PValues, "P-values present but should be absent");
                Assert.Null(pred2.TValues, "T-values present but should be absent");
                Assert.Null(pred2.StandardErrors, "Standard errors present but should be absent");
                Assert.Equal(pred.RSquared, pred2.RSquared);
                Assert.Equal(pred.RSquaredAdjusted, pred2.RSquaredAdjusted);
                Assert.Equal(pred.Bias, pred2.Bias);
                var w1 = pred.Weights.ToArray();
                var w2 = pred2.Weights.ToArray();
                Assert.Equal(w1.Length, w2.Length);
                for (int i = 0; i < w1.Length; ++i)
                    Assert.Equal(w1[i], w2[i]);
            }
 
            float[] regularizationParams = new float[] { 0, (float)0.01, (float)0.1 };
 
            foreach (float regParam in regularizationParams)
            {
                foreach (bool subdefined in new bool[] { true, false })
                {
                    // Overdetermined and inexact case, for which OLS solution is feasible but inexact.
                    Log("");
                    Log("Train using noised label, reg param {0}, so solution is no longer exact", regParam);
                    ListInstances noisyInstances = new ListInstances();
                    float boundCost = 0;
                    foreach (Instance inst in instances)
                    {
                        // When we noise the label, we do it on an appreciable but still relatively small scale,
                        // compared to the regular distribution of the labels.
                        float diff = scale * (2 * rgen.NextFloat() - 1) / 3;
                        boundCost += diff * diff;
                        noisyInstances.Add(new Instance(inst.Features, inst.Label + diff, inst.Name, false) { Id = inst.Id });
                        // Make sure this solver also works, when we have
                        if (subdefined && 2 * noisyInstances.Count >= model.Length)
                            break;
                    }
                    args.l2Weight = regParam;
                    // Transform the friendlier user-facing parameter into the actual value injected into the solver.
                    var regParam2 = regParam * regParam * noisyInstances.Count;
                    boundCost += regParam2 * finalNorm;
                    var trainer = new OlsLinearRegressionTrainer(args, host);
 
                    if (subdefined && regParam == 0)
                    {
                        // In the non-ridge regression case, ordinary least squares should fail on a deficient system.
                        bool caught = false;
                        try
                        {
                            trainer.Train(noisyInstances);
                        }
                        catch (InvalidOperationException)
                        {
                            caught = true;
                        }
                        Assert.True(caught, "Failed to encounter an error, when running OLS on a deficient system");
                        continue;
                    }
                    else
                    {
                        trainer.Train(noisyInstances);
                    }
                    var pred = trainer.CreatePredictor();
                    pred = WriteReloadOlsPredictor(pred);
                    Assert.Equal(featureCount, pred.InputType.VectorSize, "Unexpected input size");
                    Assert.True(0 <= pred.RSquared && pred.RSquared < 1, "R-squared not in expected range");
 
                    Func<Func<Instance, float>, float> getError = p =>
                        noisyInstances.Select(inst => inst.Label - p(inst)).Sum(e => e * e);
 
                    // In principle there should be no "better" solution with a lower L2 weight. Wiggle the parameters
                    // with a finite difference, and evaluate the change in error.
                    var referenceNorm = pred.Weights.Sum(x => x * x);
                    float referenceError = getError(pred.Predict);
                    float referenceCost = referenceError + regParam2 * referenceNorm;
                    float smoothing = (float)(referenceCost * 5e-6);
                    Log("Reference cost is {0} + {1} * {2} = {3}, upper bound was {4}", referenceError, regParam2, referenceNorm, referenceCost, boundCost);
                    Assert.True(boundCost > referenceCost, "Reference cost {0} was above theoretical upper bound {1}", referenceCost, boundCost);
                    float lastCost = 0;
                    var weights = pred.Weights.Sum(x => x * x);
                    for (int trial = 0; trial < model.Length * 2; ++trial)
                    {
                        int param = trial / 2;
                        bool up = (trial & 1) == 1;
                        float[] w = pred.Weights.ToArray();
                        Assert.Equal(featureCount, w.Length);
                        float b = pred.Bias;
                        bool isBias = param == featureCount;
                        float normDelta;
                        float origValue;
                        float newValue;
                        if (isBias)
                        {
                            origValue = OlsWiggle(ref b, out normDelta, up);
                            newValue = b;
                            // Bias not included in regularization
                            normDelta = 0;
                        }
                        else
                        {
                            origValue = OlsWiggle(ref w[param], out normDelta, up);
                            newValue = w[param];
                        }
                        Func<Instance, float> del = inst => b + inst.Features.AllValues.Select((v, i) => w[i] * v).Sum();
                        float wiggledCost = getError(del) + regParam2 * (referenceNorm + normDelta);
                        string desc = string.Format("after wiggling {0} {1} from {2} to {3}",
                            isBias ? "bias" : string.Format("weight[{0}]", param), up ? "up" : "down", origValue, newValue);
                        Log("Finite difference cost is {0} ({1}), {2}", wiggledCost, wiggledCost - referenceCost, desc);
                        Assert.True(wiggledCost > referenceCost * (float)(1 - 5e-7), "Finite difference cost {0} not higher than reference cost {1}, {2}",
                            wiggledCost, referenceCost, desc);
                        if (up)
                        {
                            // If the solution to the problem really does like at the base of the quadratic, then wiggling
                            // equal amounts up and down should lead to *roughly* the same error.
                            float ratio = 1 - (lastCost - referenceCost + smoothing) / (wiggledCost - referenceCost + smoothing);
                            Log("Wiggled up had a relative difference of {0:0.0%} vs. wiggled down", ratio);
                            Assert.True(0.1 > Math.Abs(ratio), "Ratio {0} of up/down too high, {1}", ratio, desc);
                        }
                        lastCost = wiggledCost;
                    }
                }
            }
 
            Done();
        }
 
        private float OlsWiggle(ref float value, out float deltaNorm, bool up)
        {
            float origValue = value;
            float wiggle = (float)Math.Max(1e-7, Math.Abs(1e-3 * value));
            value += up ? wiggle : -wiggle;
            deltaNorm = value * value - origValue * origValue;
            return origValue;
        }
 
        private OlsLinearRegressionPredictor WriteReloadOlsPredictor(OlsLinearRegressionPredictor pred)
        {
            using (MemoryStream mem = new MemoryStream())
            {
                PredictorUtils.Save(mem, pred, null, null, null, useFileSystem: true);
                mem.Seek(0, SeekOrigin.Begin);
                Microsoft.ML.Model.IDataModel model;
                Microsoft.ML.Model.IDataStats stats;
                return (OlsLinearRegressionPredictor)PredictorUtils.LoadPredictor(out model, out stats, mem, false);
            }
        }
 
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Regressor")]
        public void RegressorSyntheticDuplicatedOlsTest()
        {
            // OLS should result in the same predictor if we just simply duplicate data.
            // Make certain that ridge regression works.
            const int featureCount = 10;
            const float scale = 2;
            float[] model = new float[featureCount + 1];
            Random rgen = new Random(1);
            for (int i = 0; i < model.Length; ++i)
                model[i] = scale * (2 * rgen.NextFloat() - 1);
 
            ListInstances instances = new ListInstances();
            for (int id = 0; id < 2 * model.Length; ++id)
            {
                float label = model[featureCount];
                WritableVector vec;
                if (rgen.Next(2) == 1)
                {
                    // Dense
                    float[] features = new float[featureCount];
                    for (int i = 0; i < features.Length; ++i)
                        label += model[i] * (features[i] = scale * (2 * rgen.NextFloat() - 1));
                    vec = WritableVector.CreateDense(features, false);
                }
                else
                {
                    // Sparse
                    int entryCount = rgen.Next(featureCount);
                    int[] indices = Utils.GetRandomPermutation(rgen, featureCount).Take(entryCount).OrderBy(x => x).ToArray();
                    float[] features = new float[indices.Length];
                    for (int ii = 0; ii < indices.Length; ++ii)
                        label += model[indices[ii]] * (features[ii] = scale * (2 * rgen.NextFloat() - 1));
                    vec = WritableVector.CreateSparse(featureCount, indices, features, false);
                }
                float diff = scale * (2 * rgen.NextFloat() - 1) / 5;
                instances.Add(new Instance(vec, label + diff, "", false) { Id = id });
            }
 
            ListInstances instances2 = new ListInstances();
            foreach (Instance inst in instances)
            {
                instances2.Add(new Instance(inst.Features, inst.Label, inst.Name, false) { Id = 2 * inst.Id });
                instances2.Add(new Instance(inst.Features, inst.Label, inst.Name, false) { Id = 2 * inst.Id + 1 });
            }
            OlsLinearRegressionTrainer.OldArguments args = new OlsLinearRegressionTrainer.OldArguments();
            args.l2Weight = (float)1;
            TrainHost host = new TrainHost(new Random(0));
            var trainer = new OlsLinearRegressionTrainer(args, host);
            trainer.Train(instances);
            var pred = trainer.CreatePredictor();
            var trainer2 = new OlsLinearRegressionTrainer(args, host);
            trainer2.Train(instances2);
            var pred2 = trainer2.CreatePredictor();
 
            var tol = 1e-5;
            Assert.Equal(pred.RSquared, pred2.RSquared, tol);
            Assert.Equal(pred.Bias, pred2.Bias, tol);
            var w1 = pred.Weights.ToArray();
            var w2 = pred2.Weights.ToArray();
            Assert.Equal(w1.Length, w2.Length);
            for (int i = 0; i < w1.Length; ++i)
                Assert.Equal(w1[i], w2[i], tol);
 
            Done();
        }
#endif
 
        #endregion
 
        /// <summary>
        ///A test for FR ranker
        ///</summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("FastRank")]
        public void RankingTest()
        {
            RunMTAThread(() =>
            {
                var rankingPredictors = new[] { TestLearners.fastRankRanking };
                var rankingDatasets = GetDatasetsForRankingTest();
                RunAllTests(rankingPredictors, rankingDatasets);
            });
            Done();
        }
 
        /// <summary>
        ///A test for Poisson regression
        ///</summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Regressor")]
        public void PoissonRegressorTest()
        {
            var regressionPredictors = new[] { TestLearners.poissonRegression };
            //AP: TestDatasets.displayPoisson is broken as it says header+ but training set does not have proper header
            // Discovered when adding strict schema checks bwteen Train/Test
            // I'm not quite sure how to fix train set. Perhaps just adding proper header is be sufficient (but the columns count between train/test differ so I drop this test set at this point and let someone who added those data fix it and possibly reenable unittest
            var datasets = new[] { TestDatasets.childrenPoisson, TestDatasets.autosSample };
            RunAllTests(regressionPredictors, datasets);
            Done();
        }
 
        /// <summary>
        ///A test for Poisson regression with non-negative coefficients
        ///</summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Regressor")]
        public void PoissonRegressorNonNegativeTest()
        {
            var regressionPredictors = new[] { TestLearners.poissonRegressionNonNegative };
            //AP: TestDatasets.displayPoisson is broken as it says header+ but training set does not have proper header
            // Discovered when adding strict schema checks bwteen Train/Test
            // I'm not quite sure how to fix train set. Perhaps just adding proper header is be sufficient (but the columns count between train/test differ so I drop this test set at this point and let someone who added those data fix it and possibly reenable unittest
            var datasets = new[] { TestDatasets.childrenPoisson, TestDatasets.autosSample };
            RunAllTests(regressionPredictors, datasets);
            Done();
        }
 
        /// <summary>
        /// Multiclass Logistic Regression test.
        /// </summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Multiclass")]
        [TestCategory("Logistic Regression Sparse")]
        public void MulticlassLRSparseTest()
        {
            RunAllTests(
                new List<PredictorAndArgs>() { TestLearners.multiclassLogisticRegressionRegularized },
                new List<TestDataset>() { TestDatasets.reutersMaxDim });
            Done();
        }
 
        /// <summary>
        /// Get a list of datasets for Calibrator test.
        /// </summary>
        public IList<TestDataset> GetDatasetsForCalibratorTest()
        {
            return new[] { TestDatasets.breastCancer };
        }
 
        /// <summary>
        ///A test for no calibrators
        ///</summary>
        [Fact]
        [TestCategory("Calibrator")]
        public void DefaultCalibratorPerceptronTest()
        {
            var datasets = GetDatasetsForCalibratorTest();
            RunAllTests(new[] { TestLearners.perceptronDefault }, datasets, new string[] { "cali={}" }, "nocalibration", digitsOfPrecision: 5);
            Done();
        }
 
        /// <summary>
        ///A test for PAV calibrators
        ///</summary>
        [Fact]
        [TestCategory("Calibrator")]
        public void PAVCalibratorPerceptronTest()
        {
            var datasets = GetDatasetsForCalibratorTest();
            RunAllTests(new[] { TestLearners.perceptronDefault }, datasets, new[] { "cali=PAV" }, "PAVcalibration", digitsOfPrecision: 3);
            Done();
        }
 
        /// <summary>
        ///A test for random calibrators
        ///</summary>
        [Fact]
        [TestCategory("Calibrator")]
        public void RandomCalibratorPerceptronTest()
        {
            var datasets = GetDatasetsForCalibratorTest();
            RunAllTests(new[] { TestLearners.perceptronDefault }, datasets, new string[] { "numcali=200" }, "calibrateRandom");
            Done();
        }
 
        /// <summary>
        ///A test for default calibrators
        ///</summary>
        [Fact]
        [TestCategory("Calibrator")]
        public void NoCalibratorLinearSvmTest()
        {
            var datasets = GetDatasetsForCalibratorTest();
            RunAllTests(new[] { TestLearners.linearSVM }, datasets, new string[] { "cali={}" }, "nocalibration", digitsOfPrecision: 6);
            Done();
        }
 
        /// <summary>
        ///A test for PAV calibrators
        ///</summary>
        [Fact]
        [TestCategory("Calibrator")]
        public void PAVCalibratorLinearSvmTest()
        {
            var datasets = GetDatasetsForCalibratorTest();
            RunAllTests(new[] { TestLearners.linearSVM }, datasets, new string[] { "cali=PAV" }, "PAVcalibration", digitsOfPrecision: 5);
            Done();
        }
 
        /// <summary>
        ///A test FR weighting predictors
        ///</summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Weighting Predictors")]
        [TestCategory("FastRank")]
        public void WeightingClassificationFastRankPredictorsTest()
        {
            RunMTAThread(() =>
            {
                var learner = TestLearners.fastRankClassificationWeighted;
                var data = TestDatasets.breastCancerWeighted;
                string dir = learner.Trainer.Kind;
                string prName = "prcurve-breast-cancer-weighted-prcurve.txt";
                string prPath = DeleteOutputPath(dir, prName);
                string eval = string.Format("eval=Binary{{pr={{{0}}}}}", prPath);
                Run_TrainTest(learner, data, new[] { eval });
                if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) // PR curves are only generated on Windows.
                    CheckEqualityNormalized(dir, prName);
                Run_CV(learner, data);
            });
            Done();
        }
 
        /// <summary>
        /// Test weighted logistic regression.
        /// </summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Weighting Predictors")]
        [TestCategory("Logistic Regression")]
        public void WeightingClassificationLRPredictorsTest()
        {
            RunAllTests(
                new[] { TestLearners.logisticRegression },
                GetDatasetsForClassificationWeightingPredictorsTest());
            Done();
        }
 
        /// <summary>
        /// Test weighted neural nets.
        /// </summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Weighting Predictors")]
        [TestCategory("Neural Nets")]
        public void WeightingClassificationNNPredictorsTest()
        {
            RunAllTests(
                new[] { TestLearners.NnBinDefault },
                GetDatasetsForClassificationWeightingPredictorsTest());
            Done();
        }
 
        /// <summary>
        ///A test FR weighting predictors
        ///</summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Weighting Predictors")]
        [TestCategory("FastRank")]
        public void WeightingRegressionPredictorsTest()
        {
            RunMTAThread(() =>
            {
                RunOneAllTests(TestLearners.fastRankRegressionWeighted, TestDatasets.housingWeightedRep);
            });
            Done();
        }
 
        /// <summary>
        ///A test FR weighting predictors
        ///</summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Weighting Predictors")]
        [TestCategory("FastRank")]
        public void WeightingRankingPredictorsTest()
        {
            RunMTAThread(() =>
            {
                RunOneAllTests(TestLearners.fastRankRankingWeighted, TestDatasets.rankingWeighted);
            });
            Done();
        }
 
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Neural Nets")]
        public void NnConfigTests()
        {
            string path;
 
            // The baseline should show an input mismatch message.
            path = DeleteOutputPath(TestLearners.NnBinDefault.Trainer.Kind, "BcInputMismatch.nn");
            File.WriteAllText(path,
                @"
input Data [8];
hidden H [20] from Data all;
output Out [2] from H all;
");
            RunOneTrain(TestLearners.NnBinCustom(path), TestDatasets.breastCancer, null, "InputMismatch");
 
            // The baseline should show an output mismatch message.
            path = DeleteOutputPath(TestLearners.NnBinDefault.Trainer.Kind, "BcOutputMismatch.nn");
            File.WriteAllText(path,
                @"
input Data [9];
hidden H [20] from Data all;
output Out [5] from H all;
");
            RunOneTrain(TestLearners.NnBinCustom(path), TestDatasets.breastCancer, null, "OutputMismatch");
 
            // The data matches the .nn, but the .nn is multi-class, not binary,
            // so BinaryNeuralNetwork.Validate should throw.
            path = DeleteOutputPath(TestLearners.NnBinDefault.Trainer.Kind, "BcNonBinData.nn");
            File.WriteAllText(path,
                @"
input Data [4];
hidden H [20] from Data all;
output Out [3] from H all;
");
            RunOneTrain(TestLearners.NnBinCustom(path), TestDatasets.iris, null, "NonBinData");
 
            Done();
        }
 
        [Fact(Skip = "Test flaky. Disabling until resolved.")]
        [TestCategory("Anomaly")]
        public void PcaAnomalyTest()
        {
            Run_TrainTest(TestLearners.PCAAnomalyDefault, TestDatasets.mnistOneClass, extraSettings: new[] { "loader=text{sparse+}" }, digitsOfPrecision: 4);
            Run_TrainTest(TestLearners.PCAAnomalyNoNorm, TestDatasets.mnistOneClass, extraSettings: new[] { "loader=text{sparse+}" }, digitsOfPrecision: 4);
 
            // REVIEW: This next test was misbehaving in a strange way that seems to have gone away
            // mysteriously (bad build?).
            // REVIEW: enable this test afte Expr transform is available. Currently maml breaks on xf=Expr setting
            // Run_TrainTest(TestLearners.PCAAnomalyDefault, TestDatasets.azureCounterUnlabeled, summary: true);
 
            Done();
        }
 
        /// <summary>
        ///A test for one-class svm (libsvm wrapper)
        ///</summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Anomaly")]
        public void OneClassSvmLibsvmWrapperTest()
        {
            // We don't use the predictor that uses the MKL library, because results can be slightly different depending on the number of threads.
            Run_TrainTest(TestLearners.OneClassSvmLinear, TestDatasets.mnistOneClass, extraTag: "LinearKernel");
            Run_TrainTest(TestLearners.OneClassSvmPoly, TestDatasets.mnistOneClass, extraTag: "PolynomialKernel");
            Run_TrainTest(TestLearners.OneClassSvmRbf, TestDatasets.mnistOneClass, extraTag: "RbfKernel");
            Run_TrainTest(TestLearners.OneClassSvmSigmoid, TestDatasets.mnistOneClass, extraTag: "SigmoidKernel");
            Done();
        }
 
        /// <summary>
        ///A test for one-class svm (libsvm wrapper)
        ///</summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Anomaly")]
        public void OneClassSvmLibsvmWrapperDenseTest()
        {
            // We don't use the predictor that uses the MKL library, because results can be slightly different depending on the number of threads.
            Run_TrainTest(TestLearners.OneClassSvmLinear, TestDatasets.breastCancerOneClass, extraTag: "LinearKernel");
            Run_TrainTest(TestLearners.OneClassSvmPoly, TestDatasets.breastCancerOneClass, extraTag: "PolynomialKernel");
            Run_TrainTest(TestLearners.OneClassSvmRbf, TestDatasets.breastCancerOneClass, extraTag: "RbfKernel");
            Run_TrainTest(TestLearners.OneClassSvmSigmoid, TestDatasets.breastCancerOneClass, extraTag: "SigmoidKernel");
            Done();
        }
 
#if !CORECLR
        /// <summary>
        ///A test for one-class svm (libsvm wrapper)
        ///</summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Anomaly")]
        public void CompareSvmPredictorResultsToLibSvm()
        {
            var env = new LocalEnvironment(1, conc: 1);
            IDataView trainView = new TextLoader(env, new TextLoader.Options(), new MultiFileSource(GetDataPath(TestDatasets.mnistOneClass.trainFilename)));
            trainView =
                NormalizeTransform.Create(env,
                    new NormalizeTransform.MinMaxArguments()
                    {
                        Column = new[] { new NormalizeTransform.AffineColumn() { Name = "Features", Source = "Features" } }
                    },
                    trainView);
            var trainData = new RoleMappedData(trainView, "Label", "Features");
            IDataView testView = new TextLoader(env, new TextLoader.Options(), new MultiFileSource(GetDataPath(TestDatasets.mnistOneClass.testFilename)));
            ApplyTransformUtils.ApplyAllTransformsToData(env, trainView, testView);
            var testData = new RoleMappedData(testView, "Label", "Features");
 
            CompareSvmToLibSvmCore("linear kernel", "LinearKernel", env, trainData, testData);
            CompareSvmToLibSvmCore("polynomial kernel", "PolynomialKernel{d=2}", env, trainData, testData);
            CompareSvmToLibSvmCore("RBF kernel", "RbfKernel", env, trainData, testData);
            CompareSvmToLibSvmCore("sigmoid kernel", "SigmoidKernel", env, trainData, testData);
            Done();
        }
#endif
 
#if !CORECLR
        private const float Epsilon = 0.0004f; // Do not use Single.Epsilon as it is not commonly-accepted machine epsilon.
        private const float MaxRelError = 0.000005f;
#endif
 
        public TestPredictors(ITestOutputHelper helper) : base(helper)
        {
        }
 
#if !CORECLR
        private void CompareSvmToLibSvmCore(string kernelType, string kernel, IHostEnvironment env, RoleMappedData trainData, RoleMappedData testData)
        {
            Contracts.Assert(testData.Schema.Feature != null);
 
            var args = new OneClassSvmTrainer.Arguments();
            CmdParser.ParseArguments(env, "ker=" + kernel, args);
 
            var trainer1 = new OneClassSvmTrainer(env, args);
            var trainer2 = new OneClassSvmTrainer(env, args);
 
            trainer1.Train(trainData);
            var predictor1 = (IValueMapper)trainer1.CreatePredictor();
 
            LibSvmInterface.ModelHandle predictor2;
            trainer2.TrainCore(trainData, out predictor2);
            LibSvmInterface.ChangeSvmType(predictor2, 4);
 
            var predictions1 = new List<float>();
            var predictions2 = new List<float>();
 
            int instanceNum = 0;
            int colFeat = testData.Schema.Feature.Index;
            using (var cursor = testData.Data.GetRowCursor(col => col == colFeat))
            {
                float res1 = 0;
                var buf = default(VBuffer<float>);
                var getter = cursor.GetGetter<VBuffer<float>>(colFeat);
                var map1 = predictor1.GetMapper<VBuffer<float>, float>();
                while (cursor.MoveNext())
                {
                    getter(ref buf);
                    map1(ref buf, ref res1);
 
                    float res2;
                    unsafe
                    {
                        if (buf.IsDense)
                        {
                            fixed (float* pValues = buf.Values)
                                res2 = -LibSvmInterface.SvmPredictDense(predictor2, pValues, buf.Length);
                        }
                        else
                        {
                            fixed (float* pValues = buf.Values)
                            fixed (int* pIndices = buf.Indices)
                                res2 = -LibSvmInterface.SvmPredictSparse(predictor2, pValues, pIndices, buf.Count);
                        }
                    }
 
                    predictions1.Add(res1);
                    predictions2.Add(res2);
                    Assert.True(AreEqual(res1, res2, MaxRelError, Epsilon),
                        "Found prediction that does not match the libsvm prediction in line {0}, using {1}",
                        instanceNum, kernelType);
                    instanceNum++;
                }
            }
 
            LibSvmInterface.FreeSvmModel(ref predictor2);
 
            var predArray1 = predictions1.ToArray();
            var predArray2 = predictions2.ToArray();
            Array.Sort(predArray2, predArray1);
 
            for (int i = 0; i < predictions1.Count - 1; i++)
            {
                Assert.True(IsLessThanOrEqual(predArray1[i], predArray1[i + 1], MaxRelError, Epsilon),
                    "Different ordering of our results and libsvm results");
            }
        }
#endif
 
#if !CORECLR
        private bool IsLessThanOrEqual(float a, float b, float maxRelError, float maxAbsError)
        {
            if (a <= b)
                return true;
            float diff = a - b;
            if (diff <= maxAbsError)
                return true;
            return diff <= maxRelError * a;
        }
 
        private bool AreEqual(float a, float b, float maxRelError, float maxAbsError)
        {
            float diff = Math.Abs(a - b);
            if (diff <= maxAbsError)
                return true;
            float largest = Math.Max(Math.Abs(a), Math.Abs(b));
            return diff < largest * maxRelError;
        }
#endif
    }
 
#if OLD_TESTS // REVIEW: Some of this should be ported to the new world.
    public sealed partial class TestPredictorsOld
    {
#if OLD_TESTS // REVIEW: Need to port this old time series functionality to the new world.
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Anomaly")]
        [TestCategory("Time Series")]
        public void TimeSeriesAnomalyDetectorTest1()
        {
            const string dir = "Anomaly";
            const string windowDataFile = "AppFailure-unlabeled.windowed.txt";
            const string consName = "LeastSquares.AppFailure-test-out.txt";
 
            var dataset = TestDatasets.AppFailure;
            var windowDataPath = DeleteOutputPath(dir, windowDataFile);
 
            //Test window features creation
            var windowsGenerationArgs = "/c CreateInstances " + GetDataPath(dataset.trainFilename) +
                                        " /inst=Text{sep=, name=0 attr=2 nolabel=+} /writer=WindowWriter{size=45 stride=25} /cifile="
                                        + windowDataPath + " /rs=1 /disableTracking=+";
            TestPredictorMain.MainWithArgs(windowsGenerationArgs);
            CheckEquality(dir, windowDataFile);
 
            //Test Least squares predictor
            ConsoleGrabber consoleGrabber;
            using (consoleGrabber = new ConsoleGrabber())
            {
                var testArgs = "/c Test " + windowDataPath + " /inst=Text{name=0 nolabel=+} /pred=LeastSquaresAnom /rs=1 /disableTracking=+";
                int res = TestPredictorMain.MainWithArgs(testArgs);
                if (res != 0)
                    Log("*** Predictor returned {0}", res);
            }
            string consOutPath = DeleteOutputPath(dir, consName);
            consoleGrabber.Save(consOutPath);
            CheckEqualityNormalized(dir, consName);
 
            Done();
        }
#endif

#if OLD_TESTS // REVIEW: Figure out what to do with this in the IDV world.
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Anomaly")]
        [TestCategory("Time Series")]
        public void StreamingTimeSeriesAnomalyDetectorTest()
        {
            const string dir = "Anomaly";
 
            var instArgs = new TlcTextInstances.Arguments();
            CmdParser.ParseArguments("sep=, name=0 nolabel=+", instArgs);
            var dataset = TestDatasets.AppFailure;
            var instances = new TlcTextInstances(instArgs, GetDataPath(dataset.trainFilename));
 
            var predictor = new OLSAnomalyDetector(45, (float)0.1);
            var sb = new StringBuilder().AppendLine("Instance\tAnomaly Score\tBad anomaly?");
            foreach (var instance in instances)
            {
                float score, trend;
                if (predictor.Classify(instance.Features[0], out score, out trend))
                    sb.AppendFormat("{0}\t{1:G4}\t{2}", instance.Name, score, trend > 0).AppendLine(); // trigger alert
            }
 
            const string outFile = "StreamingLeastSquares-out.txt";
            File.WriteAllText(DeleteOutputPath(dir, outFile), sb.ToString());
            CheckEquality(dir, outFile);
 
            Done();
        }
#endif

#if OLD_TESTS // REVIEW: Need to port Tremble to the new world.
        /// <summary>
        /// A test for tremble binary classifier using logistic regression
        /// in leaf and interior nodes
        /// </summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Binary")]
        [TestCategory("TrembleDecisionTree")]
        public void BinaryClassifierTrembleTest()
        {
            var binaryPredictors = new[] { TestLearners.BinaryTrembleDecisionTreeLR };
            var datasets = new[] {
                TestDatasets.breastCancer,
                TestDatasets.adultCatAsAtt,
                TestDatasets.adultSparseWithCatAsAtt,
            };
            RunAllTests(binaryPredictors, datasets);
            Done();
        }
 
        /// <summary>
        /// A test for tremble multi-class classifier using logistic regression
        /// in leaf and interior nodes
        /// </summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("MultiClass")]
        [TestCategory("TrembleDecisionTree")]
        public void MulticlassClassificationTrembleTest()
        {
            var multiClassPredictors = new[] { TestLearners.MulticlassTrembleDecisionTreeLR };
            var multiClassClassificationDatasets = new List<TestDataset>();
            multiClassClassificationDatasets.Add(TestDatasets.iris);
            multiClassClassificationDatasets.Add(TestDatasets.adultCatAsAtt);
            multiClassClassificationDatasets.Add(TestDatasets.adultSparseWithCatAsAtt);
            RunAllTests(multiClassPredictors, multiClassClassificationDatasets);
            Done();
        }
 
        /// <summary>
        /// A test for tremble default decision tree binary classifier
        /// </summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Binary")]
        [TestCategory("TrembleDecisionTree"), Priority(2)]
        public void BinaryClassifierDecisionTreeTest()
        {
            var binaryPredictors = new[] { TestLearners.BinaryDecisionTreeDefault, TestLearners.BinaryDecisionTreeGini,
                TestLearners.BinaryDecisionTreePruning, TestLearners.BinaryDecisionTreeModified };
            var binaryClassificationDatasets = new List<TestDataset>();
            binaryClassificationDatasets.Add(TestDatasets.breastCancer);
            binaryClassificationDatasets.Add(TestDatasets.adultCatAsAtt);
            binaryClassificationDatasets.Add(TestDatasets.adultSparseWithCatAsAtt);
            RunAllTests(binaryPredictors, binaryClassificationDatasets);
            Done();
        }
 
        /// <summary>
        /// A test for tremble default decision tree binary classifier on weighted data sets
        /// </summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("Binary")]
        [TestCategory("Weighting Predictors")]
        [TestCategory("TrembleDecisionTree"), Priority(2)]
        public void BinaryClassifierDecisionTreeWeightingTest()
        {
            var binaryPredictors = new[] { TestLearners.BinaryDecisionTreeDefault, TestLearners.BinaryDecisionTreeGini,
                TestLearners.BinaryDecisionTreePruning, TestLearners.BinaryDecisionTreeModified, TestLearners.BinaryDecisionTreeRewt };
            var binaryClassificationDatasets = GetDatasetsForClassificationWeightingPredictorsTest();
            RunAllTests(binaryPredictors, binaryClassificationDatasets);
            Done();
        }
 
        /// <summary>
        /// A test for tremble default decision tree multi-class classifier
        /// </summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("MultiClass")]
        [TestCategory("TrembleDecisionTree"), Priority(2)]
        public void MulticlassClassificationDecisionTreeTest()
        {
            var multiClassPredictors = new[] { TestLearners.MulticlassDecisionTreeDefault, TestLearners.MulticlassDecisionTreeGini,
                TestLearners.MulticlassDecisionTreePruning, TestLearners.MulticlassDecisionTreeModified };
            var multiClassClassificationDatasets = new List<TestDataset>();
            multiClassClassificationDatasets.Add(TestDatasets.iris);
            multiClassClassificationDatasets.Add(TestDatasets.adultCatAsAtt);
            multiClassClassificationDatasets.Add(TestDatasets.adultSparseWithCatAsAtt);
            RunAllTests(multiClassPredictors, multiClassClassificationDatasets);
            Done();
        }
 
        /// <summary>
        /// A test for tremble default decision tree multi-class classifier on weighted data sets
        /// </summary>
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("MultiClass")]
        [TestCategory("Weighting Predictors")]
        [TestCategory("TrembleDecisionTree"), Priority(2)]
        public void MulticlassifierDecisionTreeWeightingTest()
        {
            var multiClassPredictors = new[] { TestLearners.MulticlassDecisionTreeDefault, TestLearners.MulticlassDecisionTreeGini,
                TestLearners.MulticlassDecisionTreePruning, TestLearners.MulticlassDecisionTreeModified };
            var binaryClassificationDatasets = new List<TestDataset>(GetDatasetsForClassificationWeightingPredictorsTest());
            RunAllTests(multiClassPredictors, binaryClassificationDatasets);
            Done();
        }
#endif
    }
#endif
 
    public sealed partial class TestPredictors
    {
        /// <summary>
        ///A test for binary classifiers
        ///</summary>
        [Fact]
        [TestCategory("Binary")]
        [TestCategory("LDSVM")]
        public void BinaryClassifierLDSvmTest()
        {
            var binaryPredictors = new[] { TestLearners.LDSVMDefault };
            var binaryClassificationDatasets = GetDatasetsForBinaryClassifierBaseTest();
            RunAllTests(binaryPredictors, binaryClassificationDatasets, digitsOfPrecision: 2);
            Done();
        }
 
        /// <summary>
        ///A test for binary classifiers
        ///</summary>
        [Fact]
        [TestCategory("Binary")]
        [TestCategory("LDSVM")]
        public void BinaryClassifierLDSvmNoBiasTest()
        {
            var binaryPredictors = new[] { TestLearners.LDSVMNoBias };
            var binaryClassificationDatasets = GetDatasetsForBinaryClassifierBaseTest();
            RunAllTests(binaryPredictors, binaryClassificationDatasets, digitsOfPrecision: 2);
            Done();
        }
 
        /// <summary>
        /// A test for field-aware factorization machine.
        /// </summary>
        [FieldAwareFactorizationMachineFact]
        [TestCategory("Binary")]
        [TestCategory("FieldAwareFactorizationMachine")]
        public void BinaryClassifierFieldAwareFactorizationMachineTest()
        {
            var binaryPredictors = new[] { TestLearners.FieldAwareFactorizationMachine };
            var binaryClassificationDatasets = GetDatasetsForBinaryClassifierBaseTest();
 
            RunAllTests(binaryPredictors, binaryClassificationDatasets, digitsOfPrecision: 4);
 
            Done();
        }
 
        /// <summary>
        /// Multiclass Naive Bayes test.
        /// </summary>
        [Fact]
        [TestCategory("Multiclass")]
        [TestCategory("Multi Class Naive Bayes Classifier")]
        public void MulticlassNaiveBayes()
        {
            RunOneAllTests(TestLearners.MulticlassNaiveBayesClassifier, TestDatasets.breastCancerPipe);
            Done();
        }
 
        [Fact]
        public void EnsemblesMultiClassBootstrapSelectorTest()
        {
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsembleMulticlass", "bp=mlr{t-} nm=20 st=BootstrapSelector{} tp=-"), "WE-Bootstrap");
            Run_TrainTest(pa, TestDatasets.iris, digitsOfPrecision: 6, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        [Fact]
        public void EnsemblesDefaultTest()
        {
            // This one does CV as well as TrainTest.
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsemble", "nm=20  tp=-"), "WE-Default");
            RunOneAllTests(pa, TestDatasets.breastCancer, new[] { "loader=Text{col=Label:BL:0 col=Features:R4:1-9}" }, digitsOfPrecision: 3, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        [Fact]
        public void EnsemblesBaseLearnerTest()
        {
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsemble", "bp=AvgPer nm=3 tp=-"), "WE-AvgPer");
            Run_TrainTest(pa, TestDatasets.breastCancer, new[] { "loader=Text{col=Label:BL:0 col=Features:R4:1-9}" }, digitsOfPrecision: 5, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        [Fact]
        public void EnsemblesHeterogeneousTest()
        {
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsemble", "bp=svm bp=ap nm=20 tp=-"), "WE-Hetero");
            Run_TrainTest(pa, TestDatasets.breastCancer, new[] { "loader=Text{col=Label:BL:0 col=Features:R4:1-9}" }, digitsOfPrecision: 5, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        [Fact]
        public void EnsemblesVotingCombinerTest()
        {
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsemble", "nm=20 oc=Voting tp=-"), "WE-Voting");
            Run_TrainTest(pa, TestDatasets.breastCancer, new[] { "loader=Text{col=Label:BL:0 col=Features:R4:1-9}" }, digitsOfPrecision: 6, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        [Fact]
        public void EnsemblesStackingCombinerTest()
        {
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsemble", "nm=5 oc=Stacking{bp=ap} tp=-"), "WE-StackingAP");
            Run_TrainTest(pa, TestDatasets.breastCancer, new[] { "loader=Text{col=Label:BL:0 col=Features:R4:1-9}" }, digitsOfPrecision: 4, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        [Fact]
        public void EnsemblesAveragerCombinerTest()
        {
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsemble", "nm=20 oc=Average tp=-"), "WE-Average");
            Run_TrainTest(pa, TestDatasets.breastCancer, new[] { "loader=Text{col=Label:BL:0 col=Features:R4:1-9}" }, digitsOfPrecision: 3, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        [Fact]
        public void EnsemblesBestPerformanceSelectorTest()
        {
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsemble", "nm=20 pt=BestPerformanceSelector tp=-"), "WE-BestPerf");
            Run_TrainTest(pa, TestDatasets.breastCancer, new[] { "loader=Text{col=Label:BL:0 col=Features:R4:1-9}" }, digitsOfPrecision: 4, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        [Fact]
        public void EnsemblesBestDiverseSelectorTest()
        {
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsemble", "nm=20 pt=BestDiverseSelector tp=-"), "WE-Diverse");
            Run_TrainTest(pa, TestDatasets.breastCancer, new[] { "loader=Text{col=Label:BL:0 col=Features:R4:1-9}" }, digitsOfPrecision: 6, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        [Fact]
        public void EnsemblesRandomPartitionInstanceSelectorTest()
        {
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsemble", "nm=5 st=RandomPartitionSelector tp=-"), "WE-RandomPartition");
            Run_TrainTest(pa, TestDatasets.breastCancer, new[] { "loader=Text{col=Label:BL:0 col=Features:R4:1-9}" }, digitsOfPrecision: 4, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        [Fact]
        public void EnsemblesAllDataSetSelectorTest()
        {
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsemble", "nm=20 st=AllInstanceSelector tp=-"), "WE-All");
            Run_TrainTest(pa, TestDatasets.breastCancer, new[] { "loader=Text{col=Label:BL:0 col=Features:R4:1-9}" }, digitsOfPrecision: 6, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        [Fact]
        public void EnsemblesRandomSubSpaceSelectorTest()
        {
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsemble", "nm=20 st=AllInstanceSelector{fs=RandomFeatureSelector} tp=-"), "WE-RandomFeature");
            Run_TrainTest(pa, TestDatasets.breastCancer, new[] { "loader=Text{col=Label:BL:0 col=Features:R4:1-9}" }, digitsOfPrecision: 3, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        [Fact]
        public void EnsemblesMultiAveragerTest()
        {
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsembleMulticlass", "bp=mlr{t-} nm=5 oc=MultiAverage tp=-"), "WE-Average");
            Run_TrainTest(pa, TestDatasets.iris, digitsOfPrecision: 6, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        [Fact]
        public void EnsemblesMultiVotingCombinerTest()
        {
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsembleMulticlass", "bp=mlr{t-} nm=5 oc=MultiVoting tp=-"), "WE-Voting");
            Run_TrainTest(pa, TestDatasets.iris, digitsOfPrecision: 6, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        [Fact]
        public void EnsemblesMultiStackCombinerTest()
        {
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsembleMulticlass", "bp=mlr{t-} nm=5 oc=MultiStacking{bp=mlr{t-}} tp=-"), "WE-Stacking");
            Run_TrainTest(pa, TestDatasets.iris, digitsOfPrecision: 3, parseOption: NumberParseOption.UseSingle);
            Done();
        }
 
        [Fact]
        public void EnsemblesMultiAveragerSDCATest()
        {
            var pa = new PredictorAndArgs(new SubComponent("WeightedEnsembleMulticlass", "bp=SDCAMC{nt=1} nm=5 oc=MultiAverage tp=-"), "WE-SDCA-Average");
            Run_TrainTest(pa, TestDatasets.iris, digitsOfPrecision: 4, parseOption: NumberParseOption.UseSingle);
            Done();
        }
    }
 
#if OLD_TESTS // REVIEW: We should have some tests that verify we can't deserialize old models.
    public sealed partial class TestPredictorsOld
    {
        [Fact(Skip = "Need CoreTLC specific baseline update")]
        [TestCategory("CreateInstances")]
        [TestCategory("FeatureHandler")]
        public void TestFeatureHandlerModelReuse()
        {
            string trainData = GetDataPath(TestDatasets.breastCancer.trainFilename);
            string dataModelFile = DeleteOutputPath(TestContext.TestName + "-data-model.zip");
            string ciFile = DeleteOutputPath(TestContext.TestName + "-ci.tsv");
            string argsString = string.Format(
                "/c CreateInstances {0} /inst Text{{text=1,2,3}} /m {1} /cifile {2}",
                trainData,
                dataModelFile,
                ciFile);
            var args = new TLCArguments();
            Assert.True(CmdParser.ParseArguments(argsString, args));
            RunExperiments.Run(args);
 
            // REVIEW: think of a test that would distinguish more dramatically the case when /im works and when it doesn't
            // Right now the only difference is in the output of the feature handler training.
            RunAllTests(
                new[] { TestLearners.logisticRegression_tlOld },
                new[] { TestDatasets.breastCancer },
                new[] { string.Format("/inst Text{{text=1,2,3}} /im {0}", dataModelFile) },
                "feature-handler-reuse"
                );
 
            Done();
        }
    }
#endif
}
File: TestPredictors.cs	Web Access
Project: src\test\Microsoft.ML.Predictor.Tests\Microsoft.ML.Predictor.Tests.csproj (Microsoft.ML.Predictor.Tests)