|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using Microsoft.ML.Data;
using Microsoft.ML.IntegrationTests.Datasets;
using Microsoft.ML.TestFrameworkCommon;
using Microsoft.ML.TestFrameworkCommon.Attributes;
using Microsoft.ML.Trainers;
using Microsoft.ML.Trainers.FastTree;
using Xunit;
using Xunit.Abstractions;
namespace Microsoft.ML.IntegrationTests
{
public class Evaluation : IntegrationTestBaseClass
{
public Evaluation(ITestOutputHelper output) : base(output)
{
}
/// <summary>
/// Train and Evaluate: Anomaly Detection.
/// </summary>
[Fact]
public void TrainAndEvaluateAnomalyDetection()
{
var mlContext = new MLContext(seed: 1);
var trainData = MnistOneClass.GetTextLoader(mlContext,
TestDatasets.mnistOneClass.fileHasHeader, TestDatasets.mnistOneClass.fileSeparator)
.Load(TestCommon.GetDataPath(DataDir, TestDatasets.mnistOneClass.trainFilename));
var testData = MnistOneClass.GetTextLoader(mlContext,
TestDatasets.mnistOneClass.fileHasHeader, TestDatasets.mnistOneClass.fileSeparator)
.Load(TestCommon.GetDataPath(DataDir, TestDatasets.mnistOneClass.testFilename));
// Create a training pipeline.
var pipeline = mlContext.AnomalyDetection.Trainers.RandomizedPca();
// Train the model.
var model = pipeline.Fit(trainData);
// Evaluate the model.
// TODO #2464: Using the train dataset will cause NaN metrics to be returned.
var scoredTest = model.Transform(testData);
var metrics = mlContext.AnomalyDetection.Evaluate(scoredTest);
// Check that the metrics returned are valid.
Common.AssertMetrics(metrics);
}
/// <summary>
/// Train and Evaluate: Binary Classification with no calibration.
/// </summary>
[Fact]
public void TrainAndEvaluateBinaryClassification()
{
var mlContext = new MLContext(seed: 1);
var data = mlContext.Data.LoadFromTextFile<TweetSentiment>(TestCommon.GetDataPath(DataDir, TestDatasets.Sentiment.trainFilename),
hasHeader: TestDatasets.Sentiment.fileHasHeader,
separatorChar: TestDatasets.Sentiment.fileSeparator);
// Create a training pipeline.
var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.BinaryClassification.Trainers.SdcaNonCalibrated(
new SdcaNonCalibratedBinaryTrainer.Options { NumberOfThreads = 1 }));
// Train the model.
var model = pipeline.Fit(data);
// Evaluate the model.
var scoredData = model.Transform(data);
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(scoredData);
// Check that the metrics returned are valid.
Common.AssertMetrics(metrics);
}
/// <summary>
/// Train and Evaluate: Binary Classification with a calibrated predictor.
/// </summary>
[Fact]
public void TrainAndEvaluateBinaryClassificationWithCalibration()
{
var mlContext = new MLContext(seed: 1);
var data = mlContext.Data.LoadFromTextFile<TweetSentiment>(TestCommon.GetDataPath(DataDir, TestDatasets.Sentiment.trainFilename),
hasHeader: TestDatasets.Sentiment.fileHasHeader,
separatorChar: TestDatasets.Sentiment.fileSeparator);
// Create a training pipeline.
var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(
new LbfgsLogisticRegressionBinaryTrainer.Options { NumberOfThreads = 1 }));
// Train the model.
var model = pipeline.Fit(data);
// Evaluate the model.
var scoredData = model.Transform(data);
var metrics = mlContext.BinaryClassification.Evaluate(scoredData);
// Check that the metrics returned are valid.
Common.AssertMetrics(metrics);
}
/// <summary>
/// Train and Evaluate: Clustering.
/// </summary>
[Fact]
public void TrainAndEvaluateClustering()
{
var mlContext = new MLContext(seed: 1);
var data = mlContext.Data.LoadFromTextFile<Iris>(TestCommon.GetDataPath(DataDir, TestDatasets.iris.trainFilename),
hasHeader: TestDatasets.iris.fileHasHeader,
separatorChar: TestDatasets.iris.fileSeparator);
// Create a training pipeline.
var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.Clustering.Trainers.KMeans(new KMeansTrainer.Options { NumberOfThreads = 1 }));
// Train the model.
var model = pipeline.Fit(data);
// Evaluate the model.
var scoredData = model.Transform(data);
var metrics = mlContext.Clustering.Evaluate(scoredData);
// Check that the metrics returned are valid.
Common.AssertMetrics(metrics);
}
/// <summary>
/// Train and Evaluate: Multiclass Classification.
/// </summary>
[Fact]
public void TrainAndEvaluateMulticlassClassification()
{
var mlContext = new MLContext(seed: 1);
var data = mlContext.Data.LoadFromTextFile<Iris>(TestCommon.GetDataPath(DataDir, TestDatasets.iris.trainFilename),
hasHeader: TestDatasets.iris.fileHasHeader,
separatorChar: TestDatasets.iris.fileSeparator);
// Create a training pipeline.
var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
.Append(mlContext.Transforms.Conversion.MapValueToKey("Label"))
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(
new SdcaMaximumEntropyMulticlassTrainer.Options { NumberOfThreads = 1 }));
// Train the model.
var model = pipeline.Fit(data);
// Evaluate the model.
var scoredData = model.Transform(data);
var metrics = mlContext.MulticlassClassification.Evaluate(scoredData);
// Check that the metrics returned are valid.
Common.AssertMetrics(metrics);
}
private IDataView GetScoredDataForRankingEvaluation(MLContext mlContext)
{
var data = Iris.LoadAsRankingProblem(mlContext,
TestCommon.GetDataPath(DataDir, TestDatasets.iris.trainFilename),
hasHeader: TestDatasets.iris.fileHasHeader,
separatorChar: TestDatasets.iris.fileSeparator);
// Create a training pipeline.
var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
.Append(mlContext.Ranking.Trainers.FastTree(new FastTreeRankingTrainer.Options { NumberOfThreads = 1 }));
// Train the model.
var model = pipeline.Fit(data);
// Evaluate the model.
var scoredData = model.Transform(data);
return scoredData;
}
/// <summary>
/// Train and Evaluate: Ranking.
/// </summary>
[Fact]
public void TrainAndEvaluateRanking()
{
var mlContext = new MLContext(seed: 1);
var scoredData = GetScoredDataForRankingEvaluation(mlContext);
var metrics = mlContext.Ranking.Evaluate(scoredData, labelColumnName: "Label", rowGroupColumnName: "GroupId");
// Check that the metrics returned are valid.
Common.AssertMetrics(metrics);
}
/// <summary>
/// Train and Evaluate: Ranking with options.
/// </summary>
[Fact]
public void TrainAndEvaluateRankingWithOptions()
{
var mlContext = new MLContext(seed: 1);
int[] tlevels = { 50, 150, 100 };
var options = new RankingEvaluatorOptions();
foreach (int i in tlevels)
{
options.DcgTruncationLevel = i;
var scoredData = GetScoredDataForRankingEvaluation(mlContext);
var metrics = mlContext.Ranking.Evaluate(scoredData, options, labelColumnName: "Label", rowGroupColumnName: "GroupId");
Common.AssertMetrics(metrics);
}
}
/// <summary>
/// Train and Evaluate: Recommendation.
/// </summary>
[Fact]
public void TrainAndEvaluateRecommendation()
{
var mlContext = new MLContext(seed: 1);
// Get the dataset.
var data = TrivialMatrixFactorization.LoadAndFeaturizeFromTextFile(
mlContext,
TestCommon.GetDataPath(DataDir, TestDatasets.trivialMatrixFactorization.trainFilename),
TestDatasets.trivialMatrixFactorization.fileHasHeader,
TestDatasets.trivialMatrixFactorization.fileSeparator);
// Create a pipeline to train on the sentiment data.
var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization(
new MatrixFactorizationTrainer.Options
{
MatrixColumnIndexColumnName = "MatrixColumnIndex",
MatrixRowIndexColumnName = "MatrixRowIndex",
LabelColumnName = "Label",
NumberOfIterations = 3,
NumberOfThreads = 1,
ApproximationRank = 4,
});
// Train the model.
var model = pipeline.Fit(data);
// Evaluate the model.
var scoredData = model.Transform(data);
var metrics = mlContext.Recommendation().Evaluate(scoredData);
// Check that the metrics returned are valid.
Common.AssertMetrics(metrics);
}
/// <summary>
/// Train and Evaluate: Regression.
/// </summary>
[Fact]
public void TrainAndEvaluateRegression()
{
var mlContext = new MLContext(seed: 1);
// Get the dataset
var data = mlContext.Data.LoadFromTextFile<HousingRegression>(TestCommon.GetDataPath(DataDir, TestDatasets.housing.trainFilename), hasHeader: true);
// Create a pipeline to train on the housing data.
var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
.Append(mlContext.Regression.Trainers.FastForest(new FastForestRegressionTrainer.Options { NumberOfThreads = 1 }));
// Train the model.
var model = pipeline.Fit(data);
// Evaluate the model.
var scoredData = model.Transform(data);
var metrics = mlContext.Regression.Evaluate(scoredData);
// Check that the metrics returned are valid.
Common.AssertMetrics(metrics);
}
/// <summary>
/// Evaluate With Precision-Recall Curves.
/// </summary>
/// <remarks>
/// This is currently not possible using the APIs.
/// </remarks>
[Fact]
public void TrainAndEvaluateWithPrecisionRecallCurves()
{
var mlContext = new MLContext(seed: 1);
var data = mlContext.Data.LoadFromTextFile<TweetSentiment>(TestCommon.GetDataPath(DataDir, TestDatasets.Sentiment.trainFilename),
hasHeader: TestDatasets.Sentiment.fileHasHeader,
separatorChar: TestDatasets.Sentiment.fileSeparator);
// Create a training pipeline.
var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(
new LbfgsLogisticRegressionBinaryTrainer.Options { NumberOfThreads = 1 }));
// Train the model.
var model = pipeline.Fit(data);
// Evaluate the model.
var scoredData = model.Transform(data);
var metrics = mlContext.BinaryClassification.Evaluate(scoredData);
Common.AssertMetrics(metrics);
// This scenario is not possible with the current set of APIs.
// There could be two ways imaginable:
// 1. Getting a list of (P,R) from the Evaluator (as it calculates most of the information already).
// Not currently possible.
// 2. Manually setting the classifier threshold and calling evaluate many times:
// Not currently possible: Todo #2465: Allow the setting of threshold and thresholdColumn for scoring.
// Technically, this scenario is possible using custom mappers like so:
// 1. Get a list of all unique probability scores.
// e.g. By reading the IDataView as an IEnumerable, and keeping a hash of known probabilities up to some precision.
// 2. For each value of probability:
// a. Write a custom mapper to produce PredictedLabel at that probability threshold.
// b. Calculate Precision and Recall with these labels.
// c. Append the Precision and Recall to an IList.
}
}
}
|