File: Transformers\NAIndicatorTests.cs
Web Access
Project: src\test\Microsoft.ML.Tests\Microsoft.ML.Tests.csproj (Microsoft.ML.Tests)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.IO;
using Microsoft.ML.Data;
using Microsoft.ML.Data.IO;
using Microsoft.ML.Model;
using Microsoft.ML.RunTests;
using Microsoft.ML.Runtime;
using Microsoft.ML.TestFrameworkCommon;
using Microsoft.ML.Tools;
using Xunit;
using Xunit.Abstractions;
 
namespace Microsoft.ML.Tests.Transformers
{
    public class NAIndicatorTests : TestDataPipeBase
    {
        private class TestClass
        {
            public float A;
            public double B;
            [VectorType(2)]
            public float[] C;
            [VectorType(2)]
            public double[] D;
        }
 
        public NAIndicatorTests(ITestOutputHelper output) : base(output)
        {
        }
 
        [Fact]
        public void NAIndicatorWorkout()
        {
            var data = new[] {
                new TestClass() { A = 1, B = 3, C = new float[2]{ 1, 2 } , D = new double[2]{ 3,4} },
                new TestClass() { A = float.NaN, B = double.NaN, C = new float[2]{ float.NaN, float.NaN } , D = new double[2]{ double.NaN,double.NaN}},
                new TestClass() { A = float.NegativeInfinity, B = double.NegativeInfinity, C = new float[2]{ float.NegativeInfinity, float.NegativeInfinity } , D = new double[2]{ double.NegativeInfinity, double.NegativeInfinity}},
                new TestClass() { A = float.PositiveInfinity, B = double.PositiveInfinity, C = new float[2]{ float.PositiveInfinity, float.PositiveInfinity, } , D = new double[2]{  double.PositiveInfinity, double.PositiveInfinity}},
                new TestClass() { A = 2, B = 1, C = new float[2]{ 3, 4 } , D = new double[2]{ 5,6}},
            };
 
            var dataView = ML.Data.LoadFromEnumerable(data);
            var pipe = ML.Transforms.IndicateMissingValues(new[] {
                new InputOutputColumnPair("NAA", "A"),
                new InputOutputColumnPair("NAB", "B"),
                new InputOutputColumnPair("NAC", "C"),
                new InputOutputColumnPair("NAD", "D")
            });
            TestEstimatorCore(pipe, dataView);
            Done();
        }
 
        [Fact]
        public void TestCommandLine()
        {
            Assert.Equal(0, Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0}  xf=NAIndicator{col=B:A} in=f:\2.txt" }));
        }
 
        [Fact]
        public void TestOldSavingAndLoading()
        {
            var data = new[] {
                new TestClass() { A = 1, B = 3, C = new float[2]{ 1, 2 } , D = new double[2]{ 3,4} },
                new TestClass() { A = float.NaN, B = double.NaN, C = new float[2]{ float.NaN, float.NaN } , D = new double[2]{ double.NaN,double.NaN}},
                new TestClass() { A = float.NegativeInfinity, B = double.NegativeInfinity, C = new float[2]{ float.NegativeInfinity, float.NegativeInfinity } , D = new double[2]{ double.NegativeInfinity, double.NegativeInfinity}},
                new TestClass() { A = float.PositiveInfinity,  B = double.PositiveInfinity, C = new float[2]{ float.PositiveInfinity, float.PositiveInfinity, } , D = new double[2]{  double.PositiveInfinity, double.PositiveInfinity}},
                new TestClass() { A = 2, B = 1 , C = new float[2]{ 3, 4 } , D = new double[2]{ 5,6}},
            };
 
            var dataView = ML.Data.LoadFromEnumerable(data);
            var pipe = ML.Transforms.IndicateMissingValues(new[] {
                new InputOutputColumnPair("NAA", "A"),
                new InputOutputColumnPair("NAB", "B"),
                new InputOutputColumnPair("NAC", "C"),
                new InputOutputColumnPair("NAD", "D")
            });
            var result = pipe.Fit(dataView).Transform(dataView);
            var resultRoles = new RoleMappedData(result);
            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
            }
        }
 
        [Fact]
        public void NAIndicatorFileOutput()
        {
            string dataPath = GetDataPath(TestDatasets.breastCancer.trainFilename);
            var data = ML.Data.LoadFromTextFile(dataPath, new[] {
                new TextLoader.Column("ScalarFloat", DataKind.Single, 1),
                new TextLoader.Column("ScalarDouble", DataKind.Double, 1),
                new TextLoader.Column("VectorFloat", DataKind.Single, 1, 4),
                new TextLoader.Column("VectorDoulbe", DataKind.Double, 1, 4)
            });
 
            var wrongCollection = new[] { new TestClass() { A = 1, B = 3, C = new float[2] { 1, 2 }, D = new double[2] { 3, 4 } } };
            var invalidData = ML.Data.LoadFromEnumerable(wrongCollection);
            var est = ML.Transforms.IndicateMissingValues(new[]
            {
                new InputOutputColumnPair("A", "ScalarFloat"),
                new InputOutputColumnPair("B", "ScalarDouble"),
                new InputOutputColumnPair("C", "VectorFloat"),
                new InputOutputColumnPair("D", "VectorDoulbe")
            });
 
            TestEstimatorCore(est, data, invalidInput: invalidData);
            var outputPath = GetOutputPath("NAIndicator", "featurized.tsv");
            using (var ch = ((IHostEnvironment)ML).Start("save"))
            {
                var saver = new TextSaver(ML, new TextSaver.Arguments { Silent = true });
                var savedData = ML.Data.TakeRows(est.Fit(data).Transform(data), 4);
                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true);
            }
 
            CheckEquality("NAIndicator", "featurized.tsv");
            Done();
        }
 
        [Fact]
        public void NAIndicatorMetadataTest()
        {
            var data = new[] {
                new TestClass() { A = 1, B = 3, C = new float[2]{ 1, 2 } , D = new double[2]{ 3,4} },
                new TestClass() { A = float.NaN, B = double.NaN, C = new float[2]{ float.NaN, float.NaN } , D = new double[2]{ double.NaN,double.NaN}},
                new TestClass() { A = float.NegativeInfinity, B = double.NegativeInfinity, C = new float[2]{ float.NegativeInfinity, float.NegativeInfinity } , D = new double[2]{ double.NegativeInfinity, double.NegativeInfinity}},
                new TestClass() { A = float.PositiveInfinity, B = double.PositiveInfinity, C = new float[2]{ float.PositiveInfinity, float.PositiveInfinity, } , D = new double[2]{  double.PositiveInfinity, double.PositiveInfinity}},
                new TestClass() { A = 2, B = 1, C = new float[2]{ 3, 4 } , D = new double[2]{ 5,6}},
            };
 
            var dataView = ML.Data.LoadFromEnumerable(data);
            var pipe = ML.Transforms.Categorical.OneHotEncoding("CatA", "A");
            var newpipe = pipe.Append(ML.Transforms.IndicateMissingValues("NAA", "CatA"));
            var result = newpipe.Fit(dataView).Transform(dataView);
            Assert.True(result.Schema.TryGetColumnIndex("NAA", out var col));
            // Check that the column is normalized.
            Assert.True(result.Schema[col].IsNormalized());
            // Check that slot names metadata was correctly created.
            var value = new VBuffer<ReadOnlyMemory<char>>();
            result.Schema[col].GetSlotNames(ref value);
            Assert.True(value.Length == 4);
            var mem = new ReadOnlyMemory<char>();
            value.GetItemOrDefault(0, ref mem);
            Assert.True(mem.ToString() == "1");
            value.GetItemOrDefault(1, ref mem);
            Assert.True(mem.ToString() == "-Infinity");
            value.GetItemOrDefault(2, ref mem);
            Assert.True(mem.ToString() == "Infinity");
            value.GetItemOrDefault(3, ref mem);
            Assert.True(mem.ToString() == "2");
        }
    }
}