File: DatasetDimensionsTests.cs
Web Access
Project: src\test\Microsoft.ML.AutoML.Tests\Microsoft.ML.AutoML.Tests.csproj (Microsoft.ML.AutoML.Tests)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using Microsoft.ML.Data;
using Microsoft.ML.TestFramework;
using Xunit;
using Xunit.Abstractions;
 
namespace Microsoft.ML.AutoML.Test
{
 
    public class DatasetDimensionsTests : BaseTestClass
    {
        public DatasetDimensionsTests(ITestOutputHelper output) : base(output)
        {
        }
 
        public object DatasetDimensionUtil { get; private set; }
 
        [Fact]
        public void TextColumnDimensionsTest()
        {
            var context = new MLContext(1);
            var dataBuilder = new ArrayDataViewBuilder(context);
            dataBuilder.AddColumn("categorical", new string[] { "0", "1", "0", "1", "0", "1", "2", "2", "0", "1" });
            dataBuilder.AddColumn("text", new string[] { "0", "1", "0", "1", "0", "1", "2", "2", "0", "1" });
            var data = dataBuilder.GetDataView();
            var dimensions = DatasetDimensionsApi.CalcColumnDimensions(context, data, new[] {
                new PurposeInference.Column(0, ColumnPurpose.CategoricalFeature),
                new PurposeInference.Column(0, ColumnPurpose.TextFeature),
            });
            Assert.NotNull(dimensions);
            Assert.Equal(2, dimensions.Length);
            Assert.Equal(3, dimensions[0].Cardinality);
            Assert.Null(dimensions[1].Cardinality);
            Assert.Null(dimensions[0].HasMissing);
            Assert.Null(dimensions[1].HasMissing);
        }
 
        [Fact]
        public void FloatColumnDimensionsTest()
        {
            var context = new MLContext(1);
            var dataBuilder = new ArrayDataViewBuilder(context);
            dataBuilder.AddColumn("NoNan", NumberDataViewType.Single, new float[] { 0, 1, 0, 1, 0 });
            dataBuilder.AddColumn("Nan", NumberDataViewType.Single, new float[] { 0, 1, 0, 1, float.NaN });
            var data = dataBuilder.GetDataView();
            var dimensions = DatasetDimensionsApi.CalcColumnDimensions(context, data, new[] {
                new PurposeInference.Column(0, ColumnPurpose.NumericFeature),
                new PurposeInference.Column(1, ColumnPurpose.NumericFeature),
            });
            Assert.NotNull(dimensions);
            Assert.Equal(2, dimensions.Length);
            Assert.Null(dimensions[0].Cardinality);
            Assert.Null(dimensions[1].Cardinality);
            Assert.False(dimensions[0].HasMissing);
            Assert.True(dimensions[1].HasMissing);
        }
 
        [Fact]
        public void FloatVectorColumnHasNanTest()
        {
            var context = new MLContext(1);
            var dataBuilder = new ArrayDataViewBuilder(context);
            var slotNames = new[] { "Col1", "Col2" };
            var colValues = new float[][]
            {
                new float[] { 0, 0 },
                new float[] { 1, 1 },
            };
            dataBuilder.AddColumn("NoNan", Util.GetKeyValueGetter(slotNames), NumberDataViewType.Single, colValues);
            colValues = new float[][]
            {
                new float[] { 0, 0 },
                new float[] { 1, float.NaN },
            };
            dataBuilder.AddColumn("Nan", Util.GetKeyValueGetter(slotNames), NumberDataViewType.Single, colValues);
            var data = dataBuilder.GetDataView();
            var dimensions = DatasetDimensionsApi.CalcColumnDimensions(context, data, new[] {
                new PurposeInference.Column(0, ColumnPurpose.NumericFeature),
                new PurposeInference.Column(1, ColumnPurpose.NumericFeature),
            });
            Assert.NotNull(dimensions);
            Assert.Equal(2, dimensions.Length);
            Assert.Null(dimensions[0].Cardinality);
            Assert.Null(dimensions[1].Cardinality);
            Assert.False(dimensions[0].HasMissing);
            Assert.True(dimensions[1].HasMissing);
        }
    }
}