File: UtilTest.cs
Web Access
Project: src\test\Microsoft.ML.CodeGenerator.Tests\Microsoft.ML.CodeGenerator.Tests.csproj (Microsoft.ML.CodeGenerator.Tests)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Microsoft.ML;
using Microsoft.ML.AutoML;
using Microsoft.ML.CodeGenerator.Utilities;
using Microsoft.ML.Data;
using Microsoft.ML.TestFramework;
using Xunit;
using Xunit.Abstractions;
 
namespace Microsoft.ML.CodeGenerator.Tests
{
    class TestClass
    {
        [LoadColumn(0)]
        public string Label { get; set; }
 
        [LoadColumn(1)]
        public string STR { get; set; }
 
        [LoadColumn(2)]
        public string PATH { get; set; }
 
        [LoadColumn(3)]
        public int INT { get; set; }
 
        [LoadColumn(4)]
        public Double DOUBLE { get; set; }
 
        [LoadColumn(5)]
        public float FLOAT { get; set; }
 
        [LoadColumn(6)]
        public string TrickySTR { get; set; }
 
        [LoadColumn(7)]
        public float SingleNan { get; set; }
 
        [LoadColumn(8)]
        public float SinglePositiveInfinity { get; set; }
 
        [LoadColumn(9)]
        public float SingleNegativeInfinity { get; set; }
 
        [LoadColumn(10)]
        public string EmptyString { get; set; }
 
        [LoadColumn(11)]
        public bool One { get; set; }
 
        [LoadColumn(12)]
        public bool T { get; set; }
    }
 
    class TestClassContainsDuplicates
    {
        [LoadColumn(0)]
        public string Label_col_0 { get; set; }
 
        [LoadColumn(1)]
        public string STR_col_1 { get; set; }
 
        [LoadColumn(2)]
        public string STR_col_2 { get; set; }
 
        [LoadColumn(3)]
        public string PATH_col_3 { get; set; }
 
        [LoadColumn(4)]
        public int INT_col_4 { get; set; }
 
        [LoadColumn(5)]
        public Double DOUBLE_col_5 { get; set; }
 
        [LoadColumn(6)]
        public float FLOAT_col_6 { get; set; }
 
        [LoadColumn(7)]
        public float FLOAT_col_7 { get; set; }
 
        [LoadColumn(8)]
        public string TrickySTR_col_8 { get; set; }
 
        [LoadColumn(9)]
        public float SingleNan_col_9 { get; set; }
 
        [LoadColumn(10)]
        public float SinglePositiveInfinity_col_10 { get; set; }
 
        [LoadColumn(11)]
        public float SingleNegativeInfinity_col_11 { get; set; }
 
        [LoadColumn(12)]
        public float SingleNegativeInfinity_col_12 { get; set; }
 
        [LoadColumn(13)]
        public string EmptyString_col_13 { get; set; }
 
        [LoadColumn(14)]
        public bool One_col_14 { get; set; }
 
        [LoadColumn(15)]
        public bool T_col_15 { get; set; }
    }
 
    public class UtilTest : BaseTestClass
    {
        public UtilTest(ITestOutputHelper output) : base(output)
        {
        }
 
        [Fact]
        public async Task TestGenerateSampleDataAsync()
        {
            var filePath = "sample.txt";
            using (var file = new StreamWriter(filePath))
            {
                await file.WriteLineAsync("Label,STR,PATH,INT,DOUBLE,FLOAT,TrickySTR,SingleNan,SinglePositiveInfinity,SingleNegativeInfinity,EmptyString,One,T");
                await file.WriteLineAsync("label1,feature1,/path/to/file,2,1.2,1.223E+10,ab\"\';@#$%^&-++==,NaN,Infinity,-Infinity,,1,T");
                await file.FlushAsync();
                file.Close();
                var context = new MLContext();
                var dataView = context.Data.LoadFromTextFile<TestClass>(filePath, separatorChar: ',', hasHeader: true);
                var columnInference = new ColumnInferenceResults()
                {
                    ColumnInformation = new ColumnInformation()
                    {
                        LabelColumnName = "Label"
                    }
                };
                var sampleData = Utils.GenerateSampleData(dataView, columnInference);
                Assert.Equal("@\"feature1\"", sampleData["STR"]);
                Assert.Equal("@\"/path/to/file\"", sampleData["PATH"]);
                Assert.Equal("2", sampleData["INT"]);
                Assert.Equal("1.2", sampleData["DOUBLE"]);
                Assert.Equal("1.223E+10F", sampleData["FLOAT"]);
                Assert.Equal("@\"ab\\\"\';@#$%^&-++==\"", sampleData["TrickySTR"]);
                Assert.Equal($"Single.NaN", sampleData["SingleNan"]);
                Assert.Equal($"Single.PositiveInfinity", sampleData["SinglePositiveInfinity"]);
                Assert.Equal($"Single.NegativeInfinity", sampleData["SingleNegativeInfinity"]);
                Assert.Equal("@\"\"", sampleData["EmptyString"]);
                Assert.Equal($"true", sampleData["One"]);
                Assert.Equal($"true", sampleData["T"]);
            }
        }
 
        [Fact]
        public async Task TestGenerateSampleDataAsyncDuplicateColumnNames()
        {
            var filePath = "sample2.txt";
            using (var file = new StreamWriter(filePath))
            {
                await file.WriteLineAsync("Label,STR,STR,PATH,INT,DOUBLE,FLOAT,FLOAT,TrickySTR,SingleNan,SinglePositiveInfinity,SingleNegativeInfinity,SingleNegativeInfinity,EmptyString,One,T");
                await file.WriteLineAsync("label1,feature1,feature2,/path/to/file,2,1.2,1.223E+10,1.223E+11,ab\"\';@#$%^&-++==,NaN,Infinity,-Infinity,-Infinity,,1,T");
                await file.FlushAsync();
                file.Close();
                var context = new MLContext();
                var dataView = context.Data.LoadFromTextFile<TestClassContainsDuplicates>(filePath, separatorChar: ',', hasHeader: true);
                var columnInference = new ColumnInferenceResults()
                {
                    ColumnInformation = new ColumnInformation()
                    {
                        LabelColumnName = "Label_col_0"
                    }
                };
                var sampleData = Utils.GenerateSampleData(dataView, columnInference);
                Assert.Equal("@\"feature1\"", sampleData["STR_col_1"]);
                Assert.Equal("@\"feature2\"", sampleData["STR_col_2"]);
                Assert.Equal("@\"/path/to/file\"", sampleData["PATH_col_3"]);
                Assert.Equal("2", sampleData["INT_col_4"]);
                Assert.Equal("1.2", sampleData["DOUBLE_col_5"]);
                Assert.Equal("1.223E+10F", sampleData["FLOAT_col_6"]);
                Assert.Equal("1.223E+11F", sampleData["FLOAT_col_7"]);
                Assert.Equal("@\"ab\\\"\';@#$%^&-++==\"", sampleData["TrickySTR_col_8"]);
                Assert.Equal($"Single.NaN", sampleData["SingleNan_col_9"]);
                Assert.Equal($"Single.PositiveInfinity", sampleData["SinglePositiveInfinity_col_10"]);
                Assert.Equal($"Single.NegativeInfinity", sampleData["SingleNegativeInfinity_col_11"]);
                Assert.Equal($"Single.NegativeInfinity", sampleData["SingleNegativeInfinity_col_12"]);
                Assert.Equal("@\"\"", sampleData["EmptyString_col_13"]);
                Assert.Equal($"true", sampleData["One_col_14"]);
                Assert.Equal($"true", sampleData["T_col_15"]);
            }
        }
 
        [Fact]
        public void NormalizeTest()
        {
            var testStrArray = new string[] { "Abc Abc", "abc ABC", "12", "12.3", "1AB .C" };
            var expectedStrArray = new string[] { "Abc_Abc", "Abc_ABC", "_12", "_12_3", "_1AB__C" };
            for (int i = 0; i != expectedStrArray.Count(); ++i)
            {
                var actualStr = Microsoft.ML.CodeGenerator.Utilities.Utils.Normalize(testStrArray[i]);
                Assert.Equal(expectedStrArray[i], actualStr);
            }
        }
    }
}