File: Transformers\ValueMappingTests.cs
Web Access
Project: src\test\Microsoft.ML.Tests\Microsoft.ML.Tests.csproj (Microsoft.ML.Tests)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.Model;
using Microsoft.ML.RunTests;
using Microsoft.ML.Tools;
using Microsoft.ML.Transforms;
using Microsoft.ML.Transforms.Text;
using Xunit;
using Xunit.Abstractions;
 
namespace Microsoft.ML.Tests.Transformers
{
    public class ValueMappingTests : TestDataPipeBase
    {
        public ValueMappingTests(ITestOutputHelper output) : base(output)
        {
        }
 
        class TestClass
        {
            public string A;
            public string B;
            public string C;
        }
 
        class TestWrong
        {
            public string A;
            public float B;
        }
 
        public class TestTermLookup
        {
            public string Label;
            public int GroupId;
 
            [VectorType(2107)]
            public float[] Features;
        };
 
 
        [Fact]
        public void ValueMapOneValueTest()
        {
            var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            var keys = new List<string>() { "foo", "bar", "test", "wahoo" };
            var values = new List<int>() { 1, 2, 3, 4 };
 
            var lookupMap = DataViewHelper.CreateDataView(Env, keys, values,
                ValueMappingTransformer.DefaultKeyColumnName,
                ValueMappingTransformer.DefaultValueColumnName, false);
 
            var estimator = new ValueMappingEstimator<string, int>(Env, lookupMap,
                lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
                lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
                new[] { ("D", "A"), ("E", "B"), ("F", "C") });
 
            var t = estimator.Fit(dataView);
 
            var result = t.Transform(dataView);
            var cursor = result.GetRowCursorForAllColumns();
            var getterD = cursor.GetGetter<int>(result.Schema["D"]);
            var getterE = cursor.GetGetter<int>(result.Schema["E"]);
            var getterF = cursor.GetGetter<int>(result.Schema["F"]);
            cursor.MoveNext();
 
            int dValue = 0;
            getterD(ref dValue);
            Assert.Equal(2, dValue);
            int eValue = 0;
            getterE(ref eValue);
            Assert.Equal(3, eValue);
            int fValue = 0;
            getterF(ref fValue);
            Assert.Equal(1, fValue);
        }
 
        [Fact]
        public void ValueMapInputIsVectorTest()
        {
            var data = new[] { new TestClass() { A = "bar test foo", B = "test", C = "foo" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            var keys = new List<ReadOnlyMemory<char>>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() };
            var values = new List<int>() { 1, 2, 3, 4 };
 
            var lookupMap = DataViewHelper.CreateDataView(Env, keys, values,
                ValueMappingTransformer.DefaultKeyColumnName,
                ValueMappingTransformer.DefaultValueColumnName, false);
 
            var valueMappingEstimator = new ValueMappingEstimator<string, int>(Env, lookupMap,
                lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
                lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
                new[] { ("VecD", "TokenizeA"), ("E", "B"), ("F", "C") });
 
            var estimator = new WordTokenizingEstimator(Env, new[]{
                    new WordTokenizingEstimator.ColumnOptions("TokenizeA", "A")
                }).Append(valueMappingEstimator);
 
            var schema = estimator.GetOutputSchema(SchemaShape.Create(dataView.Schema));
            Assert.True(schema.TryFindColumn("VecD", out var originalColumn));
            Assert.Equal(SchemaShape.Column.VectorKind.VariableVector, originalColumn.Kind);
            var t = estimator.Fit(dataView);
 
            var result = t.Transform(dataView);
            var cursor = result.GetRowCursorForAllColumns();
            var getterVecD = cursor.GetGetter<VBuffer<int>>(result.Schema["VecD"]);
            var getterE = cursor.GetGetter<int>(result.Schema["E"]);
            var getterF = cursor.GetGetter<int>(result.Schema["F"]);
            cursor.MoveNext();
 
            VBuffer<int> dValue = default;
            getterVecD(ref dValue);
            Assert.True(dValue.GetValues().SequenceEqual(new int[] { 2, 3, 1 }));
 
            int eValue = 0;
            getterE(ref eValue);
            Assert.Equal(3, eValue);
            int fValue = 0;
            getterF(ref fValue);
            Assert.Equal(1, fValue);
        }
 
        [Fact]
        public void ValueMapInputIsVectorAndValueAsStringKeyTypeTest()
        {
            var data = new[] { new TestClass() { A = "bar test foo", B = "test", C = "foo" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            var keyValuePairs = new List<KeyValuePair<string, string>>() {
                new KeyValuePair<string, string>("foo", "a"),
                new KeyValuePair<string, string>("bar", "b"),
                new KeyValuePair<string, string>("test", "c"),
                new KeyValuePair<string, string>("wahoo", "d")};
 
            var estimator = new WordTokenizingEstimator(Env, new[] { new WordTokenizingEstimator.ColumnOptions("TokenizeA", "A") }).
                Append(ML.Transforms.Conversion.MapValue(keyValuePairs, true, new[] { new InputOutputColumnPair("VecD", "TokenizeA"), new InputOutputColumnPair("E", "B"), new InputOutputColumnPair("F", "C") }));
            var t = estimator.Fit(dataView);
 
            var result = t.Transform(dataView);
            var cursor = result.GetRowCursorForAllColumns();
            var getterVecD = cursor.GetGetter<VBuffer<uint>>(result.Schema["VecD"]);
            var getterE = cursor.GetGetter<uint>(result.Schema["E"]);
            var getterF = cursor.GetGetter<uint>(result.Schema["F"]);
            cursor.MoveNext();
 
            VBuffer<uint> dValue = default;
            getterVecD(ref dValue);
            Assert.True(dValue.GetValues().SequenceEqual(new uint[] { 2, 3, 1 }));
 
            uint eValue = 0;
            getterE(ref eValue);
            Assert.Equal(3u, eValue);
            uint fValue = 0;
            getterF(ref fValue);
            Assert.Equal(1u, fValue);
        }
 
        [Fact]
        public void ValueMapVectorValueTest()
        {
            var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            IEnumerable<string> keys = new List<string>() { "foo", "bar", "test" };
            List<int[]> values = new List<int[]>() {
                new int[] {2, 3, 4 },
                new int[] {100, 200 },
                new int[] {400, 500, 600, 700 }};
 
            var lookupMap = DataViewHelper.CreateDataView(Env, keys, values,
                ValueMappingTransformer.DefaultKeyColumnName,
                ValueMappingTransformer.DefaultValueColumnName);
 
            var estimator = new ValueMappingEstimator<string, int>(Env, lookupMap,
                lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
                lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
                new[] { ("D", "A"), ("E", "B"), ("F", "C") });
 
            var schema = estimator.GetOutputSchema(SchemaShape.Create(dataView.Schema));
            foreach (var name in new[] { "D", "E", "F" })
            {
                Assert.True(schema.TryFindColumn(name, out var originalColumn));
                Assert.Equal(SchemaShape.Column.VectorKind.VariableVector, originalColumn.Kind);
            }
 
            var t = estimator.Fit(dataView);
 
            var result = t.Transform(dataView);
            var cursor = result.GetRowCursorForAllColumns();
            var getterD = cursor.GetGetter<VBuffer<int>>(result.Schema["D"]);
            var getterE = cursor.GetGetter<VBuffer<int>>(result.Schema["E"]);
            var getterF = cursor.GetGetter<VBuffer<int>>(result.Schema["F"]);
            cursor.MoveNext();
 
            var valuesArray = values.ToArray();
            VBuffer<int> dValue = default;
            getterD(ref dValue);
            Assert.Equal(values[1].Length, dValue.Length);
            VBuffer<int> eValue = default;
            getterE(ref eValue);
            Assert.Equal(values[2].Length, eValue.Length);
            VBuffer<int> fValue = default;
            getterF(ref fValue);
            Assert.Equal(values[0].Length, fValue.Length);
        }
 
        class Map
        {
            public string Key;
            public int Value;
        }
 
        [Fact]
        public void ValueMapDataViewAsMapTest()
        {
            var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            var map = new[] { new Map() { Key = "foo", Value = 1 },
                              new Map() { Key = "bar", Value = 2 },
                              new Map() { Key = "test", Value = 3 },
                              new Map() { Key = "wahoo", Value = 4 }
                            };
            var mapView = ML.Data.LoadFromEnumerable(map);
 
            var estimator = new ValueMappingEstimator(Env, mapView, mapView.Schema["Key"], mapView.Schema["Value"], new[] { ("D", "A"), ("E", "B"), ("F", "C") });
            var t = estimator.Fit(dataView);
 
            var result = t.Transform(dataView);
            var cursor = result.GetRowCursorForAllColumns();
            var getterD = cursor.GetGetter<int>(result.Schema["D"]);
            var getterE = cursor.GetGetter<int>(result.Schema["E"]);
            var getterF = cursor.GetGetter<int>(result.Schema["F"]);
            cursor.MoveNext();
 
            int dValue = 0;
            getterD(ref dValue);
            Assert.Equal(2, dValue);
            int eValue = 0;
            getterE(ref eValue);
            Assert.Equal(3, eValue);
            int fValue = 0;
            getterF(ref fValue);
            Assert.Equal(1, fValue);
        }
 
        [Fact]
        public void ValueMapVectorStringValueTest()
        {
            var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            IEnumerable<string> keys = new List<string>() { "foo", "bar", "test" };
            List<string[]> values = new List<string[]>() {
                new string[] {"foo", "bar" },
                new string[] {"forest", "city", "town" },
                new string[] {"winter", "summer", "autumn", "spring" }};
 
            var lookupMap = DataViewHelper.CreateDataView(Env, keys, values,
                ValueMappingTransformer.DefaultKeyColumnName,
                ValueMappingTransformer.DefaultValueColumnName);
 
            var estimator = new ValueMappingEstimator<string, int>(Env, lookupMap,
                lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
                lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
                new[] { ("D", "A"), ("E", "B"), ("F", "C") });
 
            var t = estimator.Fit(dataView);
 
            var result = t.Transform(dataView);
 
            var cursor = result.GetRowCursorForAllColumns();
            var getterD = cursor.GetGetter<VBuffer<ReadOnlyMemory<char>>>(result.Schema[3]);
            var getterE = cursor.GetGetter<VBuffer<ReadOnlyMemory<char>>>(result.Schema[4]);
            var getterF = cursor.GetGetter<VBuffer<ReadOnlyMemory<char>>>(result.Schema[5]);
            cursor.MoveNext();
 
            VBuffer<ReadOnlyMemory<char>> dValue = default;
            getterD(ref dValue);
            Assert.Equal(3, dValue.Length);
 
            VBuffer<ReadOnlyMemory<char>> eValue = default;
            getterE(ref eValue);
            Assert.Equal(4, eValue.Length);
 
            VBuffer<ReadOnlyMemory<char>> fValue = default;
            getterF(ref fValue);
            Assert.Equal(2, fValue.Length);
        }
 
        [Fact]
        public void ValueMappingMissingKey()
        {
            var data = new[] { new TestClass() { A = "barTest", B = "test", C = "foo" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            var keys = new List<string>() { "foo", "bar", "test", "wahoo" };
            var values = new List<int>() { 1, 2, 3, 4 };
 
            var lookupMap = DataViewHelper.CreateDataView(Env, keys, values,
                ValueMappingTransformer.DefaultKeyColumnName,
                ValueMappingTransformer.DefaultValueColumnName, false);
 
            var estimator = new ValueMappingEstimator<string, int>(Env, lookupMap,
                lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
                lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
                new[] { ("D", "A"), ("E", "B"), ("F", "C") });
 
            var t = estimator.Fit(dataView);
 
            var result = t.Transform(dataView);
            var cursor = result.GetRowCursorForAllColumns();
            var getterD = cursor.GetGetter<int>(result.Schema["D"]);
            var getterE = cursor.GetGetter<int>(result.Schema["E"]);
            var getterF = cursor.GetGetter<int>(result.Schema["F"]);
            cursor.MoveNext();
 
            int dValue = 1;
            getterD(ref dValue);
            Assert.Equal(0, dValue);
            int eValue = 0;
            getterE(ref eValue);
            Assert.Equal(3, eValue);
            int fValue = 0;
            getterF(ref fValue);
            Assert.Equal(1, fValue);
        }
 
        [Fact]
        public void TestDuplicateKeys()
        {
            var data = new[] { new TestClass() { A = "barTest", B = "test", C = "foo" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            var keys = new List<string>() { "foo", "foo" };
            var values = new List<int>() { 1, 2 };
 
            var lookupMap = DataViewHelper.CreateDataView(Env, keys, values,
                ValueMappingTransformer.DefaultKeyColumnName,
                ValueMappingTransformer.DefaultValueColumnName, false);
 
            Assert.Throws<InvalidOperationException>(() => new ValueMappingEstimator<string, int>(Env, lookupMap,
                lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
                lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
                new[] { ("D", "A"), ("E", "B"), ("F", "C") }));
        }
 
        [Fact]
        public void ValueMappingOutputSchema()
        {
            var data = new[] { new TestClass() { A = "barTest", B = "test", C = "foo" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            var keyValuePairs = new List<KeyValuePair<string, int>>() {
                new KeyValuePair<string,int>("foo", 1),
                new KeyValuePair<string,int>("bar", 2),
                new KeyValuePair<string,int>("test", 3),
                new KeyValuePair<string,int>("wahoo", 4)};
 
            var est = ML.Transforms.Conversion.MapValue(keyValuePairs,
                new[] { new InputOutputColumnPair("D", "A"), new InputOutputColumnPair("E", "B"), new InputOutputColumnPair("F", "C") });
 
            var outputSchema = est.GetOutputSchema(SchemaShape.Create(dataView.Schema));
 
            Assert.Equal(6, outputSchema.Count());
            Assert.True(outputSchema.TryFindColumn("D", out SchemaShape.Column dColumn));
            Assert.True(outputSchema.TryFindColumn("E", out SchemaShape.Column eColumn));
            Assert.True(outputSchema.TryFindColumn("F", out SchemaShape.Column fColumn));
 
            Assert.Equal(typeof(int), dColumn.ItemType.RawType);
            Assert.False(dColumn.IsKey);
 
            Assert.Equal(typeof(int), eColumn.ItemType.RawType);
            Assert.False(eColumn.IsKey);
 
            Assert.Equal(typeof(int), fColumn.ItemType.RawType);
            Assert.False(fColumn.IsKey);
        }
 
        [Fact]
        public void ValueMappingWithValuesAsKeyTypesOutputSchema()
        {
            var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            var keyValuePairs = new List<KeyValuePair<string, string>>() {
                new KeyValuePair<string, string>("foo", "t"),
                new KeyValuePair<string, string>("bar", "s"),
                new KeyValuePair<string, string>("test", "u"),
                new KeyValuePair<string, string>("wahoo", "v") };
 
            var estimator = ML.Transforms.Conversion.MapValue(keyValuePairs, true,
                new[] { new InputOutputColumnPair("D", "A"), new InputOutputColumnPair("E", "B"), new InputOutputColumnPair("F", "C") });
 
            var outputSchema = estimator.GetOutputSchema(SchemaShape.Create(dataView.Schema));
            Assert.Equal(6, outputSchema.Count());
            Assert.True(outputSchema.TryFindColumn("D", out SchemaShape.Column dColumn));
            Assert.True(outputSchema.TryFindColumn("E", out SchemaShape.Column eColumn));
            Assert.True(outputSchema.TryFindColumn("F", out SchemaShape.Column fColumn));
 
            Assert.Equal(typeof(uint), dColumn.ItemType.RawType);
            Assert.True(dColumn.IsKey);
 
            Assert.Equal(typeof(uint), eColumn.ItemType.RawType);
            Assert.True(eColumn.IsKey);
 
            Assert.Equal(typeof(uint), fColumn.ItemType.RawType);
            Assert.True(fColumn.IsKey);
 
            var t = estimator.Fit(dataView);
        }
 
        [Fact]
        public void ValueMappingValuesAsUintKeyTypes()
        {
            var data = new[] { new TestClass() { A = "bar", B = "test2", C = "wahoo" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            // These are the expected key type values
            var keyValuePairs = new List<KeyValuePair<string, uint>>() {
                new KeyValuePair<string, uint>("foo", 51),
                new KeyValuePair<string, uint>("bar", 25),
                new KeyValuePair<string, uint>("test", 42),
                new KeyValuePair<string, uint>("wahoo", 61)};
 
            // Workout on value mapping
            var estimator = ML.Transforms.Conversion.MapValue(keyValuePairs, true, new[] { new InputOutputColumnPair("D", "A"), new InputOutputColumnPair("E", "B"), new InputOutputColumnPair("F", "C") });
 
            var t = estimator.Fit(dataView);
 
            var result = t.Transform(dataView);
            var cursor = result.GetRowCursorForAllColumns();
            var getterD = cursor.GetGetter<uint>(result.Schema["D"]);
            var getterE = cursor.GetGetter<uint>(result.Schema["E"]);
            var getterF = cursor.GetGetter<uint>(result.Schema["F"]);
            cursor.MoveNext();
 
            // The expected values will contain the actual uints and are not generated.
            uint dValue = 1;
            getterD(ref dValue);
            Assert.Equal<uint>(25, dValue);
 
            // Should be 0 as test2 is a missing key
            uint eValue = 0;
            getterE(ref eValue);
            Assert.Equal<uint>(0, eValue);
 
            // Testing the last key
            uint fValue = 0;
            getterF(ref fValue);
            Assert.Equal<uint>(61, fValue);
        }
 
 
        [Fact]
        public void ValueMappingValuesAsUlongKeyTypes()
        {
            var data = new[] { new TestClass() { A = "bar", B = "test2", C = "wahoo" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            var keyValuePairs = new List<KeyValuePair<string, ulong>>() {
                new KeyValuePair<string, ulong>("foo", 51),
                new KeyValuePair<string, ulong>("bar", Int32.MaxValue + 1L),
                new KeyValuePair<string, ulong>("test", 42),
                new KeyValuePair<string, ulong>("wahoo", 61)};
 
            // Workout on value mapping
            var estimator = ML.Transforms.Conversion.MapValue(keyValuePairs, true, new[] { new InputOutputColumnPair("D", "A"), new InputOutputColumnPair("E", "B"), new InputOutputColumnPair("F", "C") });
 
            var t = estimator.Fit(dataView);
 
            var result = t.Transform(dataView);
            var cursor = result.GetRowCursorForAllColumns();
            var getterD = cursor.GetGetter<ulong>(result.Schema["D"]);
            var getterE = cursor.GetGetter<ulong>(result.Schema["E"]);
            var getterF = cursor.GetGetter<ulong>(result.Schema["F"]);
            cursor.MoveNext();
 
            // The expected values will contain the actual uints and are not generated.
            ulong dValue = 1;
            getterD(ref dValue);
            Assert.Equal<ulong>(Int32.MaxValue + 1L, dValue);
 
            // Should be 0 as test2 is a missing key
            ulong eValue = 0;
            getterE(ref eValue);
            Assert.Equal<ulong>(0, eValue);
 
            // Testing the last key
            ulong fValue = 0;
            getterF(ref fValue);
            Assert.Equal<ulong>(61, fValue);
        }
 
        [Fact]
        public void ValueMappingValuesAsStringKeyTypes()
        {
            var data = new[] { new TestClass() { A = "bar", B = "test", C = "notfound" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            // Generating the list of strings for the key type values, note that foo1 is duplicated as intended to test that the same index value is returned
            var keyValuePairs = new List<KeyValuePair<string, string>>() {
                new KeyValuePair<string, string>("foo", "foo1"),
                new KeyValuePair<string, string>("bar", "foo2"),
                new KeyValuePair<string, string>("test", "foo1"),
                new KeyValuePair<string, string>("wahoo", "foo3")};
 
            // Workout on value mapping
            var estimator = ML.Transforms.Conversion.MapValue(keyValuePairs, true, new[] { new InputOutputColumnPair("D", "A"), new InputOutputColumnPair("E", "B"), new InputOutputColumnPair("F", "C") });
 
            var t = estimator.Fit(dataView);
 
            var result = t.Transform(dataView);
            var cursor = result.GetRowCursorForAllColumns();
            var getterD = cursor.GetGetter<uint>(result.Schema["D"]);
            var getterE = cursor.GetGetter<uint>(result.Schema["E"]);
            var getterF = cursor.GetGetter<uint>(result.Schema["F"]);
            cursor.MoveNext();
 
            // The expected values will contain the generated key type values starting from 1.
            uint dValue = 1;
            getterD(ref dValue);
            Assert.Equal<uint>(2, dValue);
 
            // eValue will equal 1 since foo1 occurs first.
            uint eValue = 0;
            getterE(ref eValue);
            Assert.Equal<uint>(1, eValue);
 
            // fValue will be 0 since its missing
            uint fValue = 0;
            getterF(ref fValue);
            Assert.Equal<uint>(0, fValue);
        }
 
        [Fact]
        public void ValueMappingValuesAsKeyTypesReverseLookup()
        {
            var data = new[] { new TestClass() { A = "bar", B = "test", C = "notfound" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            var keyValuePairs = new List<KeyValuePair<string, string>>() {
                new KeyValuePair<string, string>("foo", "foo1"),
                new KeyValuePair<string, string>("bar", "foo2"),
                new KeyValuePair<string, string>("test", "foo1"),
                new KeyValuePair<string, string>("wahoo", "foo3")};
 
            var estimator = ML.Transforms.Conversion.MapValue("D", keyValuePairs, "A", true).
                Append(ML.Transforms.Conversion.MapKeyToValue("DOutput", "D"));
 
            var t = estimator.Fit(dataView);
 
            var result = t.Transform(dataView);
            var cursor = result.GetRowCursorForAllColumns();
            var getterD = cursor.GetGetter<ReadOnlyMemory<char>>(result.Schema["DOutput"]);
            cursor.MoveNext();
 
            // The expected values will contain the generated key type values starting from 1.
            ReadOnlyMemory<char> dValue = default;
            getterD(ref dValue);
            Assert.Equal("foo2".AsMemory(), dValue);
 
            var annotations = result.Schema["D"].Annotations;
            var allowedKeyValueGetter = annotations.GetGetter<VBuffer<ReadOnlyMemory<char>>>(annotations.Schema["KeyValues"]);
            VBuffer<ReadOnlyMemory<char>> allowedKeys = default;
            allowedKeyValueGetter(ref allowedKeys);
 
            // There should be 3 keys, "foo1", "foo2", and "foo3".
            Assert.Equal(3, allowedKeys.Length);
            var allowedKeyPool = new HashSet<ReadOnlyMemory<char>>(allowedKeys.DenseValues());
            Assert.Contains("foo1".AsMemory(), allowedKeyPool);
            Assert.Contains("foo2".AsMemory(), allowedKeyPool);
            Assert.Contains("foo3".AsMemory(), allowedKeyPool);
        }
 
        [Fact]
        public void ValueMappingWorkout()
        {
            var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
            var badData = new[] { new TestWrong() { A = "bar", B = 1.2f } };
            var badDataView = ML.Data.LoadFromEnumerable(badData);
 
            var keyValuePairs = new List<KeyValuePair<string, int>>() {
                new KeyValuePair<string, int>("foo", 1),
                new KeyValuePair<string, int>("bar", 2),
                new KeyValuePair<string, int>("test", 3),
                new KeyValuePair<string, int>("wahoo", 4)};
 
            // Workout on value mapping
            var est = ML.Transforms.Conversion.MapValue(keyValuePairs, new[] { new InputOutputColumnPair("D", "A"), new InputOutputColumnPair("E", "B"), new InputOutputColumnPair("F", "C") });
            TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView);
        }
 
        [Fact]
        public void ValueMappingValueTypeIsVectorWorkout()
        {
            var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
            var badData = new[] { new TestWrong() { A = "bar", B = 1.2f } };
            var badDataView = ML.Data.LoadFromEnumerable(badData);
 
            var keyValuePairs = new List<KeyValuePair<string, int[]>>() {
                new KeyValuePair<string,int[]>("foo", new int[] {2, 3, 4 }),
                new KeyValuePair<string,int[]>("bar", new int[] {100, 200 }),
                new KeyValuePair<string,int[]>("test", new int[] {400, 500, 600, 700 }),
                };
 
            // Workout on value mapping
            var est = ML.Transforms.Conversion.MapValue(keyValuePairs, new[] { new InputOutputColumnPair("D", "A"), new InputOutputColumnPair("E", "B"), new InputOutputColumnPair("F", "C") });
            TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView);
        }
 
        [Fact]
        public void ValueMappingInputIsVectorWorkout()
        {
            var data = new[] { new TestClass() { B = "bar test foo" } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            var badData = new[] { new TestWrong() { B = 1.2f } };
            var badDataView = ML.Data.LoadFromEnumerable(badData);
 
            var keyValuePairs = new List<KeyValuePair<ReadOnlyMemory<char>, int>>() {
                new KeyValuePair<ReadOnlyMemory<char>,int>("foo".AsMemory(), 1),
                new KeyValuePair<ReadOnlyMemory<char>,int>("bar".AsMemory(), 2),
                new KeyValuePair<ReadOnlyMemory<char>,int>("test".AsMemory(), 3),
                new KeyValuePair<ReadOnlyMemory<char>,int>("wahoo".AsMemory(), 4)
                };
 
            var est = ML.Transforms.Text.TokenizeIntoWords("TokenizeB", "B")
                .Append(ML.Transforms.Conversion.MapValue("VecB", keyValuePairs, "TokenizeB"));
            TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView);
        }
 
        [Fact]
        public void TestCommandLine()
        {
            var dataFile = GetDataPath("QuotingData.csv");
            Assert.Equal(0, Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0 col=B:R4:1 col=C:R4:2} xf=valuemap{keyCol=ID valueCol=Text data="
                                    + dataFile
                                    + @" col=A:B loader=Text{col=ID:U8:0 col=Text:TX:1 sep=, header=+} } in=f:\1.txt" }));
        }
 
        [Fact]
        public void TestCommandLineNoLoader()
        {
            var dataFile = GetDataPath("lm.labels.txt");
            Assert.Equal(0, Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0 col=B:R4:1 col=C:R4:2} xf=valuemap{data="
                                    + dataFile
                                    + @" col=A:B } in=f:\1.txt" }));
        }
 
        [Fact]
        public void TestCommandLineNoLoaderWithColumnNames()
        {
            var dataFile = GetDataPath("lm.labels.txt");
            Assert.Equal(0, Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0 col=B:R4:1 col=C:R4:2} xf=valuemap{data="
                                    + dataFile
                                    + @" col=A:B keyCol=foo valueCol=bar} in=f:\1.txt" }));
        }
 
        [Fact]
        public void TestCommandLineNoLoaderWithoutTreatValuesAsKeys()
        {
            var dataFile = GetDataPath("lm.labels.txt");
            Assert.Equal(0, Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0 col=B:R4:1 col=C:R4:2} xf=valuemap{data="
                                    + dataFile
                                    + @" col=A:B valuesAsKeyType=-} in=f:\1.txt" }));
        }
 
        [Fact]
        public void TestSavingAndLoading()
        {
            var data = new[] { new TestClass() { A = "bar", B = "foo", C = "test", } };
            var dataView = ML.Data.LoadFromEnumerable(data);
 
            var keyValuePairs = new List<KeyValuePair<string, int>>() {
                new KeyValuePair<string,int>("foo", 2),
                new KeyValuePair<string,int>("bar", 43),
                new KeyValuePair<string,int>("test", 56)};
 
            var est = ML.Transforms.Conversion.MapValue(keyValuePairs,
                new[] { new InputOutputColumnPair("D", "A"), new InputOutputColumnPair("E", "B") });
 
            var transformer = est.Fit(dataView);
            using (var ms = new MemoryStream())
            {
                ML.Model.Save(transformer, null, ms);
                ms.Position = 0;
                var loadedTransformer = ML.Model.Load(ms, out var schema);
                var result = loadedTransformer.Transform(dataView);
                Assert.Equal(5, result.Schema.Count);
                Assert.True(result.Schema.TryGetColumnIndex("D", out int col));
                Assert.True(result.Schema.TryGetColumnIndex("E", out col));
            }
        }
 
 
        [Fact]
        public void TestValueMapBackCompatTermLookup()
        {
            // Model generated with: xf=drop{col=A} 
            // Expected output: Features Label B C
            var data = new[] { new TestTermLookup() { Label = "good", GroupId = 1 } };
            var dataView = ML.Data.LoadFromEnumerable(data);
            string termLookupModelPath = GetDataPath("backcompat/termlookup.zip");
            using (FileStream fs = File.OpenRead(termLookupModelPath))
            {
                var result = ModelFileUtils.LoadTransforms(Env, dataView, fs);
                Assert.True(result.Schema.TryGetColumnIndex("Features", out int featureIdx));
                Assert.True(result.Schema.TryGetColumnIndex("Label", out int labelIdx));
                Assert.True(result.Schema.TryGetColumnIndex("GroupId", out int groupIdx));
            }
        }
 
        [Fact]
        public void TestValueMapBackCompatTermLookupKeyTypeValue()
        {
            // Model generated with: xf=drop{col=A} 
            // Expected output: Features Label B C
            var data = new[] { new TestTermLookup() { Label = "Good", GroupId = 1 } };
            var dataView = ML.Data.LoadFromEnumerable(data);
            string termLookupModelPath = GetDataPath("backcompat/termlookup_with_key.zip");
            using (FileStream fs = File.OpenRead(termLookupModelPath))
            {
                var result = ModelFileUtils.LoadTransforms(Env, dataView, fs);
                Assert.True(result.Schema.TryGetColumnIndex("Features", out int featureIdx));
                Assert.True(result.Schema.TryGetColumnIndex("Label", out int labelIdx));
                Assert.True(result.Schema.TryGetColumnIndex("GroupId", out int groupIdx));
 
                Assert.True(result.Schema[labelIdx].Type is KeyDataViewType);
                Assert.Equal((ulong)5, result.Schema[labelIdx].Type.GetItemType().GetKeyCount());
 
                var t = result.GetColumn<uint>(result.Schema["Label"]);
                uint s = t.First();
                Assert.Equal((uint)3, s);
            }
        }
 
        [Fact]
        public void TestValueMapWithNonDefaultColumnOrder()
        {
            // Get a small dataset as an IEnumerable.
            var rawData = new[] {
                new DataPoint() { Price = 3.14f },
                new DataPoint() { Price = 2000f },
                new DataPoint() { Price = 1.19f },
                new DataPoint() { Price = 2.17f },
                new DataPoint() { Price = 33.784f },
            };
 
            // Convert to IDataView
            var data = ML.Data.LoadFromEnumerable(rawData);
 
            // Create the lookup map data IEnumerable.   
            var lookupData = new[] {
                new LookupMap { Value = 3.14f, Category = "Low" },
                new LookupMap { Value = 1.19f , Category = "Low" },
                new LookupMap { Value = 2.17f , Category = "Low" },
                new LookupMap { Value = 33.784f, Category = "Medium" },
                new LookupMap { Value = 2000f, Category = "High"}
            };
 
            // Convert to IDataView
            var lookupIdvMap = ML.Data.LoadFromEnumerable(lookupData);
 
            // Constructs the ValueMappingEstimator making the ML.NET pipeline
            var pipeline = ML.Transforms.Conversion.MapValue("PriceCategory", lookupIdvMap, lookupIdvMap.Schema["Value"], lookupIdvMap.Schema["Category"], "Price");
 
            // Fits the ValueMappingEstimator and transforms the data converting the Price to PriceCategory.
            IDataView transformedData = pipeline.Fit(data).Transform(data);
 
            // Getting the resulting data as an IEnumerable.
            var features = ML.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false).ToList();
 
            var expectedCategories = new string[] { "Low", "High", "Low", "Low", "Medium" };
 
            for (int i = 0; i < features.Count; ++i)
            {
                var feature = features[i];
                Assert.Equal(rawData[i].Price, feature.Price);
                Assert.Equal(expectedCategories[i], feature.PriceCategory);
            }
        }
 
        private class LookupMap
        {
            public string Category { get; set; }
            public float Value { get; set; }
        }
 
        private class DataPoint
        {
            public float Price { get; set; }
        }
 
        private class TransformedData : DataPoint
        {
            public string PriceCategory { get; set; }
        }
    }
}