File: Dynamic\Transforms\Conversion\MapValueToKeyMultiColumn.cs
Web Access
Project: src\docs\samples\Microsoft.ML.Samples\Microsoft.ML.Samples.csproj (Microsoft.ML.Samples)
using System;
using System.Collections.Generic;
using Microsoft.ML;
 
namespace Samples.Dynamic
{
    public static class MapValueToKeyMultiColumn
    {
        /// This example demonstrates the use of the ValueToKeyMappingEstimator, by
        /// mapping strings to KeyType values. For more on ML.NET KeyTypes see:
        /// https://github.com/dotnet/machinelearning/blob/main/docs/code/IDataViewTypeSystem.md#key-types
        /// It is possible to have multiple values map to the same category.
        public static void Example()
        {
            // Create a new ML context, for ML.NET operations. It can be used for
            // exception tracking and logging, as well as the source of randomness.
            var mlContext = new MLContext();
 
            // Get a small dataset as an IEnumerable.
            var rawData = new[] {
                new DataPoint() { StudyTime = "0-4yrs" , Course = "CS" },
                new DataPoint() { StudyTime = "6-11yrs" , Course = "CS" },
                new DataPoint() { StudyTime = "12-25yrs" , Course = "LA" },
                new DataPoint() { StudyTime = "0-5yrs" , Course = "DS" }
            };
 
            var data = mlContext.Data.LoadFromEnumerable(rawData);
 
            // Constructs the ML.net pipeline
            var pipeline = mlContext.Transforms.Conversion.MapValueToKey(new[] {
                new  InputOutputColumnPair("StudyTimeCategory", "StudyTime"),
                new  InputOutputColumnPair("CourseCategory", "Course")
                },
                keyOrdinality: Microsoft.ML.Transforms.ValueToKeyMappingEstimator
                    .KeyOrdinality.ByValue, addKeyValueAnnotationsAsText: true);
 
            // Fits the pipeline to the data.
            IDataView transformedData = pipeline.Fit(data).Transform(data);
 
            // Getting the resulting data as an IEnumerable.
            // This will contain the newly created columns.
            IEnumerable<TransformedData> features = mlContext.Data.CreateEnumerable<
                TransformedData>(transformedData, reuseRowObject: false);
 
            Console.WriteLine($" StudyTime   StudyTimeCategory   Course    " +
                $"CourseCategory");
 
            foreach (var featureRow in features)
                Console.WriteLine($"{featureRow.StudyTime}\t\t" +
                    $"{featureRow.StudyTimeCategory}\t\t\t{featureRow.Course}\t\t" +
                    $"{featureRow.CourseCategory}");
 
            // TransformedData obtained post-transformation.
            //
            // StudyTime     StudyTimeCategory   Course    CourseCategory
            // 0-4yrs          1                   CS          1
            // 6-11yrs         4                   CS          1
            // 12-25yrs        3                   LA          3
            // 0-5yrs          2                   DS          2
 
            // If we wanted to provide the mapping, rather than letting the
            // transform create it, we could do so by creating an IDataView one
            // column containing the values to map to. If the values in the dataset
            // are not found in the lookup IDataView they will get mapped to the
            // missing value, 0. The keyData are shared among the columns, therefore
            // the keys are not contiguous for the column. Create the lookup map
            // data IEnumerable.
            var lookupData = new[] {
                new LookupMap { Key = "0-4yrs" },
                new LookupMap { Key = "6-11yrs" },
                new LookupMap { Key = "25+yrs"  },
                new LookupMap { Key = "CS" },
                new LookupMap { Key = "DS" },
                new LookupMap { Key = "LA"  }
            };
 
            // Convert to IDataView
            var lookupIdvMap = mlContext.Data.LoadFromEnumerable(lookupData);
 
            // Constructs the ML.net pipeline
            var pipelineWithLookupMap = mlContext.Transforms.Conversion
                .MapValueToKey(new[] {
                    new  InputOutputColumnPair("StudyTimeCategory", "StudyTime"),
                    new  InputOutputColumnPair("CourseCategory", "Course")
                    },
                    keyData: lookupIdvMap);
 
            // Fits the pipeline to the data.
            transformedData = pipelineWithLookupMap.Fit(data).Transform(data);
 
            // Getting the resulting data as an IEnumerable.
            // This will contain the newly created columns.
            features = mlContext.Data.CreateEnumerable<TransformedData>(
                transformedData, reuseRowObject: false);
 
            Console.WriteLine($" StudyTime   StudyTimeCategory  " +
                $"Course CourseCategory");
 
            foreach (var featureRow in features)
                Console.WriteLine($"{featureRow.StudyTime}\t\t" +
                    $"{featureRow.StudyTimeCategory}\t\t\t{featureRow.Course}\t\t" +
                    $"{featureRow.CourseCategory}");
 
            // StudyTime    StudyTimeCategory  Course     CourseCategory
            // 0 - 4yrs          1              CS              4
            // 6 - 11yrs         2              CS              4
            // 12 - 25yrs        0              LA              6
            // 0 - 5yrs          0              DS              5
 
        }
 
        private class DataPoint
        {
            public string StudyTime { get; set; }
            public string Course { get; set; }
        }
 
        private class TransformedData : DataPoint
        {
            public uint StudyTimeCategory { get; set; }
            public uint CourseCategory { get; set; }
        }
 
        // Type for the IDataView that will be serving as the map
        private class LookupMap
        {
            public string Key { get; set; }
        }
    }
}