File: API\AutoCatalog.cs
Web Access
Project: src\src\Microsoft.ML.AutoML\Microsoft.ML.AutoML.csproj (Microsoft.ML.AutoML)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Generic;
using System.Diagnostics.Contracts;
using System.Linq;
using Microsoft.ML.AutoML.CodeGen;
using Microsoft.ML.Data;
using Microsoft.ML.Runtime;
using Microsoft.ML.SearchSpace;
using Microsoft.ML.Trainers;
using Microsoft.ML.Trainers.FastTree;
 
namespace Microsoft.ML.AutoML
{
    /// <summary>
    /// A catalog of all available AutoML tasks.
    /// </summary>
    public sealed class AutoCatalog
    {
        private readonly MLContext _context;
 
        internal AutoCatalog(MLContext context)
        {
            _context = context;
        }
 
        /// <summary>
        /// Creates a new AutoML experiment to run on a regression dataset.
        /// </summary>
        /// <param name="maxExperimentTimeInSeconds">Maximum number of seconds that experiment will run.</param>
        /// <returns>A new AutoML regression experiment.</returns>
        /// <remarks>
        /// <para>See <see cref="RegressionExperiment"/> for a more detailed code example of an AutoML regression experiment.</para>
        /// <para>An experiment may run for longer than <paramref name="maxExperimentTimeInSeconds"/>.
        /// This is because once AutoML starts training an ML.NET model, AutoML lets the
        /// model train to completion. For instance, if the first model
        /// AutoML trains takes 4 hours, and the second model trained takes 5 hours,
        /// but <paramref name="maxExperimentTimeInSeconds"/> was the number of seconds in 6 hours,
        /// the experiment will run for 4 + 5 = 9 hours (not 6 hours).</para>
        /// </remarks>
        public RegressionExperiment CreateRegressionExperiment(uint maxExperimentTimeInSeconds)
        {
            return new RegressionExperiment(_context, new RegressionExperimentSettings()
            {
                MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds
            });
        }
 
        /// <summary>
        /// Creates a new AutoML experiment to run on a regression dataset.
        /// </summary>
        /// <param name="experimentSettings">Settings for the AutoML experiment.</param>
        /// <returns>A new AutoML regression experiment.</returns>
        /// <remarks>
        /// See <see cref="RegressionExperiment"/> for a more detailed code example of an AutoML regression experiment.
        /// </remarks>
        public RegressionExperiment CreateRegressionExperiment(RegressionExperimentSettings experimentSettings)
        {
            return new RegressionExperiment(_context, experimentSettings);
        }
 
        /// <summary>
        /// Creates a new AutoML experiment to run on a binary classification dataset.
        /// </summary>
        /// <param name="maxExperimentTimeInSeconds">Maximum number of seconds that experiment will run.</param>
        /// <returns>A new AutoML binary classification experiment.</returns>
        /// <remarks>
        /// <para>See <see cref="BinaryClassificationExperiment"/> for a more detailed code example of an AutoML binary classification experiment.</para>
        /// <para>An experiment may run for longer than <paramref name="maxExperimentTimeInSeconds"/>.
        /// This is because once AutoML starts training an ML.NET model, AutoML lets the
        /// model train to completion. For instance, if the first model
        /// AutoML trains takes 4 hours, and the second model trained takes 5 hours,
        /// but <paramref name="maxExperimentTimeInSeconds"/> was the number of seconds in 6 hours,
        /// the experiment will run for 4 + 5 = 9 hours (not 6 hours).</para>
        /// </remarks>
        public BinaryClassificationExperiment CreateBinaryClassificationExperiment(uint maxExperimentTimeInSeconds)
        {
            return new BinaryClassificationExperiment(_context, new BinaryExperimentSettings()
            {
                MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds
            });
        }
 
        /// <summary>
        /// Creates a new AutoML experiment to run on a binary classification dataset.
        /// </summary>
        /// <param name="experimentSettings">Settings for the AutoML experiment.</param>
        /// <returns>A new AutoML binary classification experiment.</returns>
        /// <remarks>
        /// See <see cref="BinaryClassificationExperiment"/> for a more detailed code example of an AutoML binary classification experiment.
        /// </remarks>
        public BinaryClassificationExperiment CreateBinaryClassificationExperiment(BinaryExperimentSettings experimentSettings)
        {
            return new BinaryClassificationExperiment(_context, experimentSettings);
        }
 
        /// <summary>
        /// Creates a new AutoML experiment to run on a multiclass classification dataset.
        /// </summary>
        /// <param name="maxExperimentTimeInSeconds">Maximum number of seconds that experiment will run.</param>
        /// <returns>A new AutoML multiclass classification experiment.</returns>
        /// <remarks>
        /// <para>See <see cref="MulticlassClassificationExperiment"/> for a more detailed code example of an AutoML multiclass classification experiment.</para>
        /// <para>An experiment may run for longer than <paramref name="maxExperimentTimeInSeconds"/>.
        /// This is because once AutoML starts training an ML.NET model, AutoML lets the
        /// model train to completion. For instance, if the first model
        /// AutoML trains takes 4 hours, and the second model trained takes 5 hours,
        /// but <paramref name="maxExperimentTimeInSeconds"/> was the number of seconds in 6 hours,
        /// the experiment will run for 4 + 5 = 9 hours (not 6 hours).</para>
        /// </remarks>
        public MulticlassClassificationExperiment CreateMulticlassClassificationExperiment(uint maxExperimentTimeInSeconds)
        {
            return new MulticlassClassificationExperiment(_context, new MulticlassExperimentSettings()
            {
                MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds
            });
        }
 
        /// <summary>
        /// Creates a new AutoML experiment to run on a multiclass classification dataset.
        /// </summary>
        /// <param name="experimentSettings">Settings for the AutoML experiment.</param>
        /// <returns>A new AutoML multiclass classification experiment.</returns>
        /// <remarks>
        /// See <see cref="MulticlassClassificationExperiment"/> for a more detailed code example of an AutoML multiclass classification experiment.
        /// </remarks>
        public MulticlassClassificationExperiment CreateMulticlassClassificationExperiment(MulticlassExperimentSettings experimentSettings)
        {
            return new MulticlassClassificationExperiment(_context, experimentSettings);
        }
 
        /// <summary>
        /// Creates a new AutoML experiment to run on a recommendation classification dataset.
        /// </summary>
        /// <param name="maxExperimentTimeInSeconds">Maximum number of seconds that experiment will run.</param>
        /// <returns>A new AutoML recommendation classification experiment.</returns>
        /// <remarks>
        /// <para>See <see cref="RecommendationExperiment"/> for a more detailed code example of an AutoML multiclass classification experiment.</para>
        /// <para>An experiment may run for longer than <paramref name="maxExperimentTimeInSeconds"/>.
        /// This is because once AutoML starts training an ML.NET model, AutoML lets the
        /// model train to completion. For instance, if the first model
        /// AutoML trains takes 4 hours, and the second model trained takes 5 hours,
        /// but <paramref name="maxExperimentTimeInSeconds"/> was the number of seconds in 6 hours,
        /// the experiment will run for 4 + 5 = 9 hours (not 6 hours).</para>
        /// </remarks>
        public RecommendationExperiment CreateRecommendationExperiment(uint maxExperimentTimeInSeconds)
        {
            return new RecommendationExperiment(_context, new RecommendationExperimentSettings()
            {
                MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds
            });
        }
 
        /// <summary>
        /// Creates a new AutoML experiment to run on a recommendation dataset.
        /// </summary>
        /// <param name="experimentSettings">Settings for the AutoML experiment.</param>
        /// <returns>A new AutoML recommendation experiment.</returns>
        /// <remarks>
        /// See <see cref="RecommendationExperiment"/> for a more detailed code example of an AutoML recommendation experiment.
        /// </remarks>
        public RecommendationExperiment CreateRecommendationExperiment(RecommendationExperimentSettings experimentSettings)
        {
            return new RecommendationExperiment(_context, experimentSettings);
        }
 
        /// <summary>
        /// Creates a new AutoML experiment to run on a ranking dataset.
        /// </summary>
        /// <param name="maxExperimentTimeInSeconds">Maximum number of seconds that experiment will run.</param>
        /// <returns>A new AutoML ranking experiment.</returns>
        /// <remarks>
        /// <para>See <see cref="RankingExperiment"/> for a more detailed code example of an AutoML ranking experiment.</para>
        /// <para>An experiment may run for longer than <paramref name="maxExperimentTimeInSeconds"/>.
        /// This is because once AutoML starts training an ML.NET model, AutoML lets the
        /// model train to completion. For instance, if the first model
        /// AutoML trains takes 4 hours, and the second model trained takes 5 hours,
        /// but <paramref name="maxExperimentTimeInSeconds"/> was the number of seconds in 6 hours,
        /// the experiment will run for 4 + 5 = 9 hours (not 6 hours).</para>
        /// </remarks>
        public RankingExperiment CreateRankingExperiment(uint maxExperimentTimeInSeconds)
        {
            return new RankingExperiment(_context, new RankingExperimentSettings()
            {
                MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds
            });
        }
 
        /// <summary>
        /// Creates a new AutoML experiment to run on a ranking dataset.
        /// </summary>
        /// <param name="experimentSettings">Settings for the AutoML experiment.</param>
        /// <returns>A new AutoML ranking experiment.</returns>
        /// <remarks>
        /// See <see cref="RankingExperiment"/> for a more detailed code example of an AutoML ranking experiment.
        /// </remarks>
        public RankingExperiment CreateRankingExperiment(RankingExperimentSettings experimentSettings)
        {
            return new RankingExperiment(_context, experimentSettings);
        }
 
        /// <summary>
        /// Infers information about the columns of a dataset in a file located at <paramref name="path"/>.
        /// </summary>
        /// <param name="path">Path to a dataset file.</param>
        /// <param name="labelColumnName">The name of the label column.</param>
        /// <param name="separatorChar">The character used as separator between data elements in a row. If <see langword="null"/>, AutoML will try to infer this value.</param>
        /// <param name="allowQuoting">Whether the file can contain columns defined by a quoted string. If <see langword="null"/>, AutoML will try to infer this value.</param>
        /// <param name="allowSparse">Whether the file can contain numerical vectors in sparse format. If <see langword="null"/>, AutoML will try to infer this value.</param>
        /// <param name="trimWhitespace">Whether trailing whitespace should be removed from dataset file lines.</param>
        /// <param name="groupColumns">Whether to group together (when possible) original columns in the dataset file into vector columns in the resulting data structures. See <see cref="TextLoader.Range"/> for more information.</param>
        /// <returns>Information inferred about the columns in the provided dataset.</returns>
        /// <remarks>
        /// Infers information about the name, data type, and purpose of each column.
        /// The returned <see cref="ColumnInferenceResults.TextLoaderOptions" /> can be used to
        /// instantiate a <see cref="TextLoader" />. The <see cref="TextLoader" /> can be used to
        /// obtain an <see cref="IDataView"/> that can be fed into an AutoML experiment,
        /// or used elsewhere in the ML.NET ecosystem (ie in <see cref="IEstimator{TTransformer}.Fit(IDataView)"/>.
        /// The <see cref="ColumnInformation"/> contains the inferred purpose of each column in the dataset.
        /// (For instance, is the column categorical, numeric, or text data? Should the column be ignored? Etc.)
        /// The <see cref="ColumnInformation"/> can be inspected and modified (or kept as is) and used by an AutoML experiment.
        /// </remarks>
        public ColumnInferenceResults InferColumns(string path, string labelColumnName = DefaultColumnNames.Label, char? separatorChar = null, bool? allowQuoting = null,
            bool? allowSparse = null, bool trimWhitespace = false, bool groupColumns = true)
        {
            UserInputValidationUtil.ValidateInferColumnsArgs(path, labelColumnName);
            return ColumnInferenceApi.InferColumns(_context, path, labelColumnName, separatorChar, allowQuoting, allowSparse, trimWhitespace, groupColumns);
        }
 
        /// <summary>
        /// Infers information about the columns of a dataset in a file located at <paramref name="path"/>.
        /// </summary>
        /// <param name="path">Path to a dataset file.</param>
        /// <param name="columnInformation">Column information for the dataset.</param>
        /// <param name="separatorChar">The character used as separator between data elements in a row. If <see langword="null"/>, AutoML will try to infer this value.</param>
        /// <param name="allowQuoting">Whether the file can contain columns defined by a quoted string. If <see langword="null"/>, AutoML will try to infer this value.</param>
        /// <param name="allowSparse">Whether the file can contain numerical vectors in sparse format. If <see langword="null"/>, AutoML will try to infer this value.</param>
        /// <param name="trimWhitespace">Whether trailing whitespace should be removed from dataset file lines.</param>
        /// <param name="groupColumns">Whether to group together (when possible) original columns in the dataset file into vector columns in the resulting data structures. See <see cref="TextLoader.Range"/> for more information.</param>
        /// <returns>Information inferred about the columns in the provided dataset.</returns>
        /// <remarks>
        /// Infers information about the name, data type, and purpose of each column.
        /// The returned <see cref="ColumnInferenceResults.TextLoaderOptions" /> can be used to
        /// instantiate a <see cref="TextLoader" />. The <see cref="TextLoader" /> can be used to
        /// obtain an <see cref="IDataView"/> that can be fed into an AutoML experiment,
        /// or used elsewhere in the ML.NET ecosystem (ie in <see cref="IEstimator{TTransformer}.Fit(IDataView)"/>.
        /// The <see cref="ColumnInformation"/> contains the inferred purpose of each column in the dataset.
        /// (For instance, is the column categorical, numeric, or text data? Should the column be ignored? Etc.)
        /// The <see cref="ColumnInformation"/> can be inspected and modified (or kept as is) and used by an AutoML experiment.
        /// </remarks>
        public ColumnInferenceResults InferColumns(string path, ColumnInformation columnInformation, char? separatorChar = null, bool? allowQuoting = null,
            bool? allowSparse = null, bool trimWhitespace = false, bool groupColumns = true)
        {
            columnInformation = columnInformation ?? new ColumnInformation();
            UserInputValidationUtil.ValidateInferColumnsArgs(path, columnInformation);
            return ColumnInferenceApi.InferColumns(_context, path, columnInformation, separatorChar, allowQuoting, allowSparse, trimWhitespace, groupColumns);
        }
 
        /// <summary>
        /// Infers information about the columns of a dataset in a file located at <paramref name="path"/>.
        /// </summary>
        /// <param name="path">Path to a dataset file.</param>
        /// <param name="labelColumnIndex">Column index of the label column in the dataset.</param>
        /// <param name="hasHeader">Whether or not the dataset file has a header row.</param>
        /// <param name="separatorChar">The character used as separator between data elements in a row. If <see langword="null"/>, AutoML will try to infer this value.</param>
        /// <param name="allowQuoting">Whether the file can contain columns defined by a quoted string. If <see langword="null"/>, AutoML will try to infer this value.</param>
        /// <param name="allowSparse">Whether the file can contain numerical vectors in sparse format. If <see langword="null"/>, AutoML will try to infer this value.</param>
        /// <param name="trimWhitespace">Whether trailing whitespace should be removed from dataset file lines.</param>
        /// <param name="groupColumns">Whether to group together (when possible) original columns in the dataset file into vector columns in the resulting data structures. See <see cref="TextLoader.Range"/> for more information.</param>
        /// <returns>Information inferred about the columns in the provided dataset.</returns>
        /// <remarks>
        /// Infers information about the name, data type, and purpose of each column.
        /// The returned <see cref="ColumnInferenceResults.TextLoaderOptions" /> can be used to
        /// instantiate a <see cref="TextLoader" />. The <see cref="TextLoader" /> can be used to
        /// obtain an <see cref="IDataView"/> that can be fed into an AutoML experiment,
        /// or used elsewhere in the ML.NET ecosystem (ie in <see cref="IEstimator{TTransformer}.Fit(IDataView)"/>.
        /// The <see cref="ColumnInformation"/> contains the inferred purpose of each column in the dataset.
        /// (For instance, is the column categorical, numeric, or text data? Should the column be ignored? Etc.)
        /// The <see cref="ColumnInformation"/> can be inspected and modified (or kept as is) and used by an AutoML experiment.
        /// </remarks>
        public ColumnInferenceResults InferColumns(string path, uint labelColumnIndex, bool hasHeader = false, char? separatorChar = null,
            bool? allowQuoting = null, bool? allowSparse = null, bool trimWhitespace = false, bool groupColumns = true)
        {
            UserInputValidationUtil.ValidateInferColumnsArgs(path);
            return ColumnInferenceApi.InferColumns(_context, path, labelColumnIndex, hasHeader, separatorChar, allowQuoting, allowSparse, trimWhitespace, groupColumns);
        }
 
        /// <summary>
        /// Create a sweepable estimator with a custom factory and search space.
        /// </summary>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[AutoMLExperiment](~/../docs/samples/docs/samples/Microsoft.ML.AutoML.Samples/Sweepable/SweepableLightGBMBinaryExperiment.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public SweepableEstimator CreateSweepableEstimator<T>(Func<MLContext, T, IEstimator<ITransformer>> factory, SearchSpace<T> ss = null)
            where T : class, new()
        {
            return new SweepableEstimator((MLContext context, Parameter param) => factory(context, param.AsType<T>()), ss);
        }
 
        /// <summary>
        /// Create an <see cref="AutoMLExperiment"/>.
        /// </summary>
        public AutoMLExperiment CreateExperiment(AutoMLExperiment.AutoMLExperimentSettings settings = null)
        {
            return new AutoMLExperiment(_context, settings ?? new AutoMLExperiment.AutoMLExperimentSettings());
        }
 
        /// <summary>
        /// Create a list of <see cref="SweepableEstimator"/> for binary classification.
        /// </summary>
        /// <param name="labelColumnName">label column name.</param>
        /// <param name="featureColumnName">feature column name.</param>
        /// <param name="exampleWeightColumnName">example weight column name.</param>
        /// <param name="useFastForest">true if use fast forest as available trainer.</param>
        /// <param name="useLgbm">true if use lgbm as available trainer.</param>
        /// <param name="useFastTree">true if use fast tree as available trainer.</param>
        /// <param name="useLbfgsLogisticRegression">true if use <see cref="LbfgsLogisticRegressionBinaryTrainer"/> as available trainer.</param>
        /// <param name="useSdcaLogisticRegression">true if use <see cref="SdcaLogisticRegressionBinaryTrainer"/> as available trainer.</param>
        /// <param name="fastTreeOption">if provided, use it as initial option for fast tree, otherwise the default option will be used.</param>
        /// <param name="lgbmOption">if provided, use it as initial option for lgbm, otherwise the default option will be used.</param>
        /// <param name="fastForestOption">if provided, use it as initial option for fast forest, otherwise the default option will be used.</param>
        /// <param name="lbfgsLogisticRegressionOption">if provided, use it as initial option for <paramref name="lbfgsLogisticRegressionSearchSpace"/>, otherwise the default option will be used.</param>
        /// <param name="sdcaLogisticRegressionOption">if provided, use it as initial option for <paramref name="sdcaLogisticRegressionSearchSpace"/>, otherwise the default option will be used.</param>
        /// <param name="fastTreeSearchSpace">if provided, use it as search space for fast tree, otherwise the default search space will be used.</param>
        /// <param name="lgbmSearchSpace">if provided, use it as search space for lgbm, otherwise the default search space will be used.</param>
        /// <param name="fastForestSearchSpace">if provided, use it as search space for fast forest, otherwise the default search space will be used.</param>
        /// <param name="lbfgsLogisticRegressionSearchSpace">if provided, use it as search space for <see cref="LbfgsLogisticRegressionBinaryTrainer"/>, otherwise the default search space will be used.</param>
        /// <param name="sdcaLogisticRegressionSearchSpace">if provided, use it as search space for <see cref="SdcaLogisticRegressionBinaryTrainer"/>, otherwise the default search space will be used.</param>
        /// <returns></returns>
        public SweepablePipeline BinaryClassification(string labelColumnName = DefaultColumnNames.Label,
            string featureColumnName = DefaultColumnNames.Features,
            string exampleWeightColumnName = null,
            bool useFastForest = true,
            bool useLgbm = true,
            bool useFastTree = true,
            bool useLbfgsLogisticRegression = true,
            bool useSdcaLogisticRegression = true,
            FastTreeOption fastTreeOption = null,
            LgbmOption lgbmOption = null,
            FastForestOption fastForestOption = null,
            LbfgsOption lbfgsLogisticRegressionOption = null,
            SdcaOption sdcaLogisticRegressionOption = null,
            SearchSpace<FastTreeOption> fastTreeSearchSpace = null,
            SearchSpace<LgbmOption> lgbmSearchSpace = null,
            SearchSpace<FastForestOption> fastForestSearchSpace = null,
            SearchSpace<LbfgsOption> lbfgsLogisticRegressionSearchSpace = null,
            SearchSpace<SdcaOption> sdcaLogisticRegressionSearchSpace = null)
        {
            var res = new List<SweepableEstimator>();
 
            if (useFastTree)
            {
                fastTreeOption = fastTreeOption ?? new FastTreeOption();
                fastTreeOption.LabelColumnName = labelColumnName;
                fastTreeOption.FeatureColumnName = featureColumnName;
                fastTreeOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateFastTreeBinary(fastTreeOption, fastTreeSearchSpace ?? new SearchSpace<FastTreeOption>(fastTreeOption)));
            }
 
            if (useFastForest)
            {
                fastForestOption = fastForestOption ?? new FastForestOption();
                fastForestOption.LabelColumnName = labelColumnName;
                fastForestOption.FeatureColumnName = featureColumnName;
                fastForestOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateFastForestBinary(fastForestOption, fastForestSearchSpace ?? new SearchSpace<FastForestOption>(fastForestOption)));
            }
 
            if (useLgbm)
            {
                lgbmOption = lgbmOption ?? new LgbmOption();
                lgbmOption.LabelColumnName = labelColumnName;
                lgbmOption.FeatureColumnName = featureColumnName;
                lgbmOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateLightGbmBinary(lgbmOption, lgbmSearchSpace ?? new SearchSpace<LgbmOption>(lgbmOption)));
            }
 
            if (useLbfgsLogisticRegression)
            {
                lbfgsLogisticRegressionOption = lbfgsLogisticRegressionOption ?? new LbfgsOption();
                lbfgsLogisticRegressionOption.LabelColumnName = labelColumnName;
                lbfgsLogisticRegressionOption.FeatureColumnName = featureColumnName;
                lbfgsLogisticRegressionOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateLbfgsLogisticRegressionBinary(lbfgsLogisticRegressionOption, lbfgsLogisticRegressionSearchSpace ?? new SearchSpace<LbfgsOption>(lbfgsLogisticRegressionOption)));
            }
 
            if (useSdcaLogisticRegression)
            {
                sdcaLogisticRegressionOption = sdcaLogisticRegressionOption ?? new SdcaOption();
                sdcaLogisticRegressionOption.LabelColumnName = labelColumnName;
                sdcaLogisticRegressionOption.FeatureColumnName = featureColumnName;
                sdcaLogisticRegressionOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionBinary(sdcaLogisticRegressionOption, sdcaLogisticRegressionSearchSpace ?? new SearchSpace<SdcaOption>(sdcaLogisticRegressionOption)));
            }
 
            return new SweepablePipeline().Append(res.ToArray());
        }
 
        /// <summary>
        /// Create a list of <see cref="SweepableEstimator"/> for multiclass classification.
        /// </summary>
        /// <param name="labelColumnName">label column name.</param>
        /// <param name="featureColumnName">feature column name.</param>
        /// <param name="exampleWeightColumnName">example weight column name.</param>
        /// <param name="useFastForest">true if use fast forest as available trainer.</param>
        /// <param name="useLgbm">true if use lgbm as available trainer.</param>
        /// <param name="useFastTree">true if use fast tree as available trainer.</param>
        /// <param name="useLbfgsMaximumEntrophy">true if use <see cref="LbfgsMaximumEntropyMulticlassTrainer"/> as available trainer.</param>
        /// <param name="useLbfgsLogisticRegression">true if use <see cref="LbfgsLogisticRegressionBinaryTrainer"/> as available trainer.</param>
        /// <param name="useSdcaMaximumEntrophy">true if use <see cref="SdcaMaximumEntropyMulticlassTrainer"/> as available trainer.</param>
        /// <param name="useSdcaLogisticRegression">true if use <see cref="SdcaLogisticRegressionBinaryTrainer"/> as available trainer.</param>
        /// <param name="fastTreeOption">if provided, use it as initial option for fast tree, otherwise the default option will be used.</param>
        /// <param name="lgbmOption">if provided, use it as initial option for lgbm, otherwise the default option will be used.</param>
        /// <param name="fastForestOption">if provided, use it as initial option for fast forest, otherwise the default option will be used.</param>
        /// <param name="lbfgsMaximumEntrophyOption">if provided, use it as initial option for <paramref name="lbfgsMaximumEntrophySearchSpace"/>, otherwise the default option will be used.</param>
        /// <param name="lbfgsLogisticRegressionOption">if provided, use it as initial option for <paramref name="lbfgsLogisticRegressionSearchSpace"/>, otherwise the default option will be used.</param>
        /// <param name="sdcaMaximumEntrophyOption">if provided, use it as initial option for <paramref name="sdcaMaximumEntorphySearchSpace"/>, otherwise the default option will be used.</param>
        /// <param name="sdcaLogisticRegressionOption">if provided, use it as initial option for <paramref name="sdcaLogisticRegressionSearchSpace"/>, otherwise the default option will be used.</param>
        /// <param name="fastTreeSearchSpace">if provided, use it as search space for fast tree, otherwise the default search space will be used.</param>
        /// <param name="lgbmSearchSpace">if provided, use it as search space for lgbm, otherwise the default search space will be used.</param>
        /// <param name="fastForestSearchSpace">if provided, use it as search space for fast forest, otherwise the default search space will be used.</param>
        /// <param name="lbfgsMaximumEntrophySearchSpace">if provided, use it as search space for <see cref="LbfgsMaximumEntropyMulticlassTrainer"/>, otherwise the default search space will be used.</param>
        /// <param name="lbfgsLogisticRegressionSearchSpace">if provided, use it as search space for <see cref="LbfgsMaximumEntropyMulticlassTrainer"/>, otherwise the default search space will be used.</param>
        /// <param name="sdcaMaximumEntorphySearchSpace">if provided, use it as search space for <see cref="SdcaMaximumEntropyMulti"/>, otherwise the default search space will be used.</param>
        /// <param name="sdcaLogisticRegressionSearchSpace">if provided, use it as search space for <see cref="SdcaLogisticRegressionBinaryTrainer"/>, otherwise the default search space will be used.</param>
        /// <returns></returns>
        public SweepablePipeline MultiClassification(
            string labelColumnName = DefaultColumnNames.Label,
            string featureColumnName = DefaultColumnNames.Features,
            string exampleWeightColumnName = null,
            bool useFastForest = true,
            bool useLgbm = true,
            bool useFastTree = true,
            bool useLbfgsMaximumEntrophy = true,
            bool useLbfgsLogisticRegression = true,
            bool useSdcaMaximumEntrophy = true,
            bool useSdcaLogisticRegression = true,
            FastTreeOption fastTreeOption = null,
            LgbmOption lgbmOption = null,
            FastForestOption fastForestOption = null,
            LbfgsOption lbfgsMaximumEntrophyOption = null,
            LbfgsOption lbfgsLogisticRegressionOption = null,
            SdcaOption sdcaMaximumEntrophyOption = null,
            SdcaOption sdcaLogisticRegressionOption = null,
            SearchSpace<FastTreeOption> fastTreeSearchSpace = null,
            SearchSpace<LgbmOption> lgbmSearchSpace = null,
            SearchSpace<FastForestOption> fastForestSearchSpace = null,
            SearchSpace<LbfgsOption> lbfgsMaximumEntrophySearchSpace = null,
            SearchSpace<LbfgsOption> lbfgsLogisticRegressionSearchSpace = null,
            SearchSpace<SdcaOption> sdcaMaximumEntorphySearchSpace = null,
            SearchSpace<SdcaOption> sdcaLogisticRegressionSearchSpace = null)
        {
            var res = new List<SweepableEstimator>();
 
            if (useFastTree)
            {
                fastTreeOption = fastTreeOption ?? new FastTreeOption();
                fastTreeOption.LabelColumnName = labelColumnName;
                fastTreeOption.FeatureColumnName = featureColumnName;
                fastTreeOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateFastTreeOva(fastTreeOption, fastTreeSearchSpace ?? new SearchSpace<FastTreeOption>(fastTreeOption)));
            }
 
            if (useFastForest)
            {
                fastForestOption = fastForestOption ?? new FastForestOption();
                fastForestOption.LabelColumnName = labelColumnName;
                fastForestOption.FeatureColumnName = featureColumnName;
                fastForestOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateFastForestOva(fastForestOption, fastForestSearchSpace ?? new SearchSpace<FastForestOption>(fastForestOption)));
            }
 
            if (useLgbm)
            {
                lgbmOption = lgbmOption ?? new LgbmOption();
                lgbmOption.LabelColumnName = labelColumnName;
                lgbmOption.FeatureColumnName = featureColumnName;
                lgbmOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateLightGbmMulti(lgbmOption, lgbmSearchSpace ?? new SearchSpace<LgbmOption>(lgbmOption)));
            }
 
            if (useLbfgsLogisticRegression)
            {
                lbfgsLogisticRegressionOption = lbfgsLogisticRegressionOption ?? new LbfgsOption();
                lbfgsLogisticRegressionOption.LabelColumnName = labelColumnName;
                lbfgsLogisticRegressionOption.FeatureColumnName = featureColumnName;
                lbfgsLogisticRegressionOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateLbfgsLogisticRegressionOva(lbfgsLogisticRegressionOption, lbfgsLogisticRegressionSearchSpace ?? new SearchSpace<LbfgsOption>(lbfgsLogisticRegressionOption)));
            }
 
 
            if (useLbfgsMaximumEntrophy)
            {
                lbfgsMaximumEntrophyOption = lbfgsMaximumEntrophyOption ?? new LbfgsOption();
                lbfgsMaximumEntrophyOption.LabelColumnName = labelColumnName;
                lbfgsMaximumEntrophyOption.FeatureColumnName = featureColumnName;
                lbfgsMaximumEntrophyOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateLbfgsMaximumEntropyMulti(lbfgsMaximumEntrophyOption, lbfgsMaximumEntrophySearchSpace ?? new SearchSpace<LbfgsOption>(lbfgsMaximumEntrophyOption)));
            }
 
            if (useSdcaMaximumEntrophy)
            {
                sdcaMaximumEntrophyOption = sdcaMaximumEntrophyOption ?? new SdcaOption();
                sdcaMaximumEntrophyOption.LabelColumnName = labelColumnName;
                sdcaMaximumEntrophyOption.FeatureColumnName = featureColumnName;
                sdcaMaximumEntrophyOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateSdcaMaximumEntropyMulti(sdcaMaximumEntrophyOption, sdcaMaximumEntorphySearchSpace ?? new SearchSpace<SdcaOption>(sdcaMaximumEntrophyOption)));
            }
 
            if (useSdcaLogisticRegression)
            {
                sdcaLogisticRegressionOption = sdcaLogisticRegressionOption ?? new SdcaOption();
                sdcaLogisticRegressionOption.LabelColumnName = labelColumnName;
                sdcaLogisticRegressionOption.FeatureColumnName = featureColumnName;
                sdcaLogisticRegressionOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionOva(sdcaLogisticRegressionOption, sdcaLogisticRegressionSearchSpace ?? new SearchSpace<SdcaOption>(sdcaLogisticRegressionOption)));
            }
 
            return new SweepablePipeline().Append(res.ToArray());
        }
 
        /// <summary>
        /// Create a list of <see cref="SweepableEstimator"/> for regression.
        /// </summary>
        /// <param name="labelColumnName">label column name.</param>
        /// <param name="featureColumnName">feature column name.</param>
        /// <param name="exampleWeightColumnName">example weight column name.</param>
        /// <param name="useFastForest">true if use fast forest as available trainer.</param>
        /// <param name="useLgbm">true if use lgbm as available trainer.</param>
        /// <param name="useFastTree">true if use fast tree as available trainer.</param>
        /// <param name="useLbfgsPoissonRegression">true if use <see cref="LbfgsPoissonRegressionTrainer"/> as available trainer.</param>
        /// <param name="useSdca">true if use <see cref="SdcaRegressionTrainer"/> as available trainer.</param>
        /// <param name="fastTreeOption">if provided, use it as initial option for fast tree, otherwise the default option will be used.</param>
        /// <param name="lgbmOption">if provided, use it as initial option for lgbm, otherwise the default option will be used.</param>
        /// <param name="fastForestOption">if provided, use it as initial option for fast forest, otherwise the default option will be used.</param>
        /// <param name="lbfgsPoissonRegressionOption">if provided, use it as initial option for <paramref name="lbfgsPoissonRegressionSearchSpace"/>, otherwise the default option will be used.</param>
        /// <param name="sdcaOption">if provided, use it as initial option for <paramref name="sdcaSearchSpace"/>, otherwise the default option will be used.</param>
        /// <param name="fastTreeSearchSpace">if provided, use it as search space for fast tree, otherwise the default search space will be used.</param>
        /// <param name="lgbmSearchSpace">if provided, use it as search space for lgbm, otherwise the default search space will be used.</param>
        /// <param name="fastForestSearchSpace">if provided, use it as search space for fast forest, otherwise the default search space will be used.</param>
        /// <param name="lbfgsPoissonRegressionSearchSpace">if provided, use it as search space for <see cref="LbfgsPoissonRegressionTrainer"/>, otherwise the default search space will be used.</param>
        /// <param name="sdcaSearchSpace">if provided, use it as search space for sdca, otherwise the default search space will be used.</param>
        /// <returns></returns>
        public SweepablePipeline Regression(
            string labelColumnName = DefaultColumnNames.Label,
            string featureColumnName = DefaultColumnNames.Features,
            string exampleWeightColumnName = null,
            bool useFastForest = true,
            bool useLgbm = true,
            bool useFastTree = true,
            bool useLbfgsPoissonRegression = true,
            bool useSdca = true,
            FastTreeOption fastTreeOption = null,
            LgbmOption lgbmOption = null,
            FastForestOption fastForestOption = null,
            LbfgsOption lbfgsPoissonRegressionOption = null,
            SdcaOption sdcaOption = null,
            SearchSpace<FastTreeOption> fastTreeSearchSpace = null,
            SearchSpace<LgbmOption> lgbmSearchSpace = null,
            SearchSpace<FastForestOption> fastForestSearchSpace = null,
            SearchSpace<LbfgsOption> lbfgsPoissonRegressionSearchSpace = null,
            SearchSpace<SdcaOption> sdcaSearchSpace = null)
        {
            var res = new List<SweepableEstimator>();
 
            if (useFastTree)
            {
                fastTreeOption = fastTreeOption ?? new FastTreeOption();
                fastTreeOption.LabelColumnName = labelColumnName;
                fastTreeOption.FeatureColumnName = featureColumnName;
                fastTreeOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateFastTreeRegression(fastTreeOption, fastTreeSearchSpace ?? new SearchSpace<FastTreeOption>(fastTreeOption)));
            }
 
            if (useFastForest)
            {
                fastForestOption = fastForestOption ?? new FastForestOption();
                fastForestOption.LabelColumnName = labelColumnName;
                fastForestOption.FeatureColumnName = featureColumnName;
                fastForestOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateFastForestRegression(fastForestOption, fastForestSearchSpace ?? new SearchSpace<FastForestOption>(fastForestOption)));
            }
 
            if (useLgbm)
            {
                lgbmOption = lgbmOption ?? new LgbmOption();
                lgbmOption.LabelColumnName = labelColumnName;
                lgbmOption.FeatureColumnName = featureColumnName;
                lgbmOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateLightGbmRegression(lgbmOption, lgbmSearchSpace ?? new SearchSpace<LgbmOption>(lgbmOption)));
            }
 
            if (useLbfgsPoissonRegression)
            {
                lbfgsPoissonRegressionOption = lbfgsPoissonRegressionOption ?? new LbfgsOption();
                lbfgsPoissonRegressionOption.LabelColumnName = labelColumnName;
                lbfgsPoissonRegressionOption.FeatureColumnName = featureColumnName;
                lbfgsPoissonRegressionOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateLbfgsPoissonRegressionRegression(lbfgsPoissonRegressionOption, lbfgsPoissonRegressionSearchSpace ?? new SearchSpace<LbfgsOption>(lbfgsPoissonRegressionOption)));
            }
 
            if (useSdca)
            {
                sdcaOption = sdcaOption ?? new SdcaOption();
                sdcaOption.LabelColumnName = labelColumnName;
                sdcaOption.FeatureColumnName = featureColumnName;
                sdcaOption.ExampleWeightColumnName = exampleWeightColumnName;
                res.Add(SweepableEstimatorFactory.CreateSdcaRegression(sdcaOption, sdcaSearchSpace ?? new SearchSpace<SdcaOption>(sdcaOption)));
            }
 
            return new SweepablePipeline().Append(res.ToArray());
        }
 
        /// <summary>
        /// Create a list of <see cref="SweepableEstimator"/> for featurizing text.
        /// </summary>
        /// <param name="outputColumnName">output column name.</param>
        /// <param name="inputColumnName">input column name.</param>
        internal SweepablePipeline TextFeaturizer(string outputColumnName, string inputColumnName)
        {
            var option = new FeaturizeTextOption
            {
                InputColumnName = inputColumnName,
                OutputColumnName = outputColumnName,
            };
 
            return new SweepablePipeline().Append(new[] { SweepableEstimatorFactory.CreateFeaturizeText(option) });
        }
 
        /// <summary>
        /// Create a <see cref="SweepablePipeline"/> for featurizing numeric columns.
        /// </summary>
        /// <param name="outputColumnNames">output column names.</param>
        /// <param name="inputColumnNames">input column names.</param>
        internal SweepablePipeline NumericFeaturizer(string[] outputColumnNames, string[] inputColumnNames)
        {
            Contracts.CheckValue(inputColumnNames, nameof(inputColumnNames));
            Contracts.CheckValue(outputColumnNames, nameof(outputColumnNames));
            Contracts.Check(outputColumnNames.Count() == inputColumnNames.Count() && outputColumnNames.Count() > 0, "outputColumnNames and inputColumnNames must have the same length and greater than 0");
            var replaceMissingValueOption = new ReplaceMissingValueOption
            {
                InputColumnNames = inputColumnNames,
                OutputColumnNames = outputColumnNames,
            };
 
            return new SweepablePipeline().Append(new[] { SweepableEstimatorFactory.CreateReplaceMissingValues(replaceMissingValueOption) });
        }
 
        /// <summary>
        /// Create a <see cref="SweepablePipeline"/> for featurizing boolean columns. This pipeline convert all boolean column
        /// to numeric type.
        /// </summary>
        /// <param name="outputColumnNames">output column names.</param>
        /// <param name="inputColumnNames">input column names.</param>
        /// <returns>a list of <see cref="SweepableEstimator"/></returns>
        internal SweepableEstimator[] BooleanFeaturizer(string[] outputColumnNames, string[] inputColumnNames)
        {
            Contracts.CheckValue(inputColumnNames, nameof(inputColumnNames));
            Contracts.CheckValue(outputColumnNames, nameof(outputColumnNames));
            Contracts.Check(outputColumnNames.Count() == inputColumnNames.Count() && outputColumnNames.Count() > 0, "outputColumnNames and inputColumnNames must have the same length and greater than 0");
 
            // by default, convertType's output kind is single
            var convertTypeOption = new ConvertTypeOption
            {
                InputColumnNames = inputColumnNames,
                OutputColumnNames = outputColumnNames,
            };
 
            return new[] { SweepableEstimatorFactory.CreateConvertType(convertTypeOption) };
        }
 
        /// <summary>
        /// Create a list of <see cref="SweepableEstimator"/> for featurizing catalog columns.
        /// </summary>
        /// <param name="outputColumnNames">output column names.</param>
        /// <param name="inputColumnNames">input column names.</param>
        internal SweepablePipeline CatalogFeaturizer(string[] outputColumnNames, string[] inputColumnNames)
        {
            Contracts.Check(outputColumnNames.Count() == inputColumnNames.Count() && outputColumnNames.Count() > 0, "outputColumnNames and inputColumnNames must have the same length and greater than 0");
 
            var option = new OneHotOption
            {
                InputColumnNames = inputColumnNames,
                OutputColumnNames = outputColumnNames,
            };
 
            return new SweepablePipeline().Append(new SweepableEstimator[] { SweepableEstimatorFactory.CreateOneHotEncoding(option), SweepableEstimatorFactory.CreateOneHotHashEncoding(option) });
        }
 
        internal SweepablePipeline ImagePathFeaturizer(string outputColumnName, string inputColumnName)
        {
            // load image => resize image (224, 224) => extract pixels => dnn featurizer
            var loadImageOption = new LoadImageOption
            {
                ImageFolder = null,
                InputColumnName = inputColumnName,
                OutputColumnName = outputColumnName,
            };
 
            var resizeImageOption = new ResizeImageOption
            {
                ImageHeight = 224,
                ImageWidth = 224,
                InputColumnName = inputColumnName,
                OutputColumnName = outputColumnName,
            };
 
            var extractPixelOption = new ExtractPixelsOption
            {
                InputColumnName = inputColumnName,
                OutputColumnName = outputColumnName,
            };
 
            var dnnFeaturizerOption = new DnnFeaturizerImageOption
            {
                InputColumnName = inputColumnName,
                OutputColumnName = outputColumnName,
            };
 
            var pipeline = new SweepablePipeline();
 
            return pipeline.Append(SweepableEstimatorFactory.CreateLoadImages(loadImageOption))
                        .Append(SweepableEstimatorFactory.CreateResizeImages(resizeImageOption))
                        .Append(SweepableEstimatorFactory.CreateExtractPixels(extractPixelOption))
                        .Append(SweepableEstimatorFactory.CreateDnnFeaturizerImage(dnnFeaturizerOption));
        }
 
        /// <summary>
        /// Create a single featurize pipeline according to <paramref name="data"/>. This function will collect all columns in <paramref name="data"/> and not in <paramref name="excludeColumns"/>,
        /// featurizing them using <see cref="CatalogFeaturizer(string[], string[])"/>, <see cref="NumericFeaturizer(string[], string[])"/> or <see cref="TextFeaturizer(string, string)"/>. And combine
        /// them into a single feature column as output.
        /// </summary>
        /// <param name="data">input data.</param>
        /// <param name="catelogicalColumns">columns that should be treated as catalog. If not specified, it will automatically infer if a column is catalog or not.</param>
        /// <param name="numericColumns">columns that should be treated as numeric. If not specified, it will automatically infer if a column is catalog or not.</param>
        /// <param name="textColumns">columns that should be treated as text. If not specified, it will automatically infer if a column is catalog or not.</param>
        /// <param name="imagePathColumns">columns that should be treated as image path. If not specified, it will automatically infer if a column is catalog or not.</param>
        /// <param name="outputColumnName">output feature column.</param>
        /// <param name="excludeColumns">columns that won't be included when featurizing, like label</param>
        public SweepablePipeline Featurizer(IDataView data, string outputColumnName = "Features", string[] catelogicalColumns = null, string[] numericColumns = null, string[] textColumns = null, string[] imagePathColumns = null, string[] excludeColumns = null)
        {
            Contracts.CheckValue(data, nameof(data));
 
            // validate if there's overlapping among catalogColumns, numericColumns, textColumns and excludeColumns
            var overallColumns = new string[][] { catelogicalColumns, numericColumns, textColumns, excludeColumns }
                                    .Where(c => c != null)
                                    .SelectMany(c => c);
 
            if (overallColumns != null)
            {
                Contracts.Assert(overallColumns.Count() == overallColumns.Distinct().Count(), "detect overlapping among catalogColumns, numericColumns, textColumns and excludedColumns");
            }
 
            var columnInfo = new ColumnInformation();
 
            if (excludeColumns != null)
            {
                foreach (var ignoreColumn in excludeColumns)
                {
                    columnInfo.IgnoredColumnNames.Add(ignoreColumn);
                }
            }
 
            if (catelogicalColumns != null)
            {
                foreach (var catalogColumn in catelogicalColumns)
                {
                    columnInfo.CategoricalColumnNames.Add(catalogColumn);
                }
            }
 
            if (numericColumns != null)
            {
                foreach (var column in numericColumns)
                {
                    columnInfo.NumericColumnNames.Add(column);
                }
            }
 
            if (textColumns != null)
            {
                foreach (var column in textColumns)
                {
                    columnInfo.TextColumnNames.Add(column);
                }
            }
 
            if (imagePathColumns != null)
            {
                foreach (var column in imagePathColumns)
                {
                    columnInfo.ImagePathColumnNames.Add(column);
                }
            }
 
            return this.Featurizer(data, columnInfo, outputColumnName);
        }
 
        /// <summary>
        /// Create a single featurize pipeline according to <paramref name="columnInformation"/>. This function will collect all columns in <paramref name="columnInformation"/>,
        /// featurizing them using <see cref="CatalogFeaturizer(string[], string[])"/>, <see cref="NumericFeaturizer(string[], string[])"/> or <see cref="TextFeaturizer(string, string)"/>. And combine
        /// them into a single feature column as output.
        /// </summary>
        /// <param name="data">input data.</param>
        /// <param name="columnInformation">column information.</param>
        /// <param name="outputColumnName">output feature column.</param>
        /// <returns>A <see cref="SweepablePipeline"/> for featurization.</returns>
        public SweepablePipeline Featurizer(IDataView data, ColumnInformation columnInformation, string outputColumnName = "Features")
        {
            Contracts.CheckValue(data, nameof(data));
            Contracts.CheckValue(columnInformation, nameof(columnInformation));
 
            var columnPurposes = PurposeInference.InferPurposes(this._context, data, columnInformation);
            var textFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.TextFeature);
            var numericFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature
                                                            && data.Schema[c.ColumnIndex].Type != BooleanDataViewType.Instance
                                                            && !(data.Schema[c.ColumnIndex].Type is VectorDataViewType vt && vt.ItemType == BooleanDataViewType.Instance)).ToArray();
            var booleanFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature && !numericFeatures.Contains(c));
            var catalogFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.CategoricalFeature);
            var imagePathFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.ImagePath);
            var textFeatureColumnNames = textFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();
            var numericFeatureColumnNames = numericFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();
            var catalogFeatureColumnNames = catalogFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();
            var imagePathColumnNames = imagePathFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();
            var booleanFeatureColumnNames = booleanFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();
 
            var pipeline = new SweepablePipeline();
            if (numericFeatureColumnNames.Length > 0)
            {
                pipeline = pipeline.Append(this.NumericFeaturizer(numericFeatureColumnNames, numericFeatureColumnNames));
            }
 
            if (booleanFeatureColumnNames.Length > 0)
            {
                pipeline = pipeline.Append(this.BooleanFeaturizer(booleanFeatureColumnNames, booleanFeatureColumnNames));
            }
 
            if (catalogFeatureColumnNames.Length > 0)
            {
                pipeline = pipeline.Append(this.CatalogFeaturizer(catalogFeatureColumnNames, catalogFeatureColumnNames));
            }
 
            foreach (var imagePathColumn in imagePathColumnNames)
            {
                pipeline = pipeline.Append(this.ImagePathFeaturizer(imagePathColumn, imagePathColumn));
            }
 
            foreach (var textColumn in textFeatureColumnNames)
            {
                pipeline = pipeline.Append(this.TextFeaturizer(textColumn, textColumn));
            }
 
            var option = new ConcatOption
            {
                InputColumnNames = textFeatureColumnNames.Concat(numericFeatureColumnNames).Concat(catalogFeatureColumnNames).Concat(imagePathColumnNames).Concat(booleanFeatureColumnNames).ToArray(),
                OutputColumnName = outputColumnName,
            };
 
            if (option.InputColumnNames.Length > 0)
            {
                pipeline = pipeline.Append(SweepableEstimatorFactory.CreateConcatenate(option));
            }
 
            return pipeline;
        }
    }
}