File: DatasetDimensions\DatasetDimensionsApi.cs
Web Access
Project: src\src\Microsoft.ML.AutoML\Microsoft.ML.AutoML.csproj (Microsoft.ML.AutoML)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using Microsoft.ML.Data;
 
namespace Microsoft.ML.AutoML
{
    internal class DatasetDimensionsApi
    {
        private const long MaxRowsToRead = 1000;
 
        public static ColumnDimensions[] CalcColumnDimensions(MLContext context, IDataView data, PurposeInference.Column[] purposes)
        {
            data = context.Data.TakeRows(data, MaxRowsToRead);
 
            var colDimensions = new ColumnDimensions[data.Schema.Count];
 
            for (var i = 0; i < data.Schema.Count; i++)
            {
                var column = data.Schema[i];
                var purpose = purposes[i];
 
                // default column dimensions
                int? cardinality = null;
                bool? hasMissing = null;
 
                var itemType = column.Type.GetItemType();
 
                // If categorical text feature, calculate cardinality
                if (itemType.IsText() && purpose.Purpose == ColumnPurpose.CategoricalFeature)
                {
                    cardinality = DatasetDimensionsUtil.GetTextColumnCardinality(data, column);
                }
 
                // If numeric feature, discover missing values
                if (itemType == NumberDataViewType.Single)
                {
                    hasMissing = column.Type.IsVector() ?
                        DatasetDimensionsUtil.HasMissingNumericVector(data, column) :
                        DatasetDimensionsUtil.HasMissingNumericSingleValue(data, column);
                }
 
                colDimensions[i] = new ColumnDimensions(cardinality, hasMissing);
            }
 
            return colDimensions;
        }
    }
}