File: Commands\ShowSchemaCommand.cs
Web Access
Project: src\src\Microsoft.ML.Data\Microsoft.ML.Data.csproj (Microsoft.ML.Data)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.CodeDom.Compiler;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Text;
using Microsoft.ML;
using Microsoft.ML.Command;
using Microsoft.ML.CommandLine;
using Microsoft.ML.Data;
using Microsoft.ML.Data.Conversion;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Runtime;
 
[assembly: LoadableClass(ShowSchemaCommand.Summary, typeof(ShowSchemaCommand), typeof(ShowSchemaCommand.Arguments), typeof(SignatureCommand),
    "Show Schema", ShowSchemaCommand.LoadName, "schema")]
 
namespace Microsoft.ML.Data
{
    internal sealed class ShowSchemaCommand : DataCommand.ImplBase<ShowSchemaCommand.Arguments>
    {
        public sealed class Arguments : DataCommand.ArgumentsBase
        {
            [Argument(ArgumentType.AtMostOnce, HelpText = "Show all steps in transform chain", ShortName = "steps")]
            public bool ShowSteps;
 
            [Argument(ArgumentType.AtMostOnce, HelpText = "Show the metadata types", ShortName = "metaTypes")]
            public bool ShowMetadataTypes;
 
            // REVIEW: Support abbreviating long vector-valued metadata?
            [Argument(ArgumentType.AtMostOnce, HelpText = "Show the metadata types and values", ShortName = "meta,metaVals,metaValues")]
            public bool ShowMetadataValues;
 
            // Note that showing metadata overrides this.
            // REVIEW: Should we just remove this or possibly support only showing metadata of specified kinds?
            [Argument(ArgumentType.AtMostOnce, HelpText = "Show slot names", ShortName = "slots", Hide = true)]
            public bool ShowSlots;
 
#if !CORECLR
            // Note that this overrides other options.
            [Argument(ArgumentType.AtMostOnce, HelpText = "Show JSON version of the schema", ShortName = "json", Hide = true)]
            public bool ShowJson;
#endif
        }
 
        internal const string LoadName = "ShowSchema";
        internal const string Summary = "Given input data, a loader, and possibly transforms, display the schema.";
 
        public ShowSchemaCommand(IHostEnvironment env, Arguments args)
                : base(env, args, nameof(ShowSchemaCommand))
        {
        }
 
        public override void Run()
        {
            using (var ch = Host.Start(LoadName))
            {
                RunCore(ch);
            }
        }
 
        private void RunCore(IChannel ch)
        {
            ILegacyDataLoader loader = CreateAndSaveLoader();
            using (var schemaWriter = new StringWriter())
            {
                RunOnData(schemaWriter, ImplOptions, loader);
                var str = schemaWriter.ToString();
                ch.AssertNonEmpty(str);
                ch.Info(str);
            }
        }
 
        /// <summary>
        /// This shows the schema of the given <paramref name="data"/>, ignoring the data specification
        /// in the <paramref name="args"/> parameter. Test code invokes this, hence it is internal.
        /// </summary>
        internal static void RunOnData(TextWriter writer, Arguments args, IDataView data)
        {
            Contracts.AssertValue(writer);
            Contracts.AssertValue(args);
            Contracts.AssertValue(data);
 
            if (args.ShowSteps)
            {
                IEnumerable<IDataView> viewChainReversed = GetViewChainReversed(data);
                foreach (var view in viewChainReversed.Reverse())
                {
                    writer.WriteLine("---- {0} ----", view.GetType().Name);
                    PrintSchema(writer, args, view.Schema, view as ITransposeDataView);
                }
            }
            else
                PrintSchema(writer, args, data.Schema, data as ITransposeDataView);
        }
 
        /// <summary>
        /// Returns the sequence of views passed through the transform chain, last to first.
        /// </summary>
        private static IEnumerable<IDataView> GetViewChainReversed(IDataView data)
        {
            Contracts.AssertValue(data);
            IDataView view = (data as LegacyCompositeDataLoader)?.View ?? data;
            while (view != null)
            {
                yield return view;
                var transform = view as IDataTransform;
                view = transform?.Source;
            }
        }
 
        private static void PrintSchema(TextWriter writer, Arguments args, DataViewSchema schema, ITransposeDataView transposeDataView)
        {
            Contracts.AssertValue(writer);
            Contracts.AssertValue(args);
            Contracts.AssertValue(schema);
            Contracts.AssertValueOrNull(transposeDataView);
#if !CORECLR
            if (args.ShowJson)
            {
                writer.WriteLine("Json Schema not supported.");
                return;
            }
#endif
            int colLim = schema.Count;
 
            var itw = new IndentedTextWriter(writer, "  ");
            itw.WriteLine("{0} columns:", colLim);
            using (itw.Nest())
            {
                var names = default(VBuffer<ReadOnlyMemory<char>>);
                for (int col = 0; col < colLim; col++)
                {
                    var name = schema[col].Name;
                    var type = schema[col].Type;
                    var slotType = transposeDataView?.GetSlotType(col);
                    itw.WriteLine("{0}: {1}{2}", name, type, slotType == null ? "" : " (T)");
 
                    bool metaVals = args.ShowMetadataValues;
                    if (metaVals || args.ShowMetadataTypes)
                    {
                        ShowMetadata(itw, schema, col, metaVals);
                        continue;
                    }
 
                    if (!args.ShowSlots)
                        continue;
                    if (!type.IsKnownSizeVector())
                        continue;
                    DataViewType typeNames;
                    if ((typeNames = schema[col].Annotations.Schema.GetColumnOrNull(AnnotationUtils.Kinds.SlotNames)?.Type) == null)
                        continue;
                    if (typeNames.GetVectorSize() != type.GetVectorSize() || !(typeNames.GetItemType() is TextDataViewType))
                    {
                        Contracts.Assert(false, "Unexpected slot names type");
                        continue;
                    }
                    schema[col].Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref names);
                    if (names.Length != type.GetVectorSize())
                    {
                        Contracts.Assert(false, "Unexpected length of slot names vector");
                        continue;
                    }
 
                    using (itw.Nest())
                    {
                        bool verbose = args.Verbose ?? false;
                        foreach (var kvp in names.Items(all: verbose))
                        {
                            if (verbose || !kvp.Value.IsEmpty)
                                itw.WriteLine("{0}:{1}", kvp.Key, kvp.Value);
                        }
                    }
                }
            }
        }
 
        private static void ShowMetadata(IndentedTextWriter itw, DataViewSchema schema, int col, bool showVals)
        {
            Contracts.AssertValue(itw);
            Contracts.AssertValue(schema);
            Contracts.Assert(0 <= col && col < schema.Count);
 
            using (itw.Nest())
            {
                foreach (var metaColumn in schema[col].Annotations.Schema.OrderBy(mcol => mcol.Name))
                {
                    var type = metaColumn.Type;
                    itw.Write("Metadata '{0}': {1}", metaColumn.Name, type);
                    if (showVals)
                    {
                        if (!(type is VectorDataViewType vectorType))
                            ShowMetadataValue(itw, schema, col, metaColumn.Name, type);
                        else
                            ShowMetadataValueVec(itw, schema, col, metaColumn.Name, vectorType);
                    }
                    itw.WriteLine();
                }
            }
        }
 
        private static void ShowMetadataValue(IndentedTextWriter itw, DataViewSchema schema, int col, string kind, DataViewType type)
        {
            Contracts.AssertValue(itw);
            Contracts.AssertValue(schema);
            Contracts.Assert(0 <= col && col < schema.Count);
            Contracts.AssertNonEmpty(kind);
            Contracts.AssertValue(type);
            Contracts.Assert(!(type is VectorDataViewType));
 
            if (!type.IsStandardScalar() && !(type is KeyDataViewType))
            {
                itw.Write(": Can't display value of this type");
                return;
            }
 
            Action<IndentedTextWriter, DataViewSchema, int, string, DataViewType> del = ShowMetadataValue<int>;
            var meth = del.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod(type.RawType);
            meth.Invoke(null, new object[] { itw, schema, col, kind, type });
        }
 
        private static void ShowMetadataValue<T>(IndentedTextWriter itw, DataViewSchema schema, int col, string kind, DataViewType type)
        {
            Contracts.AssertValue(itw);
            Contracts.AssertValue(schema);
            Contracts.Assert(0 <= col && col < schema.Count);
            Contracts.AssertNonEmpty(kind);
            Contracts.AssertValue(type);
            Contracts.Assert(!(type is VectorDataViewType));
            Contracts.Assert(type.RawType == typeof(T));
 
            var conv = Conversions.DefaultInstance.GetStringConversion<T>(type);
 
            var value = default(T);
            var sb = default(StringBuilder);
            schema[col].Annotations.GetValue(kind, ref value);
            conv(in value, ref sb);
 
            itw.Write(": '{0}'", sb);
        }
 
        private static void ShowMetadataValueVec(IndentedTextWriter itw, DataViewSchema schema, int col, string kind, VectorDataViewType type)
        {
            Contracts.AssertValue(itw);
            Contracts.AssertValue(schema);
            Contracts.Assert(0 <= col && col < schema.Count);
            Contracts.AssertNonEmpty(kind);
            Contracts.AssertValue(type);
 
            if (!type.ItemType.IsStandardScalar() && !(type.ItemType is KeyDataViewType))
            {
                itw.Write(": Can't display value of this type");
                return;
            }
 
            Action<IndentedTextWriter, DataViewSchema, int, string, VectorDataViewType> del = ShowMetadataValueVec<int>;
            var meth = del.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod(type.ItemType.RawType);
            meth.Invoke(null, new object[] { itw, schema, col, kind, type });
        }
 
        private static void ShowMetadataValueVec<T>(IndentedTextWriter itw, DataViewSchema schema, int col, string kind, VectorDataViewType type)
        {
            Contracts.AssertValue(itw);
            Contracts.AssertValue(schema);
            Contracts.Assert(0 <= col && col < schema.Count);
            Contracts.AssertNonEmpty(kind);
            Contracts.AssertValue(type);
            Contracts.Assert(type.ItemType.RawType == typeof(T));
 
            var conv = Conversions.DefaultInstance.GetStringConversion<T>(type.ItemType);
 
            var value = default(VBuffer<T>);
            schema[col].Annotations.GetValue(kind, ref value);
 
            itw.Write(": Length={0}, Count={0}", value.Length, value.GetValues().Length);
 
            using (itw.Nest())
            {
                var sb = default(StringBuilder);
                int count = 0;
                foreach (var item in value.Items())
                {
                    if ((count % 10) == 0)
                        itw.WriteLine();
                    else
                        itw.Write(", ");
                    var val = item.Value;
                    conv(in val, ref sb);
                    itw.Write("[{0}] '{1}'", item.Key, sb);
                    count++;
                }
            }
        }
    }
}