File: Utilities\TypeParsingUtils.cs
Web Access
Project: src\src\Microsoft.ML.Data\Microsoft.ML.Data.csproj (Microsoft.ML.Data)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Text;
using Microsoft.ML.CommandLine;
using Microsoft.ML.Runtime;
using Microsoft.ML.Transforms;
 
namespace Microsoft.ML.Data
{
    /// <summary>
    /// Utilities to parse command-line representations of <see cref="IDataView"/> types.
    /// </summary>
    [BestFriend]
    internal static class TypeParsingUtils
    {
        /// <summary>
        /// Attempt to parse the string into a data kind and (optionally) a keyCount. This method does not check whether
        /// the returned <see cref="InternalDataKind"/> can really be made into a key with the specified <paramref name="keyCount"/>.
        /// </summary>
        /// <param name="str">The string to parse.</param>
        /// <param name="dataKind">The parsed data kind.</param>
        /// <param name="keyCount">The parsed key count, or null if there's no key specification.</param>
        /// <returns>Whether the parsing succeeded or not.</returns>
        public static bool TryParseDataKind(string str, out InternalDataKind dataKind, out KeyCount keyCount)
        {
            Contracts.CheckValue(str, nameof(str));
            keyCount = null;
            dataKind = default;
 
            int ich = str.IndexOf('[');
            if (0 <= ich)
            {
                if (str[str.Length - 1] != ']')
                    return false;
                keyCount = KeyCount.Parse(str.Substring(ich + 1, str.Length - ich - 2));
                if (keyCount == null)
                    return false;
                if (ich == 0)
                    return true;
                str = str.Substring(0, ich);
            }
 
            if (!Enum.TryParse(str, true, out dataKind))
                return false;
 
            return true;
        }
 
        /// <summary>
        /// Construct a <see cref="KeyDataViewType"/> out of the data kind and the keyCount.
        /// </summary>
        public static KeyDataViewType ConstructKeyType(InternalDataKind? type, KeyCount keyCount)
        {
            Contracts.CheckValue(keyCount, nameof(keyCount));
 
            KeyDataViewType keyType;
            Type rawType = type.HasValue ? type.Value.ToType() : InternalDataKind.U8.ToType();
            Contracts.CheckUserArg(KeyDataViewType.IsValidDataType(rawType), nameof(TextLoader.Column.Type), "Bad item type for Key");
 
            if (keyCount.Count == null)
                keyType = new KeyDataViewType(rawType, rawType.ToMaxInt());
            else
                keyType = new KeyDataViewType(rawType, keyCount.Count.GetValueOrDefault());
 
            return keyType;
        }
    }
 
    /// <summary>
    /// Defines the cardinality, or count, of valid values of a <see cref="KeyDataViewType"/> column. This needs to be strictly positive.
    /// It is used by <see cref="TextLoader"/> and <see cref="TypeConvertingEstimator"/>.
    /// </summary>
    public sealed class KeyCount
    {
        /// <summary>
        /// Initializes the cardinality, or count, of valid values of a <see cref="KeyDataViewType"/> column to the
        /// largest integer that can be expresed by the underlying datatype of the <see cref="KeyDataViewType"/>.
        /// </summary>
        public KeyCount() { }
 
        /// <summary>
        /// Initializes the cardinality, or count, of valid values of a <see cref="KeyDataViewType"/> column to <paramref name="count"/>
        /// </summary>
        public KeyCount(ulong count)
        {
            if (count == 0)
                throw Contracts.ExceptParam(nameof(count), "The cardinality of valid values of a "
                    + nameof(KeyDataViewType) + " column has to be strictly positive.");
            Count = count;
        }
 
        [Argument(ArgumentType.AtMostOnce, HelpText = "Count of valid key values")]
        public ulong? Count;
 
        /// <summary>
        /// Parses the string format for a KeyCount, also supports the old KeyRange format for backwards compatibility.
        /// </summary>
        internal static KeyCount Parse(string str)
        {
            Contracts.AssertValue(str);
 
            var res = new KeyCount();
            if (res.TryParse(str))
                return res;
            return null;
        }
 
        private bool TryParse(string str)
        {
            Contracts.AssertValue(str);
 
            // This corresponds to the new format `[]`, with no specified Max.
            if (str.Length == 0)
                return true;
 
            // For backward compatibility we check for the old format that included a Min and looked like: `[Min-Max]`.
            int ich = str.IndexOf('-');
            if (0 <= ich)
            {
                ulong min;
                // Parse Min and the dash, throw if Min is not zero.
                if (!ulong.TryParse(str.Substring(0, ich), out min))
                    return false;
                if (min != 0)
                    throw Contracts.ExceptDecode("The minimum logical value of a " + nameof(KeyDataViewType) + " is required to be zero.");
 
                // The Max could be non defined or it could be an `*`.
                str = str.Substring(ich + 1);
                if (string.IsNullOrEmpty(str) || str == "*")
                    return true;
            }
 
            // This is the new format `[Max]`.
            ulong tmp;
            if (!ulong.TryParse(str, out tmp))
                return false;
 
            // The new string format for a key reflects KeyType.Count and expresses the cardinality/count of valid values.
            // The old format was a range with the max of the range equal to keyCount - 1.
            Count = ich == -1 ? tmp : tmp + 1;
 
            Contracts.CheckDecode(Count == null || Count > 0);
            return true;
        }
 
        internal bool TryUnparse(StringBuilder sb)
        {
            Contracts.AssertValue(sb);
            Contracts.Assert(Count == null || Count > 0);
 
            if (Count != null)
                sb.Append(Count);
            return true;
        }
    }
}