File: DataLoadSave\Binary\Header.cs
Web Access
Project: src\src\Microsoft.ML.Data\Microsoft.ML.Data.csproj (Microsoft.ML.Data)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System.Runtime.InteropServices;
 
namespace Microsoft.ML.Data.IO
{
    [StructLayout(LayoutKind.Explicit, Size = HeaderSize)]
    internal struct Header
    {
        /// <summary>
        /// The fixed header size. This should not be changed even in future versions of the format.
        /// </summary>
        public const int HeaderSize = 256;
 
        /// <summary>
        /// The header must start with this signature. This number will
        /// appear as the eight-byte sequence "CML\0DVB\0" if encoded in
        /// little-endian. (CML DVB is meant to suggest CloudML DataView binary).
        /// </summary>
        public const ulong SignatureValue = 0x00425644004C4D43;
 
        /// <summary>
        /// The file must end with this value. Is is simply the
        /// byte-order-reversed version of the head signature.
        /// </summary>
        public const ulong TailSignatureValue = 0x434D4C0044564200;
 
        /// <summary>
        /// The current version of the format this software can write.
        /// </summary>
        //public const ulong WriterVersion = 0x0001000100010001; // This first version of the format, not publicly released.
        //public const ulong WriterVersion = 0x0001000100010002; // Codec changes.
        //public const ulong WriterVersion = 0x0001000100010003; // Slot names.
        //public const ulong WriterVersion = 0x0001000100010004; // Column metadata.
        //public const ulong WriterVersion = 0x0001000100010005; // "NA" DvText support.
        public const ulong WriterVersion = 0x0001000100010006; // Replace DvTypes with .NET Standard data types.
        public const ulong CanBeReadByVersion = 0x0001000100010006;
 
        internal static string VersionToString(ulong v)
        {
            return string.Format("{0}.{1}.{2}.{3}",
                (v >> 48) & 0xffff, (v >> 32) & 0xffff,
                (v >> 16) & 0xffff, v & 0xffff);
        }
 
        /// <summary>
        /// The magic number of this file.
        /// </summary>
        [FieldOffset(0)]
        public ulong Signature;
 
        /// <summary>
        /// Indicates the version of the data file.
        /// </summary>
        [FieldOffset(8)]
        public ulong Version;
 
        /// <summary>
        /// Indicates the minimum reader version that can interpret this file, possibly
        /// with some data loss.
        /// </summary>
        [FieldOffset(16)]
        public ulong CompatibleVersion;
 
        /// <summary>
        /// The offset to the table of contents structure where the column type definitions
        /// are stored.
        /// </summary>
        [FieldOffset(24)]
        public long TableOfContentsOffset;
 
        /// <summary>
        /// The eight-byte tail signature starts at this offset. So, the entire dataset
        /// stream should be considered to have eight plus this value bytes.
        /// </summary>
        [FieldOffset(32)]
        public long TailOffset;
 
        /// <summary>
        /// The number of rows in this data file.
        /// </summary>
        [FieldOffset(40)]
        public long RowCount;
 
        /// <summary>
        /// The number of columns in this data file.
        /// </summary>
        [FieldOffset(48)]
        public int ColumnCount;
 
        // Lots of padding (up to size 256)....
    }
}