File: DataIO.cs
Web Access
Project: src\test\Microsoft.ML.IntegrationTests\Microsoft.ML.IntegrationTests.csproj (Microsoft.ML.IntegrationTests)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System.IO;
using Microsoft.ML.IntegrationTests.Datasets;
using Microsoft.ML.TestFrameworkCommon;
using Xunit;
using Xunit.Abstractions;
 
namespace Microsoft.ML.IntegrationTests
{
    /// <summary>
    /// Test data input and output formats.
    /// </summary>
    public class DataIO : IntegrationTestBaseClass
    {
        // Separators to test
        private readonly char[] _separators;
 
        public DataIO(ITestOutputHelper output) : base(output)
        {
            // SaveAsText expects a "space, tab, comma, semicolon, or bar".
            _separators = new char[] { ' ', '\t', ',', ';', '|', };
        }
 
        /// <summary>
        /// Read from Enumerable: In-Memory objects can be read as enumerables into an IDatView.
        /// </summary>
        [Fact]
        public void ReadFromIEnumerable()
        {
            var mlContext = new MLContext(seed: 1);
 
            // Read the dataset from an enumerable.
            var data = mlContext.Data.LoadFromEnumerable(TypeTestData.GenerateDataset());
 
            Common.AssertTypeTestDataset(data);
        }
 
        /// <summary>
        /// Export to Enumerable: IDatViews can be exported as enumerables of a class.
        /// </summary>
        [Fact]
        public void ExportToIEnumerable()
        {
            var mlContext = new MLContext(seed: 1);
 
            // Read the dataset from an enumerable.
            var enumerableBefore = TypeTestData.GenerateDataset();
            var data = mlContext.Data.LoadFromEnumerable(enumerableBefore);
 
            // Export back to an enumerable.
            var enumerableAfter = mlContext.Data.CreateEnumerable<TypeTestData>(data, true);
 
            Common.AssertEqual(enumerableBefore, enumerableAfter);
        }
 
        /// <summary>
        /// Write to and read from a delimited file: Any DataKind can be written to and read from a delimited file.
        /// </summary>
        /// <remarks>
        /// Tests the roundtrip through a file using explicit schematization.
        /// </remarks>
        [Fact]
        public void WriteToAndReadFromADelimetedFile()
        {
            var mlContext = new MLContext(seed: 1);
 
            var dataBefore = mlContext.Data.LoadFromEnumerable(TypeTestData.GenerateDataset());
 
            foreach (var separator in _separators)
            {
                // Serialize a dataset with a known schema to a file.
                var filePath = SerializeDatasetToFile(mlContext, dataBefore, separator);
                var dataAfter = TypeTestData.GetTextLoader(mlContext, separator).Load(filePath);
                Common.AssertTestTypeDatasetsAreEqual(mlContext, dataBefore, dataAfter);
            }
        }
 
        /// <summary>
        /// Write to and read from a delimited file: Schematized data of any DataKind can be read from a delimited file.
        /// </summary>
        /// <remarks>
        /// Tests the roundtrip through a file using schema inference.
        /// </remarks>
        [Fact]
        public void WriteToAndReadASchemaFromADelimitedFile()
        {
            var mlContext = new MLContext(seed: 1);
 
            var dataBefore = mlContext.Data.LoadFromEnumerable(TypeTestData.GenerateDataset());
 
            foreach (var separator in _separators)
            {
                // Serialize a dataset with a known schema to a file.
                var filePath = SerializeDatasetToFile(mlContext, dataBefore, separator);
                var dataAfter = mlContext.Data.LoadFromTextFile<TypeTestData>(filePath, separatorChar: separator, hasHeader: true, allowQuoting: true);
                Common.AssertTestTypeDatasetsAreEqual(mlContext, dataBefore, dataAfter);
            }
        }
 
        /// <summary>
        /// Wrie to and read from a delimited file: Schematized data of any DataKind can be read from a delimited file.
        /// </summary>
        [Fact]
        public void WriteAndReadAFromABinaryFile()
        {
            var mlContext = new MLContext(seed: 1);
 
            var dataBefore = mlContext.Data.LoadFromEnumerable(TypeTestData.GenerateDataset());
 
            // Serialize a dataset with a known schema to a file.
            var filePath = SerializeDatasetToBinaryFile(mlContext, dataBefore);
            var dataAfter = mlContext.Data.LoadFromBinary(filePath);
            Common.AssertTestTypeDatasetsAreEqual(mlContext, dataBefore, dataAfter);
        }
 
        private string SerializeDatasetToFile(MLContext mlContext, IDataView data, char separator)
        {
            var filePath = TestCommon.GetOutputPath(OutDir, Path.GetRandomFileName());
            using (var file = File.Create(filePath))
                mlContext.Data.SaveAsText(data, file, separatorChar: separator, headerRow: true);
 
            return filePath;
        }
 
        private string SerializeDatasetToBinaryFile(MLContext mlContext, IDataView data)
        {
            var filePath = TestCommon.GetOutputPath(OutDir, Path.GetRandomFileName());
            using (var file = File.Create(filePath))
                mlContext.Data.SaveAsBinary(data, file);
 
            return filePath;
        }
    }
}