File: TextFileSampleTests.cs
Web Access
Project: src\test\Microsoft.ML.AutoML.Tests\Microsoft.ML.AutoML.Tests.csproj (Microsoft.ML.AutoML.Tests)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System.IO;
using System.Text;
using Microsoft.ML.TestFramework;
using Xunit;
using Xunit.Abstractions;
 
namespace Microsoft.ML.AutoML.Test
{
 
    public class TextFileSampleTests : BaseTestClass
    {
        public TextFileSampleTests(ITestOutputHelper output) : base(output)
        {
        }
 
        [Fact]
        public void CanParseLargeRandomStream()
        {
            using (var stream = new MemoryStream())
            {
                const int numRows = 100000;
                const int rowSize = 100;
 
                var eol = Encoding.UTF8.GetBytes("\r\n");
 
                for (var i = 0; i < numRows; i++)
                {
                    var row = new byte[rowSize];
                    AutoMlUtils.Random.Value.NextBytes(row);
 
                    // ensure byte array has no 0s, so text file sampler doesn't
                    // think file is encoded with UTF-16 or UTF-32 without a BOM
                    for (var k = 0; k < row.Length; k++)
                    {
                        if (row[k] == 0)
                        {
                            row[k] = 1;
                        }
                    }
                    stream.Write(row, 0, rowSize);
                    stream.Write(eol, 0, eol.Length);
                }
 
                stream.Seek(0, SeekOrigin.Begin);
 
                var sample = TextFileSample.CreateFromFullStream(stream);
                Assert.NotNull(sample);
                Assert.True(sample.FullFileSize > 0);
            }
        }
    }
}