|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.TestFramework;
using Xunit;
using Xunit.Abstractions;
namespace Microsoft.Data.Analysis.Tests
{
public partial class DataFrameTests : BaseTestClass
{
[Fact]
public void TestIndexer()
{
DataFrame dataFrame = MakeDataFrameWithTwoColumns(length: 10);
var foo = dataFrame[0, 0];
Assert.Equal(0, dataFrame[0, 0]);
Assert.Equal(11, dataFrame[1, 1]);
Assert.Equal(2, dataFrame.Columns.Count);
Assert.Equal("Int1", dataFrame.Columns[0].Name);
var headList = dataFrame.Head(5);
Assert.Equal(14, (int)headList.Rows[4][1]);
var tailList = dataFrame.Tail(5);
Assert.Equal(19, (int)tailList.Rows[4][1]);
dataFrame[2, 1] = 1000;
Assert.Equal(1000, dataFrame[2, 1]);
var row = dataFrame.Rows[4];
Assert.Equal(14, (int)row[1]);
var column = dataFrame["Int2"] as Int32DataFrameColumn;
Assert.Equal(1000, (int)column[2]);
Assert.Throws<ArgumentException>(() => dataFrame["Int5"]);
Assert.Throws<ArgumentException>(() => dataFrame[(string)null]);
}
[Fact]
public void ColumnInsertTest()
{
var df = DataFrame.LoadCsvFromString("a1,a2\n1,2\n3,4");
var dc0 = DataFrameColumn.Create("a0", new int[] { 0, 0 });
df.Columns.Insert(0, dc0);
var dc = df.Columns["a1"];
Assert.Equal("a1", dc.Name);
}
[Fact]
public void ColumnAndTableCreationTest()
{
const int rowCount = 10;
DataFrameColumn intColumn = new Int32DataFrameColumn("IntColumn", Enumerable.Range(0, rowCount).Select(x => x));
DataFrameColumn floatColumn = new SingleDataFrameColumn("FloatColumn", Enumerable.Range(0, rowCount).Select(x => (float)x));
DataFrame dataFrame = new DataFrame();
dataFrame.Columns.Insert(0, intColumn);
dataFrame.Columns.Insert(1, floatColumn);
Assert.Equal(rowCount, dataFrame.Rows.Count);
Assert.Equal(2, dataFrame.Columns.Count);
Assert.Equal(2, dataFrame.Columns.LongCount());
Assert.Equal(rowCount, dataFrame.Columns[0].Length);
Assert.Equal("IntColumn", dataFrame.Columns[0].Name);
Assert.Equal(rowCount, dataFrame.Columns[1].Length);
Assert.Equal("FloatColumn", dataFrame.Columns[1].Name);
//add column with bigger length than other columns in the dataframe
DataFrameColumn bigColumn = new SingleDataFrameColumn("BigColumn", Enumerable.Range(0, rowCount + 1).Select(x => (float)x));
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Insert(2, bigColumn));
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Add(bigColumn));
//add column smaller than other columns in the dataframe
DataFrameColumn smallColumn = new SingleDataFrameColumn("SmallColumn", Enumerable.Range(0, rowCount - 1).Select(x => (float)x));
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Insert(2, smallColumn));
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Add(smallColumn));
//add column with duplicate name
DataFrameColumn repeatedName = new SingleDataFrameColumn("FloatColumn", Enumerable.Range(0, rowCount).Select(x => (float)x));
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Insert(2, repeatedName));
//Insert column at index out of range
DataFrameColumn extraColumn = new SingleDataFrameColumn("OtherFloatColumn", Enumerable.Range(0, rowCount).Select(x => (float)x));
var columnCount = dataFrame.Columns.Count;
Assert.Throws<ArgumentOutOfRangeException>(() => dataFrame.Columns.Insert(columnCount + 1, repeatedName));
Assert.Equal(2, dataFrame.Columns.Count);
DataFrameColumn intColumnCopy = new Int32DataFrameColumn("IntColumn", Enumerable.Range(0, rowCount).Select(x => x));
Assert.Throws<ArgumentException>(() => dataFrame.Columns[1] = intColumnCopy);
DataFrameColumn differentIntColumn = new Int32DataFrameColumn("IntColumn1", Enumerable.Range(0, rowCount).Select(x => x));
dataFrame.Columns[1] = differentIntColumn;
Assert.True(object.ReferenceEquals(differentIntColumn, dataFrame.Columns[1]));
dataFrame.Columns.RemoveAt(1);
Assert.Single(dataFrame.Columns);
Assert.True(ReferenceEquals(intColumn, dataFrame.Columns[0]));
// Test the params constructor
DataFrame dataFrame1 = new DataFrame(intColumn, floatColumn);
Assert.Equal(2, dataFrame1.Columns.Count);
Assert.Equal(intColumn, dataFrame1.Columns[0]);
Assert.Equal(floatColumn, dataFrame1.Columns[1]);
}
[Fact]
public void InsertAndRemoveColumnToTheEndOfNotEmptyDataFrameTests()
{
DataFrame dataFrame = MakeDataFrameWithAllMutableColumnTypes(10);
DataFrameColumn intColumn = new Int32DataFrameColumn("NewIntColumn", Enumerable.Range(0, 10).Select(x => x));
int columnCount = dataFrame.Columns.Count;
DataFrameColumn originalLastColumn = dataFrame.Columns[columnCount - 1];
//Insert new column at the end
dataFrame.Columns.Insert(columnCount, intColumn);
Assert.Equal(columnCount + 1, dataFrame.Columns.Count);
//Remove first
dataFrame.Columns.RemoveAt(0);
Assert.Equal(columnCount, dataFrame.Columns.Count);
//Check that int column was inserted
DataFrameColumn intColumn_1 = dataFrame.Columns["NewIntColumn"];
Assert.True(ReferenceEquals(intColumn, intColumn_1));
//Check that last column of the original dataframe was not removed
DataFrameColumn lastColumn_1 = dataFrame.Columns[originalLastColumn.Name];
Assert.True(ReferenceEquals(originalLastColumn, lastColumn_1));
//Check that new column is the last one
int newIndex = dataFrame.Columns.IndexOf("NewIntColumn");
Assert.Equal(columnCount - 1, newIndex);
//Check that original last column now has correct index
int newIndexForOriginalLastColumn = dataFrame.Columns.IndexOf(originalLastColumn.Name);
Assert.Equal(columnCount - 2, newIndexForOriginalLastColumn);
}
[Fact]
public void AddAndRemoveColumnToTheEmptyDataFrameTests()
{
DataFrame dataFrame = new DataFrame();
DataFrameColumn intColumn = new Int32DataFrameColumn("NewIntColumn", Enumerable.Range(0, 10).Select(x => x));
dataFrame.Columns.Add(intColumn);
Assert.Single(dataFrame.Columns);
Assert.Equal(10, dataFrame.Rows.Count);
dataFrame.Columns.Remove(intColumn);
Assert.Empty(dataFrame.Columns);
Assert.Equal(0, dataFrame.Rows.Count);
}
[Fact]
public void ClearColumnsTests()
{
//Arrange
DataFrame dataFrame = MakeDataFrameWithAllMutableColumnTypes(10);
//Act
dataFrame.Columns.Clear();
//Assert
Assert.Empty(dataFrame.Columns);
Assert.Equal(0, dataFrame.Rows.Count);
Assert.Equal(0, dataFrame.Columns.LongCount());
}
[Fact]
public void RenameColumnWithSetNameTests()
{
StringDataFrameColumn city = new StringDataFrameColumn("City", new string[] { "London", "Berlin" });
PrimitiveDataFrameColumn<int> temp = new PrimitiveDataFrameColumn<int>("Temperature", new int[] { 12, 13 });
DataFrame dataframe = new DataFrame(city, temp);
// Change the name of the column:
dataframe["City"].SetName("Town");
var renamedColumn = dataframe["Town"];
Assert.Throws<ArgumentException>(() => dataframe["City"]);
Assert.NotNull(renamedColumn);
Assert.Equal("Town", renamedColumn.Name);
Assert.True(ReferenceEquals(city, renamedColumn));
}
[Fact]
public void RenameColumnWithRenameColumnTests()
{
StringDataFrameColumn city = new StringDataFrameColumn("City", new string[] { "London", "Berlin" });
PrimitiveDataFrameColumn<int> temp = new PrimitiveDataFrameColumn<int>("Temperature", new int[] { 12, 13 });
DataFrame dataframe = new DataFrame(city, temp);
// Change the name of the column:
dataframe.Columns.RenameColumn("City", "Town");
var renamedColumn = dataframe["Town"];
Assert.Throws<ArgumentException>(() => dataframe["City"]);
Assert.NotNull(renamedColumn);
Assert.Equal("Town", renamedColumn.Name);
Assert.True(ReferenceEquals(city, renamedColumn));
}
[Fact]
public void TestColumnReverseOrderState()
{
var column = new Int32DataFrameColumn("Int", Enumerable.Range(0, 10));
var newColumn = 1 - column;
var checkOrderColumn = 1 - newColumn;
Assert.True(checkOrderColumn.ElementwiseEquals(column).All());
}
[Fact]
public void TestProjectionAndAppend()
{
DataFrame df = MakeDataFrameWithTwoColumns(10);
df["Int3"] = df.Columns["Int1"] * 2 + df.Columns["Int2"];
Assert.Equal(16, df.Columns["Int3"][2]);
}
[Fact]
public void TestOrderBy()
{
DataFrame df = MakeDataFrameWithAllMutableColumnTypes(20);
df.Columns["Int"][0] = 100;
df.Columns["Int"][19] = -1;
df.Columns["Int"][5] = 2000;
// Sort by "Int" in ascending order and nulls last
var sortedDf = df.OrderBy("Int");
Assert.Null(sortedDf.Columns["Int"][19]);
Assert.Equal(-1, sortedDf.Columns["Int"][0]);
Assert.Equal(100, sortedDf.Columns["Int"][17]);
Assert.Equal(2000, sortedDf.Columns["Int"][18]);
// Sort by "Int" in descending order and nulls last
sortedDf = df.OrderByDescending("Int");
Assert.Null(sortedDf.Columns["Int"][19]);
Assert.Equal(-1, sortedDf.Columns["Int"][18]);
Assert.Equal(100, sortedDf.Columns["Int"][1]);
Assert.Equal(2000, sortedDf.Columns["Int"][0]);
// Sort by "Int" in ascending order and nulls first
sortedDf = df.OrderBy("Int", putNullValuesLast: false);
Assert.Null(sortedDf.Columns["Int"][0]);
Assert.Equal(-1, sortedDf.Columns["Int"][1]);
Assert.Equal(100, sortedDf.Columns["Int"][18]);
Assert.Equal(2000, sortedDf.Columns["Int"][19]);
// Sort by "Int" in descending order and nulls first
sortedDf = df.OrderByDescending("Int", putNullValuesLast: false);
Assert.Null(sortedDf.Columns["Int"][0]);
Assert.Equal(-1, sortedDf.Columns["Int"][19]);
Assert.Equal(100, sortedDf.Columns["Int"][2]);
Assert.Equal(2000, sortedDf.Columns["Int"][1]);
// Sort by "String" in ascending order and nulls last
sortedDf = df.OrderBy("String");
Assert.Null(sortedDf.Columns["Int"][19]);
Assert.Equal(1, sortedDf.Columns["Int"][1]);
Assert.Equal(8, sortedDf.Columns["Int"][17]);
Assert.Equal(9, sortedDf.Columns["Int"][18]);
// Sort by "String" in descending order and nulls last
sortedDf = df.OrderByDescending("String");
Assert.Null(sortedDf.Columns["Int"][19]);
Assert.Equal(8, sortedDf.Columns["Int"][1]);
Assert.Equal(9, sortedDf.Columns["Int"][0]);
// Sort by "String" in ascending order and nulls first
sortedDf = df.OrderBy("String", putNullValuesLast: false);
Assert.Null(sortedDf.Columns["Int"][0]);
Assert.Equal(1, sortedDf.Columns["Int"][2]);
Assert.Equal(8, sortedDf.Columns["Int"][18]);
Assert.Equal(9, sortedDf.Columns["Int"][19]);
// Sort by "String" in descending order and nulls first
sortedDf = df.OrderByDescending("String", putNullValuesLast: false);
Assert.Null(sortedDf.Columns["Int"][0]);
Assert.Equal(8, sortedDf.Columns["Int"][2]);
Assert.Equal(9, sortedDf.Columns["Int"][1]);
}
[Fact]
public void TestGroupBy()
{
DataFrame df = MakeDataFrameWithNumericAndBoolColumns(10);
DataFrame count = df.GroupBy("Bool").Count();
Assert.Equal(2, count.Rows.Count);
Assert.Equal((long)5, count.Columns["Int"][0]);
Assert.Equal((long)4, count.Columns["Decimal"][1]);
for (int r = 0; r < count.Rows.Count; r++)
{
for (int c = 1; c < count.Columns.Count; c++)
{
Assert.Equal((long)(r == 0 ? 5 : 4), count.Columns[c][r]);
}
}
DataFrame first = df.GroupBy("Bool").First();
Assert.Equal(2, first.Rows.Count);
for (int r = 0; r < 2; r++)
{
for (int c = 0; c < count.Columns.Count; c++)
{
DataFrameColumn originalColumn = df.Columns[c];
DataFrameColumn firstColumn = first.Columns[originalColumn.Name];
Assert.Equal(originalColumn[r], firstColumn[r]);
}
}
DataFrame head = df.GroupBy("Bool").Head(3);
List<int> verify = new List<int>() { 0, 3, 1, 4, 2, 5 };
for (int r = 0; r < 5; r++)
{
for (int c = 0; c < count.Columns.Count; c++)
{
DataFrameColumn originalColumn = df.Columns[c];
DataFrameColumn headColumn = head.Columns[originalColumn.Name];
Assert.Equal(originalColumn[r].ToString(), headColumn[verify[r]].ToString());
}
}
for (int c = 0; c < count.Columns.Count; c++)
{
DataFrameColumn originalColumn = df.Columns[c];
if (originalColumn.Name == "Bool")
continue;
DataFrameColumn headColumn = head.Columns[originalColumn.Name];
Assert.Equal(originalColumn[7], headColumn[verify[5]]);
}
Assert.Equal(6, head.Rows.Count);
DataFrame tail = df.GroupBy("Bool").Tail(3);
Assert.Equal(6, tail.Rows.Count);
List<int> originalColumnVerify = new List<int>() { 6, 8, 7, 9 };
List<int> tailColumnVerity = new List<int>() { 1, 2, 4, 5 };
for (int r = 0; r < 4; r++)
{
for (int c = 0; c < count.Columns.Count; c++)
{
DataFrameColumn originalColumn = df.Columns[c];
DataFrameColumn tailColumn = tail.Columns[originalColumn.Name];
Assert.Equal(originalColumn[originalColumnVerify[r]].ToString(), tailColumn[tailColumnVerity[r]].ToString());
}
}
DataFrame max = df.GroupBy("Bool").Max();
Assert.Equal(2, max.Rows.Count);
for (int r = 0; r < 2; r++)
{
for (int c = 0; c < count.Columns.Count; c++)
{
DataFrameColumn originalColumn = df.Columns[c];
if (originalColumn.Name == "Bool" || originalColumn.Name == "Char")
continue;
DataFrameColumn maxColumn = max.Columns[originalColumn.Name];
Assert.Equal(((long)(r == 0 ? 8 : 9)).ToString(), maxColumn[r].ToString());
}
}
DataFrame min = df.GroupBy("Bool").Min();
Assert.Equal(2, min.Rows.Count);
DataFrame product = df.GroupBy("Bool").Product();
Assert.Equal(2, product.Rows.Count);
DataFrame sum = df.GroupBy("Bool").Sum();
Assert.Equal(2, sum.Rows.Count);
DataFrame mean = df.GroupBy("Bool").Mean();
Assert.Equal(2, mean.Rows.Count);
for (int r = 0; r < 2; r++)
{
for (int c = 0; c < count.Columns.Count; c++)
{
DataFrameColumn originalColumn = df.Columns[c];
if (originalColumn.Name == "Bool" || originalColumn.Name == "Char")
continue;
DataFrameColumn minColumn = min.Columns[originalColumn.Name];
Assert.Equal(r == 0 ? "0" : "1", minColumn[r].ToString());
DataFrameColumn productColumn = product.Columns[originalColumn.Name];
Assert.Equal("0", productColumn[r].ToString());
DataFrameColumn sumColumn = sum.Columns[originalColumn.Name];
Assert.Equal("20", sumColumn[r].ToString());
}
}
DataFrame columnSum = df.GroupBy("Bool").Sum("Int");
Assert.Equal(2, columnSum.Columns.Count);
Assert.Equal(20, columnSum.Columns["Int"][0]);
Assert.Equal(20, columnSum.Columns["Int"][1]);
DataFrame columnMax = df.GroupBy("Bool").Max("Int");
Assert.Equal(2, columnMax.Columns.Count);
Assert.Equal(8, columnMax.Columns["Int"][0]);
Assert.Equal(9, columnMax.Columns["Int"][1]);
DataFrame columnProduct = df.GroupBy("Bool").Product("Int");
Assert.Equal(2, columnProduct.Columns.Count);
Assert.Equal(0, columnProduct.Columns["Int"][0]);
Assert.Equal(0, columnProduct.Columns["Int"][1]);
DataFrame columnMin = df.GroupBy("Bool").Min("Int");
Assert.Equal(2, columnMin.Columns.Count);
Assert.Equal(0, columnMin.Columns["Int"][0]);
Assert.Equal(1, columnMin.Columns["Int"][1]);
DataFrame countIntColumn = df.GroupBy("Bool").Count("Int");
Assert.Equal(2, countIntColumn.Columns.Count);
Assert.Equal(2, countIntColumn.Rows.Count);
Assert.Equal((long)5, countIntColumn.Columns["Int"][0]);
Assert.Equal((long)4, countIntColumn.Columns["Int"][1]);
DataFrame firstDecimalColumn = df.GroupBy("Bool").First("Decimal");
Assert.Equal(2, firstDecimalColumn.Columns.Count);
Assert.Equal(2, firstDecimalColumn.Rows.Count);
Assert.Equal((decimal)0, firstDecimalColumn.Columns["Decimal"][0]);
Assert.Equal((decimal)1, firstDecimalColumn.Columns["Decimal"][1]);
}
[Fact]
public void TestGroupByDifferentColumnTypes()
{
void GroupCountAndAssert(DataFrame frame)
{
DataFrame grouped = frame.GroupBy("Column1").Count();
Assert.Equal(2, grouped.Rows.Count);
}
DataFrame df = MakeDataFrame<byte, bool>(10, false);
GroupCountAndAssert(df);
df = MakeDataFrame<char, bool>(10, false);
GroupCountAndAssert(df);
df = MakeDataFrame<decimal, bool>(10, false);
GroupCountAndAssert(df);
df = MakeDataFrame<double, bool>(10, false);
GroupCountAndAssert(df);
df = MakeDataFrame<float, bool>(10, false);
GroupCountAndAssert(df);
df = MakeDataFrame<int, bool>(10, false);
GroupCountAndAssert(df);
df = MakeDataFrame<long, bool>(10, false);
GroupCountAndAssert(df);
df = MakeDataFrame<sbyte, bool>(10, false);
GroupCountAndAssert(df);
df = MakeDataFrame<short, bool>(10, false);
GroupCountAndAssert(df);
df = MakeDataFrame<uint, bool>(10, false);
GroupCountAndAssert(df);
df = MakeDataFrame<ulong, bool>(10, false);
GroupCountAndAssert(df);
df = MakeDataFrame<ushort, bool>(10, false);
GroupCountAndAssert(df);
}
[Fact]
public void TestIEnumerable()
{
DataFrame df = MakeDataFrameWithAllColumnTypes(10);
int totalValueCount = 0;
for (int i = 0; i < df.Columns.Count; i++)
{
DataFrameColumn baseColumn = df.Columns[i];
foreach (object value in baseColumn)
{
totalValueCount++;
}
}
Assert.Equal(10 * df.Columns.Count, totalValueCount);
// spot check a few column types:
StringDataFrameColumn stringColumn = (StringDataFrameColumn)df.Columns["String"];
StringBuilder actualStrings = new StringBuilder();
foreach (string value in stringColumn)
{
if (value == null)
{
actualStrings.Append("<null>");
}
else
{
actualStrings.Append(value);
}
}
Assert.Equal("01234<null>6789", actualStrings.ToString());
ArrowStringDataFrameColumn arrowStringColumn = (ArrowStringDataFrameColumn)df.Columns["ArrowString"];
actualStrings.Clear();
foreach (string value in arrowStringColumn)
{
if (value == null)
{
actualStrings.Append("<null>");
}
else
{
actualStrings.Append(value);
}
}
Assert.Equal("foofoofoofoofoo<null>foofoofoofoo", actualStrings.ToString());
SingleDataFrameColumn floatColumn = (SingleDataFrameColumn)df.Columns["Float"];
actualStrings.Clear();
foreach (float? value in floatColumn)
{
if (value == null)
{
actualStrings.Append("<null>");
}
else
{
actualStrings.Append(value);
}
}
Assert.Equal("01234<null>6789", actualStrings.ToString());
Int32DataFrameColumn intColumn = (Int32DataFrameColumn)df.Columns["Int"];
actualStrings.Clear();
foreach (int? value in intColumn)
{
if (value == null)
{
actualStrings.Append("<null>");
}
else
{
actualStrings.Append(value);
}
}
Assert.Equal("01234<null>6789", actualStrings.ToString());
}
[Fact]
public void TestColumnClamp()
{
DataFrame df = MakeDataFrameWithNumericColumns(10);
// Out of place
DataFrameColumn clamped = df.Columns["Int"].Clamp(3, 7);
Assert.Equal(3, clamped[0]);
Assert.Equal(0, df.Columns["Int"][0]);
Assert.Equal(3, clamped[1]);
Assert.Equal(1, df.Columns["Int"][1]);
Assert.Equal(3, clamped[2]);
Assert.Equal(2, df.Columns["Int"][2]);
Assert.Equal(3, clamped[3]);
Assert.Equal(3, df.Columns["Int"][3]);
Assert.Equal(4, clamped[4]);
Assert.Equal(4, df.Columns["Int"][4]);
Assert.Null(clamped[5]);
Assert.Null(df.Columns["Int"][5]);
Assert.Equal(6, clamped[6]);
Assert.Equal(6, df.Columns["Int"][6]);
Assert.Equal(7, clamped[7]);
Assert.Equal(7, df.Columns["Int"][7]);
Assert.Equal(7, clamped[8]);
Assert.Equal(8, df.Columns["Int"][8]);
Assert.Equal(7, clamped[9]);
Assert.Equal(9, df.Columns["Int"][9]);
// In place
df.Columns["Int"].Clamp(3, 7, true);
Assert.Equal(3, df.Columns["Int"][0]);
Assert.Equal(3, df.Columns["Int"][1]);
Assert.Equal(3, df.Columns["Int"][2]);
Assert.Equal(3, df.Columns["Int"][3]);
Assert.Equal(4, df.Columns["Int"][4]);
Assert.Null(df.Columns["Int"][5]);
Assert.Equal(6, df.Columns["Int"][6]);
Assert.Equal(7, df.Columns["Int"][7]);
Assert.Equal(7, df.Columns["Int"][8]);
Assert.Equal(7, df.Columns["Int"][9]);
}
[Fact]
public void TestDataFrameClamp()
{
DataFrame df = MakeDataFrameWithAllColumnTypes(10);
IEnumerable<DataViewSchema.Column> dfColumns = ((IDataView)df).Schema;
void VerifyDataFrameClamp(DataFrame clampedColumn)
{
IEnumerable<DataViewSchema.Column> clampedColumns = ((IDataView)clampedColumn).Schema;
Assert.Equal(df.Columns.Count, clampedColumn.Columns.Count);
Assert.Equal(dfColumns, clampedColumns);
for (int c = 0; c < df.Columns.Count; c++)
{
DataFrameColumn column = clampedColumn.Columns[c];
if (column.IsNumericColumn())
{
for (int i = 0; i < 4; i++)
{
Assert.Equal("3", column[i].ToString());
}
Assert.Equal(4.ToString(), column[4].ToString());
Assert.Null(column[5]);
Assert.Equal(6.ToString(), column[6].ToString());
for (int i = 7; i < 10; i++)
{
Assert.Equal("7", column[i].ToString());
}
}
else
{
for (int i = 0; i < column.Length; i++)
{
var colD = df.Columns[c][i];
var ocD = column[i];
Assert.Equal(df.Columns[c][i], column[i]);
}
}
}
}
// Out of place
DataFrame clamped = df.Clamp(3, 7);
VerifyDataFrameClamp(clamped);
for (int i = 0; i < 10; i++)
{
if (i != 5)
Assert.Equal(i, df.Columns["Int"][i]);
else
Assert.Null(df.Columns["Int"][5]);
}
// Inplace
df.Clamp(3, 7, true);
VerifyDataFrameClamp(df);
}
[Fact]
public void TestPrefixAndSuffix()
{
DataFrame df = MakeDataFrameWithAllColumnTypes(10);
IEnumerable<DataViewSchema.Column> columnNames = ((IDataView)df).Schema;
DataFrame prefix = df.AddPrefix("Prefix_");
IEnumerable<DataViewSchema.Column> prefixNames = ((IDataView)prefix).Schema;
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in columnNames.Zip(((IDataView)df).Schema, (e1, e2) => (e1, e2)))
{
Assert.Equal(First.Name, Second.Name);
}
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in prefixNames.Zip(columnNames, (e1, e2) => (e1, e2)))
{
Assert.Equal(First.Name, "Prefix_" + Second.Name);
}
// Inplace
df.AddPrefix("Prefix_", true);
prefixNames = ((IDataView)df).Schema;
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in columnNames.Zip(prefixNames, (e1, e2) => (e1, e2)))
{
Assert.Equal("Prefix_" + First.Name, Second.Name);
}
DataFrame suffix = df.AddSuffix("_Suffix");
IEnumerable<DataViewSchema.Column> suffixNames = ((IDataView)suffix).Schema;
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in ((IDataView)df).Schema.Zip(columnNames, (e1, e2) => (e1, e2)))
{
Assert.Equal(First.Name, "Prefix_" + Second.Name);
}
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in columnNames.Zip(suffixNames, (e1, e2) => (e1, e2)))
{
Assert.Equal("Prefix_" + First.Name + "_Suffix", Second.Name);
}
// InPlace
df.AddSuffix("_Suffix", true);
suffixNames = ((IDataView)df).Schema;
foreach ((DataViewSchema.Column First, DataViewSchema.Column Second) in columnNames.Zip(suffixNames, (e1, e2) => (e1, e2)))
{
Assert.Equal("Prefix_" + First.Name + "_Suffix", Second.Name);
}
}
[Fact]
public void TestSample()
{
DataFrame df = MakeDataFrameWithAllColumnTypes(10);
DataFrame sampled = df.Sample(7);
Assert.Equal(7, sampled.Rows.Count);
Assert.Equal(df.Columns.Count, sampled.Columns.Count);
// all sampled rows should be unique.
HashSet<int?> uniqueRowValues = new HashSet<int?>();
foreach (int? value in sampled.Columns["Int"])
{
uniqueRowValues.Add(value);
}
Assert.Equal(uniqueRowValues.Count, sampled.Rows.Count);
// should throw exception as sample size is greater than dataframe rows
Assert.Throws<ArgumentException>(() => df.Sample(13));
}
[Fact]
public void TestDescription()
{
DataFrame df = MakeDataFrameWithAllMutableColumnTypes(10);
DataFrame description = df.Description();
DataFrameColumn descriptionColumn = description.Columns[0];
Assert.Equal("Description", descriptionColumn.Name);
Assert.Equal("Length (excluding null values)", descriptionColumn[0]);
Assert.Equal("Max", descriptionColumn[1]);
Assert.Equal("Min", descriptionColumn[2]);
Assert.Equal("Mean", descriptionColumn[3]);
for (int i = 1; i < description.Columns.Count - 1; i++)
{
DataFrameColumn column = description.Columns[i];
Assert.Equal(df.Columns[i - 1].Name, column.Name);
Assert.Equal(4, column.Length);
Assert.Equal((float)9, column[0]);
Assert.Equal((float)9, column[1]);
Assert.Equal((float)0, column[2]);
Assert.Equal((float)4, column[3]);
}
// Explicitly check the dateTimes column
DataFrameColumn dateTimeColumn = description.Columns[description.Columns.Count - 1];
Assert.Equal("DateTime", dateTimeColumn.Name);
Assert.Equal(4, dateTimeColumn.Length);
Assert.Equal((float)9, dateTimeColumn[0]);
Assert.Null(dateTimeColumn[1]);
Assert.Null(dateTimeColumn[2]);
Assert.Null(dateTimeColumn[3]);
}
[Fact]
public void TestInfo()
{
DataFrame df = MakeDataFrameWithAllMutableColumnTypes(10);
// Add a column manually here until we fix https://github.com/dotnet/corefxlab/issues/2784
PrimitiveDataFrameColumn<DateTime> dateTimes = new PrimitiveDataFrameColumn<DateTime>("DateTimes");
for (int i = 0; i < 10; i++)
{
dateTimes.Append(DateTime.Parse("2019/01/01"));
}
df.Columns.Add(dateTimes);
DataFrame Info = df.Info();
DataFrameColumn infoColumn = Info.Columns[0];
Assert.Equal("Info", infoColumn.Name);
Assert.Equal("Length (excluding null values)", infoColumn[1]);
Assert.Equal("DataType", infoColumn[0]);
for (int i = 1; i < Info.Columns.Count; i++)
{
DataFrameColumn column = Info.Columns[i];
Assert.Equal(df.Columns[i - 1].DataType.ToString(), column[0].ToString());
Assert.Equal(2, column.Length);
}
}
[Fact]
public void TestDropNulls()
{
//Create dataframe with 20 rows, where 1 row has only 1 null value and 1 row has all null values
DataFrame df = MakeDataFrameWithAllMutableColumnTypes(20);
df[0, 0] = null;
DataFrame anyNulls = df.DropNulls();
Assert.Equal(18, anyNulls.Rows.Count);
DataFrame allNulls = df.DropNulls(DropNullOptions.All);
Assert.Equal(19, allNulls.Rows.Count);
}
[Fact]
public void TestInsertMismatchedColumnToEmptyDataFrame()
{
DataFrame df = new DataFrame();
DataFrameColumn dataFrameColumn1 = new Int32DataFrameColumn("Int1");
df.Columns.Insert(0, dataFrameColumn1);
// should throw exception as column sizes are mismatched.
Assert.Throws<ArgumentException>(() => df.Columns.Insert(1, new Int32DataFrameColumn("Int2", Enumerable.Range(0, 5).Select(x => x))));
}
[Fact]
public void TestFillNulls()
{
DataFrame df = MakeDataFrameWithTwoColumns(20);
Assert.Null(df[10, 0]);
DataFrame fillNulls = df.FillNulls(1000);
Assert.Equal(1000, (int)fillNulls[10, 1]);
Assert.Null(df[10, 0]);
df.FillNulls(1000, true);
Assert.Equal(1000, df[10, 1]);
StringDataFrameColumn strColumn = new StringDataFrameColumn("String", 0);
strColumn.Append(null);
strColumn.Append(null);
Assert.Equal(2, strColumn.Length);
Assert.Equal(2, strColumn.NullCount);
DataFrameColumn filled = strColumn.FillNulls("foo");
Assert.Equal(2, strColumn.Length);
Assert.Equal(2, strColumn.NullCount);
Assert.Equal(2, filled.Length);
Assert.Equal(0, filled.NullCount);
Assert.Equal("foo", filled[0]);
Assert.Equal("foo", filled[1]);
Assert.Null(strColumn[0]);
Assert.Null(strColumn[1]);
// In place
strColumn.FillNulls("foo", true);
Assert.Equal(2, strColumn.Length);
Assert.Equal(0, strColumn.NullCount);
Assert.Equal("foo", strColumn[0]);
Assert.Equal("foo", strColumn[1]);
// ArrowStringColumn (not inplace)
ArrowStringDataFrameColumn arrowColumn = CreateArrowStringColumn(3);
Assert.Equal(3, arrowColumn.Length);
Assert.Equal(1, arrowColumn.NullCount);
Assert.Null(arrowColumn[1]);
ArrowStringDataFrameColumn arrowColumnFilled = arrowColumn.FillNulls("foo");
Assert.Equal(3, arrowColumn.Length);
Assert.Equal(1, arrowColumn.NullCount);
Assert.Equal(3, arrowColumnFilled.Length);
Assert.Equal(0, arrowColumnFilled.NullCount);
Assert.Equal("foo", arrowColumnFilled[1]);
Assert.Equal(arrowColumn[0], arrowColumnFilled[0]);
Assert.Equal(arrowColumn[2], arrowColumnFilled[2]);
}
[Fact]
public void TestValueCounts()
{
DataFrame df = MakeDataFrameWithAllColumnTypes(10, withNulls: false);
DataFrame valueCounts = df.Columns["Bool"].ValueCounts();
Assert.Equal(2, valueCounts.Rows.Count);
Assert.Equal((long)5, valueCounts.Columns["Counts"][0]);
Assert.Equal((long)5, valueCounts.Columns["Counts"][1]);
}
#pragma warning disable CS0612, CS0618 // Type or member is obsolete
[Fact]
public void TestApplyElementwiseNullCount()
{
DataFrame df = MakeDataFrameWithTwoColumns(10);
Int32DataFrameColumn column = df.Columns["Int1"] as Int32DataFrameColumn;
Assert.Equal(1, column.NullCount);
// Change all existing values to null
column.ApplyElementwise((int? value, long rowIndex) =>
{
if (!(value is null))
return null;
return value;
});
Assert.Equal(column.Length, column.NullCount);
// Don't change null values
column.ApplyElementwise((int? value, long rowIndex) =>
{
return value;
});
Assert.Equal(column.Length, column.NullCount);
// Change all null values to real values
column.ApplyElementwise((int? value, long rowIndex) =>
{
return 5;
});
Assert.Equal(0, column.NullCount);
// Don't change real values
column.ApplyElementwise((int? value, long rowIndex) =>
{
return value;
});
Assert.Equal(0, column.NullCount);
}
#pragma warning restore CS0612, CS0618 // Type or member is obsolete
[Theory]
[InlineData(10, 5)]
[InlineData(20, 20)]
public void TestClone(int dfLength, int intDfLength)
{
DataFrame df = MakeDataFrameWithAllColumnTypes(dfLength, withNulls: true);
DataFrame intDf = MakeDataFrameWithTwoColumns(intDfLength, false);
Int32DataFrameColumn intColumn = intDf.Columns["Int1"] as Int32DataFrameColumn;
DataFrame clone = df[intColumn];
Assert.Equal(intDfLength, clone.Rows.Count);
Assert.Equal(df.Columns.Count, clone.Columns.Count);
for (int i = 0; i < df.Columns.Count; i++)
{
DataFrameColumn dfColumn = df.Columns[i];
DataFrameColumn cloneColumn = clone.Columns[i];
for (long r = 0; r < clone.Rows.Count; r++)
{
Assert.Equal(dfColumn[r], cloneColumn[r]);
}
}
}
[Fact]
public void TestColumnCreationFromExisitingColumn()
{
DataFrame df = MakeDataFrameWithAllColumnTypes(10);
BooleanDataFrameColumn bigInts = new BooleanDataFrameColumn("BigInts", df.Columns["Int"].ElementwiseGreaterThan(5));
for (int i = 0; i < 10; i++)
{
if (i <= 5)
Assert.False(bigInts[i]);
else
Assert.True(bigInts[i]);
}
}
[Fact]
public void TestColumns()
{
DataFrame df = MakeDataFrameWithAllColumnTypes(10);
IReadOnlyList<DataFrameColumn> columns = df.Columns;
int i = 0;
Assert.Equal(columns.Count, df.Columns.Count);
foreach (DataFrameColumn dataFrameColumn in columns)
{
Assert.Equal(dataFrameColumn, df.Columns[i++]);
}
}
[Fact]
public void TestRows()
{
DataFrame df = MakeDataFrameWithAllColumnTypes(10);
DataFrameRowCollection rows = df.Rows;
Assert.Equal(10, rows.Count);
DataFrameRow firstRow = rows[0];
object firstValue = firstRow[0];
Assert.Equal(df[0, 0], firstValue);
long rowCount = 0;
foreach (DataFrameRow row in rows)
{
int columnIndex = 0;
foreach (var value in row)
{
Assert.Equal(df.Columns[columnIndex][rowCount], value);
columnIndex++;
}
rowCount++;
}
Assert.Equal(df.Rows.Count, rowCount);
DataFrameRow nullRow = rows[5];
int intColumnIndex = df.Columns.IndexOf("Int");
Assert.Equal(1, df.Columns[intColumnIndex].NullCount);
nullRow[intColumnIndex] = 5;
Assert.Equal(0, df.Columns[intColumnIndex].NullCount);
nullRow[intColumnIndex] = null;
Assert.Equal(1, df.Columns[intColumnIndex].NullCount);
}
[Fact]
public void TestMutationOnRows()
{
DataFrame df = MakeDataFrameWithNumericColumns(10);
DataFrameRowCollection rows = df.Rows;
foreach (DataFrameRow row in rows)
{
for (int i = 0; i < df.Columns.Count; i++)
{
DataFrameColumn column = df.Columns[i];
row[i] = Convert.ChangeType(12, column.DataType);
}
}
foreach (var column in df.Columns)
{
foreach (var value in column)
{
Assert.Equal("12", value.ToString());
}
}
}
[Fact]
public void TestAppendRows()
{
DataFrame df = MakeDataFrame<float, bool>(10);
DataFrame df2 = MakeDataFrame<int, bool>(5);
Assert.Equal(10, df.Rows.Count);
Assert.Equal(1, df.Columns[0].NullCount);
Assert.Equal(1, df.Columns[1].NullCount);
DataFrame ret = df.Append(df2.Rows, inPlace: false);
Assert.Equal(10, df.Rows.Count);
Assert.Equal(1, df.Columns[0].NullCount);
Assert.Equal(1, df.Columns[1].NullCount);
Verify(ret, df, df2);
void Verify(DataFrame ret, DataFrame check1, DataFrame check2)
{
Assert.Equal(15, ret.Rows.Count);
Assert.Equal(2, ret.Columns[0].NullCount);
Assert.Equal(2, ret.Columns[1].NullCount);
for (long i = 0; i < ret.Rows.Count; i++)
{
DataFrameRow row = ret.Rows[i];
for (int j = 0; j < check1.Columns.Count; j++)
{
if (i < check1.Rows.Count)
{
Assert.Equal(row[j], check1.Rows[i][j]);
}
else
{
Assert.Equal(row[j]?.ToString(), (check2.Rows[i - check1.Rows.Count][j])?.ToString());
}
}
}
}
DataFrame dfClone = df.Clone();
df.Append(df2.Rows, inPlace: true);
Verify(df, dfClone, df2);
}
[Fact]
public void TestAppendRowsIfColumnAreOutOfOrder()
{
var dataFrame = new DataFrame(
new StringDataFrameColumn("ColumnA", new string[] { "a", "b", "c" }),
new Int32DataFrameColumn("ColumnB", new int[] { 1, 2, 3 }),
new Int32DataFrameColumn("ColumnC", new int[] { 10, 20, 30 }));
//ColumnC and ColumnB are swaped
var dataFrame2 = new DataFrame(
new StringDataFrameColumn("ColumnA", new string[] { "d", "e", "f" }),
new Int32DataFrameColumn("ColumnC", new int[] { 40, 50, 60 }),
new Int32DataFrameColumn("ColumnB", new int[] { 4, 5, 6 }));
var resultDataFrame = dataFrame.Append(dataFrame2.Rows);
Assert.Equal(3, resultDataFrame.Columns.Count);
Assert.Equal(6, resultDataFrame.Rows.Count);
Assert.Equal("c", resultDataFrame["ColumnA"][2]);
Assert.Equal("d", resultDataFrame["ColumnA"][3]);
Assert.Equal(3, resultDataFrame["ColumnB"][2]);
Assert.Equal(4, resultDataFrame["ColumnB"][3]);
Assert.Equal(30, resultDataFrame["ColumnC"][2]);
Assert.Equal(40, resultDataFrame["ColumnC"][3]);
}
[Fact]
public void TestAppendRow()
{
DataFrame df = MakeDataFrame<int, bool>(10);
df.Append(new List<object> { 5, true }, inPlace: true);
Assert.Equal(11, df.Rows.Count);
Assert.Equal(1, df.Columns[0].NullCount);
Assert.Equal(1, df.Columns[1].NullCount);
DataFrame ret = df.Append(new List<object> { 5, true });
Assert.Equal(12, ret.Rows.Count);
Assert.Equal(1, ret.Columns[0].NullCount);
Assert.Equal(1, ret.Columns[1].NullCount);
df.Append(new List<object> { 100 }, inPlace: true);
Assert.Equal(12, df.Rows.Count);
Assert.Equal(1, df.Columns[0].NullCount);
Assert.Equal(2, df.Columns[1].NullCount);
ret = df.Append(new List<object> { 100 }, inPlace: false);
Assert.Equal(13, ret.Rows.Count);
Assert.Equal(1, ret.Columns[0].NullCount);
Assert.Equal(3, ret.Columns[1].NullCount);
df.Append(new List<object> { null, null }, inPlace: true);
Assert.Equal(13, df.Rows.Count);
Assert.Equal(2, df.Columns[0].NullCount);
Assert.Equal(3, df.Columns[1].NullCount);
ret = df.Append(new List<object> { null, null }, inPlace: false);
Assert.Equal(14, ret.Rows.Count);
Assert.Equal(3, ret.Columns[0].NullCount);
Assert.Equal(4, ret.Columns[1].NullCount);
df.Append(new Dictionary<string, object> { { "Column1", (object)5 }, { "Column2", false } }, inPlace: true);
Assert.Equal(14, df.Rows.Count);
Assert.Equal(2, df.Columns[0].NullCount);
Assert.Equal(3, df.Columns[1].NullCount);
ret = df.Append(new Dictionary<string, object> { { "Column1", (object)5 }, { "Column2", false } }, inPlace: false);
Assert.Equal(15, ret.Rows.Count);
Assert.Equal(2, ret.Columns[0].NullCount);
Assert.Equal(3, ret.Columns[1].NullCount);
df.Append(new Dictionary<string, object> { { "Column1", 5 } }, inPlace: true);
Assert.Equal(15, df.Rows.Count);
Assert.Equal(15, df.Columns["Column1"].Length);
Assert.Equal(15, df.Columns["Column2"].Length);
Assert.Equal(2, df.Columns[0].NullCount);
Assert.Equal(4, df.Columns[1].NullCount);
ret = df.Append(new Dictionary<string, object> { { "Column1", 5 } }, inPlace: false);
Assert.Equal(16, ret.Rows.Count);
Assert.Equal(16, ret.Columns["Column1"].Length);
Assert.Equal(16, ret.Columns["Column2"].Length);
Assert.Equal(2, ret.Columns[0].NullCount);
Assert.Equal(5, ret.Columns[1].NullCount);
df.Append(new Dictionary<string, object> { { "Column2", false } }, inPlace: true);
Assert.Equal(16, df.Rows.Count);
Assert.Equal(16, df.Columns["Column1"].Length);
Assert.Equal(16, df.Columns["Column2"].Length);
Assert.Equal(3, df.Columns[0].NullCount);
Assert.Equal(4, df.Columns[1].NullCount);
ret = df.Append(new Dictionary<string, object> { { "Column2", false } }, inPlace: false);
Assert.Equal(17, ret.Rows.Count);
Assert.Equal(17, ret.Columns["Column1"].Length);
Assert.Equal(17, ret.Columns["Column2"].Length);
Assert.Equal(4, ret.Columns[0].NullCount);
Assert.Equal(4, ret.Columns[1].NullCount);
df.Append((IEnumerable<object>)null, inPlace: true);
Assert.Equal(17, df.Rows.Count);
Assert.Equal(17, df.Columns["Column1"].Length);
Assert.Equal(17, df.Columns["Column2"].Length);
Assert.Equal(4, df.Columns[0].NullCount);
Assert.Equal(5, df.Columns[1].NullCount);
ret = df.Append((IEnumerable<object>)null, inPlace: false);
Assert.Equal(18, ret.Rows.Count);
Assert.Equal(18, ret.Columns["Column1"].Length);
Assert.Equal(18, ret.Columns["Column2"].Length);
Assert.Equal(5, ret.Columns[0].NullCount);
Assert.Equal(6, ret.Columns[1].NullCount);
// DataFrame must remain usable even if Append throws
Assert.Throws<FormatException>(() => df.Append(new List<object> { 5, "str" }, inPlace: true));
Assert.Throws<FormatException>(() => df.Append(new Dictionary<string, object> { { "Column2", "str" } }, inPlace: true));
Assert.Throws<ArgumentException>(() => df.Append(new List<object> { 5, true, true }, inPlace: true));
df.Append(inPlace: true);
Assert.Equal(18, df.Rows.Count);
Assert.Equal(18, df.Columns["Column1"].Length);
Assert.Equal(18, df.Columns["Column2"].Length);
Assert.Equal(5, df.Columns[0].NullCount);
Assert.Equal(6, df.Columns[1].NullCount);
ret = df.Append(inPlace: false);
Assert.Equal(18, df.Rows.Count);
Assert.Equal(18, df.Columns["Column1"].Length);
Assert.Equal(18, df.Columns["Column2"].Length);
Assert.Equal(5, df.Columns[0].NullCount);
Assert.Equal(6, df.Columns[1].NullCount);
Assert.Equal(19, ret.Rows.Count);
Assert.Equal(19, ret.Columns["Column1"].Length);
Assert.Equal(19, ret.Columns["Column2"].Length);
Assert.Equal(6, ret.Columns[0].NullCount);
Assert.Equal(7, ret.Columns[1].NullCount);
}
[Fact]
public void TestAppendEmptyValue()
{
DataFrame df = MakeDataFrame<int, bool>(10);
df.Append(new List<object> { "", true }, inPlace: true);
Assert.Equal(11, df.Rows.Count);
Assert.Equal(2, df.Columns[0].NullCount);
Assert.Equal(1, df.Columns[1].NullCount);
StringDataFrameColumn column = new StringDataFrameColumn("Strings", Enumerable.Range(0, 11).Select(x => x.ToString()));
df.Columns.Add(column);
df.Append(new List<object> { 1, true, "" }, inPlace: true);
Assert.Equal(12, df.Rows.Count);
Assert.Equal(2, df.Columns[0].NullCount);
Assert.Equal(1, df.Columns[1].NullCount);
Assert.Equal(0, df.Columns[2].NullCount);
df.Append(new List<object> { 1, true, null }, inPlace: true);
Assert.Equal(13, df.Rows.Count);
Assert.Equal(1, df.Columns[2].NullCount);
}
[Fact]
#pragma warning disable CS0612, CS0618 // Type or member is obsolete
public void TestApply()
{
int[] values = { 1, 2, 3, 4, 5 };
var col = new Int32DataFrameColumn("Ints", values);
PrimitiveDataFrameColumn<double> newCol = col.Apply(i => i + 0.5d);
Assert.Equal(values.Length, newCol.Length);
for (int i = 0; i < newCol.Length; i++)
{
Assert.Equal(col[i], values[i]); // Make sure values didn't change
Assert.Equal(newCol[i], values[i] + 0.5d);
}
}
#pragma warning disable CS0612, CS0618 // Type or member is obsolete
[Fact]
public void TestDataFrameCreate()
{
int length = 10;
void AssertLengthTypeAndValues(DataFrameColumn column, Type type)
{
Assert.Equal(column.DataType, type);
Assert.Equal(length, column.Length);
for (long i = 0; i < column.Length; i++)
{
Assert.Equal(i.ToString(), column[i].ToString());
}
}
DataFrameColumn stringColumn = DataFrameColumn.Create("String", Enumerable.Range(0, length).Select(x => x.ToString()));
AssertLengthTypeAndValues(stringColumn, typeof(string));
DataFrameColumn byteColumn = DataFrameColumn.Create("Byte", Enumerable.Range(0, length).Select(x => (byte)x));
AssertLengthTypeAndValues(byteColumn, typeof(byte));
DataFrameColumn decimalColumn = DataFrameColumn.Create("Decimal", Enumerable.Range(0, length).Select(x => (decimal)x));
AssertLengthTypeAndValues(decimalColumn, typeof(decimal));
DataFrameColumn doubleColumn = DataFrameColumn.Create("Double", Enumerable.Range(0, length).Select(x => (double)x));
AssertLengthTypeAndValues(doubleColumn, typeof(double));
DataFrameColumn floatColumn = DataFrameColumn.Create("Float", Enumerable.Range(0, length).Select(x => (float)x));
AssertLengthTypeAndValues(floatColumn, typeof(float));
DataFrameColumn intColumn = DataFrameColumn.Create("Int", Enumerable.Range(0, length).Select(x => x));
AssertLengthTypeAndValues(intColumn, typeof(int));
DataFrameColumn longColumn = DataFrameColumn.Create("Long", Enumerable.Range(0, length).Select(x => (long)x));
AssertLengthTypeAndValues(longColumn, typeof(long));
DataFrameColumn sbyteColumn = DataFrameColumn.Create("Sbyte", Enumerable.Range(0, length).Select(x => (sbyte)x));
AssertLengthTypeAndValues(sbyteColumn, typeof(sbyte));
DataFrameColumn shortColumn = DataFrameColumn.Create("Short", Enumerable.Range(0, length).Select(x => (short)x));
AssertLengthTypeAndValues(shortColumn, typeof(short));
DataFrameColumn uintColumn = DataFrameColumn.Create("Uint", Enumerable.Range(0, length).Select(x => (uint)x));
AssertLengthTypeAndValues(uintColumn, typeof(uint));
DataFrameColumn ulongColumn = DataFrameColumn.Create("Ulong", Enumerable.Range(0, length).Select(x => (ulong)x));
AssertLengthTypeAndValues(ulongColumn, typeof(ulong));
DataFrameColumn ushortColumn = DataFrameColumn.Create("Ushort", Enumerable.Range(0, length).Select(x => (ushort)x));
AssertLengthTypeAndValues(ushortColumn, typeof(ushort));
}
[Fact]
public void GetColumnTests()
{
DataFrame dataFrame = MakeDataFrameWithAllColumnTypes(10);
PrimitiveDataFrameColumn<int> primitiveInts = dataFrame.Columns.GetPrimitiveColumn<int>("Int");
Assert.NotNull(primitiveInts);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetPrimitiveColumn<float>("Int"));
StringDataFrameColumn strings = dataFrame.Columns.GetStringColumn("String");
Assert.NotNull(strings);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetStringColumn("ArrowString"));
ArrowStringDataFrameColumn arrowStrings = dataFrame.Columns.GetArrowStringColumn("ArrowString");
Assert.NotNull(arrowStrings);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetArrowStringColumn("String"));
ByteDataFrameColumn bytes = dataFrame.Columns.GetByteColumn("Byte");
Assert.NotNull(bytes);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetSingleColumn("Byte"));
Int32DataFrameColumn ints = dataFrame.Columns.GetInt32Column("Int");
Assert.NotNull(ints);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetSingleColumn("Int"));
BooleanDataFrameColumn bools = dataFrame.Columns.GetBooleanColumn("Bool");
Assert.NotNull(bools);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetSingleColumn("Bool"));
CharDataFrameColumn chars = dataFrame.Columns.GetCharColumn("Char");
Assert.NotNull(chars);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetSingleColumn("Char"));
DecimalDataFrameColumn decimals = dataFrame.Columns.GetDecimalColumn("Decimal");
Assert.NotNull(decimals);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetSingleColumn("Decimal"));
DoubleDataFrameColumn doubles = dataFrame.Columns.GetDoubleColumn("Double");
Assert.NotNull(doubles);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetSingleColumn("Double"));
SingleDataFrameColumn singles = dataFrame.Columns.GetSingleColumn("Float");
Assert.NotNull(singles);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetDoubleColumn("Float"));
Int64DataFrameColumn longs = dataFrame.Columns.GetInt64Column("Long");
Assert.NotNull(longs);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetSingleColumn("Long"));
SByteDataFrameColumn sbytes = dataFrame.Columns.GetSByteColumn("Sbyte");
Assert.NotNull(sbytes);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetSingleColumn("Sbyte"));
Int16DataFrameColumn shorts = dataFrame.Columns.GetInt16Column("Short");
Assert.NotNull(shorts);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetSingleColumn("Short"));
UInt32DataFrameColumn uints = dataFrame.Columns.GetUInt32Column("Uint");
Assert.NotNull(uints);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetSingleColumn("Uint"));
UInt64DataFrameColumn ulongs = dataFrame.Columns.GetUInt64Column("Ulong");
Assert.NotNull(ulongs);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetSingleColumn("Ulong"));
UInt16DataFrameColumn ushorts = dataFrame.Columns.GetUInt16Column("Ushort");
Assert.NotNull(ushorts);
Assert.Throws<ArgumentException>(() => dataFrame.Columns.GetSingleColumn("Ushort"));
}
[Fact]
public void TestMean()
{
DataFrame df = MakeDataFrameWithNumericColumns(10, true, 0);
Assert.Equal(40.0 / 9.0, df["Decimal"].Mean());
}
[Fact]
public void TestMedian()
{
DataFrame df = MakeDataFrameWithNumericColumns(10, true, 0);
Assert.Equal(4, df["Decimal"].Median());
}
[Fact]
public void Test_StringColumnNotEqualsNull()
{
var col = new StringDataFrameColumn("col", new[] { "One", null, "Two", "Three" });
var dfTest = new DataFrame(col);
var filteredNullDf = dfTest.Filter(dfTest["col"].ElementwiseNotEquals(null));
Assert.True(filteredNullDf.Columns.IndexOf("col") >= 0);
Assert.Equal(3, filteredNullDf.Columns["col"].Length);
Assert.Equal("One", filteredNullDf.Columns["col"][0]);
Assert.Equal("Two", filteredNullDf.Columns["col"][1]);
Assert.Equal("Three", filteredNullDf.Columns["col"][2]);
}
[Fact]
public void Test_StringColumnEqualsNull()
{
var index = new Int32DataFrameColumn("index", new int[] { 1, 2, 3, 4, 5 });
var col = new StringDataFrameColumn("col", new[] { "One", null, "Three", "Four", null }); ;
var dfTest = new DataFrame(index, col);
var filteredNullDf = dfTest.Filter(dfTest["col"].ElementwiseEquals(null));
Assert.True(filteredNullDf.Columns.IndexOf("col") >= 0);
Assert.True(filteredNullDf.Columns.IndexOf("index") >= 0);
Assert.Equal(2, filteredNullDf.Rows.Count);
Assert.Equal(2, filteredNullDf.Columns["index"][0]);
Assert.Equal(5, filteredNullDf.Columns["index"][1]);
}
}
}
|