File: DataFrameColumnCollection.cs
Web Access
Project: src\src\Microsoft.Data.Analysis\Microsoft.Data.Analysis.csproj (Microsoft.Data.Analysis)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
 
namespace Microsoft.Data.Analysis
{
    /// <summary>
    /// A DataFrameColumnCollection is just a container that holds a number of DataFrameColumn instances. 
    /// </summary>
    public class DataFrameColumnCollection : Collection<DataFrameColumn>
    {
        private readonly Action ColumnsChanged;
        private readonly Dictionary<string, int> _columnNameToIndexDictionary = new Dictionary<string, int>(StringComparer.Ordinal);
 
        internal long RowCount { get; set; }
 
        internal DataFrameColumnCollection(IEnumerable<DataFrameColumn> columns, Action columnsChanged) : base()
        {
            columns = columns ?? throw new ArgumentNullException(nameof(columns));
            ColumnsChanged = columnsChanged;
            foreach (DataFrameColumn column in columns)
            {
                Add(column);
            }
        }
 
        internal IReadOnlyList<string> GetColumnNames()
        {
            var ret = new List<string>(Count);
            for (int i = 0; i < Count; i++)
            {
                ret.Add(this[i].Name);
            }
            return ret;
        }
 
        public void RenameColumn(string currentName, string newName)
        {
            var column = this[currentName];
            column.SetName(newName);
        }
 
        [Obsolete]
        public void SetColumnName(DataFrameColumn column, string newName)
        {
            column.SetName(newName);
        }
 
        //Updates column's metadata (is used as a callback from Column class)
        internal void UpdateColumnNameMetadata(DataFrameColumn column, string newName)
        {
            string currentName = column.Name;
            int currentIndex = _columnNameToIndexDictionary[currentName];
            _columnNameToIndexDictionary.Remove(currentName);
            _columnNameToIndexDictionary.Add(newName, currentIndex);
            ColumnsChanged?.Invoke();
        }
 
        public void Insert<T>(int columnIndex, IEnumerable<T> column, string columnName)
            where T : unmanaged
        {
            DataFrameColumn newColumn = new PrimitiveDataFrameColumn<T>(columnName, column);
            Insert(columnIndex, newColumn); // calls InsertItem internally
        }
 
        protected override void InsertItem(int columnIndex, DataFrameColumn column)
        {
            column = column ?? throw new ArgumentNullException(nameof(column));
 
            if (Count == 0)
            {
                //change RowCount on inserting first row to dataframe
                RowCount = column.Length;
            }
            else if (column.Length != RowCount)
            {
                //check all columns in the dataframe have the same length (amount of rows)
                throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column));
            }
 
            if (_columnNameToIndexDictionary.ContainsKey(column.Name))
            {
                throw new ArgumentException(string.Format(Strings.DuplicateColumnName, column.Name), nameof(column));
            }
 
            column.AddOwner(this);
 
            RowCount = column.Length;
 
            _columnNameToIndexDictionary[column.Name] = columnIndex;
            for (int i = columnIndex; i < Count; i++)
            {
                _columnNameToIndexDictionary[this[i].Name]++;
            }
            base.InsertItem(columnIndex, column);
            ColumnsChanged?.Invoke();
        }
 
        protected override void SetItem(int columnIndex, DataFrameColumn column)
        {
            column = column ?? throw new ArgumentNullException(nameof(column));
            if (RowCount > 0 && column.Length != RowCount)
            {
                throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column));
            }
            bool existingColumn = _columnNameToIndexDictionary.TryGetValue(column.Name, out int existingColumnIndex);
            if (existingColumn && existingColumnIndex != columnIndex)
            {
                throw new ArgumentException(string.Format(Strings.DuplicateColumnName, column.Name), nameof(column));
            }
 
            _columnNameToIndexDictionary.Remove(this[columnIndex].Name);
            _columnNameToIndexDictionary[column.Name] = columnIndex;
 
            this[columnIndex].RemoveOwner(this);
            base.SetItem(columnIndex, column);
 
            ColumnsChanged?.Invoke();
        }
 
        protected override void RemoveItem(int columnIndex)
        {
            _columnNameToIndexDictionary.Remove(this[columnIndex].Name);
            for (int i = columnIndex + 1; i < Count; i++)
            {
                _columnNameToIndexDictionary[this[i].Name]--;
            }
 
            this[columnIndex].RemoveOwner(this);
            base.RemoveItem(columnIndex);
 
            //Reset RowCount if the last column was removed and dataframe is empty
            if (Count == 0)
                RowCount = 0;
 
            ColumnsChanged?.Invoke();
        }
 
        public void Remove(string columnName)
        {
            int columnIndex = IndexOf(columnName);
            if (columnIndex != -1)
            {
                RemoveAt(columnIndex); // calls RemoveItem internally
            }
        }
 
        /// <summary>
        /// Searches for a <see cref="DataFrameColumn"/> with the specified <paramref name="columnName"/> and returns the zero-based index of the first occurrence if found. Returns -1 otherwise
        /// </summary>
        /// <param name="columnName"></param>
        public int IndexOf(string columnName)
        {
            if (columnName != null && _columnNameToIndexDictionary.TryGetValue(columnName, out int columnIndex))
            {
                return columnIndex;
            }
            return -1;
        }
 
        protected override void ClearItems()
        {
            base.ClearItems();
            ColumnsChanged?.Invoke();
            _columnNameToIndexDictionary.Clear();
 
            //reset RowCount as DataFrame is now empty
            RowCount = 0;
        }
 
        /// <summary>
        /// An indexer based on <see cref="DataFrameColumn.Name"/>
        /// </summary>
        /// <param name="columnName">The name of a <see cref="DataFrameColumn"/></param>
        /// <returns>A <see cref="DataFrameColumn"/> if it exists.</returns>
        /// <exception cref="ArgumentException">Throws if <paramref name="columnName"/> is not present in this <see cref="DataFrame"/></exception>
        public DataFrameColumn this[string columnName]
        {
            get
            {
                int columnIndex = IndexOf(columnName);
                if (columnIndex == -1)
                {
                    throw new ArgumentException(String.Format(Strings.InvalidColumnName, columnName), nameof(columnName));
                }
                return this[columnIndex];
            }
            set
            {
                int columnIndex = IndexOf(columnName);
                DataFrameColumn newColumn = value;
                newColumn.SetName(columnName);
                if (columnIndex == -1)
                {
                    Insert(Count, newColumn);
                }
                else
                {
                    this[columnIndex] = newColumn;
                }
            }
        }
 
        /// <summary>
        /// Gets the <see cref="PrimitiveDataFrameColumn{T}"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="PrimitiveDataFrameColumn{T}"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public PrimitiveDataFrameColumn<T> GetPrimitiveColumn<T>(string name)
            where T : unmanaged
        {
            DataFrameColumn column = this[name];
            if (column is PrimitiveDataFrameColumn<T> ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(T)), nameof(T));
        }
 
        /// <summary>
        /// Gets the <see cref="DateTimeDataFrameColumn"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="DateTimeDataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public DateTimeDataFrameColumn GetDateTimeColumn(string name)
        {
            DataFrameColumn column = this[name];
            if (column is DateTimeDataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(DateTime)));
        }
 
        /// <summary>
        /// Gets the <see cref="ArrowStringDataFrameColumn"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="ArrowStringDataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public ArrowStringDataFrameColumn GetArrowStringColumn(string name)
        {
            DataFrameColumn column = this[name];
            if (column is ArrowStringDataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(string)));
        }
 
        /// <summary>
        /// Gets the <see cref="StringDataFrameColumn"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="StringDataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public StringDataFrameColumn GetStringColumn(string name)
        {
            DataFrameColumn column = this[name];
            if (column is StringDataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(string)));
        }
 
        /// <summary>
        /// Gets the <see cref="BooleanDataFrameColumn"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="BooleanDataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public BooleanDataFrameColumn GetBooleanColumn(string name)
        {
            DataFrameColumn column = this[name];
            if (column is BooleanDataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(Boolean)));
        }
 
        /// <summary>
        /// Gets the <see cref="ByteDataFrameColumn"/> with the specified <paramref name="name"/> and attempts to return it as an <see cref="ByteDataFrameColumn"/>. If <see cref="DataFrameColumn.DataType"/> is not of type <see cref="Byte"/>, an exception is thrown.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="ByteDataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public ByteDataFrameColumn GetByteColumn(string name)
        {
            DataFrameColumn column = this[name];
            if (column is ByteDataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(Byte)));
        }
 
        /// <summary>
        /// Gets the <see cref="CharDataFrameColumn"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="CharDataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public CharDataFrameColumn GetCharColumn(string name)
        {
            DataFrameColumn column = this[name];
            if (column is CharDataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(Char)));
        }
 
        /// <summary>
        /// Gets the <see cref="DoubleDataFrameColumn"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="DoubleDataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public DoubleDataFrameColumn GetDoubleColumn(string name)
        {
            DataFrameColumn column = this[name];
            if (column is DoubleDataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(Double)));
        }
 
        /// <summary>
        /// Gets the <see cref="DecimalDataFrameColumn"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="DecimalDataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public DecimalDataFrameColumn GetDecimalColumn(string name)
        {
            DataFrameColumn column = this[name];
            if (column is DecimalDataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(Decimal)));
        }
 
        /// <summary>
        /// Gets the <see cref="SingleDataFrameColumn"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="SingleDataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public SingleDataFrameColumn GetSingleColumn(string name)
        {
            DataFrameColumn column = this[name];
            if (column is SingleDataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(Single)));
        }
 
        /// <summary>
        /// Gets the <see cref="Int32DataFrameColumn"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="Int32DataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public Int32DataFrameColumn GetInt32Column(string name)
        {
            DataFrameColumn column = this[name];
            if (column is Int32DataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(Int32)));
        }
 
        /// <summary>
        /// Gets the <see cref="Int64DataFrameColumn"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="Int64DataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public Int64DataFrameColumn GetInt64Column(string name)
        {
            DataFrameColumn column = this[name];
            if (column is Int64DataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(Int64)));
        }
 
        /// <summary>
        /// Gets the <see cref="SByteDataFrameColumn"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="SByteDataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public SByteDataFrameColumn GetSByteColumn(string name)
        {
            DataFrameColumn column = this[name];
            if (column is SByteDataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(SByte)));
        }
 
        /// <summary>
        /// Gets the <see cref="Int16DataFrameColumn"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="Int16DataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public Int16DataFrameColumn GetInt16Column(string name)
        {
            DataFrameColumn column = this[name];
            if (column is Int16DataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(Int16)));
        }
 
        /// <summary>
        /// Gets the <see cref="UInt32DataFrameColumn"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="UInt32DataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public UInt32DataFrameColumn GetUInt32Column(string name)
        {
            DataFrameColumn column = this[name];
            if (column is UInt32DataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(string)));
        }
 
        /// <summary>
        /// Gets the <see cref="UInt64DataFrameColumn"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="UInt64DataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public UInt64DataFrameColumn GetUInt64Column(string name)
        {
            DataFrameColumn column = this[name];
            if (column is UInt64DataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(UInt64)));
        }
 
        /// <summary>
        /// Gets the <see cref="UInt16DataFrameColumn"/> with the specified <paramref name="name"/>.
        /// </summary>
        /// <param name="name">The name of the column</param>
        /// <returns><see cref="UInt16DataFrameColumn"/>.</returns>
        /// <exception cref="ArgumentException">A column named <paramref name="name"/> cannot be found, or if the column's type doesn't match.</exception>
        public UInt16DataFrameColumn GetUInt16Column(string name)
        {
            DataFrameColumn column = this[name];
            if (column is UInt16DataFrameColumn ret)
            {
                return ret;
            }
 
            throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(UInt16)));
        }
    }
}