File: Dynamic\Trainers\Recommendation\OneClassMatrixFactorizationWithOptions.cs
Web Access
Project: src\docs\samples\Microsoft.ML.Samples\Microsoft.ML.Samples.csproj (Microsoft.ML.Samples)
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
 
namespace Samples.Dynamic.Trainers.Recommendation
{
    public static class OneClassMatrixFactorizationWithOptions
    {
        // This example shows the use of ML.NET's one-class matrix factorization
        // module which implements a coordinate descent method described in
        // Algorithm 1 in the paper found at 
        // https://www.csie.ntu.edu.tw/~cjlin/papers/one-class-mf/biased-mf-sdm-with-supp.pdf
        // See page 28 in of the slides
        // at https://www.csie.ntu.edu.tw/~cjlin/talks/facebook.pdf for a brief 
        // introduction to one-class matrix factorization.
        // In this example we will create in-memory data and then use it to train a
        // one-class matrix factorization model. Afterward, prediction values are
        // reported. To run this example, it requires installation of additional
        // nuget package Microsoft.ML.Recommender found at
        // https://www.nuget.org/packages/Microsoft.ML.Recommender/
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for
            // exception tracking and logging, as a catalog of available operations
            // and as the source of randomness.
            var mlContext = new MLContext(seed: 0);
 
            // Get a small in-memory dataset.
            GetOneClassMatrix(out List<MatrixElement> data,
                out List<MatrixElement> testData);
 
            // Convert the in-memory matrix into an IDataView so that ML.NET
            // components can consume it.
            var dataView = mlContext.Data.LoadFromEnumerable(data);
 
            // Create a matrix factorization trainer which takes "Value" as the
            // training label, "MatrixColumnIndex" as the matrix's column index, and
            // "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used
            // to extract field
            // names' in MatrixElement class.
            var options = new MatrixFactorizationTrainer.Options
            {
                MatrixColumnIndexColumnName = nameof(
                    MatrixElement.MatrixColumnIndex),
                MatrixRowIndexColumnName = nameof(MatrixElement.MatrixRowIndex),
                LabelColumnName = nameof(MatrixElement.Value),
                NumberOfIterations = 20,
                NumberOfThreads = 8,
                ApproximationRank = 32,
                Alpha = 1,
 
                // The desired values of matrix elements not specified in the
                // training set. If the training set doesn't tell the value at the
                // u -th row and v-th column, its desired value would be set 0.15.
                // In other words, this parameter determines the value of all
                // missing matrix elements.
                C = 0.15,
                // This argument enables one-class matrix factorization.
                LossFunction = MatrixFactorizationTrainer.LossFunctionType
                    .SquareLossOneClass
            };
 
            var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization(
                options);
 
            // Train a matrix factorization model.
            var model = pipeline.Fit(dataView);
 
            // Apply the trained model to the test set. Notice that training is a
            // partial 
            var prediction = model.Transform(mlContext.Data.LoadFromEnumerable(
                testData));
 
            var results = mlContext.Data.CreateEnumerable<MatrixElement>(prediction,
                false).ToList();
            // Feed the test data into the model and then iterate through a few
            // predictions.
            foreach (var pred in results.Take(15))
                Console.WriteLine($"Predicted value at row " +
                    $"{pred.MatrixRowIndex - 1} and column " +
                    $"{pred.MatrixColumnIndex - 1} is {pred.Score} and its " +
                    $"expected value is {pred.Value}.");
 
            // Expected output similar to:
            // Predicted value at row 0 and column 0 is 0.9873335 and its expected value is 1.
            // Predicted value at row 1 and column 0 is 0.1499522 and its expected value is 0.15.
            // Predicted value at row 2 and column 0 is 0.1499791 and its expected value is 0.15.
            // Predicted value at row 3 and column 0 is 0.1499254 and its expected value is 0.15.
            // Predicted value at row 4 and column 0 is 0.1499074 and its expected value is 0.15.
            // Predicted value at row 5 and column 0 is 0.1499968 and its expected value is 0.15.
            // Predicted value at row 6 and column 0 is 0.1499791 and its expected value is 0.15.
            // Predicted value at row 7 and column 0 is 0.1499805 and its expected value is 0.15.
            // Predicted value at row 8 and column 0 is 0.1500055 and its expected value is 0.15.
            // Predicted value at row 9 and column 0 is 0.1499199 and its expected value is 0.15.
            // Predicted value at row 10 and column 0 is 0.9873335 and its expected value is 1.
            // Predicted value at row 11 and column 0 is 0.1499522 and its expected value is 0.15.
            // Predicted value at row 12 and column 0 is 0.1499791 and its expected value is 0.15.
            // Predicted value at row 13 and column 0 is 0.1499254 and its expected value is 0.15.
            // Predicted value at row 14 and column 0 is 0.1499074 and its expected value is 0.15.
            //
            // Note: use the advanced options constructor to set the number of
            // threads to 1 for a deterministic behavior.
 
            // Assume that row index is user ID and column index game ID, the
            // following list contains the games recommended by the trained model.
            // Note that sometime, you may want to exclude training data from your
            // predicted results because those would represent games that were
            // already purchased. The variable topColumns stores two matrix elements
            // with the highest predicted scores on the 1st row.
            var topColumns = results.Where(element => element.MatrixRowIndex == 1)
                .OrderByDescending(element => element.Score).Take(2);
 
            Console.WriteLine("Top 2 predictions on the 1st row:");
            foreach (var top in topColumns)
                Console.WriteLine($"Predicted value at row " +
                    $"{top.MatrixRowIndex - 1} and column " +
                    $"{top.MatrixColumnIndex - 1} is {top.Score} and its " +
                    $"expected value is {top.Value}.");
 
            // Expected output similar to:
            // Top 2 predictions at the 2nd row:
            // Predicted value at row 0 and column 0 is 0.9871138 and its expected value is 1.
            // Predicted value at row 0 and column 10 is 0.9871138 and its expected value is 1.
        }
 
        // The following variables defines the shape of a matrix. Its shape is 
        // _synthesizedMatrixRowCount-by-_synthesizedMatrixColumnCount.
        // Because in ML.NET key type's minimal value is zero, the first row index
        // is always zero in C# data structure (e.g., MatrixColumnIndex=0 and 
        // MatrixRowIndex=0 in MatrixElement below specifies the value at the
        // upper-left corner in the training matrix). If user's row index
        // starts with 1, their row index 1 would be mapped to the 2nd row in matrix
        // factorization module and their first row may contain no values.
        // This behavior is also true to column index.
        private const uint _synthesizedMatrixColumnCount = 60;
        private const uint _synthesizedMatrixRowCount = 100;
 
        // A data structure used to encode a single value in matrix
        private class MatrixElement
        {
            // Matrix column index. Its allowed range is from 0 to
            // _synthesizedMatrixColumnCount - 1.
            [KeyType(_synthesizedMatrixColumnCount)]
            public uint MatrixColumnIndex { get; set; }
            // Matrix row index. Its allowed range is from 0 to
            // _synthesizedMatrixRowCount - 1.
            [KeyType(_synthesizedMatrixRowCount)]
            public uint MatrixRowIndex { get; set; }
            // The value at the MatrixColumnIndex-th column and the
            // MatrixRowIndex-th row.
            public float Value { get; set; }
            // The predicted value at the MatrixColumnIndex-th column and the
            // MatrixRowIndex-th row.
            public float Score { get; set; }
        }
 
        // Create an in-memory matrix as a list of tuples (column index, row index,
        // value). Notice that one-class matrix factorization handle scenerios where
        // only positive signals (e.g., on Facebook, only likes are recorded and no
        // dislike before) can be observed so that all values are set to 1.
        private static void GetOneClassMatrix(
            out List<MatrixElement> observedMatrix,
            out List<MatrixElement> fullMatrix)
        {
            // The matrix factorization model will be trained only using
            // observedMatrix but we will see it can learn all information carried
            // sin fullMatrix.
            observedMatrix = new List<MatrixElement>();
            fullMatrix = new List<MatrixElement>();
            for (uint i = 0; i < _synthesizedMatrixColumnCount; ++i)
                for (uint j = 0; j < _synthesizedMatrixRowCount; ++j)
                {
                    if ((i + j) % 10 == 0)
                    {
                        // Set observed elements' values to 1 (means like).
                        observedMatrix.Add(new MatrixElement()
                        {
                            MatrixColumnIndex = i,
                            MatrixRowIndex = j,
                            Value = 1,
                            Score = 0
                        });
                        fullMatrix.Add(new MatrixElement()
                        {
                            MatrixColumnIndex = i,
                            MatrixRowIndex = j,
                            Value = 1,
                            Score = 0
                        });
                    }
                    else
                        // Set unobserved elements' values to 0.15, a value smaller
                        // than observed values (means dislike).
                        fullMatrix.Add(new MatrixElement()
                        {
                            MatrixColumnIndex = i,
                            MatrixRowIndex = j,
                            Value = 0.15f,
                            Score = 0
                        });
                }
        }
    }
}