TextClassification.cs

using System;
using System.IO;
using Microsoft.ML;
using Microsoft.ML.Data;
 
namespace Samples.Dynamic
{
    public static class TextClassification
    {
        public const int MaxSentenceLength = 600;
        /// <summary>
        /// Example use of the TensorFlow sentiment classification model.
        /// </summary>
        public static void Example()
        {
            // Download an unfrozen (SavedModel format) pre-trained sentiment
            // model and return the path to the model directory.
            string modelLocation = Microsoft.ML.SamplesUtils.DatasetUtils
                .DownloadTensorFlowSentimentModel();
 
            var mlContext = new MLContext();
            var data = new[] { new IMDBSentiment() {
                Sentiment_Text = "this film was just brilliant casting location " +
                "scenery story direction everyone's really suited the part they " +
                "played and you could just imagine being there robert is an " +
                "amazing actor and now the same being director  father came from " +
                "the same scottish island as myself so i loved the fact there " +
                "was a real connection with this film the witty remarks " +
                "throughout the film were great it was just brilliant so much " +
                "that i bought the film as soon as it was released for  and " +
                "would recommend it to everyone to watch and the fly fishing was " +
                "amazing really cried at the end it was so sad and you know what " +
                "they say if you cry at a film it must have been good and this " +
                "definitely was also to the two little boy's that played the of " +
                "norman and paul they were just brilliant children are often " +
                "left out of the list i think because the stars that play them " +
                "all grown up are such a big profile for the whole film but " +
                "these children are amazing and should be praised for what " +
                "they have done don't you think the whole story was so lovely" +
                "because it was true and was someone's life after all that was" +
                "shared with us all" } };
            var dataView = mlContext.Data.LoadFromEnumerable(data);
 
            // This is the dictionary to convert words into the integer indexes.
            var lookupMap = mlContext.Data.LoadFromTextFile(Path.Combine(
                modelLocation, "imdb_word_index.csv"),
 
                columns: new[]
                   {
                        new TextLoader.Column("Words", DataKind.String, 0),
                        new TextLoader.Column("Ids", DataKind.Int32, 1),
                   },
                separatorChar: ','
               );
 
            // Load the TensorFlow model once.
            //      - Use it for querying the schema for input and output in the
            //            model
            //      - Use it for prediction in the pipeline.
            // Unfrozen (SavedModel format) models are loaded by providing the
            // path to the directory containing the model file and other model
            // artifacts like pre-trained weights.
            using var tensorFlowModel = mlContext.Model.LoadTensorFlowModel(
                modelLocation);
            var schema = tensorFlowModel.GetModelSchema();
            var featuresType = (VectorDataViewType)schema["Features"].Type;
            Console.WriteLine("Name: {0}, Type: {1}, Shape: (-1, {2})", "Features",
                featuresType.ItemType.RawType, featuresType.Dimensions[0]);
 
            var predictionType = (VectorDataViewType)schema["Prediction/Softmax"]
                .Type;
            Console.WriteLine("Name: {0}, Type: {1}, Shape: (-1, {2})",
                "Prediction/Softmax", predictionType.ItemType.RawType,
                predictionType.Dimensions[0]);
 
            // The model expects the input feature vector to be a fixed length
            // vector.
            // In this sample, CustomMappingEstimator is used to resize variable
            // length vector to fixed length vector.
            // The following ML.NET pipeline
            //      1. tokenizes the string into words, 
            //      2. maps each word to an integer which is an index in the
            //         dictionary ('lookupMap'),
            //      3. Resizes the integer vector to a fixed length vector using
            //         CustomMappingEstimator ('ResizeFeaturesAction')
            //      4. Passes the data to TensorFlow for scoring.
            //      5. Retreives the 'Prediction' from TensorFlow and put it into
            //         ML.NET Pipeline 
 
            Action<IMDBSentiment, IntermediateFeatures> ResizeFeaturesAction =
                (i, j) =>
            {
                j.Sentiment_Text = i.Sentiment_Text;
                var features = i.VariableLengthFeatures;
                Array.Resize(ref features, MaxSentenceLength);
                j.Features = features;
            };
 
            var model =
                mlContext.Transforms.Text.TokenizeIntoWords(
                "TokenizedWords",
                "Sentiment_Text")
                .Append(mlContext.Transforms.Conversion.MapValue(
                    "VariableLengthFeatures",
                    lookupMap,
                    lookupMap.Schema["Words"],
                    lookupMap.Schema["Ids"],
                    "TokenizedWords"))
                .Append(mlContext.Transforms.CustomMapping(
                    ResizeFeaturesAction,
                    "Resize"))
                .Append(tensorFlowModel.ScoreTensorFlowModel(
                    "Prediction/Softmax",
                    "Features"))
                .Append(mlContext.Transforms.CopyColumns(
                    "Prediction",
                    "Prediction/Softmax"))
                .Fit(dataView);
            var engine = mlContext.Model.CreatePredictionEngine<IMDBSentiment,
                OutputScores>(model);
 
            // Predict with TensorFlow pipeline.
            var prediction = engine.Predict(data[0]);
 
            Console.WriteLine("Number of classes: {0}", prediction.Prediction
                .Length);
            Console.WriteLine("Is sentiment/review positive? {0}", prediction
                .Prediction[1] > 0.5 ? "Yes." : "No.");
            Console.WriteLine("Prediction Confidence: {0}", prediction.Prediction[1]
                .ToString("0.00"));
 
            ///////////////////////////// Expected output //////////////////////////
            // 
            // Name: Features, Type: System.Int32, Shape: (-1, 600)
            // Name: Prediction/Softmax, Type: System.Single, Shape: (-1, 2)
            // 
            // Number of classes: 2
            // Is sentiment/review positive ? Yes
            // Prediction Confidence: 0.65
        }
 
 
        /// <summary>
        /// Class to hold original sentiment data.
        /// </summary>
        public class IMDBSentiment
        {
            public string Sentiment_Text { get; set; }
 
            /// <summary>
            /// This is a variable length vector designated by VectorType attribute.
            /// Variable length vectors are produced by applying operations such as
            /// 'TokenizeWords' on strings resulting in vectors of tokens of
            /// variable lengths.
            /// </summary>
            [VectorType]
            public int[] VariableLengthFeatures { get; set; }
        }
 
        /// <summary>
        /// Class to hold intermediate data. Mostly used by CustomMapping Estimator
        /// </summary>
        public class IntermediateFeatures
        {
            public string Sentiment_Text { get; set; }
 
            [VectorType(MaxSentenceLength)]
            public int[] Features { get; set; }
        }
 
        /// <summary>
        /// Class to contain the output values from the transformation.
        /// </summary>
        class OutputScores
        {
            [VectorType(2)]
            public float[] Prediction { get; set; }
        }
 
    }
}
File: docs\samples\Microsoft.ML.Samples\Dynamic\TensorFlow\TextClassification.cs	Web Access
Project: src\docs\samples\Microsoft.ML.Samples.GPU\Microsoft.ML.Samples.GPU.csproj (Microsoft.ML.Samples.GPU)