7 types derived from Tokenizer
Microsoft.ML.Tokenizers (6)
Microsoft.ML.Tokenizers.Tests (1)
127 references to Tokenizer
Microsoft.Extensions.AI.Integration.Tests (3)
Microsoft.Extensions.DataIngestion (4)
Microsoft.Extensions.DataIngestion.Tests (3)
Microsoft.ML.GenAI.Core (8)
Microsoft.ML.GenAI.Core.Tests (2)
Microsoft.ML.GenAI.LLaMA (8)
Microsoft.ML.GenAI.Mistral (2)
Microsoft.ML.GenAI.Phi (10)
Microsoft.ML.GenAI.Phi.Tests (3)
Microsoft.ML.GenAI.Samples (2)
Microsoft.ML.Tokenizers (5)
Microsoft.ML.Tokenizers.Tests (60)
TiktokenTests.cs (21)
32public static Tokenizer GPT4 { get; } = TiktokenTokenizer.CreateForModel("gpt-4", _specialTokens);
33public static Tokenizer GPT2 { get; } = TiktokenTokenizer.CreateForModel("gpt2");
34public static Tokenizer P50kBase { get; } = TiktokenTokenizer.CreateForModel("text-davinci-003");
35public static Tokenizer R50kBase { get; } = TiktokenTokenizer.CreateForModel("ada");
36public static Tokenizer P50kEdit { get; } = TiktokenTokenizer.CreateForModel("text-davinci-edit-001");
37public static Tokenizer GPT4o { get; } = TiktokenTokenizer.CreateForModel("gpt-4o");
38public static Tokenizer GPT5 { get; } = TiktokenTokenizer.CreateForModel("gpt-5");
39public static Tokenizer GPT5_1 { get; } = TiktokenTokenizer.CreateForModel("gpt-5.1");
40public static Tokenizer GPT5_2 { get; } = TiktokenTokenizer.CreateForModel("gpt-5.2");
41public static Tokenizer GPT5_3 { get; } = TiktokenTokenizer.CreateForModel("gpt-5.3");
42public static Tokenizer GPT5_4 { get; } = TiktokenTokenizer.CreateForModel("gpt-5.4");
43public static Tokenizer Phi4 { get; } = TiktokenTokenizer.CreateForModel("phi-4");
68Tokenizer tokenizer = TiktokenTokenizer.Create(tokenizerDataFileName, GPT4.PreTokenizer, null, specialTokens);
118public async Task TestTokenizerUsingExternalVocab(Tokenizer tokenizer, string url)
144private void TestGPT4TokenizationEncoding(Tokenizer tokenizer)
219private void TestGPT4Tokenizer(Tokenizer gpt4Tokenizer)
489Tokenizer tokenizer = TiktokenTokenizer.CreateForModel(modelName);
503Tokenizer tokenizer = TiktokenTokenizer.CreateForEncoding(encodingName);
518Tokenizer tokenizer1 = TiktokenTokenizer.CreateForModel(modelName);
569Tokenizer tokenizer = TiktokenTokenizer.CreateForModel(name);
628private void TestTokenizerEncodingForTokenizer(Tokenizer tokenizer, string text, string[] expectedTokens, (int Index, int Length)[] expectedOffsets, int[] expectedIds)
Microsoft.ML.TorchSharp (17)
NasBert\NasBertTrainer.cs (3)
178public Tokenizer Tokenizer;
582private IList<int> PrepInputTokens(ref ReadOnlyMemory<char> sentence1, ref ReadOnlyMemory<char> sentence2, ref ValueGetter<ReadOnlyMemory<char>> getSentence1, ref ValueGetter<ReadOnlyMemory<char>> getSentence2, Tokenizer tokenizer)
612private protected void UpdateCacheIfNeeded(long position, TensorCacher outputCache, ref ReadOnlyMemory<char> sentence1, ref ReadOnlyMemory<char> sentence2, ref ValueGetter<ReadOnlyMemory<char>> getSentence1, ref ValueGetter<ReadOnlyMemory<char>> getSentence2, Tokenizer tokenizer)