1 type derived from SentencePieceTokenizer
Microsoft.ML.Tokenizers (1)
Model\LlamaTokenizer.cs (1)
17public sealed class LlamaTokenizer : SentencePieceTokenizer
2 instantiations of SentencePieceTokenizer
Microsoft.ML.Tokenizers (2)
Model\SentencePieceTokenizer.cs (2)
468return new SentencePieceTokenizer(modelProto, addBeginOfSentence, addEndOfSentence, specialTokens); 482return new SentencePieceTokenizer(options);
22 references to SentencePieceTokenizer
Microsoft.ML.GenAI.Core (2)
Pipeline\CausalLMPipeline.cs (2)
275SentencePieceTokenizer bpeTokenizer => bpeTokenizer.Decode(tokenIds.Concat(tokenIds), considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"), 281SentencePieceTokenizer bpeTokenizer => bpeTokenizer.Decode(tokenIds, considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"),
Microsoft.ML.Tokenizers (2)
Model\SentencePieceTokenizer.cs (2)
455public static SentencePieceTokenizer Create( 475public static SentencePieceTokenizer Create(SentencePieceOptions options)
Microsoft.ML.Tokenizers.Tests (18)
LlamaTests.cs (5)
69PropertyInfo? propertyInfo = typeof(SentencePieceTokenizer).GetProperty("TreatWhitespaceAsSuffix", BindingFlags.Instance | BindingFlags.NonPublic | BindingFlags.Public); 511SentencePieceTokenizer sentencePieceBpe = (tokenizer as SentencePieceTokenizer)!; 565SentencePieceTokenizer sentencePieceBpe = (tokenizer as SentencePieceTokenizer)!;
TokenizerTests.cs (2)
155if (tokenizer is SentencePieceTokenizer) 173if (tokenizer is SentencePieceTokenizer)
UnigramTests.cs (11)
20private static SentencePieceTokenizer _unigramTokenizer = CreateUnigramTokenizer(); 21private static SentencePieceTokenizer _unigramTokenizerWithSpecialTokens = CreateUnigramTokenizerWithSpecialTokens(); 22private static SentencePieceTokenizer _unigramTokenizerFromJson = CreateUnigramTokenizerFromJson(); 24private static SentencePieceTokenizer CreateUnigramTokenizer() 28return SentencePieceTokenizer.Create(remoteStream); 31private static SentencePieceTokenizer CreateUnigramTokenizerFromJson() 69return SentencePieceTokenizer.Create(options); 97private static SentencePieceTokenizer CreateUnigramTokenizerWithSpecialTokens() 101return SentencePieceTokenizer.Create(remoteStream, specialTokens: 325SentencePieceTokenizer tokenizer, 780private static void DecodeWithTokenizerTest(SentencePieceTokenizer tokenizer, string decodedString, int[] ids)