1 type derived from SentencePieceTokenizer
Microsoft.ML.Tokenizers (1)
Model\LlamaTokenizer.cs (1)
17public sealed class LlamaTokenizer : SentencePieceTokenizer
1 instantiation of SentencePieceTokenizer
Microsoft.ML.Tokenizers (1)
Model\SentencePieceTokenizer.cs (1)
458return new SentencePieceTokenizer(modelProto, addBeginOfSentence, addEndOfSentence, specialTokens);
18 references to SentencePieceTokenizer
Microsoft.ML.GenAI.Core (2)
Pipeline\CausalLMPipeline.cs (2)
275SentencePieceTokenizer bpeTokenizer => bpeTokenizer.Decode(tokenIds.Concat(tokenIds), considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"), 281SentencePieceTokenizer bpeTokenizer => bpeTokenizer.Decode(tokenIds, considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"),
Microsoft.ML.Tokenizers (1)
Model\SentencePieceTokenizer.cs (1)
445public static SentencePieceTokenizer Create(
Microsoft.ML.Tokenizers.Tests (15)
LlamaTests.cs (5)
69PropertyInfo? propertyInfo = typeof(SentencePieceTokenizer).GetProperty("TreatWhitespaceAsSuffix", BindingFlags.Instance | BindingFlags.NonPublic | BindingFlags.Public); 511SentencePieceTokenizer sentencePieceBpe = (tokenizer as SentencePieceTokenizer)!; 565SentencePieceTokenizer sentencePieceBpe = (tokenizer as SentencePieceTokenizer)!;
TokenizerTests.cs (2)
155if (tokenizer is SentencePieceTokenizer) 173if (tokenizer is SentencePieceTokenizer)
UnigramTests.cs (8)
20private static SentencePieceTokenizer _unigramTokenizer = CreateUnigramTokenizer(); 21private static SentencePieceTokenizer _unigramTokenizerWithSpecialTokens = CreateUnigramTokenizerWithSpecialTokens(); 23private static SentencePieceTokenizer CreateUnigramTokenizer() 27return SentencePieceTokenizer.Create(remoteStream); 55private static SentencePieceTokenizer CreateUnigramTokenizerWithSpecialTokens() 59return SentencePieceTokenizer.Create(remoteStream, specialTokens: 283SentencePieceTokenizer tokenizer, 535private static void DecodeWithTokenizerTest(SentencePieceTokenizer tokenizer, string decodedString, int[] ids)