1 type derived from SentencePieceTokenizer
Microsoft.ML.Tokenizers (1)
Model\LlamaTokenizer.cs (1)
17public sealed class LlamaTokenizer : SentencePieceTokenizer
1 instantiation of SentencePieceTokenizer
Microsoft.ML.Tokenizers (1)
Model\SentencePieceTokenizer.cs (1)
458return new SentencePieceTokenizer(modelProto, addBeginningOfSentence, addEndOfSentence, specialTokens);
75 references to SentencePieceTokenizer
Microsoft.ML.GenAI.Core (2)
Pipeline\CausalLMPipeline.cs (2)
275SentencePieceTokenizer bpeTokenizer => bpeTokenizer.Decode(tokenIds.Concat(tokenIds), considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"), 281SentencePieceTokenizer bpeTokenizer => bpeTokenizer.Decode(tokenIds, considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"),
Microsoft.ML.Tokenizers (1)
Model\SentencePieceTokenizer.cs (1)
445public static SentencePieceTokenizer Create(
Microsoft.ML.Tokenizers.Tests (72)
LlamaTests.cs (5)
69PropertyInfo? propertyInfo = typeof(SentencePieceTokenizer).GetProperty("TreatWhitespaceAsSuffix", BindingFlags.Instance | BindingFlags.NonPublic | BindingFlags.Public); 511SentencePieceTokenizer sentencePieceBpe = (tokenizer as SentencePieceTokenizer)!; 565SentencePieceTokenizer sentencePieceBpe = (tokenizer as SentencePieceTokenizer)!;
SentencePieceTests.cs (57)
20Assert.ThrowsAny<ArgumentException>(() => SentencePieceTokenizer.Create(null!)); 27Assert.ThrowsAny<ArgumentException>(() => SentencePieceTokenizer.Create(empty)); 36Assert.ThrowsAny<Exception>(() => SentencePieceTokenizer.Create(ms)); 45SentencePieceTokenizer tokenizer = SentencePieceTokenizer.Create(stream); 59SentencePieceTokenizer tokenizer = SentencePieceTokenizer.Create(ms); 72SentencePieceTokenizer tokenizer = SentencePieceTokenizer.Create(stream); 94SentencePieceTokenizer tokenizer = SentencePieceTokenizer.Create(stream); 109SentencePieceTokenizer tokenizer = SentencePieceTokenizer.Create(stream, 143SentencePieceTokenizer bpe = SentencePieceTokenizer.Create(bpeStream, 148SentencePieceTokenizer unigram = SentencePieceTokenizer.Create(unigramStream, 175Assert.ThrowsAny<Exception>(() => SentencePieceTokenizer.Create(ms)); 185Assert.ThrowsAny<Exception>(() => SentencePieceTokenizer.Create(ms)); 197SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 231SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 253SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 268SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 286SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 307SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model.ToArray()); 326SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 362SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model.ToArray()); 379SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 393SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model.ToArray()); 411SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 427SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 441SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 453SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 478SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 495SentencePieceTokenizer tokenizer = SentencePieceTokenizer.Create(stream); 505Assert.ThrowsAny<Exception>(() => SentencePieceTokenizer.Create(ms)); 518Assert.ThrowsAny<Exception>(() => SentencePieceTokenizer.Create(ms)); 535Assert.ThrowsAny<Exception>(() => SentencePieceTokenizer.Create(ms)); 551Assert.ThrowsAny<Exception>(() => SentencePieceTokenizer.Create(ms)); 563Assert.ThrowsAny<Exception>(() => SentencePieceTokenizer.Create(ms)); 574Assert.ThrowsAny<Exception>(() => SentencePieceTokenizer.Create(ms)); 587SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model.ToArray()); 602SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 617SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 639SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 656SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 671SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 684SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model.ToArray()); 707SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model.ToArray()); 730SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model.ToArray()); 749SentencePieceTokenizer tokenizer = CreateFromSyntheticModel(model); 768Assert.Throws<ArgumentException>(() => SentencePieceTokenizer.Create(ms)); 788SentencePieceTokenizer tokenizer = SentencePieceTokenizer.Create(ms); 796private static SentencePieceTokenizer CreateFromSyntheticModel( 804return SentencePieceTokenizer.Create(ms, addBos, addEos);
TokenizerTests.cs (2)
155if (tokenizer is SentencePieceTokenizer) 173if (tokenizer is SentencePieceTokenizer)
UnigramTests.cs (8)
20private static SentencePieceTokenizer _unigramTokenizer = CreateUnigramTokenizer(); 21private static SentencePieceTokenizer _unigramTokenizerWithSpecialTokens = CreateUnigramTokenizerWithSpecialTokens(); 23private static SentencePieceTokenizer CreateUnigramTokenizer() 27return SentencePieceTokenizer.Create(remoteStream); 55private static SentencePieceTokenizer CreateUnigramTokenizerWithSpecialTokens() 59return SentencePieceTokenizer.Create(remoteStream, specialTokens: 283SentencePieceTokenizer tokenizer, 535private static void DecodeWithTokenizerTest(SentencePieceTokenizer tokenizer, string decodedString, int[] ids)