1 type derived from SentencePieceTokenizer
Microsoft.ML.Tokenizers (1)
Model\LlamaTokenizer.cs (1)
17
public sealed class LlamaTokenizer :
SentencePieceTokenizer
1 instantiation of SentencePieceTokenizer
Microsoft.ML.Tokenizers (1)
Model\SentencePieceTokenizer.cs (1)
458
return new
SentencePieceTokenizer
(modelProto, addBeginningOfSentence, addEndOfSentence, specialTokens);
75 references to SentencePieceTokenizer
Microsoft.ML.GenAI.Core (2)
Pipeline\CausalLMPipeline.cs (2)
275
SentencePieceTokenizer
bpeTokenizer => bpeTokenizer.Decode(tokenIds.Concat(tokenIds), considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"),
281
SentencePieceTokenizer
bpeTokenizer => bpeTokenizer.Decode(tokenIds, considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"),
Microsoft.ML.Tokenizers (1)
Model\SentencePieceTokenizer.cs (1)
445
public static
SentencePieceTokenizer
Create(
Microsoft.ML.Tokenizers.Tests (72)
LlamaTests.cs (5)
69
PropertyInfo? propertyInfo = typeof(
SentencePieceTokenizer
).GetProperty("TreatWhitespaceAsSuffix", BindingFlags.Instance | BindingFlags.NonPublic | BindingFlags.Public);
511
SentencePieceTokenizer
sentencePieceBpe = (tokenizer as
SentencePieceTokenizer
)!;
565
SentencePieceTokenizer
sentencePieceBpe = (tokenizer as
SentencePieceTokenizer
)!;
SentencePieceTests.cs (57)
20
Assert.ThrowsAny<ArgumentException>(() =>
SentencePieceTokenizer
.Create(null!));
27
Assert.ThrowsAny<ArgumentException>(() =>
SentencePieceTokenizer
.Create(empty));
36
Assert.ThrowsAny<Exception>(() =>
SentencePieceTokenizer
.Create(ms));
45
SentencePieceTokenizer
tokenizer =
SentencePieceTokenizer
.Create(stream);
59
SentencePieceTokenizer
tokenizer =
SentencePieceTokenizer
.Create(ms);
72
SentencePieceTokenizer
tokenizer =
SentencePieceTokenizer
.Create(stream);
94
SentencePieceTokenizer
tokenizer =
SentencePieceTokenizer
.Create(stream);
109
SentencePieceTokenizer
tokenizer =
SentencePieceTokenizer
.Create(stream,
143
SentencePieceTokenizer
bpe =
SentencePieceTokenizer
.Create(bpeStream,
148
SentencePieceTokenizer
unigram =
SentencePieceTokenizer
.Create(unigramStream,
175
Assert.ThrowsAny<Exception>(() =>
SentencePieceTokenizer
.Create(ms));
185
Assert.ThrowsAny<Exception>(() =>
SentencePieceTokenizer
.Create(ms));
197
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
231
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
253
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
268
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
286
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
307
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model.ToArray());
326
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
362
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model.ToArray());
379
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
393
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model.ToArray());
411
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
427
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
441
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
453
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
478
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
495
SentencePieceTokenizer
tokenizer =
SentencePieceTokenizer
.Create(stream);
505
Assert.ThrowsAny<Exception>(() =>
SentencePieceTokenizer
.Create(ms));
518
Assert.ThrowsAny<Exception>(() =>
SentencePieceTokenizer
.Create(ms));
535
Assert.ThrowsAny<Exception>(() =>
SentencePieceTokenizer
.Create(ms));
551
Assert.ThrowsAny<Exception>(() =>
SentencePieceTokenizer
.Create(ms));
563
Assert.ThrowsAny<Exception>(() =>
SentencePieceTokenizer
.Create(ms));
574
Assert.ThrowsAny<Exception>(() =>
SentencePieceTokenizer
.Create(ms));
587
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model.ToArray());
602
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
617
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
639
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
656
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
671
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
684
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model.ToArray());
707
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model.ToArray());
730
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model.ToArray());
749
SentencePieceTokenizer
tokenizer = CreateFromSyntheticModel(model);
768
Assert.Throws<ArgumentException>(() =>
SentencePieceTokenizer
.Create(ms));
788
SentencePieceTokenizer
tokenizer =
SentencePieceTokenizer
.Create(ms);
796
private static
SentencePieceTokenizer
CreateFromSyntheticModel(
804
return
SentencePieceTokenizer
.Create(ms, addBos, addEos);
TokenizerTests.cs (2)
155
if (tokenizer is
SentencePieceTokenizer
)
173
if (tokenizer is
SentencePieceTokenizer
)
UnigramTests.cs (8)
20
private static
SentencePieceTokenizer
_unigramTokenizer = CreateUnigramTokenizer();
21
private static
SentencePieceTokenizer
_unigramTokenizerWithSpecialTokens = CreateUnigramTokenizerWithSpecialTokens();
23
private static
SentencePieceTokenizer
CreateUnigramTokenizer()
27
return
SentencePieceTokenizer
.Create(remoteStream);
55
private static
SentencePieceTokenizer
CreateUnigramTokenizerWithSpecialTokens()
59
return
SentencePieceTokenizer
.Create(remoteStream, specialTokens:
283
SentencePieceTokenizer
tokenizer,
535
private static void DecodeWithTokenizerTest(
SentencePieceTokenizer
tokenizer, string decodedString, int[] ids)