13 instantiations of SentencePieceNormalizer
Microsoft.ML.Tokenizers (2)
Model\LlamaTokenizer.cs (1)
54
SentencePieceNormalizer normalizer =
new
(
Model\SentencePieceBpeTokenizer.cs (1)
76
_normalizer = new
SentencePieceNormalizer
(modelProto.NormalizerSpec.RemoveExtraWhitespaces, AddDummyPrefix, EscapeWhiteSpaces, modelProto.TrainerSpec.TreatWhitespaceAsSuffix, specialTokens);
Microsoft.ML.Tokenizers.Tests (11)
LlamaTests.cs (11)
397
SentencePieceNormalizer normalizer = new
SentencePieceNormalizer
(removeExtraWhiteSpaces: false, addDummyPrefix: false, escapeWhiteSpaces: false, treatWhitespaceAsSuffix: false, specialTokens: null);
401
normalizer = new
SentencePieceNormalizer
(removeExtraWhiteSpaces: true, addDummyPrefix: false, escapeWhiteSpaces: false, treatWhitespaceAsSuffix: false, specialTokens: null);
405
normalizer = new
SentencePieceNormalizer
(removeExtraWhiteSpaces: true, addDummyPrefix: true, escapeWhiteSpaces: false, treatWhitespaceAsSuffix: false, specialTokens: null);
409
normalizer = new
SentencePieceNormalizer
(removeExtraWhiteSpaces: true, addDummyPrefix: true, escapeWhiteSpaces: true, treatWhitespaceAsSuffix: false, specialTokens: null);
413
normalizer = new
SentencePieceNormalizer
(removeExtraWhiteSpaces: false, addDummyPrefix: true, escapeWhiteSpaces: true, treatWhitespaceAsSuffix: false, specialTokens: null);
417
normalizer = new
SentencePieceNormalizer
(removeExtraWhiteSpaces: true, addDummyPrefix: true, escapeWhiteSpaces: true, treatWhitespaceAsSuffix: true, specialTokens: null);
421
normalizer = new
SentencePieceNormalizer
(removeExtraWhiteSpaces: true, addDummyPrefix: false, escapeWhiteSpaces: true, treatWhitespaceAsSuffix: true, specialTokens: null);
425
normalizer = new
SentencePieceNormalizer
(removeExtraWhiteSpaces: false, addDummyPrefix: true, escapeWhiteSpaces: true, treatWhitespaceAsSuffix: true, specialTokens: null);
429
normalizer = new
SentencePieceNormalizer
(removeExtraWhiteSpaces: false, addDummyPrefix: true, escapeWhiteSpaces: false, treatWhitespaceAsSuffix: true, specialTokens: null);
433
normalizer = new
SentencePieceNormalizer
(removeExtraWhiteSpaces: false, addDummyPrefix: true, escapeWhiteSpaces: true, treatWhitespaceAsSuffix: false, specialTokens: (_llamaPhi3Tokenizer as LlamaTokenizer)!.SpecialTokens);
441
normalizer = new
SentencePieceNormalizer
(removeExtraWhiteSpaces: false, addDummyPrefix: true, escapeWhiteSpaces: true, treatWhitespaceAsSuffix: true, specialTokens: (_llamaPhi3Tokenizer as LlamaTokenizer)!.SpecialTokens);
7 references to SentencePieceNormalizer
Microsoft.ML.Tokenizers (5)
Model\LlamaTokenizer.cs (1)
54
SentencePieceNormalizer
normalizer = new(
Model\SentencePieceBpeTokenizer.cs (4)
1557
char prefixSuffixChar = EscapeWhiteSpaces ?
SentencePieceNormalizer
.DummyPrefix : ' ';
1657
Debug.Assert(sb[suffixIndex] ==
SentencePieceNormalizer
.DummyPrefix);
1673
return EscapeWhiteSpaces ? sb.ToString(
SentencePieceNormalizer
.DummyPrefix, ' ') : sb.ToString();
1806
char prefixSuffixChar = EscapeWhiteSpaces ?
SentencePieceNormalizer
.DummyPrefix : ' ';
Microsoft.ML.Tokenizers.Tests (2)
LlamaTests.cs (2)
75
propertyInfo = typeof(
SentencePieceNormalizer
).GetProperty("TreatWhitespaceAsSuffix", BindingFlags.Instance | BindingFlags.NonPublic | BindingFlags.Public);
397
SentencePieceNormalizer
normalizer = new SentencePieceNormalizer(removeExtraWhiteSpaces: false, addDummyPrefix: false, escapeWhiteSpaces: false, treatWhitespaceAsSuffix: false, specialTokens: null);