12 writes to ConsiderPreTokenization
Microsoft.ML.Tokenizers (12)
Tokenizer.cs (12)
68
=> EncodeToIds(text, text.AsSpan(), new EncodeSettings {
ConsiderPreTokenization
= considerPreTokenization, ConsiderNormalization = considerNormalization }).Tokens;
78
=> EncodeToIds(null, text, new EncodeSettings {
ConsiderPreTokenization
= considerPreTokenization, ConsiderNormalization = considerNormalization }).Tokens;
95
ConsiderPreTokenization
= considerPreTokenization,
121
ConsiderPreTokenization
= considerPreTokenization,
150
EncodeResults<EncodedToken> result = EncodeToTokens(text, text.AsSpan(), new EncodeSettings {
ConsiderPreTokenization
= considerPreTokenization, ConsiderNormalization = considerNormalization });
166
EncodeResults<EncodedToken> result = EncodeToTokens(null, text, new EncodeSettings {
ConsiderPreTokenization
= considerPreTokenization, ConsiderNormalization = considerNormalization });
194
=> CountTokens(text, text.AsSpan(), new EncodeSettings {
ConsiderPreTokenization
= considerPreTokenization, ConsiderNormalization = considerNormalization });
204
=> CountTokens(null, text, new EncodeSettings {
ConsiderPreTokenization
= considerPreTokenization, ConsiderNormalization = considerNormalization });
279
new EncodeSettings {
ConsiderPreTokenization
= considerPreTokenization, ConsiderNormalization = considerNormalization, MaxTokenCount = maxTokenCount },
302
new EncodeSettings {
ConsiderPreTokenization
= considerPreTokenization, ConsiderNormalization = considerNormalization, MaxTokenCount = maxTokenCount },
325
new EncodeSettings {
ConsiderPreTokenization
= considerPreTokenization, ConsiderNormalization = considerNormalization, MaxTokenCount = maxTokenCount },
348
new EncodeSettings {
ConsiderPreTokenization
= considerPreTokenization, ConsiderNormalization = considerNormalization, MaxTokenCount = maxTokenCount },
25 references to ConsiderPreTokenization
Microsoft.ML.Tokenizers (25)
Model\BPETokenizer.cs (5)
323
settings.
ConsiderPreTokenization
,
372
settings.
ConsiderPreTokenization
,
428
settings.
ConsiderPreTokenization
,
481
return LastIndexOf(text, textSpan, settings.MaxTokenCount, settings.
ConsiderPreTokenization
, settings.ConsiderNormalization, out normalizedText, out tokenCount);
484
tokenCount = CountTokens(text, textSpan, settings.
ConsiderPreTokenization
, settings.ConsiderNormalization, out normalizedText, out int charsConsumed, settings.MaxTokenCount);
Model\CodeGenTokenizer.cs (5)
286
=> EncodeToTokens(text, textSpan, AddPrefixSpace, AddBeginningOfSentence, AddEndOfSentence, settings.
ConsiderPreTokenization
, settings.ConsiderNormalization);
492
Tokens = EncodeToIds(text, textSpan, AddPrefixSpace, AddBeginningOfSentence, AddEndOfSentence, settings.
ConsiderPreTokenization
, settings.ConsiderNormalization,
672
=> CountTokens(text, textSpan, AddPrefixSpace, AddBeginningOfSentence, AddEndOfSentence, settings.
ConsiderPreTokenization
, settings.ConsiderNormalization, out _, out _, settings.MaxTokenCount);
720
return LastIndexOf(text, textSpan, settings.MaxTokenCount, AddPrefixSpace, AddBeginningOfSentence, AddEndOfSentence, settings.
ConsiderPreTokenization
,
724
tokenCount = CountTokens(text, textSpan, AddPrefixSpace, AddBeginningOfSentence, AddEndOfSentence, settings.
ConsiderPreTokenization
, settings.ConsiderNormalization, out normalizedText, out int charsConsumed, settings.MaxTokenCount);
Model\EnglishRobertaTokenizer.cs (5)
324
settings.
ConsiderPreTokenization
,
408
=> EncodeToIds(text, textSpan, settings.
ConsiderPreTokenization
, settings.ConsiderNormalization, settings.MaxTokenCount);
466
=> CountTokens(text, textSpan, settings.
ConsiderPreTokenization
, settings.ConsiderNormalization, out _, out _, settings.MaxTokenCount);
488
return LastIndexOf(text, textSpan, settings.MaxTokenCount, settings.
ConsiderPreTokenization
, settings.ConsiderNormalization, out normalizedText, out tokenCount);
491
tokenCount = CountTokens(text, textSpan, settings.
ConsiderPreTokenization
, settings.ConsiderNormalization, out normalizedText, out int charsConsumed, settings.MaxTokenCount);
Model\SentencePieceTokenizer.cs (3)
200
Tokens = EncodeToTokens(text, textSpan, out string? normalizedText, AddBeginningOfSentence, AddEndOfSentence, settings.
ConsiderPreTokenization
, settings.ConsiderNormalization),
839
return CountTokens(text, textSpan, AddBeginningOfSentence, AddEndOfSentence, settings.
ConsiderPreTokenization
, settings.ConsiderNormalization, out _, out _, settings.MaxTokenCount);
1167
tokenCount = CountTokens(text, textSpan, AddBeginningOfSentence, AddEndOfSentence, settings.
ConsiderPreTokenization
, settings.ConsiderNormalization, out normalizedText, out int charsConsumed, settings.MaxTokenCount);
Model\TiktokenTokenizer.cs (4)
268
settings.
ConsiderPreTokenization
,
378
settings.
ConsiderPreTokenization
,
529
=> CountTokens(text, textSpan, settings.
ConsiderPreTokenization
, settings.ConsiderNormalization, out _, out _, settings.MaxTokenCount);
653
tokenCount = CountTokens(text, textSpan, settings.
ConsiderPreTokenization
, settings.ConsiderNormalization, out normalizedText, out int charsConsumed, settings.MaxTokenCount);
Model\WordPieceTokenizer.cs (3)
282
settings.
ConsiderPreTokenization
,
405
settings.
ConsiderPreTokenization
,
557
settings.
ConsiderPreTokenization
,