12 writes to ConsiderNormalization
Microsoft.ML.Tokenizers (12)
Tokenizer.cs (12)
68
=> EncodeToIds(text, text.AsSpan(), new EncodeSettings { ConsiderPreTokenization = considerPreTokenization,
ConsiderNormalization
= considerNormalization }).Tokens;
78
=> EncodeToIds(null, text, new EncodeSettings { ConsiderPreTokenization = considerPreTokenization,
ConsiderNormalization
= considerNormalization }).Tokens;
96
ConsiderNormalization
= considerNormalization,
122
ConsiderNormalization
= considerNormalization,
150
EncodeResults<EncodedToken> result = EncodeToTokens(text, text.AsSpan(), new EncodeSettings { ConsiderPreTokenization = considerPreTokenization,
ConsiderNormalization
= considerNormalization });
166
EncodeResults<EncodedToken> result = EncodeToTokens(null, text, new EncodeSettings { ConsiderPreTokenization = considerPreTokenization,
ConsiderNormalization
= considerNormalization });
194
=> CountTokens(text, text.AsSpan(), new EncodeSettings { ConsiderPreTokenization = considerPreTokenization,
ConsiderNormalization
= considerNormalization });
204
=> CountTokens(null, text, new EncodeSettings { ConsiderPreTokenization = considerPreTokenization,
ConsiderNormalization
= considerNormalization });
279
new EncodeSettings { ConsiderPreTokenization = considerPreTokenization,
ConsiderNormalization
= considerNormalization, MaxTokenCount = maxTokenCount },
302
new EncodeSettings { ConsiderPreTokenization = considerPreTokenization,
ConsiderNormalization
= considerNormalization, MaxTokenCount = maxTokenCount },
325
new EncodeSettings { ConsiderPreTokenization = considerPreTokenization,
ConsiderNormalization
= considerNormalization, MaxTokenCount = maxTokenCount },
348
new EncodeSettings { ConsiderPreTokenization = considerPreTokenization,
ConsiderNormalization
= considerNormalization, MaxTokenCount = maxTokenCount },
38 references to ConsiderNormalization
Microsoft.ML.Tokenizers (38)
Model\BPETokenizer.cs (6)
324
settings.
ConsiderNormalization
,
373
settings.
ConsiderNormalization
,
429
settings.
ConsiderNormalization
,
468
/// <param name="normalizedText">If the tokenizer's normalization is enabled or <paramRef name="settings" /> has <see cref="EncodeSettings.
ConsiderNormalization
"/> is <see langword="false"/>, this will be set to <paramRef name="text" /> in its normalized form; otherwise, this value will be set to <see langword="null"/>.</param>
481
return LastIndexOf(text, textSpan, settings.MaxTokenCount, settings.ConsiderPreTokenization, settings.
ConsiderNormalization
, out normalizedText, out tokenCount);
484
tokenCount = CountTokens(text, textSpan, settings.ConsiderPreTokenization, settings.
ConsiderNormalization
, out normalizedText, out int charsConsumed, settings.MaxTokenCount);
Model\CodeGenTokenizer.cs (6)
289
=> EncodeToTokens(text, textSpan, AddPrefixSpace, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization, settings.
ConsiderNormalization
);
495
Tokens = EncodeToIds(text, textSpan, AddPrefixSpace, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization, settings.
ConsiderNormalization
,
675
=> CountTokens(text, textSpan, AddPrefixSpace, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization, settings.
ConsiderNormalization
, out _, out _, settings.MaxTokenCount);
710
/// <param name="normalizedText">If the tokenizer's normalization is enabled or <paramRef name="settings" /> has <see cref="EncodeSettings.
ConsiderNormalization
"/> is <see langword="false"/>, this will be set to <paramRef name="text" /> in its normalized form; otherwise, this value will be set to <see langword="null"/>.</param>
724
settings.
ConsiderNormalization
, out normalizedText, out tokenCount);
727
tokenCount = CountTokens(text, textSpan, AddPrefixSpace, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization, settings.
ConsiderNormalization
, out normalizedText, out int charsConsumed, settings.MaxTokenCount);
Model\EnglishRobertaTokenizer.cs (6)
325
settings.
ConsiderNormalization
,
408
=> EncodeToIds(text, textSpan, settings.ConsiderPreTokenization, settings.
ConsiderNormalization
, settings.MaxTokenCount);
466
=> CountTokens(text, textSpan, settings.ConsiderPreTokenization, settings.
ConsiderNormalization
, out _, out _, settings.MaxTokenCount);
475
/// <param name="normalizedText">If the tokenizer's normalization is enabled or <paramRef name="settings" /> has <see cref="EncodeSettings.
ConsiderNormalization
"/> is <see langword="false"/>, this will be set to <paramRef name="text" /> in its normalized form; otherwise, this value will be set to <see langword="null"/>.</param>
488
return LastIndexOf(text, textSpan, settings.MaxTokenCount, settings.ConsiderPreTokenization, settings.
ConsiderNormalization
, out normalizedText, out tokenCount);
491
tokenCount = CountTokens(text, textSpan, settings.ConsiderPreTokenization, settings.
ConsiderNormalization
, out normalizedText, out int charsConsumed, settings.MaxTokenCount);
Model\SentencePieceTokenizer.cs (6)
133
Tokens = _model.EncodeToTokens(text, textSpan, out string? normalizedText, AddBeginningOfSentence, AddEndOfSentence, settings.
ConsiderNormalization
),
177
Tokens = _model.EncodeToIds(text, textSpan, AddBeginningOfSentence, AddEndOfSentence, settings.
ConsiderNormalization
, out string? normalizedText, out int charsConsumed, settings.MaxTokenCount),
245
=> _model.CountTokens(text, textSpan, AddBeginningOfSentence, AddEndOfSentence, settings.
ConsiderNormalization
, out _, out _, settings.MaxTokenCount);
308
/// <param name="normalizedText">If the tokenizer's normalization is enabled or <paramRef name="settings" /> has <see cref="EncodeSettings.
ConsiderNormalization
"/> is <see langword="false"/>, this will be set to <paramRef name="text" /> in its normalized form; otherwise, this value will be set to <see langword="null"/>.</param>
321
return _model.GetIndexByTokenCountFromEnd(text, textSpan, AddBeginningOfSentence, AddEndOfSentence, settings.MaxTokenCount, settings.
ConsiderNormalization
, out normalizedText, out tokenCount);
324
tokenCount = _model.CountTokens(text, textSpan, AddBeginningOfSentence, AddEndOfSentence, settings.
ConsiderNormalization
, out normalizedText, out int charsConsumed, settings.MaxTokenCount);
Model\TiktokenTokenizer.cs (7)
269
settings.
ConsiderNormalization
,
379
settings.
ConsiderNormalization
,
529
=> CountTokens(text, textSpan, settings.ConsiderPreTokenization, settings.
ConsiderNormalization
, out _, out _, settings.MaxTokenCount);
637
/// <param name="normalizedText">If the tokenizer's normalization is enabled or <paramRef name="settings" /> has <see cref="EncodeSettings.
ConsiderNormalization
"/> is <see langword="false"/>, this will be set to <paramRef name="text" /> in its normalized form; otherwise, this value will be set to <see langword="null"/>.</param>
650
return LastIndexOf(text, textSpan, settings.MaxTokenCount, settings.
ConsiderNormalization
, settings.
ConsiderNormalization
, out normalizedText, out tokenCount);
653
tokenCount = CountTokens(text, textSpan, settings.ConsiderPreTokenization, settings.
ConsiderNormalization
, out normalizedText, out int charsConsumed, settings.MaxTokenCount);
Model\WordPieceTokenizer.cs (6)
283
settings.
ConsiderNormalization
,
406
settings.
ConsiderNormalization
,
558
settings.
ConsiderNormalization
,
593
/// <param name="normalizedText">If the tokenizer's normalization is enabled or <paramRef name="settings" /> has <see cref="EncodeSettings.
ConsiderNormalization
"/> is <see langword="false"/>, this will be set to <paramRef name="text" /> in its normalized form; otherwise, this value will be set to <see langword="null"/>.</param>
619
settings.
ConsiderNormalization
,
620
settings.
ConsiderNormalization
,
Tokenizer.cs (1)
213
/// <param name="normalizedText">If the tokenizer's normalization is enabled or <paramRef name="settings" /> has <see cref="EncodeSettings.
ConsiderNormalization
"/> is <see langword="false"/>, this will be set to <paramRef name="text" /> in its normalized form; otherwise, this value will be set to <see langword="null"/>.</param>