6 types derived from Normalizer
Microsoft.ML.Tokenizers (4)
Normalizer\BertNormalizer.cs (1)
18internal sealed class BertNormalizer : Normalizer
Normalizer\LowerCaseNormalizer.cs (1)
14public sealed class LowerCaseNormalizer : Normalizer
Normalizer\SentencePieceNormalizer.cs (1)
15public sealed class SentencePieceNormalizer : Normalizer
Normalizer\UpperCaseNormalizer.cs (1)
14public sealed class UpperCaseNormalizer : Normalizer
Microsoft.ML.Tokenizers.Tests (2)
NormalizerTests.cs (2)
69public class RemoveQuotesNormalizer : Normalizer 133public class UnicodeNormalizer : Normalizer
44 references to Normalizer
Microsoft.ML.Tokenizers (42)
Model\BPETokenizer.cs (6)
31private readonly Normalizer? _normalizer; 114Normalizer? normalizer = null, 164Normalizer? normalizer = null, 200Normalizer? normalizer = null, 233Normalizer? normalizer, 305public override Normalizer? Normalizer => _normalizer;
Model\CodeGenTokenizer.cs (5)
33private readonly Normalizer? _normalizer; 56Normalizer? normalizer = null, 88Normalizer? normalizer = null, 100private CodeGenTokenizer(Stream vocabularyStream, Stream mergeStream, PreTokenizer? preTokenizer, Normalizer? normalizer, IReadOnlyDictionary<string, int>? specialTokens, bool addPrefixSpace, 254public override Normalizer? Normalizer => _normalizer;
Model\EnglishRobertaTokenizer.cs (7)
29private readonly Normalizer? _normalizer; 68Normalizer? normalizer = null, 105Normalizer? normalizer = null, 118internal EnglishRobertaTokenizer(string vocabularyPath, string mergePath, string highestOccurrenceMappingPath, PreTokenizer? preTokenizer = null, Normalizer? normalizer = null, bool filterUnsupportedChars = true) : 135internal EnglishRobertaTokenizer(Stream vocabularyStream, Stream mergeStream, Stream highestOccurrenceMappingStream, PreTokenizer? preTokenizer = null, Normalizer? normalizer = null, bool filterUnsupportedChars = true) : 140private EnglishRobertaTokenizer(Stream vocabularyStream, Stream mergeStream, Stream highestOccurrenceMappingStream, PreTokenizer? preTokenizer, Normalizer? normalizer, bool filterUnsupportedChars, bool disposeStream) 260public override Normalizer? Normalizer => _normalizer;
Model\Phi2Tokenizer.cs (2)
38Normalizer? normalizer = null, 69Normalizer? normalizer = null,
Model\SentencePieceTokenizer.cs (2)
34private readonly Normalizer? _normalizer; 164public override Normalizer? Normalizer => _normalizer;
Model\TiktokenTokenizer.cs (15)
34private readonly Normalizer? _normalizer; 46internal TiktokenTokenizer(string vocabFilePath, PreTokenizer? preTokenizer, IReadOnlyDictionary<string, int>? specialTokens = null, Normalizer? normalizer = null, int cacheSize = LruCache<int[]>.DefaultCacheSize) : 61internal TiktokenTokenizer(Stream vocabStream, PreTokenizer? preTokenizer, IReadOnlyDictionary<string, int>? specialTokens = null, Normalizer? normalizer = null, int cacheSize = LruCache<int[]>.DefaultCacheSize) : 82Normalizer? normalizer = null, 102private TiktokenTokenizer(Stream vocabStream, PreTokenizer? preTokenizer, IReadOnlyDictionary<string, int>? specialTokens, Normalizer? normalizer, int cacheSize, bool disposeStream) 132public override Normalizer? Normalizer => _normalizer; 1207Normalizer? normalizer = null) 1261Normalizer? normalizer, 1281Normalizer? normalizer, 1302Normalizer? normalizer, 1334Normalizer? normalizer, 1362Normalizer? normalizer = null) 1401Normalizer? normalizer = null, 1433public static TiktokenTokenizer CreateForModel(string modelName, IReadOnlyDictionary<string, int>? extraSpecialTokens = null, Normalizer? normalizer = null) 1443public static TiktokenTokenizer CreateForEncoding(string encodingName, IReadOnlyDictionary<string, int>? extraSpecialTokens = null, Normalizer? normalizer = null)
Model\WordPieceOptions.cs (1)
26public Normalizer? Normalizer { get; set; }
Model\WordPieceTokenizer.cs (2)
28private readonly Normalizer? _normalizer; 248public override Normalizer? Normalizer => _normalizer;
Tokenizer.cs (2)
29public virtual Normalizer? Normalizer => null; 432Normalizer? normalizer,
Microsoft.ML.Tokenizers.Tests (2)
BpeTests.cs (1)
550internal static BpeTokenizer CreateEmptyBpe(PreTokenizer? preTokenizer = null, Normalizer? normalizer = null)
NormalizerTests.cs (1)
59public void TestNormalizer(Normalizer normalizer, string text, string normalized)