6 types derived from Normalizer
Microsoft.ML.Tokenizers (4)
Normalizer\BertNormalizer.cs (1)
18internal sealed class BertNormalizer : Normalizer
Normalizer\LowerCaseNormalizer.cs (1)
14public sealed class LowerCaseNormalizer : Normalizer
Normalizer\SentencePieceNormalizer.cs (1)
16public sealed class SentencePieceNormalizer : Normalizer
Normalizer\UpperCaseNormalizer.cs (1)
14public sealed class UpperCaseNormalizer : Normalizer
Microsoft.ML.Tokenizers.Tests (2)
NormalizerTests.cs (2)
69public class RemoveQuotesNormalizer : Normalizer 133public class UnicodeNormalizer : Normalizer
44 references to Normalizer
Microsoft.ML.Tokenizers (42)
Model\BpeOptions.cs (1)
46public Normalizer? Normalizer { get; set; }
Model\BPETokenizer.cs (6)
31private readonly Normalizer? _normalizer; 115Normalizer? normalizer = null, 225Normalizer? normalizer = null, 261Normalizer? normalizer = null, 297Normalizer? normalizer, 423public override Normalizer? Normalizer => _normalizer;
Model\CodeGenTokenizer.cs (5)
36private readonly Normalizer? _normalizer; 59Normalizer? normalizer = null, 91Normalizer? normalizer = null, 103private CodeGenTokenizer(Stream vocabularyStream, Stream mergeStream, PreTokenizer? preTokenizer, Normalizer? normalizer, IReadOnlyDictionary<string, int>? specialTokens, bool addPrefixSpace, 257public override Normalizer? Normalizer => _normalizer;
Model\EnglishRobertaTokenizer.cs (7)
29private readonly Normalizer? _normalizer; 68Normalizer? normalizer = null, 105Normalizer? normalizer = null, 118internal EnglishRobertaTokenizer(string vocabularyPath, string mergePath, string highestOccurrenceMappingPath, PreTokenizer? preTokenizer = null, Normalizer? normalizer = null, bool filterUnsupportedChars = true) : 135internal EnglishRobertaTokenizer(Stream vocabularyStream, Stream mergeStream, Stream highestOccurrenceMappingStream, PreTokenizer? preTokenizer = null, Normalizer? normalizer = null, bool filterUnsupportedChars = true) : 140private EnglishRobertaTokenizer(Stream vocabularyStream, Stream mergeStream, Stream highestOccurrenceMappingStream, PreTokenizer? preTokenizer, Normalizer? normalizer, bool filterUnsupportedChars, bool disposeStream) 260public override Normalizer? Normalizer => _normalizer;
Model\Phi2Tokenizer.cs (2)
38Normalizer? normalizer = null, 69Normalizer? normalizer = null,
Model\SentencePieceTokenizer.cs (1)
106public override Normalizer? Normalizer => _model.Normalizer;
Model\TiktokenTokenizer.cs (15)
34private readonly Normalizer? _normalizer; 46internal TiktokenTokenizer(string vocabFilePath, PreTokenizer? preTokenizer, IReadOnlyDictionary<string, int>? specialTokens = null, Normalizer? normalizer = null, int cacheSize = LruCache<int[]>.DefaultCacheSize) : 61internal TiktokenTokenizer(Stream vocabStream, PreTokenizer? preTokenizer, IReadOnlyDictionary<string, int>? specialTokens = null, Normalizer? normalizer = null, int cacheSize = LruCache<int[]>.DefaultCacheSize) : 82Normalizer? normalizer = null, 102private TiktokenTokenizer(Stream vocabStream, PreTokenizer? preTokenizer, IReadOnlyDictionary<string, int>? specialTokens, Normalizer? normalizer, int cacheSize, bool disposeStream) 132public override Normalizer? Normalizer => _normalizer; 1222Normalizer? normalizer = null) 1276Normalizer? normalizer, 1296Normalizer? normalizer, 1317Normalizer? normalizer, 1349Normalizer? normalizer, 1377Normalizer? normalizer = null) 1416Normalizer? normalizer = null, 1448public static TiktokenTokenizer CreateForModel(string modelName, IReadOnlyDictionary<string, int>? extraSpecialTokens = null, Normalizer? normalizer = null) 1458public static TiktokenTokenizer CreateForEncoding(string encodingName, IReadOnlyDictionary<string, int>? extraSpecialTokens = null, Normalizer? normalizer = null)
Model\WordPieceOptions.cs (1)
26public Normalizer? Normalizer { get; set; }
Model\WordPieceTokenizer.cs (2)
28private readonly Normalizer? _normalizer; 248public override Normalizer? Normalizer => _normalizer;
Tokenizer.cs (2)
29public virtual Normalizer? Normalizer => null; 432Normalizer? normalizer,
Microsoft.ML.Tokenizers.Tests (2)
BpeTests.cs (1)
583internal static BpeTokenizer CreateEmptyBpe(PreTokenizer? preTokenizer = null, Normalizer? normalizer = null)
NormalizerTests.cs (1)
59public void TestNormalizer(Normalizer normalizer, string text, string normalized)