6 overrides of PreTokenizer
Microsoft.ML.Tokenizers (6)
Model\BPETokenizer.cs (1)
300public override PreTokenizer? PreTokenizer => _preTokenizer;
Model\CodeGenTokenizer.cs (1)
249public override PreTokenizer? PreTokenizer => _preTokenizer;
Model\EnglishRobertaTokenizer.cs (1)
255public override PreTokenizer? PreTokenizer => _preTokenizer;
Model\SentencePieceTokenizer.cs (1)
159public override PreTokenizer? PreTokenizer => null;
Model\TiktokenTokenizer.cs (1)
127public override PreTokenizer? PreTokenizer => _preTokenizer;
Model\WordPieceTokenizer.cs (1)
243public override PreTokenizer? PreTokenizer => _preTokenizer;
11 references to PreTokenizer
Microsoft.ML.Tokenizers.Tests (10)
EnglishRobertaTests.cs (1)
239Assert.True(tokenizer.PreTokenizer is RobertaPreTokenizer);
LlamaTests.cs (1)
498Assert.Null(tokenizer.PreTokenizer);
TiktokenTests.cs (8)
59Tokenizer tokenizer = TiktokenTokenizer.Create(tokenizerDataFileName, GPT4.PreTokenizer, null, specialTokens); 64tokenizer = TiktokenTokenizer.Create(stream, GPT4.PreTokenizer, null, specialTokens); 68tokenizer = await TiktokenTokenizer.CreateAsync(tokenizerDataFileName, GPT4.PreTokenizer, normalizer: null, specialTokens); 73tokenizer = await TiktokenTokenizer.CreateAsync(stream, GPT4.PreTokenizer, normalizer: null, specialTokens); 117TiktokenTokenizer externalTokenizer = TiktokenTokenizer.Create(tokenizerDataFileName, tokenizer.PreTokenizer, null, tiktoken.SpecialTokens); 446Assert.NotNull(tokenizer.PreTokenizer); 459Assert.NotNull(tokenizer.PreTokenizer); 513Assert.NotNull(tokenizer.PreTokenizer);
Microsoft.ML.TorchSharp (1)
NasBert\NerTrainer.cs (1)
379var pre = tokenizer.PreTokenizer.PreTokenize(sentence);