5 overrides of PreTokenizer
Microsoft.ML.Tokenizers (5)
Model\BPETokenizer.cs (1)
255
public override PreTokenizer?
PreTokenizer
=> _preTokenizer;
Model\CodeGenTokenizer.cs (1)
249
public override PreTokenizer?
PreTokenizer
=> _preTokenizer;
Model\EnglishRobertaTokenizer.cs (1)
244
public override PreTokenizer?
PreTokenizer
=> _preTokenizer;
Model\SentencePieceBpeTokenizer.cs (1)
158
public override PreTokenizer?
PreTokenizer
=> null;
Model\TiktokenTokenizer.cs (1)
126
public override PreTokenizer?
PreTokenizer
=> _preTokenizer;
11 references to PreTokenizer
Microsoft.ML.Tokenizers.Tests (10)
EnglishRobertaTests.cs (1)
239
Assert.True(tokenizer.
PreTokenizer
is RobertaPreTokenizer);
LlamaTests.cs (1)
498
Assert.Null(tokenizer.
PreTokenizer
);
TitokenTests.cs (8)
58
Tokenizer tokenizer = TiktokenTokenizer.Create(tokenizerDataFileName, GPT4.
PreTokenizer
, null, specialTokensEncoder);
63
tokenizer = TiktokenTokenizer.Create(stream, GPT4.
PreTokenizer
, null, specialTokensEncoder);
67
tokenizer = await TiktokenTokenizer.CreateAsync(tokenizerDataFileName, GPT4.
PreTokenizer
, normalizer: null, specialTokensEncoder);
72
tokenizer = await TiktokenTokenizer.CreateAsync(stream, GPT4.
PreTokenizer
, normalizer: null, specialTokensEncoder);
116
TiktokenTokenizer externalTokenizer = TiktokenTokenizer.Create(tokenizerDataFileName, tokenizer.
PreTokenizer
, null, tiktoken.SpecialTokens);
443
Assert.NotNull(tokenizer.
PreTokenizer
);
456
Assert.NotNull(tokenizer.
PreTokenizer
);
509
Assert.NotNull(tokenizer.
PreTokenizer
);
Microsoft.ML.TorchSharp (1)
NasBert\NerTrainer.cs (1)
379
var pre = tokenizer.
PreTokenizer
.PreTokenize(sentence);