10 references to CreateWordOrNonWord
Microsoft.ML.Tokenizers (3)
Model\BPETokenizer.cs (3)
94=> Create(vocabFile, mergesFile, preTokenizer: PreTokenizer.CreateWordOrNonWord(), normalizer: null, unknownToken: null, continuingSubwordPrefix: null, endOfWordSuffix: null, fuseUnknownTokens: false);
204=> Create(vocabStream, mergesStream, preTokenizer: PreTokenizer.CreateWordOrNonWord(), normalizer: null, specialTokens: null, unknownToken: null, continuingSubwordPrefix: null, endOfWordSuffix: null, fuseUnknownTokens: false);
311_preTokenizer = preTokenizer ?? PreTokenizer.CreateWordOrNonWord(); // Default to WordOrNonWord pre-tokenizer
Microsoft.ML.Tokenizers.Tests (7)
BpeTests.cs (4)
256BpeTokenizer bpe = BpeTokenizer.Create(vocabFile: vocabFile, mergesFile: mergesFile, preTokenizer: PreTokenizer.CreateWordOrNonWord(), normalizer: null, unknownToken: unknownToken,
273PreTokenizer = PreTokenizer.CreateWordOrNonWord(),
536var bpeTokenizer = BpeTokenizer.Create(vocabStream, mergesStream, PreTokenizer.CreateWordOrNonWord(specialTokens), normalizer: null, specialTokens: specialTokens, unknownToken: "<|endoftext|>");
592vocabStream: emptyVocabStream, mergesStream: null, preTokenizer: preTokenizer ?? PreTokenizer.CreateWordOrNonWord(), normalizer: normalizer, unknownToken: "Ukn");