3 instantiations of BpeTokenizer
Microsoft.ML.Tokenizers (3)
Model\BPETokenizer.cs (3)
131return new BpeTokenizer(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens); 178return new BpeTokenizer(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens); 214return new BpeTokenizer(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens);
19 references to BpeTokenizer
Microsoft.ML.Tokenizers (7)
Model\BPETokenizer.cs (5)
92public static BpeTokenizer Create(string vocabFile, string? mergesFile) 110public static BpeTokenizer Create( 142public static BpeTokenizer Create(Stream vocabStream, Stream? mergesStream) 160public static BpeTokenizer Create( 196public static async Task<BpeTokenizer> CreateAsync(
Model\Cache.cs (1)
16internal Cache() : this(BpeTokenizer.DefaultCacheCapacity) { }
Utils\StringSpanOrdinalKey.cs (1)
81internal StringSpanOrdinalKeyCache() : this(BpeTokenizer.DefaultCacheCapacity) { }
Microsoft.ML.Tokenizers.Tests (12)
BpeTests.cs (12)
254BpeTokenizer bpe = BpeTokenizer.Create(vocabFile: vocabFile, mergesFile: mergesFile, preTokenizer: PreTokenizer.CreateWordOrNonWord(), normalizer: null, unknownToken: unknownToken, 294private void TestDecodingWithSpan(BpeTokenizer bpe, int[] ids, bool considerSpecialTokens, string expectedDecoded) 334_gpt2Tokenizer = BpeTokenizer.Create(vocabStream, mergesStream); 348BpeTokenizer bpe = BpeTokenizer.Create(vocabFile, mergesFile); 354bpe = BpeTokenizer.Create(vocabStream, mergesStream); 361bpe = await BpeTokenizer.CreateAsync(vocabStream, mergesStream); 503var bpeTokenizer = BpeTokenizer.Create(vocabStream, mergesStream, PreTokenizer.CreateWordOrNonWord(specialTokens), normalizer: null, specialTokens: specialTokens, unknownToken: "<|endoftext|>"); 550internal static BpeTokenizer CreateEmptyBpe(PreTokenizer? preTokenizer = null, Normalizer? normalizer = null) 558return BpeTokenizer.Create(