4 instantiations of BpeTokenizer
Microsoft.ML.Tokenizers (4)
Model\BPETokenizer.cs (4)
132return new BpeTokenizer(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens); 186return new BpeTokenizer( 244return new BpeTokenizer(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens); 280return new BpeTokenizer(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens);
32 references to BpeTokenizer
Microsoft.ML.Tokenizers (8)
Model\BPETokenizer.cs (6)
93public static BpeTokenizer Create(string vocabFile, string? mergesFile) 111public static BpeTokenizer Create( 140public static BpeTokenizer Create(BpeOptions options) 208public static BpeTokenizer Create(Stream vocabStream, Stream? mergesStream) 226public static BpeTokenizer Create( 262public static async Task<BpeTokenizer> CreateAsync(
Model\Cache.cs (1)
16internal Cache() : this(BpeTokenizer.DefaultCacheCapacity) { }
Utils\StringSpanOrdinalKey.cs (1)
81internal StringSpanOrdinalKeyCache() : this(BpeTokenizer.DefaultCacheCapacity) { }
Microsoft.ML.Tokenizers.Tests (24)
BpeTests.cs (24)
256BpeTokenizer bpe = BpeTokenizer.Create(vocabFile: vocabFile, mergesFile: mergesFile, preTokenizer: PreTokenizer.CreateWordOrNonWord(), normalizer: null, unknownToken: unknownToken, 271bpe = BpeTokenizer.Create(bpeOptions); 294BpeTokenizer bpe1 = BpeTokenizer.Create(bpeOptions1); 298private void SimpleWithUnknownTokenTest(BpeTokenizer bpe, string sentence, (int, int)[] offsets, int[] ids, string[] expectedTokens, string decodedTokens, string decodedTokensWithoutUnknownToken) 329private void TestDecodingWithSpan(BpeTokenizer bpe, int[] ids, bool considerSpecialTokens, string expectedDecoded) 369_gpt2Tokenizer = BpeTokenizer.Create(vocabStream, mergesStream); 383BpeTokenizer bpe = BpeTokenizer.Create(vocabFile, mergesFile); 389bpe = BpeTokenizer.Create(vocabStream, mergesStream); 396bpe = await BpeTokenizer.CreateAsync(vocabStream, mergesStream); 402bpe = BpeTokenizer.Create( 549var bpeTokenizer = BpeTokenizer.Create(vocabStream, mergesStream, PreTokenizer.CreateWordOrNonWord(specialTokens), normalizer: null, specialTokens: specialTokens, unknownToken: "<|endoftext|>"); 596internal static BpeTokenizer CreateEmptyBpe(PreTokenizer? preTokenizer = null, Normalizer? normalizer = null) 604return BpeTokenizer.Create( 813private static BpeTokenizer _deepSeekR1Tokenizer = CreateBpeTokenizerFromJson(); 820BpeTokenizer tokenizer = _deepSeekR1Tokenizer; 907BpeTokenizer bpeTokenizer = BpeTokenizer.Create(options); 933bpeTokenizer = BpeTokenizer.Create(options1); 948private static BpeTokenizer CreateBpeTokenizerFromJson() 1001return BpeTokenizer.Create(bpeOptions);