4 instantiations of BpeTokenizer
Microsoft.ML.Tokenizers (4)
Model\BPETokenizer.cs (4)
132return new BpeTokenizer(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens); 181return new BpeTokenizer( 239return new BpeTokenizer(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens); 275return new BpeTokenizer(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens);
28 references to BpeTokenizer
Microsoft.ML.Tokenizers (8)
Model\BPETokenizer.cs (6)
93public static BpeTokenizer Create(string vocabFile, string? mergesFile) 111public static BpeTokenizer Create( 135public static BpeTokenizer Create(BpeOptions options) 203public static BpeTokenizer Create(Stream vocabStream, Stream? mergesStream) 221public static BpeTokenizer Create( 257public static async Task<BpeTokenizer> CreateAsync(
Model\Cache.cs (1)
16internal Cache() : this(BpeTokenizer.DefaultCacheCapacity) { }
Utils\StringSpanOrdinalKey.cs (1)
81internal StringSpanOrdinalKeyCache() : this(BpeTokenizer.DefaultCacheCapacity) { }
Microsoft.ML.Tokenizers.Tests (20)
BpeTests.cs (20)
256BpeTokenizer bpe = BpeTokenizer.Create(vocabFile: vocabFile, mergesFile: mergesFile, preTokenizer: PreTokenizer.CreateWordOrNonWord(), normalizer: null, unknownToken: unknownToken, 281BpeTokenizer bpe1 = BpeTokenizer.Create(bpeOptions); 285private void SimpleWithUnknownTokenTest(BpeTokenizer bpe, string sentence, (int, int)[] offsets, int[] ids, string[] expectedTokens, string decodedTokens, string decodedTokensWithoutUnknownToken) 316private void TestDecodingWithSpan(BpeTokenizer bpe, int[] ids, bool considerSpecialTokens, string expectedDecoded) 356_gpt2Tokenizer = BpeTokenizer.Create(vocabStream, mergesStream); 370BpeTokenizer bpe = BpeTokenizer.Create(vocabFile, mergesFile); 376bpe = BpeTokenizer.Create(vocabStream, mergesStream); 383bpe = await BpeTokenizer.CreateAsync(vocabStream, mergesStream); 389bpe = BpeTokenizer.Create( 536var bpeTokenizer = BpeTokenizer.Create(vocabStream, mergesStream, PreTokenizer.CreateWordOrNonWord(specialTokens), normalizer: null, specialTokens: specialTokens, unknownToken: "<|endoftext|>"); 583internal static BpeTokenizer CreateEmptyBpe(PreTokenizer? preTokenizer = null, Normalizer? normalizer = null) 591return BpeTokenizer.Create( 800private static BpeTokenizer _deepSeekR1Tokenizer = CreateBpeTokenizerFromJson(); 807BpeTokenizer tokenizer = _deepSeekR1Tokenizer; 875private static BpeTokenizer CreateBpeTokenizerFromJson() 928return BpeTokenizer.Create(bpeOptions);