4 instantiations of BpeTokenizer
Microsoft.ML.Tokenizers (4)
Model\BPETokenizer.cs (4)
132
return new
BpeTokenizer
(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens);
186
return new
BpeTokenizer
(
244
return new
BpeTokenizer
(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens);
280
return new
BpeTokenizer
(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens);
32 references to BpeTokenizer
Microsoft.ML.Tokenizers (8)
Model\BPETokenizer.cs (6)
93
public static
BpeTokenizer
Create(string vocabFile, string? mergesFile)
111
public static
BpeTokenizer
Create(
140
public static
BpeTokenizer
Create(BpeOptions options)
208
public static
BpeTokenizer
Create(Stream vocabStream, Stream? mergesStream)
226
public static
BpeTokenizer
Create(
262
public static async Task<
BpeTokenizer
> CreateAsync(
Model\Cache.cs (1)
16
internal Cache() : this(
BpeTokenizer
.DefaultCacheCapacity) { }
Utils\StringSpanOrdinalKey.cs (1)
81
internal StringSpanOrdinalKeyCache() : this(
BpeTokenizer
.DefaultCacheCapacity) { }
Microsoft.ML.Tokenizers.Tests (24)
BpeTests.cs (24)
256
BpeTokenizer
bpe =
BpeTokenizer
.Create(vocabFile: vocabFile, mergesFile: mergesFile, preTokenizer: PreTokenizer.CreateWordOrNonWord(), normalizer: null, unknownToken: unknownToken,
271
bpe =
BpeTokenizer
.Create(bpeOptions);
294
BpeTokenizer
bpe1 =
BpeTokenizer
.Create(bpeOptions1);
298
private void SimpleWithUnknownTokenTest(
BpeTokenizer
bpe, string sentence, (int, int)[] offsets, int[] ids, string[] expectedTokens, string decodedTokens, string decodedTokensWithoutUnknownToken)
329
private void TestDecodingWithSpan(
BpeTokenizer
bpe, int[] ids, bool considerSpecialTokens, string expectedDecoded)
369
_gpt2Tokenizer =
BpeTokenizer
.Create(vocabStream, mergesStream);
383
BpeTokenizer
bpe =
BpeTokenizer
.Create(vocabFile, mergesFile);
389
bpe =
BpeTokenizer
.Create(vocabStream, mergesStream);
396
bpe = await
BpeTokenizer
.CreateAsync(vocabStream, mergesStream);
402
bpe =
BpeTokenizer
.Create(
549
var
bpeTokenizer =
BpeTokenizer
.Create(vocabStream, mergesStream, PreTokenizer.CreateWordOrNonWord(specialTokens), normalizer: null, specialTokens: specialTokens, unknownToken: "<|endoftext|>");
596
internal static
BpeTokenizer
CreateEmptyBpe(PreTokenizer? preTokenizer = null, Normalizer? normalizer = null)
604
return
BpeTokenizer
.Create(
813
private static
BpeTokenizer
_deepSeekR1Tokenizer = CreateBpeTokenizerFromJson();
820
BpeTokenizer
tokenizer = _deepSeekR1Tokenizer;
907
BpeTokenizer
bpeTokenizer =
BpeTokenizer
.Create(options);
933
bpeTokenizer =
BpeTokenizer
.Create(options1);
948
private static
BpeTokenizer
CreateBpeTokenizerFromJson()
1001
return
BpeTokenizer
.Create(bpeOptions);