4 instantiations of BpeTokenizer
Microsoft.ML.Tokenizers (4)
Model\BPETokenizer.cs (4)
132
return new
BpeTokenizer
(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens);
181
return new
BpeTokenizer
(
239
return new
BpeTokenizer
(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens);
275
return new
BpeTokenizer
(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens);
28 references to BpeTokenizer
Microsoft.ML.Tokenizers (8)
Model\BPETokenizer.cs (6)
93
public static
BpeTokenizer
Create(string vocabFile, string? mergesFile)
111
public static
BpeTokenizer
Create(
135
public static
BpeTokenizer
Create(BpeOptions options)
203
public static
BpeTokenizer
Create(Stream vocabStream, Stream? mergesStream)
221
public static
BpeTokenizer
Create(
257
public static async Task<
BpeTokenizer
> CreateAsync(
Model\Cache.cs (1)
16
internal Cache() : this(
BpeTokenizer
.DefaultCacheCapacity) { }
Utils\StringSpanOrdinalKey.cs (1)
81
internal StringSpanOrdinalKeyCache() : this(
BpeTokenizer
.DefaultCacheCapacity) { }
Microsoft.ML.Tokenizers.Tests (20)
BpeTests.cs (20)
256
BpeTokenizer
bpe =
BpeTokenizer
.Create(vocabFile: vocabFile, mergesFile: mergesFile, preTokenizer: PreTokenizer.CreateWordOrNonWord(), normalizer: null, unknownToken: unknownToken,
281
BpeTokenizer
bpe1 =
BpeTokenizer
.Create(bpeOptions);
285
private void SimpleWithUnknownTokenTest(
BpeTokenizer
bpe, string sentence, (int, int)[] offsets, int[] ids, string[] expectedTokens, string decodedTokens, string decodedTokensWithoutUnknownToken)
316
private void TestDecodingWithSpan(
BpeTokenizer
bpe, int[] ids, bool considerSpecialTokens, string expectedDecoded)
356
_gpt2Tokenizer =
BpeTokenizer
.Create(vocabStream, mergesStream);
370
BpeTokenizer
bpe =
BpeTokenizer
.Create(vocabFile, mergesFile);
376
bpe =
BpeTokenizer
.Create(vocabStream, mergesStream);
383
bpe = await
BpeTokenizer
.CreateAsync(vocabStream, mergesStream);
389
bpe =
BpeTokenizer
.Create(
536
var
bpeTokenizer =
BpeTokenizer
.Create(vocabStream, mergesStream, PreTokenizer.CreateWordOrNonWord(specialTokens), normalizer: null, specialTokens: specialTokens, unknownToken: "<|endoftext|>");
583
internal static
BpeTokenizer
CreateEmptyBpe(PreTokenizer? preTokenizer = null, Normalizer? normalizer = null)
591
return
BpeTokenizer
.Create(
800
private static
BpeTokenizer
_deepSeekR1Tokenizer = CreateBpeTokenizerFromJson();
807
BpeTokenizer
tokenizer = _deepSeekR1Tokenizer;
875
private static
BpeTokenizer
CreateBpeTokenizerFromJson()
928
return
BpeTokenizer
.Create(bpeOptions);