3 instantiations of BpeTokenizer
Microsoft.ML.Tokenizers (3)
Model\BPETokenizer.cs (3)
131
return new
BpeTokenizer
(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens);
178
return new
BpeTokenizer
(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens);
214
return new
BpeTokenizer
(result.vocab, result.merges, preTokenizer, normalizer, specialTokens, unknownToken, continuingSubwordPrefix, endOfWordSuffix, fuseUnknownTokens);
19 references to BpeTokenizer
Microsoft.ML.Tokenizers (7)
Model\BPETokenizer.cs (5)
92
public static
BpeTokenizer
Create(string vocabFile, string? mergesFile)
110
public static
BpeTokenizer
Create(
142
public static
BpeTokenizer
Create(Stream vocabStream, Stream? mergesStream)
160
public static
BpeTokenizer
Create(
196
public static async Task<
BpeTokenizer
> CreateAsync(
Model\Cache.cs (1)
16
internal Cache() : this(
BpeTokenizer
.DefaultCacheCapacity) { }
Utils\StringSpanOrdinalKey.cs (1)
81
internal StringSpanOrdinalKeyCache() : this(
BpeTokenizer
.DefaultCacheCapacity) { }
Microsoft.ML.Tokenizers.Tests (12)
BpeTests.cs (12)
254
BpeTokenizer
bpe =
BpeTokenizer
.Create(vocabFile: vocabFile, mergesFile: mergesFile, preTokenizer: PreTokenizer.CreateWordOrNonWord(), normalizer: null, unknownToken: unknownToken,
294
private void TestDecodingWithSpan(
BpeTokenizer
bpe, int[] ids, bool considerSpecialTokens, string expectedDecoded)
334
_gpt2Tokenizer =
BpeTokenizer
.Create(vocabStream, mergesStream);
348
BpeTokenizer
bpe =
BpeTokenizer
.Create(vocabFile, mergesFile);
354
bpe =
BpeTokenizer
.Create(vocabStream, mergesStream);
361
bpe = await
BpeTokenizer
.CreateAsync(vocabStream, mergesStream);
503
var
bpeTokenizer =
BpeTokenizer
.Create(vocabStream, mergesStream, PreTokenizer.CreateWordOrNonWord(specialTokens), normalizer: null, specialTokens: specialTokens, unknownToken: "<|endoftext|>");
550
internal static
BpeTokenizer
CreateEmptyBpe(PreTokenizer? preTokenizer = null, Normalizer? normalizer = null)
558
return
BpeTokenizer
.Create(