1 type derived from CodeGenTokenizer
Microsoft.ML.Tokenizers (1)
Model\Phi2Tokenizer.cs (1)
15public sealed class Phi2Tokenizer : CodeGenTokenizer
1 instantiation of CodeGenTokenizer
Microsoft.ML.Tokenizers (1)
Model\CodeGenTokenizer.cs (1)
1894return new CodeGenTokenizer(
21 references to CodeGenTokenizer
Microsoft.ML.GenAI.Phi (2)
Phi2\Phi2TokenizerHelper.cs (2)
15public static CodeGenTokenizer Create( 28return CodeGenTokenizer.Create(vocabStream, mergesStream, addPrefixSpace, addBeginOfSentence, addEndOfSentence);
Microsoft.ML.GenAI.Phi.Tests (1)
Phi2Tests.cs (1)
44var tokenizer = Phi2TokenizerHelper.Create(modelWeightFolder, addBeginOfSentence: true);
Microsoft.ML.Tokenizers (5)
Model\CodeGenTokenizer.cs (3)
1877public static CodeGenTokenizer Create( 1897new RegexPreTokenizer(TiktokenTokenizer.P50kBaseRegex(), CodeGenTokenizer.CodeGenSpecialTokens), 1899CodeGenTokenizer.CodeGenSpecialTokens,
Model\Phi2Tokenizer.cs (2)
117vocabStream, mergesStream, new RegexPreTokenizer(TiktokenTokenizer.P50kBaseRegex(), CodeGenTokenizer.CodeGenSpecialTokens), normalizer: null, 118CodeGenTokenizer.CodeGenSpecialTokens, addPrefixSpace: addPrefixSpace, addBeginningOfSentence: addBeginOfSentence, addEndOfSentence: addEndOfSentence);
Microsoft.ML.Tokenizers.Tests (13)
CodeGenTests.cs (13)
31return CodeGenTokenizer.Create(vocabStream, mergesStream, addPrefixSpace, bos, eos); 42return CodeGenTokenizer.Create(vocabStream, mergesStream); 250CodeGenTokenizer codeGenTokenizer = (tokenizer as CodeGenTokenizer)!; 302private void TestDecodingWithSpan(CodeGenTokenizer tokenizer, int[] ids, bool hasPrefixSpace, bool considerSpecialTokens, string expectedDecoded) 341CodeGenTokenizer codeGenTokenizer = (tokenizer as CodeGenTokenizer)!; 547CodeGenTokenizer codeGenTokenizer = (_codegen350MMonoTokenizerWithBeginningOfSentence as CodeGenTokenizer)!; 680codeGenTokenizer = (_codegen350MMonoTokenizerWithEndOfSentence as CodeGenTokenizer)!; 813codeGenTokenizer = (_codegen350MMonoTokenizerWithBeginningAndEndOfSentence as CodeGenTokenizer)!; 969CodeGenTokenizer codeGenTokenizer = (_codegen350MMonoTokenizer as CodeGenTokenizer)!;