42 references to EncodeToIds
Microsoft.ML.GenAI.LLaMA.Tests (1)
LLaMA3_1Tests.cs (1)
82var tokenizeIds = tokenizer.EncodeToIds(message, true, false);
Microsoft.ML.GenAI.Phi.Tests (1)
Phi2Tests.cs (1)
56var tokenized = tokenizer.EncodeToIds(message, true, false);
Microsoft.ML.Tokenizers.Tests (37)
BpeTests.cs (3)
259IReadOnlyList<int> idsList = tokenizer.EncodeToIds(sentence); 377IReadOnlyList<int> ids = tokenizer.EncodeToIds(text); 440Assert.Equal(expectedIds, tokenizer.EncodeToIds(text));
CodeGenTests.cs (7)
371Assert.Equal(ids, tokenizer.EncodeToIds(text)); 609IReadOnlyList<int> ids = codeGenTokenizer.EncodeToIds(text); 742ids = codeGenTokenizer.EncodeToIds(text); 889ids = codeGenTokenizer.EncodeToIds(text); 974Assert.Equal(codeGenTokenizer.EncodeToIds(DefaultSpecialToken)[0], codeGenTokenizer.BeginningOfSentenceId!.Value); 975Assert.Equal(codeGenTokenizer.EncodeToIds(DefaultSpecialToken)[0], codeGenTokenizer.EndOfSentenceId!.Value); 976Assert.Equal(codeGenTokenizer.EncodeToIds(DefaultSpecialToken)[0], codeGenTokenizer.UnknownTokenId!.Value);
EnglishRobertaTests.cs (4)
192Assert.Equal(expectedIds, tokenizer.EncodeToIds(text)); 250ids = tokenizer.EncodeToIds((string)p[0]); 255ids = tokenizer.EncodeToIds((string)p[0]); 262ids = tokenizer.EncodeToIds((string)p[0]);
LlamaTests.cs (6)
250Assert.Equal(ids, llamaTokenizer.EncodeToIds(input)); 341Assert.Equal([], llamaTokenizer.EncodeToIds((string)null!)); 556Assert.Equal(expectedIds, tokenizer.EncodeToIds(text)); 669encodedIds = tokenizer.EncodeToIds(kvp.Key); 835var ids = tokenizer.EncodeToIds(text); 874ids = tokenizerWithSuffix.EncodeToIds(text);
TitokenTests.cs (14)
137IReadOnlyList<int> encoded = tokenizer.EncodeToIds(text); 190IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 212IReadOnlyList<int> encoded = gpt4Tokenizer.EncodeToIds(text); 233IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 254IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 267IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 284IReadOnlyList<int> encoded = GPT4o.EncodeToIds(text); 301encoded = GPT4o.EncodeToIds(text); 321IReadOnlyList<int> encoded = GPT2.EncodeToIds(text); 340IReadOnlyList<int> encoded = P50kBase.EncodeToIds(text); 359IReadOnlyList<int> encoded = P50kEdit.EncodeToIds(text); 378IReadOnlyList<int> encoded = R50kBase.EncodeToIds(text); 575Assert.Equal(expectedIds, tokenizer.EncodeToIds(text)); 685Assert.Equal(expectedIds, GPT4.EncodeToIds(text));
TokenizerTests.cs (3)
133IReadOnlyList<int> fullIdsList = tokenizer.EncodeToIds(input); 163prefixIds = tokenizer.EncodeToIds(prefixString); 185suffixIds = tokenizer.EncodeToIds(suffixString);
Microsoft.ML.TorchSharp (3)
Extensions\TokenizerExtensions.cs (1)
54return tokenizer.RobertaModel().ConvertIdsToOccurrenceRanks(tokenizer.EncodeToIds(sentence));
Roberta\QATrainer.cs (2)
857var contextTokenId = _parent.Tokenizer.RobertaModel().ConvertIdsToOccurrenceRanks(_parent.Tokenizer.EncodeToIds(context.ToString())); 859var questionTokenId = _parent.Tokenizer.RobertaModel().ConvertIdsToOccurrenceRanks(_parent.Tokenizer.EncodeToIds(question.ToString()));