50 references to EncodeToIds
Microsoft.ML.GenAI.Core (2)
Trainer\CausalLMDataset.cs (2)
60var inputIds = tokenizer.EncodeToIds(input); 61var outputIds = tokenizer.EncodeToIds(input + output);
Microsoft.ML.GenAI.LLaMA.Tests (1)
LLaMA3_1Tests.cs (1)
83var tokenizeIds = tokenizer.EncodeToIds(message, true, false);
Microsoft.ML.GenAI.Phi.Tests (1)
Phi2Tests.cs (1)
56var tokenized = tokenizer.EncodeToIds(message, true, false);
Microsoft.ML.Tokenizers (1)
Model\BertTokenizer.cs (1)
263IReadOnlyList<int> ids = text is null ? base.EncodeToIds(textSpan, considerPreTokenization, considerNormalization) : base.EncodeToIds(text, considerPreTokenization, considerNormalization);
Microsoft.ML.Tokenizers.Tests (42)
BpeTests.cs (4)
259IReadOnlyList<int> idsList = tokenizer.EncodeToIds(sentence); 377IReadOnlyList<int> ids = tokenizer.EncodeToIds(text); 440Assert.Equal(expectedIds, tokenizer.EncodeToIds(text)); 527IReadOnlyList<int> ids = bpeTokenizer.EncodeToIds(input);
CodeGenTests.cs (7)
371Assert.Equal(ids, tokenizer.EncodeToIds(text)); 609IReadOnlyList<int> ids = codeGenTokenizer.EncodeToIds(text); 742ids = codeGenTokenizer.EncodeToIds(text); 889ids = codeGenTokenizer.EncodeToIds(text); 974Assert.Equal(codeGenTokenizer.EncodeToIds(DefaultSpecialToken)[0], codeGenTokenizer.BeginningOfSentenceId!.Value); 975Assert.Equal(codeGenTokenizer.EncodeToIds(DefaultSpecialToken)[0], codeGenTokenizer.EndOfSentenceId!.Value); 976Assert.Equal(codeGenTokenizer.EncodeToIds(DefaultSpecialToken)[0], codeGenTokenizer.UnknownTokenId!.Value);
EnglishRobertaTests.cs (4)
192Assert.Equal(expectedIds, tokenizer.EncodeToIds(text)); 250ids = tokenizer.EncodeToIds((string)p[0]); 255ids = tokenizer.EncodeToIds((string)p[0]); 262ids = tokenizer.EncodeToIds((string)p[0]);
LlamaTests.cs (6)
250Assert.Equal(ids, llamaTokenizer.EncodeToIds(input)); 341Assert.Equal([], llamaTokenizer.EncodeToIds((string)null!)); 556Assert.Equal(expectedIds, tokenizer.EncodeToIds(text)); 669encodedIds = tokenizer.EncodeToIds(kvp.Key); 835var ids = tokenizer.EncodeToIds(text); 874ids = tokenizerWithSuffix.EncodeToIds(text);
TiktokenTests.cs (14)
138IReadOnlyList<int> encoded = tokenizer.EncodeToIds(text); 191IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 213IReadOnlyList<int> encoded = gpt4Tokenizer.EncodeToIds(text); 234IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 255IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 268IReadOnlyList<int> encoded = GPT4.EncodeToIds(text); 285IReadOnlyList<int> encoded = GPT4o.EncodeToIds(text); 302encoded = GPT4o.EncodeToIds(text); 322IReadOnlyList<int> encoded = GPT2.EncodeToIds(text); 341IReadOnlyList<int> encoded = P50kBase.EncodeToIds(text); 360IReadOnlyList<int> encoded = P50kEdit.EncodeToIds(text); 379IReadOnlyList<int> encoded = R50kBase.EncodeToIds(text); 579Assert.Equal(expectedIds, tokenizer.EncodeToIds(text)); 689Assert.Equal(expectedIds, GPT4.EncodeToIds(text));
TokenizerTests.cs (3)
133IReadOnlyList<int> fullIdsList = tokenizer.EncodeToIds(input); 163prefixIds = tokenizer.EncodeToIds(prefixString); 185suffixIds = tokenizer.EncodeToIds(suffixString);
WordPieceTests.cs (4)
65IReadOnlyList<int> ids = tokenizer.EncodeToIds(""); 87ids = tokenizer.EncodeToIds(text); 166IReadOnlyList<int> ids = tokenizer.EncodeToIds(text); 210IReadOnlyList<int> ids = tokenizer.EncodeToIds(text);
Microsoft.ML.TorchSharp (3)
Extensions\TokenizerExtensions.cs (1)
54return tokenizer.RobertaModel().ConvertIdsToOccurrenceRanks(tokenizer.EncodeToIds(sentence));
Roberta\QATrainer.cs (2)
857var contextTokenId = _parent.Tokenizer.RobertaModel().ConvertIdsToOccurrenceRanks(_parent.Tokenizer.EncodeToIds(context.ToString())); 859var questionTokenId = _parent.Tokenizer.RobertaModel().ConvertIdsToOccurrenceRanks(_parent.Tokenizer.EncodeToIds(question.ToString()));