50 references to EncodeToIds
Microsoft.ML.GenAI.Core (2)
Trainer\CausalLMDataset.cs (2)
60
var inputIds = tokenizer.
EncodeToIds
(input);
61
var outputIds = tokenizer.
EncodeToIds
(input + output);
Microsoft.ML.GenAI.LLaMA.Tests (1)
LLaMA3_1Tests.cs (1)
83
var tokenizeIds = tokenizer.
EncodeToIds
(message, true, false);
Microsoft.ML.GenAI.Phi.Tests (1)
Phi2Tests.cs (1)
56
var tokenized = tokenizer.
EncodeToIds
(message, true, false);
Microsoft.ML.Tokenizers (1)
Model\BertTokenizer.cs (1)
263
IReadOnlyList<int> ids = text is null ? base.EncodeToIds(textSpan, considerPreTokenization, considerNormalization) : base.
EncodeToIds
(text, considerPreTokenization, considerNormalization);
Microsoft.ML.Tokenizers.Tests (42)
BpeTests.cs (4)
259
IReadOnlyList<int> idsList = tokenizer.
EncodeToIds
(sentence);
377
IReadOnlyList<int> ids = tokenizer.
EncodeToIds
(text);
440
Assert.Equal(expectedIds, tokenizer.
EncodeToIds
(text));
527
IReadOnlyList<int> ids = bpeTokenizer.
EncodeToIds
(input);
CodeGenTests.cs (7)
371
Assert.Equal(ids, tokenizer.
EncodeToIds
(text));
609
IReadOnlyList<int> ids = codeGenTokenizer.
EncodeToIds
(text);
742
ids = codeGenTokenizer.
EncodeToIds
(text);
889
ids = codeGenTokenizer.
EncodeToIds
(text);
974
Assert.Equal(codeGenTokenizer.
EncodeToIds
(DefaultSpecialToken)[0], codeGenTokenizer.BeginningOfSentenceId!.Value);
975
Assert.Equal(codeGenTokenizer.
EncodeToIds
(DefaultSpecialToken)[0], codeGenTokenizer.EndOfSentenceId!.Value);
976
Assert.Equal(codeGenTokenizer.
EncodeToIds
(DefaultSpecialToken)[0], codeGenTokenizer.UnknownTokenId!.Value);
EnglishRobertaTests.cs (4)
192
Assert.Equal(expectedIds, tokenizer.
EncodeToIds
(text));
250
ids = tokenizer.
EncodeToIds
((string)p[0]);
255
ids = tokenizer.
EncodeToIds
((string)p[0]);
262
ids = tokenizer.
EncodeToIds
((string)p[0]);
LlamaTests.cs (6)
250
Assert.Equal(ids, llamaTokenizer.
EncodeToIds
(input));
341
Assert.Equal([], llamaTokenizer.
EncodeToIds
((string)null!));
556
Assert.Equal(expectedIds, tokenizer.
EncodeToIds
(text));
669
encodedIds = tokenizer.
EncodeToIds
(kvp.Key);
835
var ids = tokenizer.
EncodeToIds
(text);
874
ids = tokenizerWithSuffix.
EncodeToIds
(text);
TiktokenTests.cs (14)
138
IReadOnlyList<int> encoded = tokenizer.
EncodeToIds
(text);
191
IReadOnlyList<int> encoded = GPT4.
EncodeToIds
(text);
213
IReadOnlyList<int> encoded = gpt4Tokenizer.
EncodeToIds
(text);
234
IReadOnlyList<int> encoded = GPT4.
EncodeToIds
(text);
255
IReadOnlyList<int> encoded = GPT4.
EncodeToIds
(text);
268
IReadOnlyList<int> encoded = GPT4.
EncodeToIds
(text);
285
IReadOnlyList<int> encoded = GPT4o.
EncodeToIds
(text);
302
encoded = GPT4o.
EncodeToIds
(text);
322
IReadOnlyList<int> encoded = GPT2.
EncodeToIds
(text);
341
IReadOnlyList<int> encoded = P50kBase.
EncodeToIds
(text);
360
IReadOnlyList<int> encoded = P50kEdit.
EncodeToIds
(text);
379
IReadOnlyList<int> encoded = R50kBase.
EncodeToIds
(text);
579
Assert.Equal(expectedIds, tokenizer.
EncodeToIds
(text));
689
Assert.Equal(expectedIds, GPT4.
EncodeToIds
(text));
TokenizerTests.cs (3)
133
IReadOnlyList<int> fullIdsList = tokenizer.
EncodeToIds
(input);
163
prefixIds = tokenizer.
EncodeToIds
(prefixString);
185
suffixIds = tokenizer.
EncodeToIds
(suffixString);
WordPieceTests.cs (4)
65
IReadOnlyList<int> ids = tokenizer.
EncodeToIds
("");
87
ids = tokenizer.
EncodeToIds
(text);
166
IReadOnlyList<int> ids = tokenizer.
EncodeToIds
(text);
210
IReadOnlyList<int> ids = tokenizer.
EncodeToIds
(text);
Microsoft.ML.TorchSharp (3)
Extensions\TokenizerExtensions.cs (1)
54
return tokenizer.RobertaModel().ConvertIdsToOccurrenceRanks(tokenizer.
EncodeToIds
(sentence));
Roberta\QATrainer.cs (2)
857
var contextTokenId = _parent.Tokenizer.RobertaModel().ConvertIdsToOccurrenceRanks(_parent.Tokenizer.
EncodeToIds
(context.ToString()));
859
var questionTokenId = _parent.Tokenizer.RobertaModel().ConvertIdsToOccurrenceRanks(_parent.Tokenizer.
EncodeToIds
(question.ToString()));