16 references to EncodeToIds
Microsoft.ML.Tokenizers (2)
Model\BertTokenizer.cs (2)
243base.EncodeToIds(text, maxTokenCount - 2, out normalizedText, out charsConsumed, considerPreTokenization, considerNormalization);
258base.EncodeToIds(text, maxTokenCount, out normalizedText, out charsConsumed, considerPreTokenization, considerNormalization);
Microsoft.ML.Tokenizers.Tests (14)
BpeTests.cs (3)
475Assert.Equal(expectedIds, tokenizer.EncodeToIds(text, expectedIds.Length, out string? normalizedText, out int length));
482Assert.Equal(expectedIds.Take(expectedIds.Length - 2), tokenizer.EncodeToIds(text, expectedIds.Length - 2, out normalizedText, out length));
818IReadOnlyList<int> subIds = tokenizer.EncodeToIds(text, maxTokenCount: i, out _, out int charConsumed);
CodeGenTests.cs (3)
379Assert.Equal(ids, codeGenTokenizer.EncodeToIds(text, ids.Length, out string? normalizedText, out int length));
401Assert.Equal(ids.Take(ids.Length - expectedTokensToExclude), codeGenTokenizer.EncodeToIds(text, ids.Length - 1, out normalizedText, out length));
997IReadOnlyList<int> ids = _codegen350MMonoTokenizer.EncodeToIds(input, maxTokenCount, out _, out int charsConsumed);