18 references to GetIndexByTokenCount
Microsoft.ML.Tokenizers.Tests (18)
BpeTests.cs (1)
460
Assert.Equal(expectedOffsets[expectedOffsets.Length - 4].Index + expectedOffsets[expectedOffsets.Length - 4].Length, tokenizer.
GetIndexByTokenCount
(text, expectedIds.Length - 3, out normalizedText, out int tokenCount));
CodeGenTests.cs (5)
443
Assert.Equal(offsets[offsets.Length - 1].Index + offsets[offsets.Length - 1].Length, codeGenTokenizer.
GetIndexByTokenCount
(text, ids.Length, out normalizedText, out int tokenCount));
638
int length = codeGenTokenizer.
GetIndexByTokenCount
(text, maxTokenCount: 500, out normalizedText, out count);
771
length = codeGenTokenizer.
GetIndexByTokenCount
(text, maxTokenCount: 500, out normalizedText, out count);
925
length = codeGenTokenizer.
GetIndexByTokenCount
(text, maxTokenCount: 500, out normalizedText, out count);
1008
charsConsumed = _codegen350MMonoTokenizer.
GetIndexByTokenCount
(input, maxTokenCount, out _, out int tokenCount);
EnglishRobertaTests.cs (1)
212
Assert.Equal(expectedOffsets[expectedOffsets.Length - 4].Index + expectedOffsets[expectedOffsets.Length - 4].Length, tokenizer.
GetIndexByTokenCount
(text, expectedIds.Length - 3, out normalizedText, out int tokenCount));
LlamaTests.cs (1)
635
Assert.Equal(expectedOffsets[expectedOffsets.Length - 7].Index + expectedOffsets[expectedOffsets.Length - 7].Length, tokenizer.
GetIndexByTokenCount
(text, expectedIds.Length - 6, out string? normalizedString, out int tokenCount));
TiktokenTests.cs (2)
599
Assert.Equal(expectedOffsets[expectedOffsets.Length - 4].Index + expectedOffsets[expectedOffsets.Length - 4].Length, tokenizer.
GetIndexByTokenCount
(text, expectedIds.Length - 3, out normalizedText, out int tokenCount));
694
int length = GPT4.
GetIndexByTokenCount
(text, tokenCount, out _, out int count);
TokenizerTests.cs (6)
56
Assert.Equal(2, tokenizer.
GetIndexByTokenCount
("hello", 2, out string? normalizedText, out int tokenCount));
60
Assert.Equal(5, tokenizer.
GetIndexByTokenCount
("hello", 8, out normalizedText, out tokenCount));
137
int index1 = tokenizer.
GetIndexByTokenCount
(input, maxTokenCount: i, out string? processedText1, out int tokenCount1);
216
Assert.Equal(0, tokenizer.
GetIndexByTokenCount
((string)null!, maxTokenCount: 10, out _, out _));
221
Assert.Throws<ArgumentOutOfRangeException>(() => tokenizer.
GetIndexByTokenCount
(input, maxTokenCount: 0, out _, out _));
222
Assert.Throws<ArgumentOutOfRangeException>(() => tokenizer.
GetIndexByTokenCount
(input, maxTokenCount: -1, out _, out _));
WordPieceTests.cs (2)
67
int index = tokenizer.
GetIndexByTokenCount
("", maxTokenCount: 10, normalizedText: out _, tokenCount: out int tokenCount);
124
index = tokenizer.
GetIndexByTokenCount
(text, maxTokenCount: i, normalizedText: out _, out tokenCount);