18 references to GetIndexByTokenCountFromEnd
Microsoft.ML.Tokenizers.Tests (18)
BpeTests.cs (1)
467
Assert.Equal(expectedOffsets[expectedOffsets.Length - 3].Index, tokenizer.
GetIndexByTokenCountFromEnd
(text, 3, out normalizedText, out tokenCount));
CodeGenTests.cs (5)
470
Assert.Equal(expectedIndex, codeGenTokenizer.
GetIndexByTokenCountFromEnd
(text, 1, out normalizedText, out tokenCount));
657
int index = codeGenTokenizer.
GetIndexByTokenCountFromEnd
(text, maxTokenCount: 500, out normalizedText, out count);
790
index = codeGenTokenizer.
GetIndexByTokenCountFromEnd
(text, maxTokenCount: 500, out normalizedText, out count);
944
index = codeGenTokenizer.
GetIndexByTokenCountFromEnd
(text, maxTokenCount: 500, out normalizedText, out count);
1018
charsConsumed = _codegen350MMonoTokenizer.
GetIndexByTokenCountFromEnd
(input, maxTokenCount, out _, out tokenCount);
EnglishRobertaTests.cs (1)
219
Assert.Equal(expectedOffsets[expectedOffsets.Length - 3].Index, tokenizer.
GetIndexByTokenCountFromEnd
(text, 3, out normalizedText, out tokenCount));
LlamaTests.cs (1)
642
Assert.Equal(expectedOffsets[expectedOffsets.Length - 7].Index, tokenizer.
GetIndexByTokenCountFromEnd
(text, 7, out normalizedString, out tokenCount));
TiktokenTests.cs (2)
606
Assert.Equal(expectedOffsets[expectedOffsets.Length - 3].Index, tokenizer.
GetIndexByTokenCountFromEnd
(text, 3, out normalizedText, out tokenCount));
712
int index = GPT4.
GetIndexByTokenCountFromEnd
(text, tokenCount, out _, out count);
TokenizerTests.cs (6)
70
Assert.Equal(3, tokenizer.
GetIndexByTokenCountFromEnd
("hello", 2, out string? normalizedText, out int tokenCount));
74
Assert.Equal(0, tokenizer.
GetIndexByTokenCountFromEnd
("hello", 8, out normalizedText, out tokenCount));
138
int index2 = tokenizer.
GetIndexByTokenCountFromEnd
(input, maxTokenCount: i, out string? processedText2, out int tokenCount2);
217
Assert.Equal(0, tokenizer.
GetIndexByTokenCountFromEnd
((string)null!, maxTokenCount: 10, out _, out _));
223
Assert.Throws<ArgumentOutOfRangeException>(() => tokenizer.
GetIndexByTokenCountFromEnd
(input, maxTokenCount: 0, out _, out _));
224
Assert.Throws<ArgumentOutOfRangeException>(() => tokenizer.
GetIndexByTokenCountFromEnd
(input, maxTokenCount: -1, out _, out _));
WordPieceTests.cs (2)
70
index = tokenizer.
GetIndexByTokenCountFromEnd
("", maxTokenCount: 10, normalizedText: out _, tokenCount: out tokenCount);
134
index = tokenizer.
GetIndexByTokenCountFromEnd
(text, maxTokenCount: i, normalizedText: out _, out tokenCount);