18 references to GetIndexByTokenCount
Microsoft.ML.Tokenizers.Tests (18)
BpeTests.cs (1)
460Assert.Equal(expectedOffsets[expectedOffsets.Length - 4].Index + expectedOffsets[expectedOffsets.Length - 4].Length, tokenizer.GetIndexByTokenCount(text, expectedIds.Length - 3, out normalizedText, out int tokenCount));
CodeGenTests.cs (5)
443Assert.Equal(offsets[offsets.Length - 1].Index + offsets[offsets.Length - 1].Length, codeGenTokenizer.GetIndexByTokenCount(text, ids.Length, out normalizedText, out int tokenCount)); 638int length = codeGenTokenizer.GetIndexByTokenCount(text, maxTokenCount: 500, out normalizedText, out count); 771length = codeGenTokenizer.GetIndexByTokenCount(text, maxTokenCount: 500, out normalizedText, out count); 925length = codeGenTokenizer.GetIndexByTokenCount(text, maxTokenCount: 500, out normalizedText, out count); 1008charsConsumed = _codegen350MMonoTokenizer.GetIndexByTokenCount(input, maxTokenCount, out _, out int tokenCount);
EnglishRobertaTests.cs (1)
212Assert.Equal(expectedOffsets[expectedOffsets.Length - 4].Index + expectedOffsets[expectedOffsets.Length - 4].Length, tokenizer.GetIndexByTokenCount(text, expectedIds.Length - 3, out normalizedText, out int tokenCount));
LlamaTests.cs (1)
635Assert.Equal(expectedOffsets[expectedOffsets.Length - 7].Index + expectedOffsets[expectedOffsets.Length - 7].Length, tokenizer.GetIndexByTokenCount(text, expectedIds.Length - 6, out string? normalizedString, out int tokenCount));
TiktokenTests.cs (2)
599Assert.Equal(expectedOffsets[expectedOffsets.Length - 4].Index + expectedOffsets[expectedOffsets.Length - 4].Length, tokenizer.GetIndexByTokenCount(text, expectedIds.Length - 3, out normalizedText, out int tokenCount)); 694int length = GPT4.GetIndexByTokenCount(text, tokenCount, out _, out int count);
TokenizerTests.cs (6)
56Assert.Equal(2, tokenizer.GetIndexByTokenCount("hello", 2, out string? normalizedText, out int tokenCount)); 60Assert.Equal(5, tokenizer.GetIndexByTokenCount("hello", 8, out normalizedText, out tokenCount)); 137int index1 = tokenizer.GetIndexByTokenCount(input, maxTokenCount: i, out string? processedText1, out int tokenCount1); 216Assert.Equal(0, tokenizer.GetIndexByTokenCount((string)null!, maxTokenCount: 10, out _, out _)); 221Assert.Throws<ArgumentOutOfRangeException>(() => tokenizer.GetIndexByTokenCount(input, maxTokenCount: 0, out _, out _)); 222Assert.Throws<ArgumentOutOfRangeException>(() => tokenizer.GetIndexByTokenCount(input, maxTokenCount: -1, out _, out _));
WordPieceTests.cs (2)
67int index = tokenizer.GetIndexByTokenCount("", maxTokenCount: 10, normalizedText: out _, tokenCount: out int tokenCount); 124index = tokenizer.GetIndexByTokenCount(text, maxTokenCount: i, normalizedText: out _, out tokenCount);