34 references to CountTokens
Microsoft.ML.Tokenizers.Tests (34)
BpeTests.cs (3)
265Assert.Equal(ids.Length, tokenizer.CountTokens(sentence)); 381Assert.Equal(12, tokenizer.CountTokens(text)); 457Assert.Equal(expectedIds.Length, tokenizer.CountTokens(text));
CodeGenTests.cs (4)
428Assert.Equal(ids.Length, codeGenTokenizer.CountTokens(text)); 629count = codeGenTokenizer.CountTokens(text); 762count = codeGenTokenizer.CountTokens(text); 917count = codeGenTokenizer.CountTokens(text);
EnglishRobertaTests.cs (4)
209Assert.Equal(expectedIds.Length, tokenizer.CountTokens(text)); 251idsCount = tokenizer.CountTokens((string)p[0]); 257idsCount = tokenizer.CountTokens((string)p[0]); 261idsCount = tokenizer.CountTokens((string)p[0]);
LlamaTests.cs (4)
251Assert.Equal(ids.Length, llamaTokenizer.CountTokens(input)); 344Assert.Equal(0, llamaTokenizer.CountTokens((string)null!)); 632Assert.Equal(expectedIds.Length, tokenizer.CountTokens(text)); 672tokenCount = tokenizer.CountTokens(kvp.Key);
TiktokenTests.cs (14)
144int idsCount = tokenizer.CountTokens(text); 197int idsCount = GPT4.CountTokens(text); 215int idsCount = gpt4Tokenizer.CountTokens(text); 244int idsCount = GPT4.CountTokens(text); 259int idsCount = GPT4.CountTokens(text); 269int idsCount = GPT4.CountTokens(text); 286int idsCount = GPT4o.CountTokens(text); 303idsCount = GPT4o.CountTokens(text); 323int idsCount = GPT2.CountTokens(text); 342int idsCount = P50kBase.CountTokens(text); 361int idsCount = P50kEdit.CountTokens(text); 380int idsCount = R50kBase.CountTokens(text); 596Assert.Equal(expectedIds.Length, tokenizer.CountTokens(text)); 690Assert.Equal(expectedIds.Length, GPT4.CountTokens(text));
TokenizerTests.cs (3)
48Assert.Equal(5, tokenizer.CountTokens("hello")); 200Assert.Equal(0, tokenizer.CountTokens(s.Substring(index1))); 208Assert.Equal(0, tokenizer.CountTokens(s.Substring(0, index2)));
WordPieceTests.cs (2)
64Assert.Equal(0, tokenizer.CountTokens("")); 117Assert.Equal(5, tokenizer.CountTokens(text));