47 references to EncodeToTokens
Microsoft.ML.Tokenizers.Tests (44)
BertTokenizerTests.cs (11)
55
var tokens = tokenizer.
EncodeToTokens
(text, out string? normalizedText);
76
tokens = tokenizer.
EncodeToTokens
(tokenizer.Decode(ids), out normalizedText);
128
var tokens = tokenizer.
EncodeToTokens
(text, out string? normalizedText);
148
tokens = tokenizer.
EncodeToTokens
(tokenizer.Decode(ids), out normalizedText);
196
var tokens = tokenizer.
EncodeToTokens
(text, out string? normalizedText);
238
var tokens = bertTokenizer.
EncodeToTokens
(text, out string? normalizedText);
253
tokens = bertTokenizer.
EncodeToTokens
(text, out normalizedText);
268
tokens = bertTokenizer.
EncodeToTokens
(text, out normalizedText);
282
tokens = bertTokenizer.
EncodeToTokens
(text, out normalizedText);
313
var tokens = bertTokenizer.
EncodeToTokens
(text, out string? normalizedText);
330
tokens = bertTokenizer.
EncodeToTokens
(text, out normalizedText);
BpeTests.cs (4)
257
IReadOnlyList<EncodedToken> encoding = tokenizer.
EncodeToTokens
(sentence, out _);
376
IReadOnlyList<EncodedToken> encoding = tokenizer.
EncodeToTokens
(text, out _);
429
IReadOnlyList<EncodedToken> encoding = tokenizer.
EncodeToTokens
(text, out _);
507
IReadOnlyList<EncodedToken> tokens = bpeTokenizer.
EncodeToTokens
(input, out _);
CodeGenTests.cs (5)
252
IReadOnlyList<EncodedToken> encoding = tokenizer.
EncodeToTokens
(text, out _);
347
IReadOnlyList<EncodedToken> encoding = tokenizer.
EncodeToTokens
(text, out _);
549
IReadOnlyList<EncodedToken> encoding = codeGenTokenizer.
EncodeToTokens
(text, out _);
682
encoding = codeGenTokenizer.
EncodeToTokens
(text, out _);
815
encoding = codeGenTokenizer.
EncodeToTokens
(text, out _);
EnglishRobertaTests.cs (4)
181
IReadOnlyList<EncodedToken> encoding = tokenizer.
EncodeToTokens
(text, out _);
249
encoding = tokenizer.
EncodeToTokens
((string)p[0], out _);
256
encoding = tokenizer.
EncodeToTokens
((string)p[0], out _);
263
encoding = tokenizer.
EncodeToTokens
((string)p[0], out _);
LlamaTests.cs (4)
244
IReadOnlyList<EncodedToken> result = llamaTokenizer.
EncodeToTokens
(input, out _);
338
Assert.Equal([], llamaTokenizer.
EncodeToTokens
((string)null!, out _));
500
IReadOnlyList<EncodedToken> encoding = tokenizer.
EncodeToTokens
(text, out _);
664
encodedTokens = tokenizer.
EncodeToTokens
(kvp.Key, out normalizedText);
NormalizerTests.cs (1)
65
IReadOnlyList<EncodedToken> tokens = tokenizer.
EncodeToTokens
(text, out normalizedText);
PreTokenizerTests.cs (1)
66
IReadOnlyList<EncodedToken> encoding = tokenizer.
EncodeToTokens
(text, out _);
TiktokenTests.cs (8)
146
IReadOnlyList<EncodedToken> result = tokenizer.
EncodeToTokens
(text, out string? normalizedText);
199
IReadOnlyList<EncodedToken> result = GPT4.
EncodeToTokens
(text, out string? normalizedText);
242
IReadOnlyList<EncodedToken> result = GPT4.
EncodeToTokens
(text, out string? normalizedText);
261
IReadOnlyList<EncodedToken> result = GPT4.
EncodeToTokens
(text, out string? normalizedText);
277
IReadOnlyList<EncodedToken> result = GPT4.
EncodeToTokens
(text, out string? normalizedText);
311
IReadOnlyList<EncodedToken> result = GPT4o.
EncodeToTokens
(text, out string? normalizedText);
582
IReadOnlyList<EncodedToken> encoding = tokenizer.
EncodeToTokens
(text, out _);
697
IReadOnlyList<EncodedToken> result = GPT4.
EncodeToTokens
(text, out _);
UnigramTests.cs (2)
404
IReadOnlyList<EncodedToken> result = _unigramTokenizer.
EncodeToTokens
(inputText, out string? normalized);
408
result = _unigramTokenizerFromJson.
EncodeToTokens
(inputText, out normalized);
WordPieceTests.cs (4)
62
IReadOnlyList<EncodedToken> tokens = tokenizer.
EncodeToTokens
("", out _);
75
tokens = tokenizer.
EncodeToTokens
(text, out _);
156
IReadOnlyList<EncodedToken> tokens = tokenizer.
EncodeToTokens
(text, out _);
194
IReadOnlyList<EncodedToken> tokens = tokenizer.
EncodeToTokens
(text, out _);
Microsoft.ML.TorchSharp (3)
NasBert\NerTrainer.cs (2)
170
IReadOnlyList<EncodedToken> encoding = Tokenizer.
EncodeToTokens
(sentence, out string normalizedText);
380
IReadOnlyList<EncodedToken> encoding = tokenizer.
EncodeToTokens
(sentence, out string normalizedText);
Roberta\QATrainer.cs (1)
404
var contextTokens = Tokenizer.
EncodeToTokens
(contextString, out string normalized);