24 writes to Tokens
Microsoft.ML.Tokenizers (23)
Model\BPETokenizer.cs (4)
317return new EncodeResults<EncodedToken> { Tokens = [], NormalizedText = null, CharsConsumed = 0 }; 346return new EncodeResults<EncodedToken> { Tokens = tokens, NormalizedText = normalizedText, CharsConsumed = charsConsumed }; 366return new EncodeResults<int> { Tokens = [], NormalizedText = null, CharsConsumed = 0 }; 402return new EncodeResults<int> { Tokens = ids, NormalizedText = normalizedText, CharsConsumed = charsConsumed };
Model\CodeGenTokenizer.cs (3)
328return new EncodeResults<EncodedToken> { Tokens = [], NormalizedText = null, CharsConsumed = 0 }; 402return new EncodeResults<EncodedToken> { Tokens = tokens, NormalizedText = normalizedText, CharsConsumed = textSpanToEncode.Length }; 492Tokens = EncodeToIds(text, textSpan, AddPrefixSpace, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization, settings.ConsiderNormalization,
Model\EnglishRobertaTokenizer.cs (5)
318return new EncodeResults<EncodedToken> { Tokens = [], NormalizedText = null, CharsConsumed = 0 }; 343return new EncodeResults<EncodedToken> { Tokens = tokens, NormalizedText = normalizedText, CharsConsumed = charsConsumed }; 347return new EncodeResults<EncodedToken> { Tokens = EncodeInternal(textSpanToEncode), NormalizedText = normalizedText, CharsConsumed = charsConsumed }; 419return new EncodeResults<int> { Tokens = [], NormalizedText = null, CharsConsumed = 0 }; 455return new EncodeResults<int> { Tokens = ids, NormalizedText = normalizedText, CharsConsumed = textLength };
Model\SentencePieceTokenizer.cs (2)
200Tokens = EncodeToTokens(text, textSpan, out string? normalizedText, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization, settings.ConsiderNormalization), 457Tokens = EncodeToIds(text, textSpan, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderNormalization, out string? normalizedText, out int charsConsumed, settings.MaxTokenCount),
Model\TiktokenTokenizer.cs (4)
262return new EncodeResults<EncodedToken> { NormalizedText = null, Tokens = [], CharsConsumed = 0 }; 290return new EncodeResults<EncodedToken> { NormalizedText = normalizedText, Tokens = tokens, CharsConsumed = charsConsumed }; 372return new EncodeResults<int> { NormalizedText = null, Tokens = [], CharsConsumed = 0 }; 407return new EncodeResults<int> { NormalizedText = normalizedText, Tokens = ids, CharsConsumed = charsConsumed };
Model\WordPieceTokenizer.cs (4)
276return new EncodeResults<EncodedToken> { NormalizedText = null, Tokens = [], CharsConsumed = 0 }; 304return new EncodeResults<EncodedToken> { NormalizedText = normalizedText, Tokens = tokens, CharsConsumed = charsConsumed }; 399return new EncodeResults<int> { NormalizedText = null, Tokens = [], CharsConsumed = 0 }; 435return new EncodeResults<int> { NormalizedText = normalizedText, Tokens = ids, CharsConsumed = charsConsumed };
Tokenizer.cs (1)
54Tokens = ids,
Microsoft.ML.Tokenizers.Tests (1)
TokenizerTests.cs (1)
119return new EncodeResults<EncodedToken> { Tokens = tokens, CharsConsumed = count };
15 references to Tokens
Microsoft.ML.Tokenizers (15)
Model\CodeGenTokenizer.cs (2)
303return result.Tokens; 321return result.Tokens;
Tokenizer.cs (13)
46var ids = new int[results.Tokens.Count]; 49ids[i] = results.Tokens[i].Id; 68=> EncodeToIds(text, text.AsSpan(), new EncodeSettings { ConsiderPreTokenization = considerPreTokenization, ConsiderNormalization = considerNormalization }).Tokens; 78=> EncodeToIds(null, text, new EncodeSettings { ConsiderPreTokenization = considerPreTokenization, ConsiderNormalization = considerNormalization }).Tokens; 103return result.Tokens; 129return result.Tokens; 153return result.Tokens; 169return result.Tokens; 184=> EncodeToTokens(text, textSpan, settings).Tokens.Count; 237tokenCount = Math.Min(maxTokenCount, tokens.Tokens.Count); 243var token = tokens.Tokens[tokenCount - 1]; 253var token = tokens.Tokens[tokens.Tokens.Count - tokenCount];