24 references to EncodedToken
Microsoft.ML.Tokenizers (23)
Model\CodeGenTokenizer.cs (6)
379
tokens.Add(new
EncodedToken
(BeginningOfSentenceId.Value, BeginningOfSentenceToken!, (0, 0)));
1593
tokens.Add(new
EncodedToken
(tokensToAdd[0].Id, tokensToAdd[0].Value, (offset == 0 ? tokensToAdd[0].Offset.Index : tokensToAdd[0].Offset.Index + offset - 1, offset == 0 ? tokensToAdd[0].Offset.Length - 1 : tokensToAdd[0].Offset.Length)));
1597
tokens.Add(new
EncodedToken
(tokensToAdd[i].Id, tokensToAdd[i].Value, (tokensToAdd[i].Offset.Index + offset - 1, tokensToAdd[i].Offset.Length)));
1605
tokens.Add(new
EncodedToken
(t.Id, t.Value, (t.Offset.Index + offset, t.Offset.Length)));
1625
return new List<EncodedToken> { new
EncodedToken
(_vocab[new StringSpanOrdinalKey(tokenValue)].Id, tokenValue, (mapping[0], 1)) };
1699
return new
EncodedToken
(id, token, (tokenStartIndex, tokenLength));
Model\EnglishRobertaTokenizer.cs (4)
328
tokens.Add(new
EncodedToken
(t.Id, t.Value, (split.Offset + t.Offset.Index, t.Offset.Length)));
918
list.Add(new
EncodedToken
(tokens[j].Id, tokens[j].Value, (indexMapping[index], tokens[j].Value.Length)));
950
return new List<EncodedToken> { new
EncodedToken
(_vocab[new StringSpanOrdinalKey(tokenValue)], tokenValue, (indexMapping[0], 1)) };
1039
tokens.Add(new
EncodedToken
(_vocab[new StringSpanOrdinalKey(w)], w, (indexMapping[index], w.Length)));
Model\SentencePieceBpeTokenizer.cs (9)
275
tokens.Add(new
EncodedToken
(BeginningOfSentenceId, BeginningOfSentenceToken, (0, 0)));
289
tokens.Add(new
EncodedToken
(id, _specialTokensReverse![id], (Offset, Length)));
302
tokens.Add(new
EncodedToken
(EndOfSentenceId, EndOfSentenceToken, (text.Length, 0)));
322
tokens.Add(new
EncodedToken
(BeginningOfSentenceId, BeginningOfSentenceToken, (0, 0)));
352
tokens.Add(new
EncodedToken
(
367
tokens.Add(new
EncodedToken
(EndOfSentenceId, EndOfSentenceToken, (text.Length, 0)));
384
tokens.Add(new
EncodedToken
(id, token, (index + i, 1)));
408
tokens.Add(new
EncodedToken
(id, token, (index + i, length)));
436
tokens.Add(new
EncodedToken
(id.Id, text.Slice(pieceSpan.Index, pieceSpan.Length).ToString(), (pieceSpan.Index, pieceSpan.Length)));
Model\TiktokenTokenizer.cs (3)
306
tokens.Add(new
EncodedToken
(
318
tokens.Add(new
EncodedToken
(mappedId.Id, mappedId.Token, (offset, mappedId.Token.Length)));
347
tokens.Add(new
EncodedToken
(
Model\Word.cs (1)
299
tokens.Add(new
EncodedToken
(_symbols[i].C, vocabReverse[_symbols[i].C], (index + offset, _symbols[i].Len)));
Microsoft.ML.Tokenizers.Tests (1)
TokenizerTests.cs (1)
115
tokens.Add(new
EncodedToken
(c - 'a', c.ToString(), (count, 1)));