5 references to SplitText
Microsoft.ML.Tokenizers (5)
Model\SentencePieceTokenizer.cs (4)
281
foreach ((int Offset, int Length) in PreTokenizer.
SplitText
(text, _specialTokensRegex!))
611
foreach ((int Offset, int Length) in PreTokenizer.
SplitText
(text, _specialTokensRegex!))
944
foreach ((int Offset, int Length) in PreTokenizer.
SplitText
(text, _specialTokensRegex!))
1302
(int Offset, int Length)[] splits = PreTokenizer.
SplitText
(text, _specialTokensRegex!).ToArray();
PreTokenizer\RobertaPreTokenizer.cs (1)
47
return
SplitText
(text, TiktokenTokenizer.P50kBaseRegex());