9 references to SplitText
Microsoft.ML.Tokenizers (9)
Model\SentencePieceBpeModel.cs (4)
116
foreach ((int Offset, int Length) in PreTokenizer.
SplitText
(text, SpecialTokensRegex!))
375
foreach ((int Offset, int Length) in PreTokenizer.
SplitText
(text, SpecialTokensRegex!))
652
foreach ((int Offset, int Length) in PreTokenizer.
SplitText
(text, SpecialTokensRegex!))
907
(int Offset, int Length)[] splits = PreTokenizer.
SplitText
(text, SpecialTokensRegex!).ToArray();
Model\SentencePieceUnigramModel.cs (4)
228
foreach ((int Offset, int Length) in PreTokenizer.
SplitText
(text, SpecialTokensRegex!))
651
foreach ((int Offset, int Length) in PreTokenizer.
SplitText
(text, SpecialTokensRegex!))
1022
foreach ((int Offset, int Length) in PreTokenizer.
SplitText
(text, SpecialTokensRegex!))
1283
(int Offset, int Length)[] splits = PreTokenizer.
SplitText
(text, SpecialTokensRegex!).ToArray();
PreTokenizer\RobertaPreTokenizer.cs (1)
47
return
SplitText
(text, TiktokenTokenizer.P50kBaseRegex());