9 references to SplitText
Microsoft.ML.Tokenizers (9)
Model\SentencePieceBpeModel.cs (4)
116foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 375foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 652foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 907(int Offset, int Length)[] splits = PreTokenizer.SplitText(text, SpecialTokensRegex!).ToArray();
Model\SentencePieceUnigramModel.cs (4)
228foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 651foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 1022foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 1283(int Offset, int Length)[] splits = PreTokenizer.SplitText(text, SpecialTokensRegex!).ToArray();
PreTokenizer\RobertaPreTokenizer.cs (1)
47return SplitText(text, TiktokenTokenizer.P50kBaseRegex());