1 write to SpecialTokensRegex
Microsoft.ML.Tokenizers (1)
Model\SentencePieceBaseModel.cs (1)
51SpecialTokensRegex = new Regex(string.Join("|", specialTokens.Keys.Select(s => Regex.Escape(s))), RegexOptions.Compiled);
24 references to SpecialTokensRegex
Microsoft.ML.Tokenizers (24)
Model\SentencePieceBpeModel.cs (12)
93if (SpecialTokensRegex is not null) 107Debug.Assert(SpecialTokensRegex is not null); 116foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 345if (SpecialTokensRegex is not null) 359Debug.Assert(SpecialTokensRegex is not null); 375foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 630return SpecialTokensRegex is not null ? 637Debug.Assert(SpecialTokensRegex is not null); 652foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 881if (SpecialTokensRegex is not null) 895Debug.Assert(SpecialTokensRegex is not null); 907(int Offset, int Length)[] splits = PreTokenizer.SplitText(text, SpecialTokensRegex!).ToArray();
Model\SentencePieceUnigramModel.cs (12)
117if (SpecialTokensRegex is not null) 217Debug.Assert(SpecialTokensRegex is not null); 228foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 589if (SpecialTokensRegex is not null) 642Debug.Assert(SpecialTokensRegex is not null); 651foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 979if (SpecialTokensRegex is not null) 1013Debug.Assert(SpecialTokensRegex is not null); 1022foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 1248if (SpecialTokensRegex is not null) 1277Debug.Assert(SpecialTokensRegex is not null); 1283(int Offset, int Length)[] splits = PreTokenizer.SplitText(text, SpecialTokensRegex!).ToArray();