2 writes to SpecialTokensRegex
Microsoft.ML.Tokenizers (2)
Model\SentencePieceBaseModel.cs (2)
51SpecialTokensRegex = new Regex(string.Join("|", specialTokens.Keys.Select(s => Regex.Escape(s))), RegexOptions.Compiled); 112SpecialTokensRegex = new Regex(string.Join("|", SpecialTokens.Keys.Select(s => Regex.Escape(s))), RegexOptions.Compiled);
24 references to SpecialTokensRegex
Microsoft.ML.Tokenizers (24)
Model\SentencePieceBpeModel.cs (12)
139if (SpecialTokensRegex is not null) 153Debug.Assert(SpecialTokensRegex is not null); 162foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 391if (SpecialTokensRegex is not null) 405Debug.Assert(SpecialTokensRegex is not null); 421foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 676return SpecialTokensRegex is not null ? 683Debug.Assert(SpecialTokensRegex is not null); 698foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 927if (SpecialTokensRegex is not null) 941Debug.Assert(SpecialTokensRegex is not null); 953(int Offset, int Length)[] splits = PreTokenizer.SplitText(text, SpecialTokensRegex!).ToArray();
Model\SentencePieceUnigramModel.cs (12)
167if (SpecialTokensRegex is not null) 267Debug.Assert(SpecialTokensRegex is not null); 278foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 639if (SpecialTokensRegex is not null) 692Debug.Assert(SpecialTokensRegex is not null); 701foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 1029if (SpecialTokensRegex is not null) 1063Debug.Assert(SpecialTokensRegex is not null); 1072foreach ((int Offset, int Length) in PreTokenizer.SplitText(text, SpecialTokensRegex!)) 1298if (SpecialTokensRegex is not null) 1327Debug.Assert(SpecialTokensRegex is not null); 1333(int Offset, int Length)[] splits = PreTokenizer.SplitText(text, SpecialTokensRegex!).ToArray();