2 writes to SpecialTokensRegex
Microsoft.ML.Tokenizers (2)
Model\SentencePieceBaseModel.cs (2)
51
SpecialTokensRegex
= new Regex(string.Join("|", specialTokens.Keys.Select(s => Regex.Escape(s))), RegexOptions.Compiled);
112
SpecialTokensRegex
= new Regex(string.Join("|", SpecialTokens.Keys.Select(s => Regex.Escape(s))), RegexOptions.Compiled);
24 references to SpecialTokensRegex
Microsoft.ML.Tokenizers (24)
Model\SentencePieceBpeModel.cs (12)
139
if (
SpecialTokensRegex
is not null)
153
Debug.Assert(
SpecialTokensRegex
is not null);
162
foreach ((int Offset, int Length) in PreTokenizer.SplitText(text,
SpecialTokensRegex
!))
391
if (
SpecialTokensRegex
is not null)
405
Debug.Assert(
SpecialTokensRegex
is not null);
421
foreach ((int Offset, int Length) in PreTokenizer.SplitText(text,
SpecialTokensRegex
!))
676
return
SpecialTokensRegex
is not null ?
683
Debug.Assert(
SpecialTokensRegex
is not null);
698
foreach ((int Offset, int Length) in PreTokenizer.SplitText(text,
SpecialTokensRegex
!))
927
if (
SpecialTokensRegex
is not null)
941
Debug.Assert(
SpecialTokensRegex
is not null);
953
(int Offset, int Length)[] splits = PreTokenizer.SplitText(text,
SpecialTokensRegex
!).ToArray();
Model\SentencePieceUnigramModel.cs (12)
167
if (
SpecialTokensRegex
is not null)
267
Debug.Assert(
SpecialTokensRegex
is not null);
278
foreach ((int Offset, int Length) in PreTokenizer.SplitText(text,
SpecialTokensRegex
!))
639
if (
SpecialTokensRegex
is not null)
692
Debug.Assert(
SpecialTokensRegex
is not null);
701
foreach ((int Offset, int Length) in PreTokenizer.SplitText(text,
SpecialTokensRegex
!))
1029
if (
SpecialTokensRegex
is not null)
1063
Debug.Assert(
SpecialTokensRegex
is not null);
1072
foreach ((int Offset, int Length) in PreTokenizer.SplitText(text,
SpecialTokensRegex
!))
1298
if (
SpecialTokensRegex
is not null)
1327
Debug.Assert(
SpecialTokensRegex
is not null);
1333
(int Offset, int Length)[] splits = PreTokenizer.SplitText(text,
SpecialTokensRegex
!).ToArray();