1 write to SpecialTokensRegex
Microsoft.ML.Tokenizers (1)
Model\SentencePieceBaseModel.cs (1)
51
SpecialTokensRegex
= new Regex(string.Join("|", specialTokens.Keys.Select(s => Regex.Escape(s))), RegexOptions.Compiled);
24 references to SpecialTokensRegex
Microsoft.ML.Tokenizers (24)
Model\SentencePieceBpeModel.cs (12)
93
if (
SpecialTokensRegex
is not null)
107
Debug.Assert(
SpecialTokensRegex
is not null);
116
foreach ((int Offset, int Length) in PreTokenizer.SplitText(text,
SpecialTokensRegex
!))
345
if (
SpecialTokensRegex
is not null)
359
Debug.Assert(
SpecialTokensRegex
is not null);
375
foreach ((int Offset, int Length) in PreTokenizer.SplitText(text,
SpecialTokensRegex
!))
630
return
SpecialTokensRegex
is not null ?
637
Debug.Assert(
SpecialTokensRegex
is not null);
652
foreach ((int Offset, int Length) in PreTokenizer.SplitText(text,
SpecialTokensRegex
!))
881
if (
SpecialTokensRegex
is not null)
895
Debug.Assert(
SpecialTokensRegex
is not null);
907
(int Offset, int Length)[] splits = PreTokenizer.SplitText(text,
SpecialTokensRegex
!).ToArray();
Model\SentencePieceUnigramModel.cs (12)
117
if (
SpecialTokensRegex
is not null)
217
Debug.Assert(
SpecialTokensRegex
is not null);
228
foreach ((int Offset, int Length) in PreTokenizer.SplitText(text,
SpecialTokensRegex
!))
589
if (
SpecialTokensRegex
is not null)
642
Debug.Assert(
SpecialTokensRegex
is not null);
651
foreach ((int Offset, int Length) in PreTokenizer.SplitText(text,
SpecialTokensRegex
!))
979
if (
SpecialTokensRegex
is not null)
1013
Debug.Assert(
SpecialTokensRegex
is not null);
1022
foreach ((int Offset, int Length) in PreTokenizer.SplitText(text,
SpecialTokensRegex
!))
1248
if (
SpecialTokensRegex
is not null)
1277
Debug.Assert(
SpecialTokensRegex
is not null);
1283
(int Offset, int Length)[] splits = PreTokenizer.SplitText(text,
SpecialTokensRegex
!).ToArray();