1 write to BeginningOfSentenceToken
Microsoft.ML.Tokenizers (1)
Model\CodeGenTokenizer.cs (1)
139
BeginningOfSentenceToken
= beginningOfSentenceToken;
18 references to BeginningOfSentenceToken
Microsoft.ML.Tokenizers (10)
Model\CodeGenTokenizer.cs (10)
156
if (!string.IsNullOrEmpty(
BeginningOfSentenceToken
))
158
if (!_vocab.TryGetValue(
BeginningOfSentenceToken
!, out (int beggingOfSentenceId, string token) value))
160
throw new ArgumentException($"The beginning of sentence token '{
BeginningOfSentenceToken
}' is not found in the vocabulary.");
176
if (AddBeginningOfSentence && string.IsNullOrEmpty(
BeginningOfSentenceToken
))
379
tokens.Add(new EncodedToken(BeginningOfSentenceId.Value,
BeginningOfSentenceToken
!, new Range(0, 0)));
1263
AppendToBytesArray(
BeginningOfSentenceToken
!.AsSpan(), ref bytes, ref bytesIndex);
1376
if (
BeginningOfSentenceToken
!.Length > buffer.Length)
1381
BeginningOfSentenceToken
.AsSpan().CopyTo(buffer);
1382
buffer = buffer.Slice(
BeginningOfSentenceToken
.Length);
1383
charsWritten +=
BeginningOfSentenceToken
.Length;
Microsoft.ML.Tokenizers.Tests (8)
CodeGenTests.cs (8)
294
string targetText = $"{codeGenTokenizer.
BeginningOfSentenceToken
}{text}{codeGenTokenizer.EndOfSentenceToken}";
550
Assert.True(codeGenTokenizer.
BeginningOfSentenceToken
is not null);
555
tokensList.Insert(0, codeGenTokenizer.
BeginningOfSentenceToken
!);
578
tokensList.Insert(0, codeGenTokenizer.
BeginningOfSentenceToken
!);
816
Assert.True(codeGenTokenizer.
BeginningOfSentenceToken
is not null);
822
tokensList.Insert(0, codeGenTokenizer.
BeginningOfSentenceToken
!);
851
tokensList.Insert(0, codeGenTokenizer.
BeginningOfSentenceToken
!);
978
Assert.Equal(DefaultSpecialToken, codeGenTokenizer.
BeginningOfSentenceToken
);