1 write to BeginningOfSentenceToken
Microsoft.ML.Tokenizers (1)
Model\CodeGenTokenizer.cs (1)
142
BeginningOfSentenceToken
= beginningOfSentenceToken;
18 references to BeginningOfSentenceToken
Microsoft.ML.Tokenizers (10)
Model\CodeGenTokenizer.cs (10)
159
if (!string.IsNullOrEmpty(
BeginningOfSentenceToken
))
161
if (!_vocab.TryGetValue(
BeginningOfSentenceToken
!, out (int beggingOfSentenceId, string token) value))
163
throw new ArgumentException($"The beginning of sentence token '{
BeginningOfSentenceToken
}' is not found in the vocabulary.");
179
if (AddBeginningOfSentence && string.IsNullOrEmpty(
BeginningOfSentenceToken
))
382
tokens.Add(new EncodedToken(BeginningOfSentenceId.Value,
BeginningOfSentenceToken
!, new Range(0, 0)));
1266
Helpers.AppendToBytesArray(
BeginningOfSentenceToken
!.AsSpan(), ref bytes, ref bytesIndex);
1379
if (
BeginningOfSentenceToken
!.Length > buffer.Length)
1384
BeginningOfSentenceToken
.AsSpan().CopyTo(buffer);
1385
buffer = buffer.Slice(
BeginningOfSentenceToken
.Length);
1386
charsWritten +=
BeginningOfSentenceToken
.Length;
Microsoft.ML.Tokenizers.Tests (8)
CodeGenTests.cs (8)
294
string targetText = $"{codeGenTokenizer.
BeginningOfSentenceToken
}{text}{codeGenTokenizer.EndOfSentenceToken}";
550
Assert.True(codeGenTokenizer.
BeginningOfSentenceToken
is not null);
555
tokensList.Insert(0, codeGenTokenizer.
BeginningOfSentenceToken
!);
578
tokensList.Insert(0, codeGenTokenizer.
BeginningOfSentenceToken
!);
816
Assert.True(codeGenTokenizer.
BeginningOfSentenceToken
is not null);
822
tokensList.Insert(0, codeGenTokenizer.
BeginningOfSentenceToken
!);
851
tokensList.Insert(0, codeGenTokenizer.
BeginningOfSentenceToken
!);
978
Assert.Equal(DefaultSpecialToken, codeGenTokenizer.
BeginningOfSentenceToken
);