1 write to BeginningOfSentenceToken
Microsoft.ML.Tokenizers (1)
Model\CodeGenTokenizer.cs (1)
142BeginningOfSentenceToken = beginningOfSentenceToken;
18 references to BeginningOfSentenceToken
Microsoft.ML.Tokenizers (10)
Model\CodeGenTokenizer.cs (10)
159if (!string.IsNullOrEmpty(BeginningOfSentenceToken)) 161if (!_vocab.TryGetValue(BeginningOfSentenceToken!, out (int beggingOfSentenceId, string token) value)) 163throw new ArgumentException($"The beginning of sentence token '{BeginningOfSentenceToken}' is not found in the vocabulary."); 179if (AddBeginningOfSentence && string.IsNullOrEmpty(BeginningOfSentenceToken)) 382tokens.Add(new EncodedToken(BeginningOfSentenceId.Value, BeginningOfSentenceToken!, new Range(0, 0))); 1266Helpers.AppendToBytesArray(BeginningOfSentenceToken!.AsSpan(), ref bytes, ref bytesIndex); 1379if (BeginningOfSentenceToken!.Length > buffer.Length) 1384BeginningOfSentenceToken.AsSpan().CopyTo(buffer); 1385buffer = buffer.Slice(BeginningOfSentenceToken.Length); 1386charsWritten += BeginningOfSentenceToken.Length;
Microsoft.ML.Tokenizers.Tests (8)
CodeGenTests.cs (8)
294string targetText = $"{codeGenTokenizer.BeginningOfSentenceToken}{text}{codeGenTokenizer.EndOfSentenceToken}"; 550Assert.True(codeGenTokenizer.BeginningOfSentenceToken is not null); 555tokensList.Insert(0, codeGenTokenizer.BeginningOfSentenceToken!); 578tokensList.Insert(0, codeGenTokenizer.BeginningOfSentenceToken!); 816Assert.True(codeGenTokenizer.BeginningOfSentenceToken is not null); 822tokensList.Insert(0, codeGenTokenizer.BeginningOfSentenceToken!); 851tokensList.Insert(0, codeGenTokenizer.BeginningOfSentenceToken!); 978Assert.Equal(DefaultSpecialToken, codeGenTokenizer.BeginningOfSentenceToken);