1 write to BeginningOfSentenceToken
Microsoft.ML.Tokenizers (1)
Model\CodeGenTokenizer.cs (1)
139BeginningOfSentenceToken = beginningOfSentenceToken;
18 references to BeginningOfSentenceToken
Microsoft.ML.Tokenizers (10)
Model\CodeGenTokenizer.cs (10)
156if (!string.IsNullOrEmpty(BeginningOfSentenceToken)) 158if (!_vocab.TryGetValue(BeginningOfSentenceToken!, out (int beggingOfSentenceId, string token) value)) 160throw new ArgumentException($"The beginning of sentence token '{BeginningOfSentenceToken}' is not found in the vocabulary."); 176if (AddBeginningOfSentence && string.IsNullOrEmpty(BeginningOfSentenceToken)) 379tokens.Add(new EncodedToken(BeginningOfSentenceId.Value, BeginningOfSentenceToken!, new Range(0, 0))); 1263AppendToBytesArray(BeginningOfSentenceToken!.AsSpan(), ref bytes, ref bytesIndex); 1376if (BeginningOfSentenceToken!.Length > buffer.Length) 1381BeginningOfSentenceToken.AsSpan().CopyTo(buffer); 1382buffer = buffer.Slice(BeginningOfSentenceToken.Length); 1383charsWritten += BeginningOfSentenceToken.Length;
Microsoft.ML.Tokenizers.Tests (8)
CodeGenTests.cs (8)
294string targetText = $"{codeGenTokenizer.BeginningOfSentenceToken}{text}{codeGenTokenizer.EndOfSentenceToken}"; 550Assert.True(codeGenTokenizer.BeginningOfSentenceToken is not null); 555tokensList.Insert(0, codeGenTokenizer.BeginningOfSentenceToken!); 578tokensList.Insert(0, codeGenTokenizer.BeginningOfSentenceToken!); 816Assert.True(codeGenTokenizer.BeginningOfSentenceToken is not null); 822tokensList.Insert(0, codeGenTokenizer.BeginningOfSentenceToken!); 851tokensList.Insert(0, codeGenTokenizer.BeginningOfSentenceToken!); 978Assert.Equal(DefaultSpecialToken, codeGenTokenizer.BeginningOfSentenceToken);