1 write to EndOfSentenceToken
Microsoft.ML.Tokenizers (1)
Model\CodeGenTokenizer.cs (1)
140EndOfSentenceToken = endOfSentenceToken;
17 references to EndOfSentenceToken
Microsoft.ML.Tokenizers (10)
Model\CodeGenTokenizer.cs (10)
166if (!string.IsNullOrEmpty(EndOfSentenceToken)) 168if (!_vocab.TryGetValue(EndOfSentenceToken!, out (int endOfSentenceId, string token) value)) 170throw new ArgumentException($"The end of sentence token '{EndOfSentenceToken}' is not found in the vocabulary."); 181if (AddEndOfSentence && string.IsNullOrEmpty(EndOfSentenceToken)) 398tokens.Add(new EncodedToken(EndOfSentenceId.Value, EndOfSentenceToken!, (addPrefixSpace ? Math.Max(0, textSpanToEncode.Length - 1) : textSpanToEncode.Length, 0))); 1270AppendToBytesArray(EndOfSentenceToken!.AsSpan(), ref bytes, ref bytesIndex); 1397if (EndOfSentenceToken!.Length > buffer.Length) 1402EndOfSentenceToken.AsSpan().CopyTo(buffer); 1403buffer = buffer.Slice(EndOfSentenceToken.Length); 1404charsWritten += EndOfSentenceToken.Length;
Microsoft.ML.Tokenizers.Tests (7)
CodeGenTests.cs (7)
294string targetText = $"{codeGenTokenizer.BeginningOfSentenceToken}{text}{codeGenTokenizer.EndOfSentenceToken}"; 683Assert.True(codeGenTokenizer.EndOfSentenceToken is not null); 688tokensList.Add(codeGenTokenizer.EndOfSentenceToken!); 711tokensList.Add(codeGenTokenizer.EndOfSentenceToken!); 823tokensList.Add(codeGenTokenizer.EndOfSentenceToken!); 852tokensList.Add(codeGenTokenizer.EndOfSentenceToken!); 979Assert.Equal(DefaultSpecialToken, codeGenTokenizer.EndOfSentenceToken);