1 write to EndOfSentenceId
Microsoft.ML.Tokenizers (1)
Model\SentencePieceTokenizer.cs (1)
66EndOfSentenceId = modelProto.TrainerSpec.EosId <= 0 ? 1 : modelProto.TrainerSpec.EosId;
16 references to EndOfSentenceId
Microsoft.ML.GenAI.Phi.Tests (1)
Phi3Tests.cs (1)
122tokenizer.EndOfSentenceId.Should().Be(2);
Microsoft.ML.Tokenizers (6)
Model\SentencePieceTokenizer.cs (6)
302tokens.Add(new EncodedToken(EndOfSentenceId, EndOfSentenceToken, new Range(text.Length, text.Length))); 367tokens.Add(new EncodedToken(EndOfSentenceId, EndOfSentenceToken, new Range(text.Length, text.Length))); 636accumulatedIds.Add(EndOfSentenceId); 732accumulatedIds.Add(EndOfSentenceId); 1749else if (id == tokenizer.EndOfSentenceId) 1990else if (id == tokenizer.EndOfSentenceId)
Microsoft.ML.Tokenizers.Tests (9)
LlamaTests.cs (9)
277Assert.Equal(isEmptyInput ? Array.Empty<int>() : ids.Skip(1).Concat(new[] { bpe.EndOfSentenceId }), bpeTokens.Select(token => token.Id)); 283Assert.Equal(isEmptyInput ? Array.Empty<int>() : ids.Skip(1).Concat(new[] { bpe.EndOfSentenceId }), encodedIds); 287Assert.Equal(isEmptyInput ? Array.Empty<int>() : ids.Concat(new[] { bpe.EndOfSentenceId }), bpeTokens.Select(token => token.Id)); 293Assert.Equal(isEmptyInput ? Array.Empty<int>() : ids.Concat(new[] { bpe.EndOfSentenceId }), encodedIds); 367Assert.Equal(2, bpe.EndOfSentenceId); 386Assert.Equal(llamaTokenizer.EndOfSentenceToken, llamaTokenizer.Decode([llamaTokenizer.EndOfSentenceId], considerSpecialTokens: true)); 388Assert.Equal(OperationStatus.Done, llamaTokenizer.Decode([llamaTokenizer.EndOfSentenceId], destinationBuffer, considerSpecialTokens: true, out int idsConsumed, out int charactersWritten)); 539expectedIds1 = addEndOfSentence ? expectedIds1.Concat(new[] { sentencePieceBpe.EndOfSentenceId }).ToArray() : expectedIds1; 573expectedIds1 = addEndOfSentence ? expectedIds1.Concat(new[] { sentencePieceBpe.EndOfSentenceId }).ToArray() : expectedIds1;