16 references to _unigramTokenizerWithSpecialTokens
Microsoft.ML.Tokenizers.Tests (16)
UnigramTests.cs (16)
453result = _unigramTokenizerWithSpecialTokens.EncodeToTokens(newString, out normalized, addBeginningOfSentence: false, addEndOfSentence: false); 454extracted = ExtractedIds(_unigramTokenizerWithSpecialTokens, result, normalizedText, false, false); 457expectedIds[0] = _unigramTokenizerWithSpecialTokens.BeginningOfSentenceId; 459expectedIds[ids.Length + 1] = _unigramTokenizerWithSpecialTokens.SpecialTokens!["<pad>"]; 461expectedIds[ids.Length * 2 + 2] = _unigramTokenizerWithSpecialTokens.EndOfSentenceId; 465expectedTokens[0] = _unigramTokenizerWithSpecialTokens.BeginningOfSentenceToken; 469expectedTokens[tokens.Length * 2 + 2] = _unigramTokenizerWithSpecialTokens.EndOfSentenceToken; 664inputText = $"{_unigramTokenizerWithSpecialTokens.BeginningOfSentenceToken}{inputText}<pad>{inputText}{_unigramTokenizerWithSpecialTokens.EndOfSentenceToken}"; 666expectedIds[0] = _unigramTokenizerWithSpecialTokens.BeginningOfSentenceId; 668expectedIds[ids.Length + 1] = _unigramTokenizerWithSpecialTokens.SpecialTokens!["<pad>"]; 670expectedIds[ids.Length * 2 + 2] = _unigramTokenizerWithSpecialTokens.EndOfSentenceId; 671string expectedNormalized = $"{_unigramTokenizerWithSpecialTokens.BeginningOfSentenceToken}{normalizedText}<pad>{normalizedText}{_unigramTokenizerWithSpecialTokens.EndOfSentenceToken}"; 675result = _unigramTokenizerWithSpecialTokens.EncodeToIds(inputText, addBeginningOfSentence: false, addEndOfSentence: false, maxTokenCount: i, out string? normalized, out int charConsumed); 679result = _unigramTokenizerWithSpecialTokens.EncodeToIds(inputText.AsSpan(), addBeginningOfSentence: false, addEndOfSentence: false, maxTokenCount: i, out normalized, out charConsumed);