1 write to AddPrefixSpace
Microsoft.ML.Tokenizers (1)
Model\CodeGenTokenizer.cs (1)
142AddPrefixSpace = addPrefixSpace;
24 references to AddPrefixSpace
Microsoft.ML.Tokenizers (7)
Model\CodeGenTokenizer.cs (7)
286=> EncodeToTokens(text, textSpan, AddPrefixSpace, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization, settings.ConsiderNormalization); 490Tokens = EncodeToIds(text, textSpan, AddPrefixSpace, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization, settings.ConsiderNormalization, 670=> CountTokens(text, textSpan, AddPrefixSpace, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization, settings.ConsiderNormalization, out _, out _, settings.MaxTokenCount); 718return LastIndexOf(text, textSpan, settings.MaxTokenCount, AddPrefixSpace, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization, 722tokenCount = CountTokens(text, textSpan, AddPrefixSpace, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization, settings.ConsiderNormalization, out normalizedString, out int charsConsumed, settings.MaxTokenCount); 1232public override string? Decode(IEnumerable<int> ids) => Decode(ids, hasPrefixSpace: AddPrefixSpace, considerSpecialTokens: false); 1327=> Decode(ids, destination, hasPrefixSpace: AddPrefixSpace, considerSpecialTokens: false, out idsConsumed, out charsWritten);
Microsoft.ML.Tokenizers.Tests (17)
CodeGenTests.cs (17)
258TestDecodingWithSpan(codeGenTokenizer, ids, codeGenTokenizer.AddPrefixSpace, considerSpecialTokens: false, text); 260encoding = codeGenTokenizer.EncodeToTokens(text, addPrefixSpace: codeGenTokenizer.AddPrefixSpace, addBeginningOfSentence: true, addEndOfSentence: false, out _); 263encoding = codeGenTokenizer.EncodeToTokens(text.AsSpan(), addPrefixSpace: codeGenTokenizer.AddPrefixSpace, addBeginningOfSentence: true, addEndOfSentence: false, out _); 266TestDecodingWithSpan(codeGenTokenizer, ids, codeGenTokenizer.AddPrefixSpace, considerSpecialTokens: false, text); 268encoding = codeGenTokenizer.EncodeToTokens(text, addPrefixSpace: codeGenTokenizer.AddPrefixSpace, addBeginningOfSentence: false, addEndOfSentence: true, out _); 271encoding = codeGenTokenizer.EncodeToTokens(text.AsSpan(), addPrefixSpace: codeGenTokenizer.AddPrefixSpace, addBeginningOfSentence: false, addEndOfSentence: true, out _); 274TestDecodingWithSpan(codeGenTokenizer, ids, codeGenTokenizer.AddPrefixSpace, considerSpecialTokens: false, text); 276encoding = codeGenTokenizer.EncodeToTokens(text, addPrefixSpace: codeGenTokenizer.AddPrefixSpace, addBeginningOfSentence: true, addEndOfSentence: true, out _); 279encoding = codeGenTokenizer.EncodeToTokens(text.AsSpan(), addPrefixSpace: codeGenTokenizer.AddPrefixSpace, addBeginningOfSentence: true, addEndOfSentence: true, out _); 282TestDecodingWithSpan(codeGenTokenizer, ids, codeGenTokenizer.AddPrefixSpace, considerSpecialTokens: false, text); 348ValidateEncoding(encoding, codeGenTokenizer.AddPrefixSpace, expectedTokens, expectedOffsets, expectedIds, expectedTokensWithSpace, expectedOffsetsWithSpace, expectedIdsWithSpace); 351ValidateEncoding(encoding, codeGenTokenizer.AddPrefixSpace, expectedTokens, expectedOffsets, expectedIds, expectedTokensWithSpace, expectedOffsetsWithSpace, expectedIdsWithSpace); 369var ids = codeGenTokenizer.AddPrefixSpace ? expectedIdsWithSpace : expectedIds; 403var offsets = codeGenTokenizer.AddPrefixSpace ? expectedOffsetsWithSpace : expectedOffsets; 441offsets = codeGenTokenizer.AddPrefixSpace ? expectedOffsetsWithSpace : expectedOffsets; 496var tokens = codeGenTokenizer.AddPrefixSpace ? expectedTokensWithSpace : expectedTokens; 970Assert.False(codeGenTokenizer.AddPrefixSpace);