1 write to AddPrefixSpace
Microsoft.ML.Tokenizers (1)
Model\CodeGenTokenizer.cs (1)
142
AddPrefixSpace
= addPrefixSpace;
24 references to AddPrefixSpace
Microsoft.ML.Tokenizers (7)
Model\CodeGenTokenizer.cs (7)
286
=> EncodeToTokens(text, textSpan,
AddPrefixSpace
, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization, settings.ConsiderNormalization);
492
Tokens = EncodeToIds(text, textSpan,
AddPrefixSpace
, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization, settings.ConsiderNormalization,
672
=> CountTokens(text, textSpan,
AddPrefixSpace
, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization, settings.ConsiderNormalization, out _, out _, settings.MaxTokenCount);
720
return LastIndexOf(text, textSpan, settings.MaxTokenCount,
AddPrefixSpace
, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization,
724
tokenCount = CountTokens(text, textSpan,
AddPrefixSpace
, AddBeginningOfSentence, AddEndOfSentence, settings.ConsiderPreTokenization, settings.ConsiderNormalization, out normalizedText, out int charsConsumed, settings.MaxTokenCount);
1234
public override string Decode(IEnumerable<int> ids) => Decode(ids, hasPrefixSpace:
AddPrefixSpace
, considerSpecialTokens: false);
1329
=> Decode(ids, destination, hasPrefixSpace:
AddPrefixSpace
, considerSpecialTokens: false, out idsConsumed, out charsWritten);
Microsoft.ML.Tokenizers.Tests (17)
CodeGenTests.cs (17)
258
TestDecodingWithSpan(codeGenTokenizer, ids, codeGenTokenizer.
AddPrefixSpace
, considerSpecialTokens: false, text);
260
encoding = codeGenTokenizer.EncodeToTokens(text, addPrefixSpace: codeGenTokenizer.
AddPrefixSpace
, addBeginningOfSentence: true, addEndOfSentence: false, out _);
263
encoding = codeGenTokenizer.EncodeToTokens(text.AsSpan(), addPrefixSpace: codeGenTokenizer.
AddPrefixSpace
, addBeginningOfSentence: true, addEndOfSentence: false, out _);
266
TestDecodingWithSpan(codeGenTokenizer, ids, codeGenTokenizer.
AddPrefixSpace
, considerSpecialTokens: false, text);
268
encoding = codeGenTokenizer.EncodeToTokens(text, addPrefixSpace: codeGenTokenizer.
AddPrefixSpace
, addBeginningOfSentence: false, addEndOfSentence: true, out _);
271
encoding = codeGenTokenizer.EncodeToTokens(text.AsSpan(), addPrefixSpace: codeGenTokenizer.
AddPrefixSpace
, addBeginningOfSentence: false, addEndOfSentence: true, out _);
274
TestDecodingWithSpan(codeGenTokenizer, ids, codeGenTokenizer.
AddPrefixSpace
, considerSpecialTokens: false, text);
276
encoding = codeGenTokenizer.EncodeToTokens(text, addPrefixSpace: codeGenTokenizer.
AddPrefixSpace
, addBeginningOfSentence: true, addEndOfSentence: true, out _);
279
encoding = codeGenTokenizer.EncodeToTokens(text.AsSpan(), addPrefixSpace: codeGenTokenizer.
AddPrefixSpace
, addBeginningOfSentence: true, addEndOfSentence: true, out _);
282
TestDecodingWithSpan(codeGenTokenizer, ids, codeGenTokenizer.
AddPrefixSpace
, considerSpecialTokens: false, text);
348
ValidateEncoding(encoding, codeGenTokenizer.
AddPrefixSpace
, expectedTokens, expectedOffsets, expectedIds, expectedTokensWithSpace, expectedOffsetsWithSpace, expectedIdsWithSpace);
351
ValidateEncoding(encoding, codeGenTokenizer.
AddPrefixSpace
, expectedTokens, expectedOffsets, expectedIds, expectedTokensWithSpace, expectedOffsetsWithSpace, expectedIdsWithSpace);
369
var ids = codeGenTokenizer.
AddPrefixSpace
? expectedIdsWithSpace : expectedIds;
403
var offsets = codeGenTokenizer.
AddPrefixSpace
? expectedOffsetsWithSpace : expectedOffsets;
441
offsets = codeGenTokenizer.
AddPrefixSpace
? expectedOffsetsWithSpace : expectedOffsets;
496
var tokens = codeGenTokenizer.
AddPrefixSpace
? expectedTokensWithSpace : expectedTokens;
970
Assert.False(codeGenTokenizer.
AddPrefixSpace
);