1 write to _byteCodeToIdOffset
Microsoft.ML.Tokenizers (1)
Model\SentencePieceTokenizer.cs (1)
60
_byteCodeToIdOffset
= _vocab.TryGetValue("<0x00>", out (int Id, float Score, byte Type) value) ? value.Id : _maxByteId;
15 references to _byteCodeToIdOffset
Microsoft.ML.Tokenizers (15)
Model\SentencePieceTokenizer.cs (15)
61
_oneByteUtf8EncodingMaxId =
_byteCodeToIdOffset
+ 0x7F; // 0x7F is the maximum value of the one byte UTF-8 character.
380
int id = (int)c +
_byteCodeToIdOffset
; // byte code is mapped to the to the Ids starting from 4.
404
int id = (int)utf8Bytes[j] +
_byteCodeToIdOffset
; // byte code is mapped to the to the Ids starting from 4.
750
accumulatedIds.Add((int)c +
_byteCodeToIdOffset
); // byte code is mapped to the to the Ids starting from 4.
778
accumulatedIds.Add((int)utf8Bytes[j] +
_byteCodeToIdOffset
); // byte code is mapped to the to the Ids starting from 4.
1563
while (enumerator.Current <
_byteCodeToIdOffset
)
1577
EncodeByte(enumerator.Current, _oneByteUtf8EncodingMaxId,
_byteCodeToIdOffset
, ref bytesCount, ref bytesPoolArray, ref sb);
1601
if (enumerator.Current <
_byteCodeToIdOffset
)
1625
bytesPoolArray![bytesCount++] = (byte)(enumerator.Current -
_byteCodeToIdOffset
);
1629
EncodeByte(enumerator.Current, _oneByteUtf8EncodingMaxId,
_byteCodeToIdOffset
, ref bytesCount, ref bytesPoolArray, ref sb);
1811
while (enumerator.Current <
_byteCodeToIdOffset
)
1830
if (!EncodeByte(enumerator.Current, _oneByteUtf8EncodingMaxId,
_byteCodeToIdOffset
, ref bytesCount, buffer, ref charsWritten, ref idsConsumed, ref bytesPoolArray))
1877
if (enumerator.Current <
_byteCodeToIdOffset
)
1908
bytesPoolArray![bytesCount++] = (byte)(enumerator.Current -
_byteCodeToIdOffset
);
1912
if (!EncodeByte(enumerator.Current, _oneByteUtf8EncodingMaxId,
_byteCodeToIdOffset
, ref bytesCount, buffer, ref charsWritten, ref idsConsumed, ref bytesPoolArray))