1 write to _byteCodeToIdOffset
Microsoft.ML.Tokenizers (1)
Model\SentencePieceTokenizer.cs (1)
60
_byteCodeToIdOffset
= _vocab.TryGetValue("<0x00>", out (int Id, float Score, byte Type) value) ? value.Id : _maxByteId;
15 references to _byteCodeToIdOffset
Microsoft.ML.Tokenizers (15)
Model\SentencePieceTokenizer.cs (15)
61
_oneByteUtf8EncodingMaxId =
_byteCodeToIdOffset
+ 0x7F; // 0x7F is the maximum value of the one byte UTF-8 character.
381
int id = (int)c +
_byteCodeToIdOffset
; // byte code is mapped to the to the Ids starting from 4.
405
int id = (int)utf8Bytes[j] +
_byteCodeToIdOffset
; // byte code is mapped to the to the Ids starting from 4.
751
accumulatedIds.Add((int)c +
_byteCodeToIdOffset
); // byte code is mapped to the to the Ids starting from 4.
779
accumulatedIds.Add((int)utf8Bytes[j] +
_byteCodeToIdOffset
); // byte code is mapped to the to the Ids starting from 4.
1564
while (enumerator.Current <
_byteCodeToIdOffset
)
1578
EncodeByte(enumerator.Current, _oneByteUtf8EncodingMaxId,
_byteCodeToIdOffset
, ref bytesCount, ref bytesPoolArray, ref sb);
1602
if (enumerator.Current <
_byteCodeToIdOffset
)
1626
bytesPoolArray![bytesCount++] = (byte)(enumerator.Current -
_byteCodeToIdOffset
);
1630
EncodeByte(enumerator.Current, _oneByteUtf8EncodingMaxId,
_byteCodeToIdOffset
, ref bytesCount, ref bytesPoolArray, ref sb);
1812
while (enumerator.Current <
_byteCodeToIdOffset
)
1831
if (!EncodeByte(enumerator.Current, _oneByteUtf8EncodingMaxId,
_byteCodeToIdOffset
, ref bytesCount, buffer, ref charsWritten, ref idsConsumed, ref bytesPoolArray))
1878
if (enumerator.Current <
_byteCodeToIdOffset
)
1909
bytesPoolArray![bytesCount++] = (byte)(enumerator.Current -
_byteCodeToIdOffset
);
1913
if (!EncodeByte(enumerator.Current, _oneByteUtf8EncodingMaxId,
_byteCodeToIdOffset
, ref bytesCount, buffer, ref charsWritten, ref idsConsumed, ref bytesPoolArray))