2 writes to ByteCodeToIdOffset
Microsoft.ML.Tokenizers (2)
Model\SentencePieceBpeModel.cs (1)
40
ByteCodeToIdOffset
= _vocab.TryGetValue("<0x00>", out (int Id, float Score, byte Type) value) ? value.Id : MaxByteId;
Model\SentencePieceUnigramModel.cs (1)
67
ByteCodeToIdOffset
= _vocab.TryGetValue("<0x00>", out int id) ? id : MaxByteId;
20 references to ByteCodeToIdOffset
Microsoft.ML.Tokenizers (20)
Model\SentencePieceBaseModel.cs (10)
172
while (current <
ByteCodeToIdOffset
)
188
EncodeByte(current, OneByteUtf8EncodingMaxId,
ByteCodeToIdOffset
, ref bytesCount, ref bytesPoolArray, ref sb);
205
if (current <
ByteCodeToIdOffset
)
229
bytesPoolArray![bytesCount++] = (byte)(current -
ByteCodeToIdOffset
);
233
EncodeByte(current, OneByteUtf8EncodingMaxId,
ByteCodeToIdOffset
, ref bytesCount, ref bytesPoolArray, ref sb);
396
while (current <
ByteCodeToIdOffset
)
417
if (!EncodeByte(enumerator.Current, OneByteUtf8EncodingMaxId,
ByteCodeToIdOffset
, ref bytesCount, buffer, ref charsWritten, ref idsConsumed, ref bytesPoolArray))
471
if (current <
ByteCodeToIdOffset
)
502
bytesPoolArray![bytesCount++] = (byte)(current -
ByteCodeToIdOffset
);
506
if (!EncodeByte(current, OneByteUtf8EncodingMaxId,
ByteCodeToIdOffset
, ref bytesCount, buffer, ref charsWritten, ref idsConsumed, ref bytesPoolArray))
Model\SentencePieceBpeModel.cs (5)
41
OneByteUtf8EncodingMaxId =
ByteCodeToIdOffset
+ 0x7F; // 0x7F is the maximum value of the one byte UTF-8 character.
216
int id = (int)c +
ByteCodeToIdOffset
; // byte code is mapped to the to the Ids starting from 4.
240
int id = (int)utf8Bytes[j] +
ByteCodeToIdOffset
; // byte code is mapped to the to the Ids starting from 4.
515
accumulatedIds.Add((int)c +
ByteCodeToIdOffset
); // byte code is mapped to the to the Ids starting from 4.
543
accumulatedIds.Add((int)utf8Bytes[j] +
ByteCodeToIdOffset
); // byte code is mapped to the to the Ids starting from 4.
Model\SentencePieceUnigramModel.cs (5)
68
OneByteUtf8EncodingMaxId =
ByteCodeToIdOffset
+ 0x7F; // 0x7F is the maximum value of the one byte UTF-8 character.
69
MaxIdByteFallbackId =
ByteCodeToIdOffset
+ 0xFF; // from <0x00> to <0xFF>.
433
int id =
ByteCodeToIdOffset
+ destination[0];
438
id =
ByteCodeToIdOffset
+ destination[j];
752
ids.Insert(IdsIndex,
ByteCodeToIdOffset
+ normalizationSpan[Utf8Index + j]);