4 writes to ByteCodeToIdOffset
Microsoft.ML.Tokenizers (4)
Model\SentencePieceBpeModel.cs (2)
40
ByteCodeToIdOffset
= _vocab.TryGetValue("<0x00>", out (int Id, float Score, byte Type) value) ? value.Id : MaxByteId;
67
ByteCodeToIdOffset
= value.Id;
Model\SentencePieceUnigramModel.cs (2)
59
ByteCodeToIdOffset
= _vocab.TryGetValue("<0x00>", out int id) ? id : MaxByteId;
117
ByteCodeToIdOffset
= id;
23 references to ByteCodeToIdOffset
Microsoft.ML.Tokenizers (23)
Model\SentencePieceBaseModel.cs (10)
233
while (current <
ByteCodeToIdOffset
)
249
EncodeByte(current, OneByteUtf8EncodingMaxId,
ByteCodeToIdOffset
, ref bytesCount, ref bytesPoolArray, ref sb);
266
if (current <
ByteCodeToIdOffset
)
290
bytesPoolArray![bytesCount++] = (byte)(current -
ByteCodeToIdOffset
);
294
EncodeByte(current, OneByteUtf8EncodingMaxId,
ByteCodeToIdOffset
, ref bytesCount, ref bytesPoolArray, ref sb);
457
while (current <
ByteCodeToIdOffset
)
478
if (!EncodeByte(enumerator.Current, OneByteUtf8EncodingMaxId,
ByteCodeToIdOffset
, ref bytesCount, buffer, ref charsWritten, ref idsConsumed, ref bytesPoolArray))
532
if (current <
ByteCodeToIdOffset
)
563
bytesPoolArray![bytesCount++] = (byte)(current -
ByteCodeToIdOffset
);
567
if (!EncodeByte(current, OneByteUtf8EncodingMaxId,
ByteCodeToIdOffset
, ref bytesCount, buffer, ref charsWritten, ref idsConsumed, ref bytesPoolArray))
Model\SentencePieceBpeModel.cs (6)
41
OneByteUtf8EncodingMaxId =
ByteCodeToIdOffset
+ 0x7F; // 0x7F is the maximum value of the one byte UTF-8 character.
68
OneByteUtf8EncodingMaxId =
ByteCodeToIdOffset
+ 0x7F; // 0x7F is the maximum value of the one byte UTF-8 character.
262
int id = (int)c +
ByteCodeToIdOffset
; // byte code is mapped to the to the Ids starting from 4.
286
int id = (int)utf8Bytes[j] +
ByteCodeToIdOffset
; // byte code is mapped to the to the Ids starting from 4.
561
accumulatedIds.Add((int)c +
ByteCodeToIdOffset
); // byte code is mapped to the to the Ids starting from 4.
589
accumulatedIds.Add((int)utf8Bytes[j] +
ByteCodeToIdOffset
); // byte code is mapped to the to the Ids starting from 4.
Model\SentencePieceUnigramModel.cs (7)
60
OneByteUtf8EncodingMaxId =
ByteCodeToIdOffset
+ 0x7F; // 0x7F is the maximum value of the one byte UTF-8 character.
61
MaxIdByteFallbackId =
ByteCodeToIdOffset
+ 0xFF; // from <0x00> to <0xFF>.
118
OneByteUtf8EncodingMaxId =
ByteCodeToIdOffset
+ 0x7F; // 0x7F is the maximum value of the one byte UTF-8 character.
119
MaxIdByteFallbackId =
ByteCodeToIdOffset
+ 0xFF; // from <0x00> to <0xFF>.
483
int id =
ByteCodeToIdOffset
+ destination[0];
488
id =
ByteCodeToIdOffset
+ destination[j];
802
ids.Insert(IdsIndex,
ByteCodeToIdOffset
+ normalizationSpan[Utf8Index + j]);