4 writes to ByteCodeToIdOffset
Microsoft.ML.Tokenizers (4)
Model\SentencePieceBpeModel.cs (2)
40ByteCodeToIdOffset = _vocab.TryGetValue("<0x00>", out (int Id, float Score, byte Type) value) ? value.Id : MaxByteId; 67ByteCodeToIdOffset = value.Id;
Model\SentencePieceUnigramModel.cs (2)
59ByteCodeToIdOffset = _vocab.TryGetValue("<0x00>", out int id) ? id : MaxByteId; 117ByteCodeToIdOffset = id;
23 references to ByteCodeToIdOffset
Microsoft.ML.Tokenizers (23)
Model\SentencePieceBaseModel.cs (10)
233while (current < ByteCodeToIdOffset) 249EncodeByte(current, OneByteUtf8EncodingMaxId, ByteCodeToIdOffset, ref bytesCount, ref bytesPoolArray, ref sb); 266if (current < ByteCodeToIdOffset) 290bytesPoolArray![bytesCount++] = (byte)(current - ByteCodeToIdOffset); 294EncodeByte(current, OneByteUtf8EncodingMaxId, ByteCodeToIdOffset, ref bytesCount, ref bytesPoolArray, ref sb); 457while (current < ByteCodeToIdOffset) 478if (!EncodeByte(enumerator.Current, OneByteUtf8EncodingMaxId, ByteCodeToIdOffset, ref bytesCount, buffer, ref charsWritten, ref idsConsumed, ref bytesPoolArray)) 532if (current < ByteCodeToIdOffset) 563bytesPoolArray![bytesCount++] = (byte)(current - ByteCodeToIdOffset); 567if (!EncodeByte(current, OneByteUtf8EncodingMaxId, ByteCodeToIdOffset, ref bytesCount, buffer, ref charsWritten, ref idsConsumed, ref bytesPoolArray))
Model\SentencePieceBpeModel.cs (6)
41OneByteUtf8EncodingMaxId = ByteCodeToIdOffset + 0x7F; // 0x7F is the maximum value of the one byte UTF-8 character. 68OneByteUtf8EncodingMaxId = ByteCodeToIdOffset + 0x7F; // 0x7F is the maximum value of the one byte UTF-8 character. 262int id = (int)c + ByteCodeToIdOffset; // byte code is mapped to the to the Ids starting from 4. 286int id = (int)utf8Bytes[j] + ByteCodeToIdOffset; // byte code is mapped to the to the Ids starting from 4. 561accumulatedIds.Add((int)c + ByteCodeToIdOffset); // byte code is mapped to the to the Ids starting from 4. 589accumulatedIds.Add((int)utf8Bytes[j] + ByteCodeToIdOffset); // byte code is mapped to the to the Ids starting from 4.
Model\SentencePieceUnigramModel.cs (7)
60OneByteUtf8EncodingMaxId = ByteCodeToIdOffset + 0x7F; // 0x7F is the maximum value of the one byte UTF-8 character. 61MaxIdByteFallbackId = ByteCodeToIdOffset + 0xFF; // from <0x00> to <0xFF>. 118OneByteUtf8EncodingMaxId = ByteCodeToIdOffset + 0x7F; // 0x7F is the maximum value of the one byte UTF-8 character. 119MaxIdByteFallbackId = ByteCodeToIdOffset + 0xFF; // from <0x00> to <0xFF>. 483int id = ByteCodeToIdOffset + destination[0]; 488id = ByteCodeToIdOffset + destination[j]; 802ids.Insert(IdsIndex, ByteCodeToIdOffset + normalizationSpan[Utf8Index + j]);