2 writes to ByteCodeToIdOffset
Microsoft.ML.Tokenizers (2)
Model\SentencePieceBpeModel.cs (1)
40ByteCodeToIdOffset = _vocab.TryGetValue("<0x00>", out (int Id, float Score, byte Type) value) ? value.Id : MaxByteId;
Model\SentencePieceUnigramModel.cs (1)
67ByteCodeToIdOffset = _vocab.TryGetValue("<0x00>", out int id) ? id : MaxByteId;
20 references to ByteCodeToIdOffset
Microsoft.ML.Tokenizers (20)
Model\SentencePieceBaseModel.cs (10)
172while (current < ByteCodeToIdOffset) 188EncodeByte(current, OneByteUtf8EncodingMaxId, ByteCodeToIdOffset, ref bytesCount, ref bytesPoolArray, ref sb); 205if (current < ByteCodeToIdOffset) 229bytesPoolArray![bytesCount++] = (byte)(current - ByteCodeToIdOffset); 233EncodeByte(current, OneByteUtf8EncodingMaxId, ByteCodeToIdOffset, ref bytesCount, ref bytesPoolArray, ref sb); 396while (current < ByteCodeToIdOffset) 417if (!EncodeByte(enumerator.Current, OneByteUtf8EncodingMaxId, ByteCodeToIdOffset, ref bytesCount, buffer, ref charsWritten, ref idsConsumed, ref bytesPoolArray)) 471if (current < ByteCodeToIdOffset) 502bytesPoolArray![bytesCount++] = (byte)(current - ByteCodeToIdOffset); 506if (!EncodeByte(current, OneByteUtf8EncodingMaxId, ByteCodeToIdOffset, ref bytesCount, buffer, ref charsWritten, ref idsConsumed, ref bytesPoolArray))
Model\SentencePieceBpeModel.cs (5)
41OneByteUtf8EncodingMaxId = ByteCodeToIdOffset + 0x7F; // 0x7F is the maximum value of the one byte UTF-8 character. 216int id = (int)c + ByteCodeToIdOffset; // byte code is mapped to the to the Ids starting from 4. 240int id = (int)utf8Bytes[j] + ByteCodeToIdOffset; // byte code is mapped to the to the Ids starting from 4. 515accumulatedIds.Add((int)c + ByteCodeToIdOffset); // byte code is mapped to the to the Ids starting from 4. 543accumulatedIds.Add((int)utf8Bytes[j] + ByteCodeToIdOffset); // byte code is mapped to the to the Ids starting from 4.
Model\SentencePieceUnigramModel.cs (5)
68OneByteUtf8EncodingMaxId = ByteCodeToIdOffset + 0x7F; // 0x7F is the maximum value of the one byte UTF-8 character. 69MaxIdByteFallbackId = ByteCodeToIdOffset + 0xFF; // from <0x00> to <0xFF>. 433int id = ByteCodeToIdOffset + destination[0]; 438id = ByteCodeToIdOffset + destination[j]; 752ids.Insert(IdsIndex, ByteCodeToIdOffset + normalizationSpan[Utf8Index + j]);