1 write to _byteCodeToIdOffset
Microsoft.ML.Tokenizers (1)
Model\SentencePieceBpeTokenizer.cs (1)
60_byteCodeToIdOffset = _vocab.TryGetValue("<0x00>", out (int Id, float Score, byte Type) value) ? value.Id : _maxByteId;
15 references to _byteCodeToIdOffset
Microsoft.ML.Tokenizers (15)
Model\SentencePieceBpeTokenizer.cs (15)
61_oneByteUtf8EncodingMaxId = _byteCodeToIdOffset + 0x7F; // 0x7F is the maximum value of the one byte UTF-8 character. 380int id = (int)c + _byteCodeToIdOffset; // byte code is mapped to the to the Ids starting from 4. 404int id = (int)utf8Bytes[j] + _byteCodeToIdOffset; // byte code is mapped to the to the Ids starting from 4. 750accumulatedIds.Add((int)c + _byteCodeToIdOffset); // byte code is mapped to the to the Ids starting from 4. 778accumulatedIds.Add((int)utf8Bytes[j] + _byteCodeToIdOffset); // byte code is mapped to the to the Ids starting from 4. 1563while (enumerator.Current < _byteCodeToIdOffset) 1577EncodeByte(enumerator.Current, _oneByteUtf8EncodingMaxId, _byteCodeToIdOffset, ref bytesCount, ref bytesPoolArray, ref sb); 1601if (enumerator.Current < _byteCodeToIdOffset) 1625bytesPoolArray![bytesCount++] = (byte)(enumerator.Current - _byteCodeToIdOffset); 1629EncodeByte(enumerator.Current, _oneByteUtf8EncodingMaxId, _byteCodeToIdOffset, ref bytesCount, ref bytesPoolArray, ref sb); 1811while (enumerator.Current < _byteCodeToIdOffset) 1830if (!EncodeByte(enumerator.Current, _oneByteUtf8EncodingMaxId, _byteCodeToIdOffset, ref bytesCount, buffer, ref charsWritten, ref idsConsumed, ref bytesPoolArray)) 1877if (enumerator.Current < _byteCodeToIdOffset) 1908bytesPoolArray![bytesCount++] = (byte)(enumerator.Current - _byteCodeToIdOffset); 1912if (!EncodeByte(enumerator.Current, _oneByteUtf8EncodingMaxId, _byteCodeToIdOffset, ref bytesCount, buffer, ref charsWritten, ref idsConsumed, ref bytesPoolArray))