1 write to ByteToUnicode
Microsoft.ML.Tokenizers (1)
Utils\ByteToUnicodeEncoding.cs (1)
35
ByteToUnicode
= byteToUnicodeMapping;
16 references to ByteToUnicode
Microsoft.ML.Tokenizers (16)
Model\CodeGenTokenizer.cs (1)
1522
private static readonly char _transformedSpace = ByteToUnicodeEncoding.Instance.
ByteToUnicode
[' '];
Model\EnglishRobertaTokenizer.cs (4)
367
IReadOnlyDictionary<char, char> byteToUnicode = ByteToUnicodeEncoding.Instance.
ByteToUnicode
;
679
IReadOnlyDictionary<char, char> byteToUnicode = ByteToUnicodeEncoding.Instance.
ByteToUnicode
;
724
IReadOnlyDictionary<char, char> byteToUnicode = ByteToUnicodeEncoding.Instance.
ByteToUnicode
;
1083
public bool IsSupportedChar(char ch) => ByteToUnicodeEncoding.Instance.
ByteToUnicode
.ContainsKey(ch);
Utils\ByteToUnicodeEncoding.cs (1)
36
UnicodeToByte =
ByteToUnicode
.ToDictionary(kv => kv.Value, kv => kv.Key);
Utils\Helpers.cs (10)
90
destination[targetIndex] = byteToUnicodeEncoder.
ByteToUnicode
[(char)c];
99
destination[targetIndex] = byteToUnicodeEncoder.
ByteToUnicode
[(char)((c + (0b110u << 11)) >> 6)];
100
destination[targetIndex + 1] = byteToUnicodeEncoder.
ByteToUnicode
[(char)((c & 0x3Fu) + 0x80u)];
110
destination[targetIndex] = byteToUnicodeEncoder.
ByteToUnicode
[(char)((value + (0b11110 << 21)) >> 18)];
111
destination[targetIndex + 1] = byteToUnicodeEncoder.
ByteToUnicode
[(char)(((value & (0x3Fu << 12)) >> 12) + 0x80u)];
112
destination[targetIndex + 2] = byteToUnicodeEncoder.
ByteToUnicode
[(char)(((value & (0x3Fu << 6)) >> 6) + 0x80u)];
113
destination[targetIndex + 3] = byteToUnicodeEncoder.
ByteToUnicode
[(char)((value & 0x3Fu) + 0x80u)];
121
destination[targetIndex] = byteToUnicodeEncoder.
ByteToUnicode
[(char)((c + (0b1110 << 16)) >> 12)];
122
destination[targetIndex + 1] = byteToUnicodeEncoder.
ByteToUnicode
[(char)(((c & (0x3Fu << 6)) >> 6) + 0x80u)];
123
destination[targetIndex + 2] = byteToUnicodeEncoder.
ByteToUnicode
[(char)((c & 0x3Fu) + 0x80u)];