1 write to ByteToUnicode
Microsoft.ML.Tokenizers (1)
Utils\ByteToUnicodeEncoding.cs (1)
35ByteToUnicode = byteToUnicodeMapping;
16 references to ByteToUnicode
Microsoft.ML.Tokenizers (16)
Model\CodeGenTokenizer.cs (1)
1522private static readonly char _transformedSpace = ByteToUnicodeEncoding.Instance.ByteToUnicode[' '];
Model\EnglishRobertaTokenizer.cs (4)
367IReadOnlyDictionary<char, char> byteToUnicode = ByteToUnicodeEncoding.Instance.ByteToUnicode; 679IReadOnlyDictionary<char, char> byteToUnicode = ByteToUnicodeEncoding.Instance.ByteToUnicode; 724IReadOnlyDictionary<char, char> byteToUnicode = ByteToUnicodeEncoding.Instance.ByteToUnicode; 1083public bool IsSupportedChar(char ch) => ByteToUnicodeEncoding.Instance.ByteToUnicode.ContainsKey(ch);
Utils\ByteToUnicodeEncoding.cs (1)
36UnicodeToByte = ByteToUnicode.ToDictionary(kv => kv.Value, kv => kv.Key);
Utils\Helpers.cs (10)
90destination[targetIndex] = byteToUnicodeEncoder.ByteToUnicode[(char)c]; 99destination[targetIndex] = byteToUnicodeEncoder.ByteToUnicode[(char)((c + (0b110u << 11)) >> 6)]; 100destination[targetIndex + 1] = byteToUnicodeEncoder.ByteToUnicode[(char)((c & 0x3Fu) + 0x80u)]; 110destination[targetIndex] = byteToUnicodeEncoder.ByteToUnicode[(char)((value + (0b11110 << 21)) >> 18)]; 111destination[targetIndex + 1] = byteToUnicodeEncoder.ByteToUnicode[(char)(((value & (0x3Fu << 12)) >> 12) + 0x80u)]; 112destination[targetIndex + 2] = byteToUnicodeEncoder.ByteToUnicode[(char)(((value & (0x3Fu << 6)) >> 6) + 0x80u)]; 113destination[targetIndex + 3] = byteToUnicodeEncoder.ByteToUnicode[(char)((value & 0x3Fu) + 0x80u)]; 121destination[targetIndex] = byteToUnicodeEncoder.ByteToUnicode[(char)((c + (0b1110 << 16)) >> 12)]; 122destination[targetIndex + 1] = byteToUnicodeEncoder.ByteToUnicode[(char)(((c & (0x3Fu << 6)) >> 6) + 0x80u)]; 123destination[targetIndex + 2] = byteToUnicodeEncoder.ByteToUnicode[(char)((c & 0x3Fu) + 0x80u)];