81 references to Helpers
Microsoft.ML.Tokenizers (81)
Model\BPETokenizer.cs (8)
847
Helpers
.AppendToBytesArray(token.AsSpan(), ref bytes, ref bytesIndex);
864
Helpers
.AppendToBytesArray(span, ref bytes, ref bytesIndex);
868
string decodedString =
Helpers
.GetString(bytes.AsSpan(0, bytesIndex));
1053
if (!
Helpers
.ConvertUtf8ToUtf16(current, vocabBuffer.Slice(utf16CharsInBuffer), out int utf8BytesConsumed, out int utf16CharsWritten))
1146
await
Helpers
.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) :
1334
int encodedLength =
Helpers
.EncodeToUtf8AndTransform(text, token, mapping);
1447
int encodedLength =
Helpers
.EncodeToUtf8AndTransform(text, token, mapping);
1527
int encodedLength =
Helpers
.EncodeToUtf8AndTransform(text, token, mapping);
Model\CodeGenTokenizer.cs (10)
462
int encodedLength =
Helpers
.EncodeToUtf8AndTransform(textSpan, token, mapping);
1145
int encodedLength =
Helpers
.EncodeToUtf8AndTransform(textSpan, token, mapping);
1209
int encodedLength =
Helpers
.EncodeToUtf8AndTransform(textSpan, token, mapping);
1266
Helpers
.AppendToBytesArray(BeginningOfSentenceToken!.AsSpan(), ref bytes, ref bytesIndex);
1275
Helpers
.AppendToBytesArray(EndOfSentenceToken!.AsSpan(), ref bytes, ref bytesIndex);
1284
Helpers
.AppendToBytesArray(UnknownToken!.AsSpan(), ref bytes, ref bytesIndex);
1294
Helpers
.ArrayPoolGrow(ref bytes, (bytes.Length + bytesCountToEncode) * 2);
1298
bytesIndex +=
Helpers
.GetUtf8Bytes(removePrefixSpace ? specialToken.AsSpan().Slice(1) : specialToken.AsSpan(), bytes.AsSpan().Slice(bytesIndex));
1309
Helpers
.AppendToBytesArray(span, ref bytes, ref bytesIndex);
1483
if (!
Helpers
.ConvertUtf8ToUtf16(current, vocabBuffer.Slice(utf16CharsInBuffer), out int utf8BytesConsumed, out int utf16CharsWritten))
Model\SentencePieceBaseModel.cs (9)
226
Helpers
.ArrayPoolGrow(ref bytesPoolArray, bytesCount * 2);
286
Helpers
.ArrayPoolGrow(ref charPoolArray, len);
289
int charCount =
Helpers
.GetChars(bytesPoolArray.AsSpan(0, bytesCount), charPoolArray);
499
Helpers
.ArrayPoolGrow(ref bytesPoolArray, bytesCount * 2);
624
Helpers
.ArrayPoolGrow(ref charPoolArray, len);
627
int charCount =
Helpers
.GetChars(bytesPoolArray.AsSpan(0, bytesCount), charPoolArray);
685
Helpers
.Replace(tokenSpan, buffer, prefixSuffixChar, ' ');
712
Helpers
.Replace(tokenSpan, buffer, prefixSuffixChar, ' ');
734
Helpers
.Replace(tokenSpan, buffer, prefixSuffixChar, ' ');
Model\SentencePieceBpeModel.cs (4)
236
int bytesWritten =
Helpers
.GetUtf8Bytes(text.Slice(i), utf8Bytes);
536
int bytesWritten =
Helpers
.GetUtf8Bytes(text.Slice(i), utf8Bytes);
799
int encodedCount =
Helpers
.GetUtf8Bytes(text.Slice(i), utf8Bytes);
1075
int encodedCount =
Helpers
.GetUtf8Bytes(text.Slice(0, i + 1), utf8Bytes);
Model\SentencePieceUnigramModel.cs (14)
162
int expectedCount =
Helpers
.GetUtf16LengthFromUtf8Bytes(utf8Bytes);
172
bool res =
Helpers
.ConvertUtf8ToUtf16(utf8Bytes, normalizedString.AsSpan(normalizedString.Length - normalizedStringCountFromEnd - expectedCount), out int bytesConsumed, out int charsWritten);
185
Helpers
.ArrayPoolGrow(ref normalizedString, normalizedString.Length << 1);
198
Helpers
.ArrayPoolGrow(ref normalizedString, normalizedString.Length << 1);
202
bool res =
Helpers
.ConvertUtf8ToUtf16(normalizationSpan, normalizedUtf16Span, out int bytesConsumed, out int charsWritten);
299
int byteCount =
Helpers
.GetUtf8Bytes(text, byteSpan);
362
string stringToken = node.Id == UnknownId ?
Helpers
.GetString(normalizationSpan.Slice(node.StartsAt, endsAt - node.StartsAt)) : _vocabReverse[node.Id].Piece;
428
int codepointLength =
Helpers
.EncodeNextUtf8(normalizationSpan.Slice(offsetStart), destination);
495
int mbLen =
Helpers
.OneCharLen(normalized[startsAt]);
812
Helpers
.ArrayPoolGrow(ref unknownTokensTracking, unknownTokensCount << 1);
899
charsConsumed +=
Helpers
.GetUtf16LengthFromUtf8Bytes(utf8UnknownBytes);
1147
charsConsumed +=
Helpers
.GetUtf16LengthFromUtf8Bytes(normalizationSpan);
1184
charsConsumed +=
Helpers
.GetUtf16LengthFromUtf8Bytes(normalizationSpan.Slice(ids[i].UtfStartOffset, ids[i].Utf8Length));
1395
consumedCharacters +=
Helpers
.GetUtf16LengthFromUtf8Bytes(normalizationSpan.Slice(node.StartsAt, length));
Model\TiktokenTokenizer.cs (15)
168
string? line = useAsync ? await
Helpers
.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine();
174
if (!
Helpers
.TryParseInt32(line, capacity.Length, out suggestedCapacity))
179
line = useAsync ? await
Helpers
.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine();
189
line = useAsync ? await
Helpers
.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine();
203
} while ((line = useAsync ? await
Helpers
.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine()) is not null);
216
if (
Helpers
.TryParseInt32(line, spaceIndex + 1, out int rank))
218
AddData(
Helpers
.FromBase64String(line, 0, spaceIndex), rank);
226
await
Helpers
.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) :
327
int encodedLength =
Helpers
.EncodeToUtf8(text, arrayPoolArray, indexMappingSpan);
444
int encodedLength =
Helpers
.EncodeToUtf8(text, arrayPoolArray, indexMappingSpan);
609
int encodedLength =
Helpers
.EncodeToUtf8(text, arrayPoolArray, indexMappingSpan);
735
int encodedLength =
Helpers
.EncodeToUtf8(text, arrayPoolArray, indexMappingSpan);
829
return
Helpers
.GetString(utf8Bytes.Slice(0, utf8ByteCount));
912
if (!
Helpers
.ConvertUtf8ToUtf16(tokenBytes.Span, tempBuffer, out int bytesConsumed, out incompleteCharsWritten))
945
if (!
Helpers
.ConvertUtf8ToUtf16(utf8Bytes.Slice(utf8BytesIncompleteIndex, utf8BytesIncompleteCount + tokenBytes.Length), tempBuffer, out int bytesConsumed, out int charsConsumed))
Model\WordPieceTokenizer.cs (2)
103
string? line = useAsync ? await
Helpers
.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine();
115
line = useAsync ? await
Helpers
.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine();
Normalizer\BertNormalizer.cs (2)
154
Helpers
.ArrayPoolGrow(ref buffer, index + 40);
165
Helpers
.ArrayPoolGrow(ref buffer, index + buffer.Length + 10);
Normalizer\SentencePieceNormalizer.cs (7)
304
if (!
Helpers
.IsValidDecodeUtf8(input, out int length))
405
Helpers
.ArrayPoolGrow(ref normalized, ref poolArray, (normalizedIndex + _spaceSymbol.Length) << 1);
417
Helpers
.ArrayPoolGrow(ref normalized, ref poolArray, (normalizedIndex + 1) << 1);
469
Helpers
.ArrayPoolGrow(ref normalized, ref poolArray, (normalizedIndex + _spaceSymbol.Length) << 1);
478
Helpers
.ArrayPoolGrow(ref normalized, ref poolArray, (normalizedIndex + 1) << 1);
502
trieBlobSize =
Helpers
.Swap32(trieBlobSize);
521
data[i] =
Helpers
.Swap32(data[i]);
Utils\DoubleArrayTrie.cs (1)
758
encodingLength =
Helpers
.GetUtf8Bytes(pair.Key.AsSpan(), bytes);
Utils\Helpers.cs (1)
223
Helpers
.ArrayPoolGrow<byte>(ref bytes, bytes.Length * 2);
Utils\Helpers.netstandard.cs (4)
147
Helpers
.ArrayPoolGrow(ref destination, destination.Length * 2);
159
Helpers
.ArrayPoolGrow(ref destination, destination.Length * 2);
172
Helpers
.ArrayPoolGrow(ref destination, Math.Max(destination.Length, 4) * 2);
186
Helpers
.ArrayPoolGrow(ref destination, Math.Max(destination.Length, 3) * 2);
Utils\OrdinalUtf8StringComparer.cs (2)
73
xLen =
Helpers
.GetUtf8Bytes(x.AsSpan(i), buffer1);
74
yLen =
Helpers
.GetUtf8Bytes(y.AsSpan(i), buffer2);
Utils\StringSpanOrdinalKey.cs (1)
46
public override int GetHashCode() =>
Helpers
.GetHashCode(Span);
Utils\ValueStringBuilder.cs (1)
96
Helpers
.Replace(span, oldValue, newValue);