71 references to Helpers
Microsoft.ML.Tokenizers (71)
Model\BPETokenizer.cs (1)
824await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) :
Model\CodeGenTokenizer.cs (8)
462int encodedLength = Helpers.EncodeToUtf8AndTransform(textSpan, token, mapping); 1145int encodedLength = Helpers.EncodeToUtf8AndTransform(textSpan, token, mapping); 1209int encodedLength = Helpers.EncodeToUtf8AndTransform(textSpan, token, mapping); 1294Helpers.ArrayPoolGrow(ref bytes, (bytes.Length + bytesCountToEncode) * 2); 1298bytesIndex += Helpers.GetUtf8Bytes(removePrefixSpace ? specialToken.AsSpan().Slice(1) : specialToken.AsSpan(), bytes.AsSpan().Slice(bytesIndex)); 1483if (!Helpers.ConvertUtf8ToUtf16(current, vocabBuffer.Slice(utf16CharsInBuffer), out int utf8BytesConsumed, out int utf16CharsWritten)) 1576Helpers.ArrayPoolGrow<byte>(ref bytes, bytes.Length * 2); 1584i += Helpers.EncodeCodePointToUtf8(text, i, ref bytes, ref bytesIndex) - 1;
Model\SentencePieceBaseModel.cs (9)
287Helpers.ArrayPoolGrow(ref bytesPoolArray, bytesCount * 2); 347Helpers.ArrayPoolGrow(ref charPoolArray, len); 350int charCount = Helpers.GetChars(bytesPoolArray.AsSpan(0, bytesCount), charPoolArray); 560Helpers.ArrayPoolGrow(ref bytesPoolArray, bytesCount * 2); 685Helpers.ArrayPoolGrow(ref charPoolArray, len); 688int charCount = Helpers.GetChars(bytesPoolArray.AsSpan(0, bytesCount), charPoolArray); 746Helpers.Replace(tokenSpan, buffer, prefixSuffixChar, ' '); 773Helpers.Replace(tokenSpan, buffer, prefixSuffixChar, ' '); 795Helpers.Replace(tokenSpan, buffer, prefixSuffixChar, ' ');
Model\SentencePieceBpeModel.cs (4)
282int bytesWritten = Helpers.GetUtf8Bytes(text.Slice(i), utf8Bytes); 582int bytesWritten = Helpers.GetUtf8Bytes(text.Slice(i), utf8Bytes); 845int encodedCount = Helpers.GetUtf8Bytes(text.Slice(i), utf8Bytes); 1121int encodedCount = Helpers.GetUtf8Bytes(text.Slice(0, i + 1), utf8Bytes);
Model\SentencePieceUnigramModel.cs (14)
212int expectedCount = Helpers.GetUtf16LengthFromUtf8Bytes(utf8Bytes); 222bool res = Helpers.ConvertUtf8ToUtf16(utf8Bytes, normalizedString.AsSpan(normalizedString.Length - normalizedStringCountFromEnd - expectedCount), out int bytesConsumed, out int charsWritten); 235Helpers.ArrayPoolGrow(ref normalizedString, normalizedString.Length << 1); 248Helpers.ArrayPoolGrow(ref normalizedString, normalizedString.Length << 1); 252bool res = Helpers.ConvertUtf8ToUtf16(normalizationSpan, normalizedUtf16Span, out int bytesConsumed, out int charsWritten); 349int byteCount = Helpers.GetUtf8Bytes(text, byteSpan); 412string stringToken = node.Id == UnknownId ? Helpers.GetString(normalizationSpan.Slice(node.StartsAt, endsAt - node.StartsAt)) : _vocabReverse[node.Id].Piece; 478int codepointLength = Helpers.EncodeNextUtf8(normalizationSpan.Slice(offsetStart), destination); 545int mbLen = Helpers.OneCharLen(normalized[startsAt]); 862Helpers.ArrayPoolGrow(ref unknownTokensTracking, unknownTokensCount << 1); 949charsConsumed += Helpers.GetUtf16LengthFromUtf8Bytes(utf8UnknownBytes); 1197charsConsumed += Helpers.GetUtf16LengthFromUtf8Bytes(normalizationSpan); 1234charsConsumed += Helpers.GetUtf16LengthFromUtf8Bytes(normalizationSpan.Slice(ids[i].UtfStartOffset, ids[i].Utf8Length)); 1445consumedCharacters += Helpers.GetUtf16LengthFromUtf8Bytes(normalizationSpan.Slice(node.StartsAt, length));
Model\TiktokenTokenizer.cs (15)
168string? line = useAsync ? await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine(); 174if (!Helpers.TryParseInt32(line, capacity.Length, out suggestedCapacity)) 179line = useAsync ? await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine(); 189line = useAsync ? await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine(); 203} while ((line = useAsync ? await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine()) is not null); 216if (Helpers.TryParseInt32(line, spaceIndex + 1, out int rank)) 218AddData(Helpers.FromBase64String(line, 0, spaceIndex), rank); 226await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : 327int encodedLength = Helpers.EncodeToUtf8(text, arrayPoolArray, indexMappingSpan); 444int encodedLength = Helpers.EncodeToUtf8(text, arrayPoolArray, indexMappingSpan); 609int encodedLength = Helpers.EncodeToUtf8(text, arrayPoolArray, indexMappingSpan); 735int encodedLength = Helpers.EncodeToUtf8(text, arrayPoolArray, indexMappingSpan); 829return Helpers.GetString(utf8Bytes.Slice(0, utf8ByteCount)); 912if (!Helpers.ConvertUtf8ToUtf16(tokenBytes.Span, tempBuffer, out int bytesConsumed, out incompleteCharsWritten)) 945if (!Helpers.ConvertUtf8ToUtf16(utf8Bytes.Slice(utf8BytesIncompleteIndex, utf8BytesIncompleteCount + tokenBytes.Length), tempBuffer, out int bytesConsumed, out int charsConsumed))
Model\WordPieceTokenizer.cs (2)
103string? line = useAsync ? await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine(); 115line = useAsync ? await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine();
Normalizer\BertNormalizer.cs (2)
154Helpers.ArrayPoolGrow(ref buffer, index + 40); 165Helpers.ArrayPoolGrow(ref buffer, index + buffer.Length + 10);
Normalizer\SentencePieceNormalizer.cs (7)
304if (!Helpers.IsValidDecodeUtf8(input, out int length)) 405Helpers.ArrayPoolGrow(ref normalized, ref poolArray, (normalizedIndex + _spaceSymbol.Length) << 1); 417Helpers.ArrayPoolGrow(ref normalized, ref poolArray, (normalizedIndex + 1) << 1); 469Helpers.ArrayPoolGrow(ref normalized, ref poolArray, (normalizedIndex + _spaceSymbol.Length) << 1); 478Helpers.ArrayPoolGrow(ref normalized, ref poolArray, (normalizedIndex + 1) << 1); 502trieBlobSize = Helpers.Swap32(trieBlobSize); 521data[i] = Helpers.Swap32(data[i]);
Utils\DoubleArrayTrie.cs (1)
758encodingLength = Helpers.GetUtf8Bytes(pair.Key.AsSpan(), bytes);
Utils\Helpers.netstandard.cs (4)
147Helpers.ArrayPoolGrow(ref destination, destination.Length * 2); 159Helpers.ArrayPoolGrow(ref destination, destination.Length * 2); 172Helpers.ArrayPoolGrow(ref destination, Math.Max(destination.Length, 4) * 2); 186Helpers.ArrayPoolGrow(ref destination, Math.Max(destination.Length, 3) * 2);
Utils\OrdinalUtf8StringComparer.cs (2)
73xLen = Helpers.GetUtf8Bytes(x.AsSpan(i), buffer1); 74yLen = Helpers.GetUtf8Bytes(y.AsSpan(i), buffer2);
Utils\StringSpanOrdinalKey.cs (1)
46public override int GetHashCode() => Helpers.GetHashCode(Span);
Utils\ValueStringBuilder.cs (1)
96Helpers.Replace(span, oldValue, newValue);