81 references to Helpers
Microsoft.ML.Tokenizers (81)
Model\BPETokenizer.cs (8)
847Helpers.AppendToBytesArray(token.AsSpan(), ref bytes, ref bytesIndex); 864Helpers.AppendToBytesArray(span, ref bytes, ref bytesIndex); 868string decodedString = Helpers.GetString(bytes.AsSpan(0, bytesIndex)); 1053if (!Helpers.ConvertUtf8ToUtf16(current, vocabBuffer.Slice(utf16CharsInBuffer), out int utf8BytesConsumed, out int utf16CharsWritten)) 1146await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : 1334int encodedLength = Helpers.EncodeToUtf8AndTransform(text, token, mapping); 1447int encodedLength = Helpers.EncodeToUtf8AndTransform(text, token, mapping); 1527int encodedLength = Helpers.EncodeToUtf8AndTransform(text, token, mapping);
Model\CodeGenTokenizer.cs (10)
462int encodedLength = Helpers.EncodeToUtf8AndTransform(textSpan, token, mapping); 1145int encodedLength = Helpers.EncodeToUtf8AndTransform(textSpan, token, mapping); 1209int encodedLength = Helpers.EncodeToUtf8AndTransform(textSpan, token, mapping); 1266Helpers.AppendToBytesArray(BeginningOfSentenceToken!.AsSpan(), ref bytes, ref bytesIndex); 1275Helpers.AppendToBytesArray(EndOfSentenceToken!.AsSpan(), ref bytes, ref bytesIndex); 1284Helpers.AppendToBytesArray(UnknownToken!.AsSpan(), ref bytes, ref bytesIndex); 1294Helpers.ArrayPoolGrow(ref bytes, (bytes.Length + bytesCountToEncode) * 2); 1298bytesIndex += Helpers.GetUtf8Bytes(removePrefixSpace ? specialToken.AsSpan().Slice(1) : specialToken.AsSpan(), bytes.AsSpan().Slice(bytesIndex)); 1309Helpers.AppendToBytesArray(span, ref bytes, ref bytesIndex); 1483if (!Helpers.ConvertUtf8ToUtf16(current, vocabBuffer.Slice(utf16CharsInBuffer), out int utf8BytesConsumed, out int utf16CharsWritten))
Model\SentencePieceBaseModel.cs (9)
226Helpers.ArrayPoolGrow(ref bytesPoolArray, bytesCount * 2); 286Helpers.ArrayPoolGrow(ref charPoolArray, len); 289int charCount = Helpers.GetChars(bytesPoolArray.AsSpan(0, bytesCount), charPoolArray); 499Helpers.ArrayPoolGrow(ref bytesPoolArray, bytesCount * 2); 624Helpers.ArrayPoolGrow(ref charPoolArray, len); 627int charCount = Helpers.GetChars(bytesPoolArray.AsSpan(0, bytesCount), charPoolArray); 685Helpers.Replace(tokenSpan, buffer, prefixSuffixChar, ' '); 712Helpers.Replace(tokenSpan, buffer, prefixSuffixChar, ' '); 734Helpers.Replace(tokenSpan, buffer, prefixSuffixChar, ' ');
Model\SentencePieceBpeModel.cs (4)
236int bytesWritten = Helpers.GetUtf8Bytes(text.Slice(i), utf8Bytes); 536int bytesWritten = Helpers.GetUtf8Bytes(text.Slice(i), utf8Bytes); 799int encodedCount = Helpers.GetUtf8Bytes(text.Slice(i), utf8Bytes); 1075int encodedCount = Helpers.GetUtf8Bytes(text.Slice(0, i + 1), utf8Bytes);
Model\SentencePieceUnigramModel.cs (14)
162int expectedCount = Helpers.GetUtf16LengthFromUtf8Bytes(utf8Bytes); 172bool res = Helpers.ConvertUtf8ToUtf16(utf8Bytes, normalizedString.AsSpan(normalizedString.Length - normalizedStringCountFromEnd - expectedCount), out int bytesConsumed, out int charsWritten); 185Helpers.ArrayPoolGrow(ref normalizedString, normalizedString.Length << 1); 198Helpers.ArrayPoolGrow(ref normalizedString, normalizedString.Length << 1); 202bool res = Helpers.ConvertUtf8ToUtf16(normalizationSpan, normalizedUtf16Span, out int bytesConsumed, out int charsWritten); 299int byteCount = Helpers.GetUtf8Bytes(text, byteSpan); 362string stringToken = node.Id == UnknownId ? Helpers.GetString(normalizationSpan.Slice(node.StartsAt, endsAt - node.StartsAt)) : _vocabReverse[node.Id].Piece; 428int codepointLength = Helpers.EncodeNextUtf8(normalizationSpan.Slice(offsetStart), destination); 495int mbLen = Helpers.OneCharLen(normalized[startsAt]); 812Helpers.ArrayPoolGrow(ref unknownTokensTracking, unknownTokensCount << 1); 899charsConsumed += Helpers.GetUtf16LengthFromUtf8Bytes(utf8UnknownBytes); 1147charsConsumed += Helpers.GetUtf16LengthFromUtf8Bytes(normalizationSpan); 1184charsConsumed += Helpers.GetUtf16LengthFromUtf8Bytes(normalizationSpan.Slice(ids[i].UtfStartOffset, ids[i].Utf8Length)); 1395consumedCharacters += Helpers.GetUtf16LengthFromUtf8Bytes(normalizationSpan.Slice(node.StartsAt, length));
Model\TiktokenTokenizer.cs (15)
168string? line = useAsync ? await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine(); 174if (!Helpers.TryParseInt32(line, capacity.Length, out suggestedCapacity)) 179line = useAsync ? await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine(); 189line = useAsync ? await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine(); 203} while ((line = useAsync ? await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine()) is not null); 216if (Helpers.TryParseInt32(line, spaceIndex + 1, out int rank)) 218AddData(Helpers.FromBase64String(line, 0, spaceIndex), rank); 226await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : 327int encodedLength = Helpers.EncodeToUtf8(text, arrayPoolArray, indexMappingSpan); 444int encodedLength = Helpers.EncodeToUtf8(text, arrayPoolArray, indexMappingSpan); 609int encodedLength = Helpers.EncodeToUtf8(text, arrayPoolArray, indexMappingSpan); 735int encodedLength = Helpers.EncodeToUtf8(text, arrayPoolArray, indexMappingSpan); 829return Helpers.GetString(utf8Bytes.Slice(0, utf8ByteCount)); 912if (!Helpers.ConvertUtf8ToUtf16(tokenBytes.Span, tempBuffer, out int bytesConsumed, out incompleteCharsWritten)) 945if (!Helpers.ConvertUtf8ToUtf16(utf8Bytes.Slice(utf8BytesIncompleteIndex, utf8BytesIncompleteCount + tokenBytes.Length), tempBuffer, out int bytesConsumed, out int charsConsumed))
Model\WordPieceTokenizer.cs (2)
103string? line = useAsync ? await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine(); 115line = useAsync ? await Helpers.ReadLineAsync(reader, cancellationToken).ConfigureAwait(false) : reader.ReadLine();
Normalizer\BertNormalizer.cs (2)
154Helpers.ArrayPoolGrow(ref buffer, index + 40); 165Helpers.ArrayPoolGrow(ref buffer, index + buffer.Length + 10);
Normalizer\SentencePieceNormalizer.cs (7)
304if (!Helpers.IsValidDecodeUtf8(input, out int length)) 405Helpers.ArrayPoolGrow(ref normalized, ref poolArray, (normalizedIndex + _spaceSymbol.Length) << 1); 417Helpers.ArrayPoolGrow(ref normalized, ref poolArray, (normalizedIndex + 1) << 1); 469Helpers.ArrayPoolGrow(ref normalized, ref poolArray, (normalizedIndex + _spaceSymbol.Length) << 1); 478Helpers.ArrayPoolGrow(ref normalized, ref poolArray, (normalizedIndex + 1) << 1); 502trieBlobSize = Helpers.Swap32(trieBlobSize); 521data[i] = Helpers.Swap32(data[i]);
Utils\DoubleArrayTrie.cs (1)
758encodingLength = Helpers.GetUtf8Bytes(pair.Key.AsSpan(), bytes);
Utils\Helpers.cs (1)
223Helpers.ArrayPoolGrow<byte>(ref bytes, bytes.Length * 2);
Utils\Helpers.netstandard.cs (4)
147Helpers.ArrayPoolGrow(ref destination, destination.Length * 2); 159Helpers.ArrayPoolGrow(ref destination, destination.Length * 2); 172Helpers.ArrayPoolGrow(ref destination, Math.Max(destination.Length, 4) * 2); 186Helpers.ArrayPoolGrow(ref destination, Math.Max(destination.Length, 3) * 2);
Utils\OrdinalUtf8StringComparer.cs (2)
73xLen = Helpers.GetUtf8Bytes(x.AsSpan(i), buffer1); 74yLen = Helpers.GetUtf8Bytes(y.AsSpan(i), buffer2);
Utils\StringSpanOrdinalKey.cs (1)
46public override int GetHashCode() => Helpers.GetHashCode(Span);
Utils\ValueStringBuilder.cs (1)
96Helpers.Replace(span, oldValue, newValue);