155 references to Length
Microsoft.ML.Tokenizers (123)
Model\BPETokenizer.cs (8)
466EncodeWithCache(textSpanToEncode.Slice(split.Offset, split.Length), tokens, split.Offset, ref priorityQueue); 527EncodeToIdsWithCache(textSpanToEncode.Slice(split.Offset, split.Length), ids, maxTokenCount - ids.Count, out int length, ref priorityQueue); 530if (length < split.Length || ids.Count >= maxTokenCount) 588count += EncodeToIdsWithCache(textSpanToEncode.Slice(split.Offset, split.Length), null, maxTokenCount - count, out int length, ref priorityQueue); 591if (length < split.Length || count >= maxTokenCount) 668count += EncodeToIdsWithCache(textSpanToEncode.Slice(split.Offset, split.Length), null, maxTokenCount - count, out int length, ref priorityQueue); 671if (length < split.Length || count >= maxTokenCount) 722tokenCount += EncodeToIdsFromEndWithCache(textSpanToEncode.Slice(split.Offset, split.Length), null, maxTokenCount - tokenCount, out int textIndex, ref priorityQueue);
Model\CodeGenTokenizer.cs (19)
391EncodeInternal(null, textSpanToEncode.Slice(split.Offset, split.Length), tokens, addPrefixSpace, split.Offset, agenda); 632EncodeToIdsInternal(null, textSpanToEncode.Slice(split.Offset, split.Length), ids, agenda, out int length, maxTokenCount - ids.Count); 635if (length < split.Length || ids.Count >= maxTokenCount) 844count += EncodeToIdsInternal(null, textSpanToEncode.Slice(split.Offset, split.Length), null, agenda, out int length, maxTokenCount - count); 847if (length < split.Length || count >= maxTokenCount) 983tokenCount += EncodeToIdsFromEndInternal(null, textSpanToEncode.Slice(split.Offset, split.Length), null, agenda, out int textIndex, maxTokenCount - tokenCount); 1636if (symbols[top.Left].pieceSpan.Length == 0 || symbols[top.Right].pieceSpan.Length == 0 || 1637symbols[top.Left].pieceSpan.Length + symbols[top.Right].pieceSpan.Length != top.Length) 1643symbols[top.Left].pieceSpan = (symbols[top.Left].pieceSpan.Index, symbols[top.Left].pieceSpan.Length + symbols[top.Right].pieceSpan.Length); 1663if (_vocab.TryGetValue(text.Slice(symbols[index].pieceSpan.Index, symbols[index].pieceSpan.Length), out (int Id, string Token) value)) 1665result.Add(GetToken(value.Id, value.Token, symbols[index].pieceSpan.Index, symbols[index].pieceSpan.Length, originalText, mapping)); 1669result.Add(GetToken(UnknownTokenId.Value, UnknownToken!, symbols[index].pieceSpan.Index, symbols[index].pieceSpan.Length, originalText, mapping)); 1693if (!_mergeRanks.TryGetValue(textSpan.Slice(symbols[left].pieceSpan.Index, symbols[left].pieceSpan.Length), textSpan.Slice(symbols[right].pieceSpan.Index, symbols[right].pieceSpan.Length), out int rank)) 1698SymbolPair pair = new(left, right, rank, symbols[left].pieceSpan.Length + symbols[right].pieceSpan.Length);
Model\EnglishRobertaTokenizer.cs (6)
337foreach (EncodedToken t in EncodeInternal(textSpanToEncode.Slice(split.Offset, split.Length))) 441EncodeToIdsInternal(textSpanToEncode.Slice(split.Offset, split.Length), ids, out int length, maxTokenCount - ids.Count); 444if (length < split.Length || ids.Count >= maxTokenCount) 525count += EncodeToIdsInternal(textSpanToEncode.Slice(split.Offset, split.Length), null, out int length, maxTokenCount - count); 528if (length < split.Length || count >= maxTokenCount) 572tokenCount += EncodeToIdsFromEndInternal(textSpanToEncode.Slice(split.Offset, split.Length), null, out int textIndex, maxTokenCount - tokenCount);
Model\SentencePieceBpeModel.cs (45)
168if (_vocab.TryGetValue(text.Slice(symbols[index].pieceSpan.Index, symbols[index].pieceSpan.Length), out (int Id, float Score, byte Type) tokenInfo)) 184EncodeAsBytes(text.Slice(symbols[index].pieceSpan.Index, symbols[index].pieceSpan.Length), symbols[index].pieceSpan.Index); 190GetTokenString(id, symbols[index].pieceSpan.Index, symbols[index].pieceSpan.Length, text), 191new Range(symbols[index].pieceSpan.Index, symbols[index].pieceSpan.Index + symbols[index].pieceSpan.Length))); 262if (!_vocab.TryGetValue(text.Slice(pieceSpan.Index, pieceSpan.Length), out (int Id, float Score, byte Type) id)) 264EncodeAsBytes(text.Slice(pieceSpan.Index, pieceSpan.Length), pieceSpan.Index); 270!revMerge.TryGetValue((pieceSpan.Index, pieceSpan.Length), out (int LeftIndex, int LeftLen, int RightIndex, int RightLen) merge)) 272tokens.Add(new EncodedToken(id.Id, text.Slice(pieceSpan.Index, pieceSpan.Length).ToString(), new Range(pieceSpan.Index, pieceSpan.Index + pieceSpan.Length))); 446if (_vocab.TryGetValue(text.Slice(symbols[index].pieceSpan.Index, symbols[index].pieceSpan.Length), out (int Id, float Score, byte Type) tokenInfo)) 462if (!EncodeAsBytes(text.Slice(symbols[index].pieceSpan.Index, symbols[index].pieceSpan.Length), symbols[index].pieceSpan.Index, ref charsConsumed)) 473charsConsumed += symbols[index].pieceSpan.Length; 568if (!_vocab.TryGetValue(text.Slice(pieceSpan.Index, pieceSpan.Length), out (int Id, float Score, byte Type) id)) 570return EncodeAsBytes(text.Slice(pieceSpan.Index, pieceSpan.Length), pieceSpan.Index, ref charsConsumed); 575!revMerge.TryGetValue((pieceSpan.Index, pieceSpan.Length), out (int LeftIndex, int LeftLen, int RightIndex, int RightLen) merge)) 580charsConsumed += pieceSpan.Length; 714if (_vocab.TryGetValue(text.Slice(symbols[index].pieceSpan.Index, symbols[index].pieceSpan.Length), out (int Id, float Score, byte Type) tokenInfo)) 730if (!EncodeAsBytes(text.Slice(symbols[index].pieceSpan.Index, symbols[index].pieceSpan.Length), symbols[index].pieceSpan.Index, ref charsConsumed)) 740charsConsumed += symbols[index].pieceSpan.Length; 827if (!_vocab.TryGetValue(text.Slice(pieceSpan.Index, pieceSpan.Length), out (int Id, float Score, byte Type) id)) 829return EncodeAsBytes(text.Slice(pieceSpan.Index, pieceSpan.Length), pieceSpan.Index, ref charsConsumed); 834!revMerge.TryGetValue((pieceSpan.Index, pieceSpan.Length), out (int LeftIndex, int LeftLen, int RightIndex, int RightLen) merge)) 839charsConsumed += pieceSpan.Length; 919if (current.Offset + current.Length < text.Length) 921splitText = text.Slice(current.Offset + current.Length); 930if (InternalSpecialTokens!.TryGetValue(text.Slice(current.Offset, current.Length), out int id)) 934textIndex -= current.Length; 938int start = i > 0 ? splits[i - 1].Offset + splits[i - 1].Length : 0; 990if (_vocab.TryGetValue(text.Slice(symbols[index].pieceSpan.Index, symbols[index].pieceSpan.Length), out (int Id, float Score, byte Type) tokenInfo)) 1006if (!EncodeAsBytesFromEnd(text.Slice(symbols[index].pieceSpan.Index, symbols[index].pieceSpan.Length), symbols[index].pieceSpan.Index, ref textIndex)) 1016textIndex -= symbols[index].pieceSpan.Length; 1103if (!_vocab.TryGetValue(text.Slice(pieceSpan.Index, pieceSpan.Length), out (int Id, float Score, byte Type) id)) 1105return EncodeAsBytesFromEnd(text.Slice(pieceSpan.Index, pieceSpan.Length), pieceSpan.Index, ref textIndex); 1110!revMerge.TryGetValue((pieceSpan.Index, pieceSpan.Length), out (int LeftIndex, int LeftLen, int RightIndex, int RightLen) merge)) 1115textIndex -= pieceSpan.Length; 1164if (symbols[top.Left].pieceSpan.Length == 0 || symbols[top.Right].pieceSpan.Length == 0 || 1165symbols[top.Left].pieceSpan.Length + symbols[top.Right].pieceSpan.Length != top.Length) 1171symbols[top.Left].pieceSpan = (symbols[top.Left].pieceSpan.Index, symbols[top.Left].pieceSpan.Length + symbols[top.Right].pieceSpan.Length); 1197int pieceLength = symbols[left].pieceSpan.Length + symbols[right].pieceSpan.Length; 1211revMerge.Add((symbols[left].pieceSpan.Index, pieceLength), (symbols[left].pieceSpan.Index, symbols[left].pieceSpan.Length, symbols[right].pieceSpan.Index, symbols[right].pieceSpan.Length));
Model\SentencePieceUnigramModel.cs (6)
1295if (current.Offset + current.Length < text.Length) 1297GetIndexByTokenCountFromEndInternal(text.Slice(current.Offset + current.Length), considerNormalization, ref tokenCount, buffer, ref normalizedString, ref normalizedStringCountFromEnd, ref charConsumedFromEnd, maxTokenCount); 1306if (InternalSpecialTokens!.TryGetValue(text.Slice(current.Offset, current.Length), out int id)) 1311charConsumedFromEnd += current.Length; 1316StoreNormalizedTextFromEnd(text.Slice(current.Offset, current.Length), ref normalizedString, ref normalizedStringCountFromEnd); 1321int start = i > 0 ? splits[i - 1].Offset + splits[i - 1].Length : 0;
Model\TiktokenTokenizer.cs (6)
282EncodeToTokens(textSpanToEncode.Slice(split.Offset, split.Length), tokens, split.Offset); 393EncodeToIds(textSpanToEncode.Slice(split.Offset, split.Length), ids, out int length, maxTokenCount - ids.Count); 396if (length < split.Length || ids.Count >= maxTokenCount) 560count += CountTokens(textSpanToEncode.Slice(split.Offset, split.Length), out int length, maxTokenCount - count); 563if (length < split.Length || count >= maxTokenCount) 687tokenCount += CountTokensFromEnd(textSpanToEncode.Slice(split.Offset, split.Length), out int textIndex, maxTokenCount - tokenCount);
Model\WordPieceTokenizer.cs (9)
296EncodeToTokens(textSpanToEncode.Slice(split.Offset, split.Length), tokens, split.Offset); 420EncodeToIds(textSpanToEncode.Slice(split.Offset, split.Length), ids, out int length, maxTokenCount - ids.Count); 422if (length < split.Length || ids.Count >= maxTokenCount) 570count += EncodeToIds(textSpanToEncode.Slice(split.Offset, split.Length), accumulatedIds: null, out int length, maxTokenCount - count); 572if (length < split.Length || count >= maxTokenCount) 649int count = EncodeToIds(textSpanToEncode.Slice(split.Offset, split.Length), accumulatedIds: null, out charsConsumed, settings.MaxTokenCount - tokenCount); 650if (charsConsumed != split.Length) 652return fromEnd ? split.Offset + split.Length : split.Offset; 659return fromEnd ? split.Offset : split.Offset + split.Length;
PreTokenizer\CompositePreTokenizer.cs (8)
88yield return (range.Offset, range.Length); 89beginning += range.Length; 116beginning = range.Offset + range.Length; 118yield return (offset + range.Offset, range.Length); 166yield return (range.Offset, range.Length); 167beginning += range.Length; 194beginning = range.Offset + range.Length; 196yield return (offset + range.Offset, range.Length);
PreTokenizer\PreTokenizer.cs (4)
38yield return (match.Offset, match.Length); 39beginning = match.Offset + match.Length; 146yield return (match.Offset, match.Length); 147beginning = match.Offset + match.Length;
PreTokenizer\RegexPreTokenizer.cs (12)
74yield return (match.Offset, match.Length); 75beginning = match.Offset + match.Length; 78yield return (specialMatch.Offset, specialMatch.Length); 79beginning = specialMatch.Offset + specialMatch.Length; 85yield return (match.Offset, match.Length); 86beginning = match.Length + match.Offset; 125yield return (match.Offset, match.Length); 126beginning = match.Offset + match.Length; 129yield return (specialMatch.Offset, specialMatch.Length); 130beginning = specialMatch.Offset + specialMatch.Length; 136yield return (match.Offset, match.Length); 137beginning = match.Length + match.Offset;
Microsoft.ML.Tokenizers.Tests (23)
BpeTests.cs (3)
497int expectedLength = expectedOffsets[expectedOffsets.Length - 3].Index + expectedOffsets[expectedOffsets.Length - 3].Length; 506Assert.Equal(expectedOffsets[expectedOffsets.Length - 4].Index + expectedOffsets[expectedOffsets.Length - 4].Length, tokenizer.GetIndexByTokenCount(text, expectedIds.Length - 3, out normalizedText, out int tokenCount)); 509Assert.Equal(expectedOffsets[expectedOffsets.Length - 4].Index + expectedOffsets[expectedOffsets.Length - 4].Length, tokenizer.GetIndexByTokenCount(text.AsSpan(), expectedIds.Length - 3, out normalizedText, out tokenCount));
CodeGenTests.cs (8)
404int expectedLength = offsets.Length > expectedTokensToExclude ? offsets[offsets.Length - expectedTokensToExclude - 1].Index + offsets[offsets.Length - expectedTokensToExclude - 1].Length : 0; 443Assert.Equal(offsets[offsets.Length - 1].Index + offsets[offsets.Length - 1].Length, codeGenTokenizer.GetIndexByTokenCount(text, ids.Length, out normalizedText, out int tokenCount)); 446Assert.Equal(offsets[offsets.Length - 1].Index + offsets[offsets.Length - 1].Length, codeGenTokenizer.GetIndexByTokenCount(text.AsSpan(), ids.Length, out normalizedText, out tokenCount)); 450Assert.Equal(expectedOffsets[expectedOffsets.Length - 1].Index + expectedOffsets[expectedOffsets.Length - 1].Length, codeGenTokenizer.GetIndexByTokenCount(text, expectedIds.Length, addPrefixSpace: false, addBeginningOfSentence: false, addEndOfSentence: false, out normalizedText, out tokenCount)); 453Assert.Equal(expectedOffsets[expectedOffsets.Length - 1].Index + expectedOffsets[expectedOffsets.Length - 1].Length, codeGenTokenizer.GetIndexByTokenCount(text.AsSpan(), expectedIds.Length, addPrefixSpace: false, addBeginningOfSentence: false, addEndOfSentence: false, out normalizedText, out tokenCount)); 457Assert.Equal(expectedOffsetsWithSpace[expectedOffsetsWithSpace.Length - 1].Index + expectedOffsetsWithSpace[expectedOffsetsWithSpace.Length - 1].Length, codeGenTokenizer.GetIndexByTokenCount(text, expectedIdsWithSpace.Length, addPrefixSpace: true, addBeginningOfSentence: false, addEndOfSentence: false, out normalizedText, out tokenCount)); 460Assert.Equal(expectedOffsetsWithSpace[expectedOffsetsWithSpace.Length - 1].Index + expectedOffsetsWithSpace[expectedOffsetsWithSpace.Length - 1].Length, codeGenTokenizer.GetIndexByTokenCount(text.AsSpan(), expectedIdsWithSpace.Length, addPrefixSpace: true, addBeginningOfSentence: false, addEndOfSentence: false, out normalizedText, out tokenCount)); 995int calculatedLengthUsingOffsets = expectedTokenCount > 0 ? offsets[expectedTokenCount - 1].Index + offsets[expectedTokenCount - 1].Length : 0;
EnglishRobertaTests.cs (3)
203int expectedLength = expectedOffsets[expectedOffsets.Length - 3].Index + expectedOffsets[expectedOffsets.Length - 3].Length; 212Assert.Equal(expectedOffsets[expectedOffsets.Length - 4].Index + expectedOffsets[expectedOffsets.Length - 4].Length, tokenizer.GetIndexByTokenCount(text, expectedIds.Length - 3, out normalizedText, out int tokenCount)); 215Assert.Equal(expectedOffsets[expectedOffsets.Length - 4].Index + expectedOffsets[expectedOffsets.Length - 4].Length, tokenizer.GetIndexByTokenCount(text.AsSpan(), expectedIds.Length - 3, out normalizedText, out tokenCount));
LlamaTests.cs (3)
608int expectedLength = expectedOffsets1[expectedOffsets1.Length - 1].Index + expectedOffsets1[expectedOffsets1.Length - 1].Length; 635Assert.Equal(expectedOffsets[expectedOffsets.Length - 7].Index + expectedOffsets[expectedOffsets.Length - 7].Length, tokenizer.GetIndexByTokenCount(text, expectedIds.Length - 6, out string? normalizedString, out int tokenCount)); 638Assert.Equal(expectedOffsets[expectedOffsets.Length - 7].Index + expectedOffsets[expectedOffsets.Length - 7].Length, tokenizer.GetIndexByTokenCount(text.AsSpan(), expectedIds.Length - 6, out normalizedString, out tokenCount));
TiktokenTests.cs (6)
652int expectedLength = expectedOffsets[expectedOffsets.Length - 5].Index + expectedOffsets[expectedOffsets.Length - 5].Length; 661Assert.Equal(expectedOffsets[expectedOffsets.Length - 4].Index + expectedOffsets[expectedOffsets.Length - 4].Length, tokenizer.GetIndexByTokenCount(text, expectedIds.Length - 3, out normalizedText, out int tokenCount)); 664Assert.Equal(expectedOffsets[expectedOffsets.Length - 4].Index + expectedOffsets[expectedOffsets.Length - 4].Length, tokenizer.GetIndexByTokenCount(text.AsSpan(), expectedIds.Length - 3, out normalizedText, out tokenCount)); 762Assert.True(offsets[count + 1].Index < offsets[count].Index + offsets[count].Length); 767Assert.Equal(offsets[count - 1].Index + offsets[count - 1].Length, length); 780Assert.True(offsets[offsets.Length - tokenCount].Index < offsets[offsets.Length - tokenCount - 1].Index + offsets[offsets.Length - tokenCount - 1].Length);
System.Formats.Cbor (4)
System\Formats\Cbor\Reader\CborReader.Map.cs (3)
139ReadOnlySpan<byte> previousKeyEncoding = buffer.Slice(previousKeyEncodingRange.Offset, previousKeyEncodingRange.Length); 140ReadOnlySpan<byte> currentKeyEncoding = buffer.Slice(currentKeyEncodingRange.Offset, currentKeyEncodingRange.Length); 210return _reader._data.Span.Slice(range.Offset, range.Length);
System\Formats\Cbor\Writer\CborWriter.Map.cs (1)
272return _writer._buffer.AsSpan(range.Offset, range.Length);
System.Text.Json (2)
System\Text\Json\Document\JsonDocument.PropertyNameSet.cs (2)
89ReadOnlySpan<byte> previousPropertyName = utf8Json.Span.Slice(range.Start, range.Length); 124ReadOnlyMemory<byte> propertyName = utf8Json.Slice(range.Start, range.Length);
System.Text.RegularExpressions (3)
System\Text\RegularExpressions\Regex.EnumerateSplits.cs (3)
224_currentSplit = !_regex.RightToLeft ? (_lastMatch.Index + _lastMatch.Length).._input.Length : 0.._lastMatch.Index; 229(bool Success, int Index, int Length, int TextPosition) match = _regex.RunSingleMatch(RegexRunnerMode.BoundsRequired, _lastMatch.Length, _input, _startAt); 235int start = _lastMatch.Index + Math.Max(_lastMatch.Length, 0);