103 references to Offset
Microsoft.ML.Tokenizers (89)
Model\BPETokenizer.cs (10)
466EncodeWithCache(textSpanToEncode.Slice(split.Offset, split.Length), tokens, split.Offset, ref priorityQueue); 527EncodeToIdsWithCache(textSpanToEncode.Slice(split.Offset, split.Length), ids, maxTokenCount - ids.Count, out int length, ref priorityQueue); 528charsConsumed = split.Offset + length; 588count += EncodeToIdsWithCache(textSpanToEncode.Slice(split.Offset, split.Length), null, maxTokenCount - count, out int length, ref priorityQueue); 589textLength = split.Offset + length; 668count += EncodeToIdsWithCache(textSpanToEncode.Slice(split.Offset, split.Length), null, maxTokenCount - count, out int length, ref priorityQueue); 669charsConsumed = split.Offset + length; 722tokenCount += EncodeToIdsFromEndWithCache(textSpanToEncode.Slice(split.Offset, split.Length), null, maxTokenCount - tokenCount, out int textIndex, ref priorityQueue); 725return split.Offset + textIndex;
Model\CodeGenTokenizer.cs (9)
391EncodeInternal(null, textSpanToEncode.Slice(split.Offset, split.Length), tokens, addPrefixSpace, split.Offset, agenda); 632EncodeToIdsInternal(null, textSpanToEncode.Slice(split.Offset, split.Length), ids, agenda, out int length, maxTokenCount - ids.Count); 633charsConsumed = split.Offset + length; 844count += EncodeToIdsInternal(null, textSpanToEncode.Slice(split.Offset, split.Length), null, agenda, out int length, maxTokenCount - count); 845charsConsumed = split.Offset + length; 983tokenCount += EncodeToIdsFromEndInternal(null, textSpanToEncode.Slice(split.Offset, split.Length), null, agenda, out int textIndex, maxTokenCount - tokenCount); 986retIndex = addPrefixSpace ? split.Offset + textIndex - 1 : split.Offset + textIndex;
Model\EnglishRobertaTokenizer.cs (9)
337foreach (EncodedToken t in EncodeInternal(textSpanToEncode.Slice(split.Offset, split.Length))) 339tokens.Add(new EncodedToken(t.Id, t.Value, new Range(split.Offset + t.Offset.Start.Value, split.Offset + t.Offset.End.Value))); 441EncodeToIdsInternal(textSpanToEncode.Slice(split.Offset, split.Length), ids, out int length, maxTokenCount - ids.Count); 442textLength = split.Offset + length; 525count += EncodeToIdsInternal(textSpanToEncode.Slice(split.Offset, split.Length), null, out int length, maxTokenCount - count); 526charsConsumed = split.Offset + length; 572tokenCount += EncodeToIdsFromEndInternal(textSpanToEncode.Slice(split.Offset, split.Length), null, out int textIndex, maxTokenCount - tokenCount); 575return split.Offset + textIndex;
Model\SentencePieceBpeModel.cs (6)
919if (current.Offset + current.Length < text.Length) 921splitText = text.Slice(current.Offset + current.Length); 930if (InternalSpecialTokens!.TryGetValue(text.Slice(current.Offset, current.Length), out int id)) 936if (current.Offset > 0 && idsCount < maxTokens) 938int start = i > 0 ? splits[i - 1].Offset + splits[i - 1].Length : 0; 939splitText = text.Slice(start, current.Offset - start);
Model\SentencePieceUnigramModel.cs (7)
1295if (current.Offset + current.Length < text.Length) 1297GetIndexByTokenCountFromEndInternal(text.Slice(current.Offset + current.Length), considerNormalization, ref tokenCount, buffer, ref normalizedString, ref normalizedStringCountFromEnd, ref charConsumedFromEnd, maxTokenCount); 1306if (InternalSpecialTokens!.TryGetValue(text.Slice(current.Offset, current.Length), out int id)) 1316StoreNormalizedTextFromEnd(text.Slice(current.Offset, current.Length), ref normalizedString, ref normalizedStringCountFromEnd); 1319if (current.Offset > 0) 1321int start = i > 0 ? splits[i - 1].Offset + splits[i - 1].Length : 0; 1322GetIndexByTokenCountFromEndInternal(text.Slice(start, current.Offset - start), considerNormalization, ref tokenCount, buffer, ref normalizedString, ref normalizedStringCountFromEnd, ref charConsumedFromEnd, maxTokenCount);
Model\TiktokenTokenizer.cs (8)
282EncodeToTokens(textSpanToEncode.Slice(split.Offset, split.Length), tokens, split.Offset); 393EncodeToIds(textSpanToEncode.Slice(split.Offset, split.Length), ids, out int length, maxTokenCount - ids.Count); 394charsConsumed = split.Offset + length; 560count += CountTokens(textSpanToEncode.Slice(split.Offset, split.Length), out int length, maxTokenCount - count); 561charsConsumed = split.Offset + length; 687tokenCount += CountTokensFromEnd(textSpanToEncode.Slice(split.Offset, split.Length), out int textIndex, maxTokenCount - tokenCount); 690return split.Offset + textIndex;
Model\WordPieceTokenizer.cs (10)
296EncodeToTokens(textSpanToEncode.Slice(split.Offset, split.Length), tokens, split.Offset); 420EncodeToIds(textSpanToEncode.Slice(split.Offset, split.Length), ids, out int length, maxTokenCount - ids.Count); 427charsConsumed = split.Offset + length; 570count += EncodeToIds(textSpanToEncode.Slice(split.Offset, split.Length), accumulatedIds: null, out int length, maxTokenCount - count); 649int count = EncodeToIds(textSpanToEncode.Slice(split.Offset, split.Length), accumulatedIds: null, out charsConsumed, settings.MaxTokenCount - tokenCount); 652return fromEnd ? split.Offset + split.Length : split.Offset; 659return fromEnd ? split.Offset : split.Offset + split.Length;
PreTokenizer\CompositePreTokenizer.cs (12)
88yield return (range.Offset, range.Length); 100if (range.Offset > beginning) 105foreach ((int Offset, int Length) subRange in SplitText(text, preTokenizers, preTokenizerIndex + 1, offset + beginning, range.Offset - beginning)) 112yield return (offset + beginning, range.Offset); 116beginning = range.Offset + range.Length; 118yield return (offset + range.Offset, range.Length); 166yield return (range.Offset, range.Length); 178if (range.Offset > beginning) 183foreach ((int Offset, int Length) subRange in SplitText(text, preTokenizers, preTokenizerIndex + 1, offset + beginning, range.Offset - beginning)) 190yield return (offset + beginning, range.Offset); 194beginning = range.Offset + range.Length; 196yield return (offset + range.Offset, range.Length);
PreTokenizer\PreTokenizer.cs (4)
38yield return (match.Offset, match.Length); 39beginning = match.Offset + match.Length; 146yield return (match.Offset, match.Length); 147beginning = match.Offset + match.Length;
PreTokenizer\RegexPreTokenizer.cs (14)
72while (TryGetMatch(regex, text, beginning, specialMatch.Offset - beginning, out match)) 74yield return (match.Offset, match.Length); 75beginning = match.Offset + match.Length; 78yield return (specialMatch.Offset, specialMatch.Length); 79beginning = specialMatch.Offset + specialMatch.Length; 85yield return (match.Offset, match.Length); 86beginning = match.Length + match.Offset; 123while (TryGetMatch(regex, text.AsSpan(), beginning, specialMatch.Offset - beginning, out match)) 125yield return (match.Offset, match.Length); 126beginning = match.Offset + match.Length; 129yield return (specialMatch.Offset, specialMatch.Length); 130beginning = specialMatch.Offset + specialMatch.Length; 136yield return (match.Offset, match.Length); 137beginning = match.Length + match.Offset;
System.Formats.Cbor (10)
System\Formats\Cbor\Reader\CborReader.cs (1)
411_keyEncodingRanges.RemoveWhere(key => key.Offset >= checkpointOffset);
System\Formats\Cbor\Reader\CborReader.Map.cs (6)
139ReadOnlySpan<byte> previousKeyEncoding = buffer.Slice(previousKeyEncodingRange.Offset, previousKeyEncodingRange.Length); 140ReadOnlySpan<byte> currentKeyEncoding = buffer.Slice(currentKeyEncodingRange.Offset, currentKeyEncodingRange.Length); 145ResetBuffer(currentKeyEncodingRange.Offset); 150ResetBuffer(currentKeyEncodingRange.Offset); 166ResetBuffer(currentKeyEncodingRange.Offset); 210return _reader._data.Span.Slice(range.Offset, range.Length);
System\Formats\Cbor\Writer\CborWriter.Map.cs (3)
106_buffer.AsSpan(currentKey.Offset, _offset).Clear(); 107_offset = currentKey.Offset; 272return _writer._buffer.AsSpan(range.Offset, range.Length);
System.Private.Windows.Core (4)
System\Value.cs (4)
782? new(byteArray, _union.Segment.Offset, _union.Segment.Count) 798? new(charArray, _union.Segment.Offset, _union.Segment.Count) 1003? (T)(object)new ArraySegment<char>(chars, _union.Segment.Offset, _union.Segment.Count) 1010? (T)(object)new ArraySegment<byte>(bytes, _union.Segment.Offset, _union.Segment.Count)