103 references to Offset
Microsoft.ML.Tokenizers (89)
Model\BPETokenizer.cs (10)
466
EncodeWithCache(textSpanToEncode.Slice(split.
Offset
, split.Length), tokens, split.
Offset
, ref priorityQueue);
527
EncodeToIdsWithCache(textSpanToEncode.Slice(split.
Offset
, split.Length), ids, maxTokenCount - ids.Count, out int length, ref priorityQueue);
528
charsConsumed = split.
Offset
+ length;
588
count += EncodeToIdsWithCache(textSpanToEncode.Slice(split.
Offset
, split.Length), null, maxTokenCount - count, out int length, ref priorityQueue);
589
textLength = split.
Offset
+ length;
668
count += EncodeToIdsWithCache(textSpanToEncode.Slice(split.
Offset
, split.Length), null, maxTokenCount - count, out int length, ref priorityQueue);
669
charsConsumed = split.
Offset
+ length;
722
tokenCount += EncodeToIdsFromEndWithCache(textSpanToEncode.Slice(split.
Offset
, split.Length), null, maxTokenCount - tokenCount, out int textIndex, ref priorityQueue);
725
return split.
Offset
+ textIndex;
Model\CodeGenTokenizer.cs (9)
391
EncodeInternal(null, textSpanToEncode.Slice(split.
Offset
, split.Length), tokens, addPrefixSpace, split.
Offset
, agenda);
632
EncodeToIdsInternal(null, textSpanToEncode.Slice(split.
Offset
, split.Length), ids, agenda, out int length, maxTokenCount - ids.Count);
633
charsConsumed = split.
Offset
+ length;
844
count += EncodeToIdsInternal(null, textSpanToEncode.Slice(split.
Offset
, split.Length), null, agenda, out int length, maxTokenCount - count);
845
charsConsumed = split.
Offset
+ length;
983
tokenCount += EncodeToIdsFromEndInternal(null, textSpanToEncode.Slice(split.
Offset
, split.Length), null, agenda, out int textIndex, maxTokenCount - tokenCount);
986
retIndex = addPrefixSpace ? split.
Offset
+ textIndex - 1 : split.
Offset
+ textIndex;
Model\EnglishRobertaTokenizer.cs (9)
337
foreach (EncodedToken t in EncodeInternal(textSpanToEncode.Slice(split.
Offset
, split.Length)))
339
tokens.Add(new EncodedToken(t.Id, t.Value, new Range(split.
Offset
+ t.Offset.Start.Value, split.
Offset
+ t.Offset.End.Value)));
441
EncodeToIdsInternal(textSpanToEncode.Slice(split.
Offset
, split.Length), ids, out int length, maxTokenCount - ids.Count);
442
textLength = split.
Offset
+ length;
525
count += EncodeToIdsInternal(textSpanToEncode.Slice(split.
Offset
, split.Length), null, out int length, maxTokenCount - count);
526
charsConsumed = split.
Offset
+ length;
572
tokenCount += EncodeToIdsFromEndInternal(textSpanToEncode.Slice(split.
Offset
, split.Length), null, out int textIndex, maxTokenCount - tokenCount);
575
return split.
Offset
+ textIndex;
Model\SentencePieceBpeModel.cs (6)
919
if (current.
Offset
+ current.Length < text.Length)
921
splitText = text.Slice(current.
Offset
+ current.Length);
930
if (InternalSpecialTokens!.TryGetValue(text.Slice(current.
Offset
, current.Length), out int id))
936
if (current.
Offset
> 0 && idsCount < maxTokens)
938
int start = i > 0 ? splits[i - 1].
Offset
+ splits[i - 1].Length : 0;
939
splitText = text.Slice(start, current.
Offset
- start);
Model\SentencePieceUnigramModel.cs (7)
1295
if (current.
Offset
+ current.Length < text.Length)
1297
GetIndexByTokenCountFromEndInternal(text.Slice(current.
Offset
+ current.Length), considerNormalization, ref tokenCount, buffer, ref normalizedString, ref normalizedStringCountFromEnd, ref charConsumedFromEnd, maxTokenCount);
1306
if (InternalSpecialTokens!.TryGetValue(text.Slice(current.
Offset
, current.Length), out int id))
1316
StoreNormalizedTextFromEnd(text.Slice(current.
Offset
, current.Length), ref normalizedString, ref normalizedStringCountFromEnd);
1319
if (current.
Offset
> 0)
1321
int start = i > 0 ? splits[i - 1].
Offset
+ splits[i - 1].Length : 0;
1322
GetIndexByTokenCountFromEndInternal(text.Slice(start, current.
Offset
- start), considerNormalization, ref tokenCount, buffer, ref normalizedString, ref normalizedStringCountFromEnd, ref charConsumedFromEnd, maxTokenCount);
Model\TiktokenTokenizer.cs (8)
282
EncodeToTokens(textSpanToEncode.Slice(split.
Offset
, split.Length), tokens, split.
Offset
);
393
EncodeToIds(textSpanToEncode.Slice(split.
Offset
, split.Length), ids, out int length, maxTokenCount - ids.Count);
394
charsConsumed = split.
Offset
+ length;
560
count += CountTokens(textSpanToEncode.Slice(split.
Offset
, split.Length), out int length, maxTokenCount - count);
561
charsConsumed = split.
Offset
+ length;
687
tokenCount += CountTokensFromEnd(textSpanToEncode.Slice(split.
Offset
, split.Length), out int textIndex, maxTokenCount - tokenCount);
690
return split.
Offset
+ textIndex;
Model\WordPieceTokenizer.cs (10)
296
EncodeToTokens(textSpanToEncode.Slice(split.
Offset
, split.Length), tokens, split.
Offset
);
420
EncodeToIds(textSpanToEncode.Slice(split.
Offset
, split.Length), ids, out int length, maxTokenCount - ids.Count);
427
charsConsumed = split.
Offset
+ length;
570
count += EncodeToIds(textSpanToEncode.Slice(split.
Offset
, split.Length), accumulatedIds: null, out int length, maxTokenCount - count);
649
int count = EncodeToIds(textSpanToEncode.Slice(split.
Offset
, split.Length), accumulatedIds: null, out charsConsumed, settings.MaxTokenCount - tokenCount);
652
return fromEnd ? split.
Offset
+ split.Length : split.
Offset
;
659
return fromEnd ? split.
Offset
: split.
Offset
+ split.Length;
PreTokenizer\CompositePreTokenizer.cs (12)
88
yield return (range.
Offset
, range.Length);
100
if (range.
Offset
> beginning)
105
foreach ((int Offset, int Length) subRange in SplitText(text, preTokenizers, preTokenizerIndex + 1, offset + beginning, range.
Offset
- beginning))
112
yield return (offset + beginning, range.
Offset
);
116
beginning = range.
Offset
+ range.Length;
118
yield return (offset + range.
Offset
, range.Length);
166
yield return (range.
Offset
, range.Length);
178
if (range.
Offset
> beginning)
183
foreach ((int Offset, int Length) subRange in SplitText(text, preTokenizers, preTokenizerIndex + 1, offset + beginning, range.
Offset
- beginning))
190
yield return (offset + beginning, range.
Offset
);
194
beginning = range.
Offset
+ range.Length;
196
yield return (offset + range.
Offset
, range.Length);
PreTokenizer\PreTokenizer.cs (4)
38
yield return (match.
Offset
, match.Length);
39
beginning = match.
Offset
+ match.Length;
146
yield return (match.
Offset
, match.Length);
147
beginning = match.
Offset
+ match.Length;
PreTokenizer\RegexPreTokenizer.cs (14)
72
while (TryGetMatch(regex, text, beginning, specialMatch.
Offset
- beginning, out match))
74
yield return (match.
Offset
, match.Length);
75
beginning = match.
Offset
+ match.Length;
78
yield return (specialMatch.
Offset
, specialMatch.Length);
79
beginning = specialMatch.
Offset
+ specialMatch.Length;
85
yield return (match.
Offset
, match.Length);
86
beginning = match.Length + match.
Offset
;
123
while (TryGetMatch(regex, text.AsSpan(), beginning, specialMatch.
Offset
- beginning, out match))
125
yield return (match.
Offset
, match.Length);
126
beginning = match.
Offset
+ match.Length;
129
yield return (specialMatch.
Offset
, specialMatch.Length);
130
beginning = specialMatch.
Offset
+ specialMatch.Length;
136
yield return (match.
Offset
, match.Length);
137
beginning = match.Length + match.
Offset
;
System.Formats.Cbor (10)
System\Formats\Cbor\Reader\CborReader.cs (1)
411
_keyEncodingRanges.RemoveWhere(key => key.
Offset
>= checkpointOffset);
System\Formats\Cbor\Reader\CborReader.Map.cs (6)
139
ReadOnlySpan<byte> previousKeyEncoding = buffer.Slice(previousKeyEncodingRange.
Offset
, previousKeyEncodingRange.Length);
140
ReadOnlySpan<byte> currentKeyEncoding = buffer.Slice(currentKeyEncodingRange.
Offset
, currentKeyEncodingRange.Length);
145
ResetBuffer(currentKeyEncodingRange.
Offset
);
150
ResetBuffer(currentKeyEncodingRange.
Offset
);
166
ResetBuffer(currentKeyEncodingRange.
Offset
);
210
return _reader._data.Span.Slice(range.
Offset
, range.Length);
System\Formats\Cbor\Writer\CborWriter.Map.cs (3)
106
_buffer.AsSpan(currentKey.
Offset
, _offset).Clear();
107
_offset = currentKey.
Offset
;
272
return _writer._buffer.AsSpan(range.
Offset
, range.Length);
System.Private.Windows.Core (4)
System\Value.cs (4)
782
? new(byteArray, _union.Segment.
Offset
, _union.Segment.Count)
798
? new(charArray, _union.Segment.
Offset
, _union.Segment.Count)
1003
? (T)(object)new ArraySegment<char>(chars, _union.Segment.
Offset
, _union.Segment.Count)
1010
? (T)(object)new ArraySegment<byte>(bytes, _union.Segment.
Offset
, _union.Segment.Count)