1 write to Offset
Microsoft.ML.Tokenizers (1)
EncodedToken.cs (1)
39
Offset
= offset;
151 references to Offset
Microsoft.ML.Tokenizers (36)
EncodedToken.cs (2)
44
public bool Equals(EncodedToken other) => Id == other.Id && Value == other.Value &&
Offset
.Equals(other.
Offset
);
Model\CodeGenTokenizer.cs (20)
1032
if (tokens[tokenCount].
Offset
.Start.Value == tokens[tokenCount + 1].
Offset
.Start.Value)
1036
while (j < tokens.Count && tokens[j].
Offset
.Start.Value == tokens[tokenCount].
Offset
.Start.Value)
1047
charsConsumed += tokens[k].
Offset
.End.Value - tokens[k].
Offset
.Start.Value;
1059
charsConsumed += tokens[tokenCount].
Offset
.End.Value - tokens[tokenCount].
Offset
.Start.Value;
1087
while (index < tokens.Count && tokens[index].
Offset
.Start.Value == tokens[index - 1].
Offset
.Start.Value)
1095
textIndex -= tokens[i].
Offset
.End.Value - tokens[i].
Offset
.Start.Value;
1595
(int s, int e) r = offset == 0 ? (tokensToAdd[0].
Offset
.Start.Value, tokensToAdd[0].
Offset
.End.Value - 1) : (tokensToAdd[0].
Offset
.Start.Value + offset - 1, tokensToAdd[0].
Offset
.End.Value + offset - 1);
1600
tokens.Add(new EncodedToken(tokensToAdd[i].Id, tokensToAdd[i].Value, new Range(tokensToAdd[i].
Offset
.Start.Value + offset - 1, tokensToAdd[i].
Offset
.End.Value + offset - 1)));
1608
tokens.Add(new EncodedToken(t.Id, t.Value, new Range(t.
Offset
.Start.Value + offset, t.
Offset
.End.Value + offset)));
Model\EnglishRobertaTokenizer.cs (12)
339
tokens.Add(new EncodedToken(t.Id, t.Value, new Range(split.Offset + t.
Offset
.Start.Value, split.Offset + t.
Offset
.End.Value)));
611
charsConsumed += tokens[i].
Offset
.End.Value - tokens[i].
Offset
.Start.Value;
618
charsConsumed += tokens[i].
Offset
.End.Value - tokens[i].
Offset
.Start.Value;
648
textIndex -= tokens[i].
Offset
.End.Value - tokens[i].
Offset
.Start.Value;
655
textIndex -= tokens[i].
Offset
.End.Value - tokens[i].
Offset
.Start.Value;
919
if (tokens[i].
Offset
.Start.Value != indexMapping[index] || tokens[i].
Offset
.End.Value != indexMapping[index] + tokens[i].Value.Length)
Tokenizer.cs (2)
244
return token.
Offset
.End.Value;
254
return token.
Offset
.Start.Value;
Microsoft.ML.Tokenizers.Tests (115)
BpeTests.cs (9)
277
Assert.Equal(offsets[i], (encoding[i].
Offset
.Start.Value, encoding[i].
Offset
.End.Value - encoding[i].
Offset
.Start.Value));
433
Assert.Equal(expectedOffsets, encoding.Select(t => (t.
Offset
.Start.Value, t.
Offset
.End.Value - t.
Offset
.Start.Value)).ToArray());
437
Assert.Equal(expectedOffsets, encoding1.Select(t => (t.
Offset
.Start.Value, t.
Offset
.End.Value - t.
Offset
.Start.Value)).ToArray());
CodeGenTests.cs (60)
238
Assert.Equal(expectedOffsetsWithSpace, encoding.Select(t => (t.
Offset
.Start.Value, t.
Offset
.End.Value - t.
Offset
.Start.Value)).ToArray());
244
Assert.Equal(expectedOffsets, encoding.Select(t => (t.
Offset
.Start.Value, t.
Offset
.End.Value - t.
Offset
.Start.Value)).ToArray());
558
Assert.Equal((0, 0), (encoding[0].
Offset
.Start.Value, encoding[0].
Offset
.End.Value));
563
Assert.Equal((0, 0), (encoding[0].
Offset
.Start.Value, encoding[0].
Offset
.End.Value));
568
Assert.Equal((0, 0), (encoding[0].
Offset
.Start.Value, encoding[0].
Offset
.End.Value));
573
Assert.Equal((0, 0), (encoding[0].
Offset
.Start.Value, encoding[0].
Offset
.End.Value));
582
Assert.Equal((0, 0), (encoding[0].
Offset
.Start.Value, encoding[0].
Offset
.End.Value));
587
Assert.Equal((0, 0), (encoding[0].
Offset
.Start.Value, encoding[0].
Offset
.End.Value));
592
Assert.True(!encoding[0].
Offset
.Equals(new Range(0, 0)) || !encoding[1].
Offset
.Equals(new Range(0, 0)));
597
Assert.True(!encoding[0].
Offset
.Equals(new Range(0, 0)) || !encoding[1].
Offset
.Equals(new Range(0, 0)));
602
Assert.True(!encoding[0].
Offset
.Equals(new Range(0, 0)) || !encoding[1].
Offset
.Equals(new Range(0, 0)));
607
Assert.True(!encoding[0].
Offset
.Equals(new Range(0, 0)) || !encoding[1].
Offset
.Equals(new Range(0, 0)));
691
Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
696
Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
701
Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
706
Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
715
Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
720
Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
725
Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
730
Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
735
Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
740
Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
826
Assert.Equal(new Range(0, 0), encoding[0].
Offset
);
827
Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
832
Assert.Equal(new Range(0, 0), encoding[0].
Offset
);
833
Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
838
Assert.Equal(new Range(0, 0), encoding[0].
Offset
);
839
Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
844
Assert.Equal(new Range(0, 0), encoding[0].
Offset
);
845
Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
856
Assert.Equal(new Range(0, 0), encoding[0].
Offset
);
857
Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
862
Assert.Equal(new Range(0, 0), encoding[0].
Offset
);
863
Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
868
Assert.True(!encoding[0].
Offset
.Equals(new Range(0, 0)) || !encoding[1].
Offset
.Equals(new Range(0, 0)));
869
Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
874
Assert.True(!encoding[0].
Offset
.Equals(new Range(0, 0)) || !encoding[1].
Offset
.Equals(new Range(0, 0)));
875
Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
880
Assert.True(!encoding[0].
Offset
.Equals(new Range(0, 0)) || !encoding[1].
Offset
.Equals(new Range(0, 0)));
881
Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
886
Assert.True(!encoding[0].
Offset
.Equals(new Range(0, 0)) || !encoding[1].
Offset
.Equals(new Range(0, 0)));
887
Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].
Offset
);
EnglishRobertaTests.cs (9)
185
Assert.Equal(expectedOffsets, encoding.Select(t => (t.
Offset
.Start.Value, t.
Offset
.End.Value - t.
Offset
.Start.Value)).ToArray());
189
Assert.Equal(expectedOffsets, encoding1.Select(t => (t.
Offset
.Start.Value, t.
Offset
.End.Value - t.
Offset
.Start.Value)).ToArray());
267
(int, int)[] offsets = encoding.Select(t => (t.
Offset
.Start.Value, t.
Offset
.End.Value - t.
Offset
.Start.Value)).ToArray();
LlamaTests.cs (12)
247
Assert.Equal(offsets, result.Select(t => (t.
Offset
.Start.Value, t.
Offset
.End.Value - t.
Offset
.Start.Value)).ToArray());
504
Assert.Equal(expectedOffsets, encoding.Select(t => (t.
Offset
.Start.Value, t.
Offset
.End.Value - t.
Offset
.Start.Value)).ToArray());
508
Assert.Equal(expectedOffsets, encoding1.Select(t => (t.
Offset
.Start.Value, t.
Offset
.End.Value - t.
Offset
.Start.Value)).ToArray());
542
Assert.Equal(expectedOffsets1, encoding.Select(t => (t.
Offset
.Start.Value, t.
Offset
.End.Value - t.
Offset
.Start.Value)).ToArray());
TiktokenTests.cs (22)
148
Range[] offsets = result.Select(token => token.
Offset
).ToArray();
201
(int, int)[] offsets = result.Select(token => (token.
Offset
.Start.Value, token.
Offset
.End.Value - token.
Offset
.Start.Value)).ToArray();
242
(int, int)[] offsets = result.Select(token => (token.
Offset
.Start.Value, token.
Offset
.End.Value - token.
Offset
.Start.Value)).ToArray();
278
Assert.Equal(new List<(int, int)> { (0, 12), (12, 5), (17, 2), (18, 1), (19, 6), (25, 10) }, result.Select(token => (token.
Offset
.Start.Value, token.
Offset
.End.Value - token.
Offset
.Start.Value)).ToArray());
313
Assert.Equal(new List<(int, int)> { (0, 13), (13, 5), (18, 2), (20, 6), (26, 15) }, result.Select(token => (token.
Offset
.Start.Value, token.
Offset
.End.Value - token.
Offset
.Start.Value)).ToArray());
572
Assert.Equal(expectedOffsets, encoding.Select(t => (t.
Offset
.Start.Value, t.
Offset
.End.Value - t.
Offset
.Start.Value)).ToArray());
576
Assert.Equal(expectedOffsets, encoding1.Select(t => (t.
Offset
.Start.Value, t.
Offset
.End.Value - t.
Offset
.Start.Value)).ToArray());
685
(int Index, int Length)[] offsets = result.Select(r => (r.
Offset
.Start.Value, r.
Offset
.End.Value - r.
Offset
.Start.Value)).ToArray();
WordPieceTests.cs (3)
107
while (i >= tokens[j].
Offset
.End.Value)
113
Assert.Equal(j == 0 ? 0 : tokens[j - 1].
Offset
.End.Value, charsWritten);
114
Assert.Equal(j == 0 ? "" : text.Substring(0, tokens[j - 1].
Offset
.End.Value), bufferSlice.Slice(0, charsWritten).ToString());