1 write to Offset
Microsoft.ML.Tokenizers (1)
EncodedToken.cs (1)
37
Offset
= offset;
95 references to Offset
Microsoft.ML.Tokenizers (27)
Model\CodeGenTokenizer.cs (17)
1030
if (tokens[tokenCount].
Offset
.Index == tokens[tokenCount + 1].
Offset
.Index)
1034
while (j < tokens.Count && tokens[j].
Offset
.Index == tokens[tokenCount].
Offset
.Index)
1045
charsConsumed += tokens[k].
Offset
.Length;
1057
charsConsumed += tokens[tokenCount].
Offset
.Length;
1085
while (index < tokens.Count && tokens[index].
Offset
.Index == tokens[index - 1].
Offset
.Index)
1093
textIndex -= tokens[i].
Offset
.Length;
1593
tokens.Add(new EncodedToken(tokensToAdd[0].Id, tokensToAdd[0].Value, (offset == 0 ? tokensToAdd[0].
Offset
.Index : tokensToAdd[0].
Offset
.Index + offset - 1, offset == 0 ? tokensToAdd[0].
Offset
.Length - 1 : tokensToAdd[0].
Offset
.Length)));
1597
tokens.Add(new EncodedToken(tokensToAdd[i].Id, tokensToAdd[i].Value, (tokensToAdd[i].
Offset
.Index + offset - 1, tokensToAdd[i].
Offset
.Length)));
1605
tokens.Add(new EncodedToken(t.Id, t.Value, (t.
Offset
.Index + offset, t.
Offset
.Length)));
Model\EnglishRobertaTokenizer.cs (7)
328
tokens.Add(new EncodedToken(t.Id, t.Value, (split.Offset + t.
Offset
.Index, t.
Offset
.Length)));
600
charsConsumed += tokens[i].
Offset
.Length;
607
charsConsumed += tokens[i].
Offset
.Length;
637
textIndex -= tokens[i].
Offset
.Length;
644
textIndex -= tokens[i].
Offset
.Length;
908
if (tokens[i].
Offset
!= (indexMapping[index], tokens[i].Value.Length))
Tokenizer.cs (3)
244
return token.
Offset
.Index + token.
Offset
.Length;
254
return token.
Offset
.Index;
Microsoft.ML.Tokenizers.Tests (68)
BpeTests.cs (3)
277
Assert.Equal(offsets[i], encoding[i].
Offset
);
433
Assert.Equal(expectedOffsets, encoding.Select(t => t.
Offset
).ToArray());
437
Assert.Equal(expectedOffsets, encoding1.Select(t => t.
Offset
).ToArray());
CodeGenTests.cs (50)
238
Assert.Equal(expectedOffsetsWithSpace, encoding.Select(t => t.
Offset
).ToArray());
244
Assert.Equal(expectedOffsets, encoding.Select(t => t.
Offset
).ToArray());
558
Assert.Equal((0, 0), encoding[0].
Offset
);
563
Assert.Equal((0, 0), encoding[0].
Offset
);
568
Assert.Equal((0, 0), encoding[0].
Offset
);
573
Assert.Equal((0, 0), encoding[0].
Offset
);
582
Assert.Equal((0, 0), encoding[0].
Offset
);
587
Assert.Equal((0, 0), encoding[0].
Offset
);
592
Assert.True(encoding[0].
Offset
!= (0, 0) || encoding[1].
Offset
!= (0, 0));
597
Assert.True(encoding[0].
Offset
!= (0, 0) || encoding[1].
Offset
!= (0, 0));
602
Assert.True(encoding[0].
Offset
!= (0, 0) || encoding[1].
Offset
!= (0, 0));
607
Assert.True(encoding[0].
Offset
!= (0, 0) || encoding[1].
Offset
!= (0, 0));
691
Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
696
Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
701
Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
706
Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
715
Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
720
Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
725
Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
730
Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
735
Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
740
Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
826
Assert.Equal((0, 0), encoding[0].
Offset
);
827
Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
832
Assert.Equal((0, 0), encoding[0].
Offset
);
833
Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
838
Assert.Equal((0, 0), encoding[0].
Offset
);
839
Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
844
Assert.Equal((0, 0), encoding[0].
Offset
);
845
Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
856
Assert.Equal((0, 0), encoding[0].
Offset
);
857
Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
862
Assert.Equal((0, 0), encoding[0].
Offset
);
863
Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
868
Assert.True(encoding[0].
Offset
!= (0, 0) || encoding[1].
Offset
!= (0, 0));
869
Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
874
Assert.True(encoding[0].
Offset
!= (0, 0) || encoding[1].
Offset
!= (0, 0));
875
Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
880
Assert.True(encoding[0].
Offset
!= (0, 0) || encoding[1].
Offset
!= (0, 0));
881
Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
886
Assert.True(encoding[0].
Offset
!= (0, 0) || encoding[1].
Offset
!= (0, 0));
887
Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].
Offset
);
EnglishRobertaTests.cs (3)
185
Assert.Equal(expectedOffsets, encoding.Select(t => t.
Offset
).ToArray());
189
Assert.Equal(expectedOffsets, encoding1.Select(t => t.
Offset
).ToArray());
267
(int, int)[] offsets = encoding.Select(t => t.
Offset
).ToArray();
LlamaTests.cs (4)
247
Assert.Equal(offsets, result.Select(t => t.
Offset
).ToArray());
504
Assert.Equal(expectedOffsets, encoding.Select(t => t.
Offset
).ToArray());
508
Assert.Equal(expectedOffsets, encoding1.Select(t => t.
Offset
).ToArray());
542
Assert.Equal(expectedOffsets1, encoding.Select(t => t.
Offset
).ToArray());
TitokenTests.cs (8)
147
(int, int)[] offsets = result.Select(token => token.
Offset
).ToArray();
200
(int, int)[] offsets = result.Select(token => token.
Offset
).ToArray();
241
(int, int)[] offsets = result.Select(token => token.
Offset
).ToArray();
277
Assert.Equal(new List<(int, int)> { (0, 12), (12, 5), (17, 2), (18, 1), (19, 6), (25, 10) }, result.Select(token => token.
Offset
).ToArray());
312
Assert.Equal(new List<(int, int)> { (0, 13), (13, 5), (18, 2), (20, 6), (26, 15) }, result.Select(token => token.
Offset
).ToArray());
568
Assert.Equal(expectedOffsets, encoding.Select(t => t.
Offset
).ToArray());
572
Assert.Equal(expectedOffsets, encoding1.Select(t => t.
Offset
).ToArray());
681
(int Index, int Length)[] offsets = result.Select(r => r.
Offset
).ToArray();