1 write to Offset
Microsoft.ML.Tokenizers (1)
EncodedToken.cs (1)
37Offset = offset;
95 references to Offset
Microsoft.ML.Tokenizers (27)
Model\CodeGenTokenizer.cs (17)
1030if (tokens[tokenCount].Offset.Index == tokens[tokenCount + 1].Offset.Index) 1034while (j < tokens.Count && tokens[j].Offset.Index == tokens[tokenCount].Offset.Index) 1045charsConsumed += tokens[k].Offset.Length; 1057charsConsumed += tokens[tokenCount].Offset.Length; 1085while (index < tokens.Count && tokens[index].Offset.Index == tokens[index - 1].Offset.Index) 1093textIndex -= tokens[i].Offset.Length; 1593tokens.Add(new EncodedToken(tokensToAdd[0].Id, tokensToAdd[0].Value, (offset == 0 ? tokensToAdd[0].Offset.Index : tokensToAdd[0].Offset.Index + offset - 1, offset == 0 ? tokensToAdd[0].Offset.Length - 1 : tokensToAdd[0].Offset.Length))); 1597tokens.Add(new EncodedToken(tokensToAdd[i].Id, tokensToAdd[i].Value, (tokensToAdd[i].Offset.Index + offset - 1, tokensToAdd[i].Offset.Length))); 1605tokens.Add(new EncodedToken(t.Id, t.Value, (t.Offset.Index + offset, t.Offset.Length)));
Model\EnglishRobertaTokenizer.cs (7)
328tokens.Add(new EncodedToken(t.Id, t.Value, (split.Offset + t.Offset.Index, t.Offset.Length))); 600charsConsumed += tokens[i].Offset.Length; 607charsConsumed += tokens[i].Offset.Length; 637textIndex -= tokens[i].Offset.Length; 644textIndex -= tokens[i].Offset.Length; 908if (tokens[i].Offset != (indexMapping[index], tokens[i].Value.Length))
Tokenizer.cs (3)
244return token.Offset.Index + token.Offset.Length; 254return token.Offset.Index;
Microsoft.ML.Tokenizers.Tests (68)
BpeTests.cs (3)
277Assert.Equal(offsets[i], encoding[i].Offset); 433Assert.Equal(expectedOffsets, encoding.Select(t => t.Offset).ToArray()); 437Assert.Equal(expectedOffsets, encoding1.Select(t => t.Offset).ToArray());
CodeGenTests.cs (50)
238Assert.Equal(expectedOffsetsWithSpace, encoding.Select(t => t.Offset).ToArray()); 244Assert.Equal(expectedOffsets, encoding.Select(t => t.Offset).ToArray()); 558Assert.Equal((0, 0), encoding[0].Offset); 563Assert.Equal((0, 0), encoding[0].Offset); 568Assert.Equal((0, 0), encoding[0].Offset); 573Assert.Equal((0, 0), encoding[0].Offset); 582Assert.Equal((0, 0), encoding[0].Offset); 587Assert.Equal((0, 0), encoding[0].Offset); 592Assert.True(encoding[0].Offset != (0, 0) || encoding[1].Offset != (0, 0)); 597Assert.True(encoding[0].Offset != (0, 0) || encoding[1].Offset != (0, 0)); 602Assert.True(encoding[0].Offset != (0, 0) || encoding[1].Offset != (0, 0)); 607Assert.True(encoding[0].Offset != (0, 0) || encoding[1].Offset != (0, 0)); 691Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].Offset); 696Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].Offset); 701Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].Offset); 706Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].Offset); 715Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].Offset); 720Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].Offset); 725Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].Offset); 730Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].Offset); 735Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].Offset); 740Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].Offset); 826Assert.Equal((0, 0), encoding[0].Offset); 827Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].Offset); 832Assert.Equal((0, 0), encoding[0].Offset); 833Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].Offset); 838Assert.Equal((0, 0), encoding[0].Offset); 839Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].Offset); 844Assert.Equal((0, 0), encoding[0].Offset); 845Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].Offset); 856Assert.Equal((0, 0), encoding[0].Offset); 857Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].Offset); 862Assert.Equal((0, 0), encoding[0].Offset); 863Assert.Equal((text.Length, 0), encoding[encoding.Count - 1].Offset); 868Assert.True(encoding[0].Offset != (0, 0) || encoding[1].Offset != (0, 0)); 869Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].Offset); 874Assert.True(encoding[0].Offset != (0, 0) || encoding[1].Offset != (0, 0)); 875Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].Offset); 880Assert.True(encoding[0].Offset != (0, 0) || encoding[1].Offset != (0, 0)); 881Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].Offset); 886Assert.True(encoding[0].Offset != (0, 0) || encoding[1].Offset != (0, 0)); 887Assert.NotEqual((text.Length, 0), encoding[encoding.Count - 1].Offset);
EnglishRobertaTests.cs (3)
185Assert.Equal(expectedOffsets, encoding.Select(t => t.Offset).ToArray()); 189Assert.Equal(expectedOffsets, encoding1.Select(t => t.Offset).ToArray()); 267(int, int)[] offsets = encoding.Select(t => t.Offset).ToArray();
LlamaTests.cs (4)
247Assert.Equal(offsets, result.Select(t => t.Offset).ToArray()); 504Assert.Equal(expectedOffsets, encoding.Select(t => t.Offset).ToArray()); 508Assert.Equal(expectedOffsets, encoding1.Select(t => t.Offset).ToArray()); 542Assert.Equal(expectedOffsets1, encoding.Select(t => t.Offset).ToArray());
TitokenTests.cs (8)
147(int, int)[] offsets = result.Select(token => token.Offset).ToArray(); 200(int, int)[] offsets = result.Select(token => token.Offset).ToArray(); 241(int, int)[] offsets = result.Select(token => token.Offset).ToArray(); 277Assert.Equal(new List<(int, int)> { (0, 12), (12, 5), (17, 2), (18, 1), (19, 6), (25, 10) }, result.Select(token => token.Offset).ToArray()); 312Assert.Equal(new List<(int, int)> { (0, 13), (13, 5), (18, 2), (20, 6), (26, 15) }, result.Select(token => token.Offset).ToArray()); 568Assert.Equal(expectedOffsets, encoding.Select(t => t.Offset).ToArray()); 572Assert.Equal(expectedOffsets, encoding1.Select(t => t.Offset).ToArray()); 681(int Index, int Length)[] offsets = result.Select(r => r.Offset).ToArray();