1 write to Offset
Microsoft.ML.Tokenizers (1)
EncodedToken.cs (1)
39Offset = offset;
151 references to Offset
Microsoft.ML.Tokenizers (36)
EncodedToken.cs (2)
44public bool Equals(EncodedToken other) => Id == other.Id && Value == other.Value && Offset.Equals(other.Offset);
Model\CodeGenTokenizer.cs (20)
1032if (tokens[tokenCount].Offset.Start.Value == tokens[tokenCount + 1].Offset.Start.Value) 1036while (j < tokens.Count && tokens[j].Offset.Start.Value == tokens[tokenCount].Offset.Start.Value) 1047charsConsumed += tokens[k].Offset.End.Value - tokens[k].Offset.Start.Value; 1059charsConsumed += tokens[tokenCount].Offset.End.Value - tokens[tokenCount].Offset.Start.Value; 1087while (index < tokens.Count && tokens[index].Offset.Start.Value == tokens[index - 1].Offset.Start.Value) 1095textIndex -= tokens[i].Offset.End.Value - tokens[i].Offset.Start.Value; 1595(int s, int e) r = offset == 0 ? (tokensToAdd[0].Offset.Start.Value, tokensToAdd[0].Offset.End.Value - 1) : (tokensToAdd[0].Offset.Start.Value + offset - 1, tokensToAdd[0].Offset.End.Value + offset - 1); 1600tokens.Add(new EncodedToken(tokensToAdd[i].Id, tokensToAdd[i].Value, new Range(tokensToAdd[i].Offset.Start.Value + offset - 1, tokensToAdd[i].Offset.End.Value + offset - 1))); 1608tokens.Add(new EncodedToken(t.Id, t.Value, new Range(t.Offset.Start.Value + offset, t.Offset.End.Value + offset)));
Model\EnglishRobertaTokenizer.cs (12)
339tokens.Add(new EncodedToken(t.Id, t.Value, new Range(split.Offset + t.Offset.Start.Value, split.Offset + t.Offset.End.Value))); 611charsConsumed += tokens[i].Offset.End.Value - tokens[i].Offset.Start.Value; 618charsConsumed += tokens[i].Offset.End.Value - tokens[i].Offset.Start.Value; 648textIndex -= tokens[i].Offset.End.Value - tokens[i].Offset.Start.Value; 655textIndex -= tokens[i].Offset.End.Value - tokens[i].Offset.Start.Value; 919if (tokens[i].Offset.Start.Value != indexMapping[index] || tokens[i].Offset.End.Value != indexMapping[index] + tokens[i].Value.Length)
Tokenizer.cs (2)
244return token.Offset.End.Value; 254return token.Offset.Start.Value;
Microsoft.ML.Tokenizers.Tests (115)
BpeTests.cs (9)
277Assert.Equal(offsets[i], (encoding[i].Offset.Start.Value, encoding[i].Offset.End.Value - encoding[i].Offset.Start.Value)); 433Assert.Equal(expectedOffsets, encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 437Assert.Equal(expectedOffsets, encoding1.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray());
CodeGenTests.cs (60)
238Assert.Equal(expectedOffsetsWithSpace, encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 244Assert.Equal(expectedOffsets, encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 558Assert.Equal((0, 0), (encoding[0].Offset.Start.Value, encoding[0].Offset.End.Value)); 563Assert.Equal((0, 0), (encoding[0].Offset.Start.Value, encoding[0].Offset.End.Value)); 568Assert.Equal((0, 0), (encoding[0].Offset.Start.Value, encoding[0].Offset.End.Value)); 573Assert.Equal((0, 0), (encoding[0].Offset.Start.Value, encoding[0].Offset.End.Value)); 582Assert.Equal((0, 0), (encoding[0].Offset.Start.Value, encoding[0].Offset.End.Value)); 587Assert.Equal((0, 0), (encoding[0].Offset.Start.Value, encoding[0].Offset.End.Value)); 592Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 597Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 602Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 607Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 691Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 696Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 701Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 706Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 715Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 720Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 725Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 730Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 735Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 740Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 826Assert.Equal(new Range(0, 0), encoding[0].Offset); 827Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 832Assert.Equal(new Range(0, 0), encoding[0].Offset); 833Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 838Assert.Equal(new Range(0, 0), encoding[0].Offset); 839Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 844Assert.Equal(new Range(0, 0), encoding[0].Offset); 845Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 856Assert.Equal(new Range(0, 0), encoding[0].Offset); 857Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 862Assert.Equal(new Range(0, 0), encoding[0].Offset); 863Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 868Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 869Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 874Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 875Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 880Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 881Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 886Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 887Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset);
EnglishRobertaTests.cs (9)
185Assert.Equal(expectedOffsets, encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 189Assert.Equal(expectedOffsets, encoding1.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 267(int, int)[] offsets = encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray();
LlamaTests.cs (12)
247Assert.Equal(offsets, result.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 504Assert.Equal(expectedOffsets, encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 508Assert.Equal(expectedOffsets, encoding1.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 542Assert.Equal(expectedOffsets1, encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray());
TiktokenTests.cs (22)
148Range[] offsets = result.Select(token => token.Offset).ToArray(); 201(int, int)[] offsets = result.Select(token => (token.Offset.Start.Value, token.Offset.End.Value - token.Offset.Start.Value)).ToArray(); 242(int, int)[] offsets = result.Select(token => (token.Offset.Start.Value, token.Offset.End.Value - token.Offset.Start.Value)).ToArray(); 278Assert.Equal(new List<(int, int)> { (0, 12), (12, 5), (17, 2), (18, 1), (19, 6), (25, 10) }, result.Select(token => (token.Offset.Start.Value, token.Offset.End.Value - token.Offset.Start.Value)).ToArray()); 313Assert.Equal(new List<(int, int)> { (0, 13), (13, 5), (18, 2), (20, 6), (26, 15) }, result.Select(token => (token.Offset.Start.Value, token.Offset.End.Value - token.Offset.Start.Value)).ToArray()); 572Assert.Equal(expectedOffsets, encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 576Assert.Equal(expectedOffsets, encoding1.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 685(int Index, int Length)[] offsets = result.Select(r => (r.Offset.Start.Value, r.Offset.End.Value - r.Offset.Start.Value)).ToArray();
WordPieceTests.cs (3)
107while (i >= tokens[j].Offset.End.Value) 113Assert.Equal(j == 0 ? 0 : tokens[j - 1].Offset.End.Value, charsWritten); 114Assert.Equal(j == 0 ? "" : text.Substring(0, tokens[j - 1].Offset.End.Value), bufferSlice.Slice(0, charsWritten).ToString());