1 write to Offset
Microsoft.ML.Tokenizers (1)
EncodedToken.cs (1)
39Offset = offset;
158 references to Offset
Microsoft.ML.Tokenizers (40)
EncodedToken.cs (2)
44public bool Equals(EncodedToken other) => Id == other.Id && Value == other.Value && Offset.Equals(other.Offset);
Model\CodeGenTokenizer.cs (20)
1035if (tokens[tokenCount].Offset.Start.Value == tokens[tokenCount + 1].Offset.Start.Value) 1039while (j < tokens.Count && tokens[j].Offset.Start.Value == tokens[tokenCount].Offset.Start.Value) 1050charsConsumed += tokens[k].Offset.End.Value - tokens[k].Offset.Start.Value; 1062charsConsumed += tokens[tokenCount].Offset.End.Value - tokens[tokenCount].Offset.Start.Value; 1090while (index < tokens.Count && tokens[index].Offset.Start.Value == tokens[index - 1].Offset.Start.Value) 1098textIndex -= tokens[i].Offset.End.Value - tokens[i].Offset.Start.Value; 1598(int s, int e) r = offset == 0 ? (tokensToAdd[0].Offset.Start.Value, tokensToAdd[0].Offset.End.Value - 1) : (tokensToAdd[0].Offset.Start.Value + offset - 1, tokensToAdd[0].Offset.End.Value + offset - 1); 1603tokens.Add(new EncodedToken(tokensToAdd[i].Id, tokensToAdd[i].Value, new Range(tokensToAdd[i].Offset.Start.Value + offset - 1, tokensToAdd[i].Offset.End.Value + offset - 1))); 1611tokens.Add(new EncodedToken(t.Id, t.Value, new Range(t.Offset.Start.Value + offset, t.Offset.End.Value + offset)));
Model\EnglishRobertaTokenizer.cs (12)
339tokens.Add(new EncodedToken(t.Id, t.Value, new Range(split.Offset + t.Offset.Start.Value, split.Offset + t.Offset.End.Value))); 611charsConsumed += tokens[i].Offset.End.Value - tokens[i].Offset.Start.Value; 618charsConsumed += tokens[i].Offset.End.Value - tokens[i].Offset.Start.Value; 648textIndex -= tokens[i].Offset.End.Value - tokens[i].Offset.Start.Value; 655textIndex -= tokens[i].Offset.End.Value - tokens[i].Offset.Start.Value; 919if (tokens[i].Offset.Start.Value != indexMapping[index] || tokens[i].Offset.End.Value != indexMapping[index] + tokens[i].Value.Length)
Model\SentencePieceUnigramModel.cs (4)
430int tokenLength = tokens[start].Offset.End.Value; 441int tokenLength = tokens[start].Offset.End.Value; 470int offsetStart = tokens[insertionStartPosition].Offset.Start.Value; 471int tokenLength = tokens[insertionStartPosition].Offset.End.Value - offsetStart;
Tokenizer.cs (2)
244return token.Offset.End.Value; 254return token.Offset.Start.Value;
Microsoft.ML.Tokenizers.Tests (118)
BpeTests.cs (9)
277Assert.Equal(offsets[i], (encoding[i].Offset.Start.Value, encoding[i].Offset.End.Value - encoding[i].Offset.Start.Value)); 433Assert.Equal(expectedOffsets, encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 437Assert.Equal(expectedOffsets, encoding1.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray());
CodeGenTests.cs (60)
238Assert.Equal(expectedOffsetsWithSpace, encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 244Assert.Equal(expectedOffsets, encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 558Assert.Equal((0, 0), (encoding[0].Offset.Start.Value, encoding[0].Offset.End.Value)); 563Assert.Equal((0, 0), (encoding[0].Offset.Start.Value, encoding[0].Offset.End.Value)); 568Assert.Equal((0, 0), (encoding[0].Offset.Start.Value, encoding[0].Offset.End.Value)); 573Assert.Equal((0, 0), (encoding[0].Offset.Start.Value, encoding[0].Offset.End.Value)); 582Assert.Equal((0, 0), (encoding[0].Offset.Start.Value, encoding[0].Offset.End.Value)); 587Assert.Equal((0, 0), (encoding[0].Offset.Start.Value, encoding[0].Offset.End.Value)); 592Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 597Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 602Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 607Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 691Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 696Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 701Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 706Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 715Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 720Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 725Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 730Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 735Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 740Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 826Assert.Equal(new Range(0, 0), encoding[0].Offset); 827Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 832Assert.Equal(new Range(0, 0), encoding[0].Offset); 833Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 838Assert.Equal(new Range(0, 0), encoding[0].Offset); 839Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 844Assert.Equal(new Range(0, 0), encoding[0].Offset); 845Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 856Assert.Equal(new Range(0, 0), encoding[0].Offset); 857Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 862Assert.Equal(new Range(0, 0), encoding[0].Offset); 863Assert.Equal(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 868Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 869Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 874Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 875Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 880Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 881Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset); 886Assert.True(!encoding[0].Offset.Equals(new Range(0, 0)) || !encoding[1].Offset.Equals(new Range(0, 0))); 887Assert.NotEqual(new Range(text.Length, text.Length), encoding[encoding.Count - 1].Offset);
EnglishRobertaTests.cs (9)
185Assert.Equal(expectedOffsets, encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 189Assert.Equal(expectedOffsets, encoding1.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 267(int, int)[] offsets = encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray();
LlamaTests.cs (12)
247Assert.Equal(offsets, result.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 504Assert.Equal(expectedOffsets, encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 508Assert.Equal(expectedOffsets, encoding1.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 542Assert.Equal(expectedOffsets1, encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray());
TiktokenTests.cs (22)
151Range[] offsets = result.Select(token => token.Offset).ToArray(); 204(int, int)[] offsets = result.Select(token => (token.Offset.Start.Value, token.Offset.End.Value - token.Offset.Start.Value)).ToArray(); 245(int, int)[] offsets = result.Select(token => (token.Offset.Start.Value, token.Offset.End.Value - token.Offset.Start.Value)).ToArray(); 281Assert.Equal(new List<(int, int)> { (0, 12), (12, 5), (17, 2), (18, 1), (19, 6), (25, 10) }, result.Select(token => (token.Offset.Start.Value, token.Offset.End.Value - token.Offset.Start.Value)).ToArray()); 316Assert.Equal(new List<(int, int)> { (0, 13), (13, 5), (18, 2), (20, 6), (26, 15) }, result.Select(token => (token.Offset.Start.Value, token.Offset.End.Value - token.Offset.Start.Value)).ToArray()); 586Assert.Equal(expectedOffsets, encoding.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 590Assert.Equal(expectedOffsets, encoding1.Select(t => (t.Offset.Start.Value, t.Offset.End.Value - t.Offset.Start.Value)).ToArray()); 699(int Index, int Length)[] offsets = result.Select(r => (r.Offset.Start.Value, r.Offset.End.Value - r.Offset.Start.Value)).ToArray();
UnigramTests.cs (3)
336Assert.True(writableTokens[0].Offset.Equals(new Range(0, 0))); 347Assert.True(writableTokens[writableTokens.Count - 1].Offset.Equals(new Range(normalized.Length, normalized.Length))); 355writableTokens.Select(t => t.Offset)
WordPieceTests.cs (3)
107while (i >= tokens[j].Offset.End.Value) 113Assert.Equal(j == 0 ? 0 : tokens[j - 1].Offset.End.Value, charsWritten); 114Assert.Equal(j == 0 ? "" : text.Substring(0, tokens[j - 1].Offset.End.Value), bufferSlice.Slice(0, charsWritten).ToString());