43 references to
Microsoft.ML.Tokenizers (43)
Model\BPETokenizer.cs (1)
270
(string a, string b) mergeValues = merges
[
i];
Model\Word.cs (42)
42
_symbols
[
len - 1].Next = len;
62
if (_symbols
[
i].C == c1 && i + 1 < _symbols.Count && _symbols
[
i + 1].C == c2)
64
Symbol first = _symbols
[
i];
65
Symbol second = _symbols
[
i + 1];
70
changes.Push((Pair<int>.Create(_symbols
[
i - 1].C, first.C), -1));
71
changes.Push((Pair<int>.Create(_symbols
[
i - 1].C, replacement), 1));
79
_symbols
[
i].C = replacement;
80
_symbols
[
i].Prev = first.Prev;
81
_symbols
[
i].Next = second.Next;
82
_symbols
[
i].Len = first.Len + second.Len;
89
changes.Push((Pair<int>.Create(second.C, _symbols
[
i + 1].C), -1));
90
changes.Push((Pair<int>.Create(replacement, _symbols
[
i + 1].C), 1));
109
if (merges.TryGetValue(Pair<int>.Create(_symbols
[
i].C, _symbols
[
i + 1].C), out (int m1, int m2) value))
127
priorityQueue.Enqueue(skip
[
i]);
132
if (_symbols.Count == 0 || _symbols
[
top.Pos].Len == 0 || _symbols
[
top.Pos].Next == -1)
137
int nextPos = _symbols
[
top.Pos].Next;
138
Symbol right = _symbols
[
nextPos];
141
Pair<int> targetNewPair = Pair<int>.Create(_symbols
[
top.Pos].C, right.C);
148
_symbols
[
top.Pos].MergeWith(ref right, top.NewId);
151
_symbols
[
nextPos].Len = 0;
156
_symbols
[
right.Next].Prev = top.Pos;
160
Symbol current = _symbols
[
top.Pos];
164
Symbol prevSymbol = _symbols
[
prev];
177
Symbol nextSymbol = _symbols
[
next];
190
if (_symbols
[
i].Len == 0)
201
accumulatedIds.Add(_symbols
[
i].C);
213
accumulatedIds.Add(_symbols
[
i].C);
214
charsConsumed += _symbols
[
i].Len;
228
accumulatedIds.Add(_symbols
[
i].C);
229
textIndex -= _symbols
[
i].Len;
243
charsConsumed += _symbols
[
i].Len;
257
textIndex -= _symbols
[
i].Len;
268
chars.Push(_symbols
[
i].C);
286
sb.Append($", {_symbols
[
i].C}");
298
int endIndex = index + _symbols
[
i].Len;
299
tokens.Add(new EncodedToken(_symbols
[
i].C, vocabReverse[_symbols
[
i].C], new Range(index + offset, index + offset + _symbols
[
i].Len)));
300
index += _symbols
[
i].Len;