43 references to
Microsoft.ML.Tokenizers (43)
Model\BPETokenizer.cs (1)
270(string a, string b) mergeValues = merges[i];
Model\Word.cs (42)
42_symbols[len - 1].Next = len; 62if (_symbols[i].C == c1 && i + 1 < _symbols.Count && _symbols[i + 1].C == c2) 64Symbol first = _symbols[i]; 65Symbol second = _symbols[i + 1]; 70changes.Push((Pair<int>.Create(_symbols[i - 1].C, first.C), -1)); 71changes.Push((Pair<int>.Create(_symbols[i - 1].C, replacement), 1)); 79_symbols[i].C = replacement; 80_symbols[i].Prev = first.Prev; 81_symbols[i].Next = second.Next; 82_symbols[i].Len = first.Len + second.Len; 89changes.Push((Pair<int>.Create(second.C, _symbols[i + 1].C), -1)); 90changes.Push((Pair<int>.Create(replacement, _symbols[i + 1].C), 1)); 109if (merges.TryGetValue(Pair<int>.Create(_symbols[i].C, _symbols[i + 1].C), out (int m1, int m2) value)) 127priorityQueue.Enqueue(skip[i]); 132if (_symbols.Count == 0 || _symbols[top.Pos].Len == 0 || _symbols[top.Pos].Next == -1) 137int nextPos = _symbols[top.Pos].Next; 138Symbol right = _symbols[nextPos]; 141Pair<int> targetNewPair = Pair<int>.Create(_symbols[top.Pos].C, right.C); 148_symbols[top.Pos].MergeWith(ref right, top.NewId); 151_symbols[nextPos].Len = 0; 156_symbols[right.Next].Prev = top.Pos; 160Symbol current = _symbols[top.Pos]; 164Symbol prevSymbol = _symbols[prev]; 177Symbol nextSymbol = _symbols[next]; 190if (_symbols[i].Len == 0) 201accumulatedIds.Add(_symbols[i].C); 213accumulatedIds.Add(_symbols[i].C); 214charsConsumed += _symbols[i].Len; 228accumulatedIds.Add(_symbols[i].C); 229textIndex -= _symbols[i].Len; 243charsConsumed += _symbols[i].Len; 257textIndex -= _symbols[i].Len; 268chars.Push(_symbols[i].C); 286sb.Append($", {_symbols[i].C}"); 298int endIndex = index + _symbols[i].Len; 299tokens.Add(new EncodedToken(_symbols[i].C, vocabReverse[_symbols[i].C], new Range(index + offset, index + offset + _symbols[i].Len))); 300index += _symbols[i].Len;