2 writes to cache_v
Microsoft.ML.GenAI.Phi (2)
Module\Phi2Attention.cs (2)
89
this.
cache_v
= torch.zeros(maxBatch, this._numKeyValueHeads, maxLength, this._headDim, dtype: config.Dtype);
105
this.
cache_v
= this.cache_v.to(hiddenStates.device, disposeAfter: true).DetachFromDisposeScope();
3 references to cache_v
Microsoft.ML.GenAI.Phi (3)
Module\Phi2Attention.cs (3)
105
this.cache_v = this.
cache_v
.to(hiddenStates.device, disposeAfter: true).DetachFromDisposeScope();
140
this.
cache_v
[..batchSize, .., pastKeyValueLength..kvSeqLen, ..] = valueStates;
142
valueStates = this.
cache_v
[..batchSize, .., ..kvSeqLen, ..];