2 writes to cache_k
Microsoft.ML.GenAI.Phi (2)
Module\Phi2Attention.cs (2)
88
this.
cache_k
= torch.zeros(maxBatch, this._numKeyValueHeads, maxLength, this._headDim, dtype: config.Dtype);
104
this.
cache_k
= this.cache_k.to(hiddenStates.device, disposeAfter: true).DetachFromDisposeScope();
4 references to cache_k
Microsoft.ML.GenAI.Phi (4)
Module\Phi2Attention.cs (4)
102
if (this.
cache_k
.device != hiddenStates.device)
104
this.cache_k = this.
cache_k
.to(hiddenStates.device, disposeAfter: true).DetachFromDisposeScope();
139
this.
cache_k
[..batchSize, .., pastKeyValueLength..kvSeqLen, ..] = keyStates;
141
keyStates = this.
cache_k
[..batchSize, .., ..kvSeqLen, ..];