1 write to _headDim
Microsoft.ML.GenAI.Phi (1)
Module\Phi2Attention.cs (1)
63this._headDim = this._hiddenSize / this._numAttentionHeads;
12 references to _headDim
Microsoft.ML.GenAI.Phi (12)
Module\Phi2Attention.cs (12)
70Contract.Assert(this._hiddenSize % (this._headDim * this._numAttentionHeads) == 0, "hidden_size must be divisible by num_attention_heads"); 71this.q_proj = new GenAILinear(this._hiddenSize, this._numAttentionHeads * this._headDim, hasBias: true, dtype: config.Dtype); 72this.k_proj = new GenAILinear(this._hiddenSize, this._numKeyValueHeads * this._headDim, hasBias: true, dtype: config.Dtype); 73this.v_proj = new GenAILinear(this._hiddenSize, this._numKeyValueHeads * this._headDim, hasBias: true, dtype: config.Dtype); 74this.dense = new GenAILinear(this._numAttentionHeads * this._headDim, this._hiddenSize, hasBias: true, dtype: config.Dtype); 85dim: (int)(this._partialRotaryFactor * this._headDim), 88this.cache_k = torch.zeros(maxBatch, this._numKeyValueHeads, maxLength, this._headDim, dtype: config.Dtype); 89this.cache_v = torch.zeros(maxBatch, this._numKeyValueHeads, maxLength, this._headDim, dtype: config.Dtype); 121queryStates = queryStates.view(batchSize, seqLen, this._numAttentionHeads, this._headDim).transpose_(1, 2); 122keyStates = keyStates.view(batchSize, seqLen, this._numKeyValueHeads, this._headDim).transpose_(1, 2); 123valueStates = valueStates.view(batchSize, seqLen, this._numKeyValueHeads, this._headDim).transpose_(1, 2); 147attnWeights = attnWeights / Math.Sqrt(this._headDim);