1 write to _headDim
Microsoft.ML.GenAI.Phi (1)
Module\Phi2Attention.cs (1)
63
this.
_headDim
= this._hiddenSize / this._numAttentionHeads;
12 references to _headDim
Microsoft.ML.GenAI.Phi (12)
Module\Phi2Attention.cs (12)
70
Contract.Assert(this._hiddenSize % (this.
_headDim
* this._numAttentionHeads) == 0, "hidden_size must be divisible by num_attention_heads");
71
this.q_proj = new GenAILinear(this._hiddenSize, this._numAttentionHeads * this.
_headDim
, hasBias: true, dtype: config.Dtype);
72
this.k_proj = new GenAILinear(this._hiddenSize, this._numKeyValueHeads * this.
_headDim
, hasBias: true, dtype: config.Dtype);
73
this.v_proj = new GenAILinear(this._hiddenSize, this._numKeyValueHeads * this.
_headDim
, hasBias: true, dtype: config.Dtype);
74
this.dense = new GenAILinear(this._numAttentionHeads * this.
_headDim
, this._hiddenSize, hasBias: true, dtype: config.Dtype);
85
dim: (int)(this._partialRotaryFactor * this.
_headDim
),
88
this.cache_k = torch.zeros(maxBatch, this._numKeyValueHeads, maxLength, this.
_headDim
, dtype: config.Dtype);
89
this.cache_v = torch.zeros(maxBatch, this._numKeyValueHeads, maxLength, this.
_headDim
, dtype: config.Dtype);
121
queryStates = queryStates.view(batchSize, seqLen, this._numAttentionHeads, this.
_headDim
).transpose_(1, 2);
122
keyStates = keyStates.view(batchSize, seqLen, this._numKeyValueHeads, this.
_headDim
).transpose_(1, 2);
123
valueStates = valueStates.view(batchSize, seqLen, this._numKeyValueHeads, this.
_headDim
).transpose_(1, 2);
147
attnWeights = attnWeights / Math.Sqrt(this.
_headDim
);