1 write to _headDim
Microsoft.ML.GenAI.Core (1)
13 references to _headDim
Microsoft.ML.GenAI.Core (13)
Module\Attention.cs (13)
112Contract.Assert(this._hiddenSize % (this._headDim * this._numHeads) == 0, "hidden_size must be divisible by num_heads");
117var opSize = this._numHeads * this._headDim + 2 * (this._numKeyValueHeads * this._headDim);
122this.q_proj = new QuantizedLinear(this._hiddenSize, this._numHeads * this._headDim, hasBias: attentionBias, dtype: dtype);
123this.k_proj = new QuantizedLinear(this._hiddenSize, this._numKeyValueHeads * this._headDim, hasBias: attentionBias, dtype: dtype);
124this.v_proj = new QuantizedLinear(this._hiddenSize, this._numKeyValueHeads * this._headDim, hasBias: attentionBias, dtype: dtype);
147var queryPos = this._numHeads * this._headDim;
149keyStates = qkv[.., .., queryPos..(queryPos + this._numKeyValueHeads * this._headDim)];
150valueStates = qkv[.., .., (queryPos + this._numKeyValueHeads * this._headDim)..];
163queryStates = queryStates.view(bsz, qLen, this._numHeads, this._headDim).transpose(1, 2);
164keyStates = keyStates.view(bsz, qLen, this._numKeyValueHeads, this._headDim).transpose(1, 2);
165valueStates = valueStates.view(bsz, qLen, this._numKeyValueHeads, this._headDim).transpose(1, 2);
185attnWeights = attnWeights / Math.Sqrt(this._headDim);