1 write to _headDim
Microsoft.ML.TorchSharp (1)
NasBert\Modules\MultiHeadAttention.cs (1)
73
_headDim
= _embeddingDim / _numHeads;
10 references to _headDim
Microsoft.ML.TorchSharp (10)
NasBert\Modules\MultiHeadAttention.cs (10)
74
_scaling = Math.Pow(
_headDim
, -0.5);
75
if (
_headDim
* _numHeads != _embeddingDim)
212
q = q.view(tgtLen, batchSize * _numHeads,
_headDim
).transpose_(0, 1);
213
k = k?.view(-1, batchSize * _numHeads,
_headDim
).transpose_(0, 1);
214
v = v?.view(-1, batchSize * _numHeads,
_headDim
).transpose_(0, 1);
221
var prevKey = savedState[PrevKeyKey].view(batchSize * _numHeads, -1,
_headDim
);
229
var prevValue = savedState[PrevValueKey].view(batchSize * _numHeads, -1,
_headDim
);
236
savedState[PrevKeyKey] = k?.view(batchSize, _numHeads, -1,
_headDim
);
238
savedState[PrevValueKey] = v?.view(batchSize, _numHeads, -1,
_headDim
);
294
Debug.Assert(attention.size().SequenceEqual(new[] { batchSize * _numHeads, tgtLen,
_headDim
}));