1 write to _headDim
Microsoft.ML.TorchSharp (1)
NasBert\Modules\MultiHeadAttention.cs (1)
73_headDim = _embeddingDim / _numHeads;
10 references to _headDim
Microsoft.ML.TorchSharp (10)
NasBert\Modules\MultiHeadAttention.cs (10)
74_scaling = Math.Pow(_headDim, -0.5); 75if (_headDim * _numHeads != _embeddingDim) 212q = q.view(tgtLen, batchSize * _numHeads, _headDim).transpose_(0, 1); 213k = k?.view(-1, batchSize * _numHeads, _headDim).transpose_(0, 1); 214v = v?.view(-1, batchSize * _numHeads, _headDim).transpose_(0, 1); 221var prevKey = savedState[PrevKeyKey].view(batchSize * _numHeads, -1, _headDim); 229var prevValue = savedState[PrevValueKey].view(batchSize * _numHeads, -1, _headDim); 236savedState[PrevKeyKey] = k?.view(batchSize, _numHeads, -1, _headDim); 238savedState[PrevValueKey] = v?.view(batchSize, _numHeads, -1, _headDim); 294Debug.Assert(attention.size().SequenceEqual(new[] { batchSize * _numHeads, tgtLen, _headDim }));