42 references to ModelUtils
Microsoft.ML.TorchSharp (42)
NasBert\Models\NasBertEncoder.cs (11)
98ModelUtils.InitNormal(TokenEmbedding.weight, mean: 0.0, std: 0.02); 99ModelUtils.InitZeros(TokenEmbedding.weight[paddingIdx]); 102ModelUtils.InitNormal(SegmentEmbedding.weight, mean: 0.0, std: 0.02); 131ModelUtils.FreezeModuleParams(TokenEmbedding); 132ModelUtils.FreezeModuleParams(PositionalEmbedding); 133ModelUtils.FreezeModuleParams(SegmentEmbedding); 134ModelUtils.FreezeModuleParams(EmbeddingLayerNorm); 139ModelUtils.FreezeModuleParams(Layers); 140ModelUtils.FreezeModuleParams(HiddenTransferList); 145ModelUtils.FreezeModuleParams(HiddenTransferList); 150ModelUtils.FreezeModuleParams(Layers[i]);
NasBert\Models\NasBertModel.cs (1)
44ModelUtils.FreezeModuleParams(Encoder);
NasBert\Models\PredictionHead.cs (2)
23ModelUtils.InitXavierUniform(dense.weight); 24ModelUtils.InitZeros(dense.bias);
NasBert\Models\SequenceLabelHead.cs (2)
25ModelUtils.InitXavierUniform(dense.weight); 26ModelUtils.InitZeros(dense.bias);
NasBert\Modules\ConvSeparable.cs (3)
29ModelUtils.InitNormal(conv1.weight, mean: 0, std: std); 30ModelUtils.InitNormal(conv2.weight, mean: 0, std: std); 31ModelUtils.InitConstant(conv2.bias, 0);
NasBert\Modules\Embedding\LearnedPositionalEmbedding.cs (2)
28ModelUtils.InitNormal(Embedding.weight, mean: 0, std: Math.Pow(EmbeddingDim, -0.5)); 29ModelUtils.InitZeros(Embedding.weight[PadPositionIndex]);
NasBert\Modules\EmbedTransfer.cs (2)
78ModelUtils.InitXavierUniform(HiddenTransfer.weight); 79ModelUtils.InitZeros(HiddenTransfer.bias);
NasBert\Modules\HiddenTransfer.cs (2)
37ModelUtils.InitNormal(InHiddenTransfer.weight, mean: 0.0, std: 0.02); 38ModelUtils.InitZeros(InHiddenTransfer.bias);
NasBert\Modules\Layers\FeedForwardLayer.cs (4)
48ModelUtils.InitNormal(fullConnected1.weight, mean: 0.0, std: 0.02); 49ModelUtils.InitZeros(fullConnected1.bias); 50ModelUtils.InitNormal(fullConnected2.weight, mean: 0.0, std: 0.02); 51ModelUtils.InitZeros(fullConnected2.bias);
NasBert\Modules\MultiHeadAttention.cs (13)
112ModelUtils.InitXavierUniform(QProjection.weight, 1.0 / Math.Sqrt(2.0)); 113ModelUtils.InitXavierUniform(KProjection.weight, 1.0 / Math.Sqrt(2.0)); 114ModelUtils.InitXavierUniform(VProjection.weight, 1.0 / Math.Sqrt(2.0)); 118ModelUtils.InitXavierUniform(QProjection.weight); 119ModelUtils.InitXavierUniform(KProjection.weight); 120ModelUtils.InitXavierUniform(VProjection.weight); 123ModelUtils.InitXavierUniform(OutProjLinear.weight); 127ModelUtils.InitConstant(QProjection.bias, 0); 128ModelUtils.InitConstant(KProjection.bias, 0); 129ModelUtils.InitConstant(VProjection.bias, 0); 130ModelUtils.InitConstant(OutProjLinear.bias, 0); 135ModelUtils.InitXavierUniform(KBias); 136ModelUtils.InitXavierUniform(VBias);