File: NasBert\Modules\ActivationFunction.cs
Web Access
Project: src\src\Microsoft.ML.TorchSharp\Microsoft.ML.TorchSharp.csproj (Microsoft.ML.TorchSharp)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System;
using System.Collections.Generic;
using System.Text;
using TorchSharp;
using TorchSharp.Modules;
 
namespace Microsoft.ML.TorchSharp.NasBert.Modules
{
 
    internal sealed class ActivationFunction : torch.nn.Module<torch.Tensor, torch.Tensor>
    {
        private readonly torch.nn.Module<torch.Tensor, torch.Tensor> _function;
        private bool _disposedValue;
 
        public ActivationFunction(string name) : base(name)
        {
            _function = name?.ToLower() switch
            {
                "relu" => torch.nn.ReLU(),
                "gelu" => torch.nn.GELU(),
                "gelu_fast" => new GeLUFast(),
                "tanh" => torch.nn.Tanh(),
                "linear" => torch.nn.Identity(),
                _ => throw new NotSupportedException($"Activation function {name} not supported.")
            };
        }
 
        [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", "MSML_GeneralName:This name should be PascalCased", Justification = "Need to match TorchSharp.")]
        public override torch.Tensor forward(torch.Tensor x)
        {
            return _function.forward(x);
        }
 
        public override string GetName()
        {
            return _function.GetName();
        }
 
        protected override void Dispose(bool disposing)
        {
            if (!_disposedValue)
            {
                if (disposing)
                {
                    _function.Dispose();
                    _disposedValue = true;
                }
            }
 
            base.Dispose(disposing);
        }
    }
 
    /// <summary>
    /// See https://arxiv.org/pdf/1606.08415.pdf:
    /// y = 0.5x * (1 + tanh(sqrt(2/Pi) * (x + 0.044715 x^3)))
    /// </summary>
    public class GeLUFast : torch.nn.Module<torch.Tensor, torch.Tensor>
    {
        private readonly double _alpha = Math.Sqrt(2 / Math.PI);
        private readonly double _beta = 0.044715;
 
        public GeLUFast() : base(nameof(GeLUFast)) { }
 
        [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", "MSML_GeneralName:This name should be PascalCased", Justification = "Need to match TorchSharp.")]
        public override torch.Tensor forward(torch.Tensor x)
        {
            using var disposeScope = torch.NewDisposeScope();
            var x1 = torch.pow(x, 3).mul_(_beta).add_(x).mul_(_alpha);  // sqrt(2/Pi) * (x + 0.044715 x^3)
            var y = torch.nn.functional.tanh(x1).add_(1.0).mul_(0.5).mul_(x);
            return y.MoveToOuterDisposeScope();
        }
    }
}