fixed APTx activation (#214)

NeuroDiffGym · May 14, 2024 · 0ee81fd · 0ee81fd
1 parent a722d79
commit 0ee81fd
Show file tree

Hide file tree

Showing 2 changed files with 53 additions and 0 deletions.
diff --git a/neurodiffeq/networks.py b/neurodiffeq/networks.py
@@ -173,3 +173,37 @@ def __init__(self, beta=1.0, trainable=False):
 
     def forward(self, x):
         return x * torch.sigmoid(self.beta * x)
+
+class APTx(nn.Module):
+    r"""The APTx (Alpha Plus Tanh Times) activation function: :math:`\mathrm{APTx}(x)= (\alpha + \tanh{(\beta x)}) \gamma x` 
+        behaves similar to  the MISH activation function, but requires lesser mathematical operations to
+        compute. The lesser computational requirements of APTx does speed up the
+        model training, and thus also reduces the hardware requirement for the deep
+        learning model
+    :param alpha: The :math:`\alpha` parameter in the APTx activation.
+    :type alpha: float
+    :param beta: The :math:`\beta` parameter in the APTx activation.
+    :type beta: float
+    :param gamma: The :math:`\gamma` parameter in the APTx activation.
+    :type gamma: float
+    :param trainable: Whether scalar :math:`\beta` can be trained
+    :type trainable: bool
+    """
+
+    def __init__(self, alpha=1.0, beta=1.0, gamma=1.0, trainable=False):
+        super(APTx, self).__init__()
+        alpha = float(alpha)
+        beta = float(beta)
+        gamma = float(gamma)
+        self.trainable = trainable
+        if trainable:
+            self.alpha = nn.Parameter(torch.tensor(alpha))
+            self.beta = nn.Parameter(torch.tensor(beta))
+            self.gamma = nn.Parameter(torch.tensor(gamma))
+        else:
+            self.alpha = alpha
+            self.beta = beta
+            self.gamma = gamma
+
+    def forward(self, x):
+        return (self.alpha + torch.nn.functional.tanh(self.beta*x))*self.gamma*x
diff --git a/tests/test_networks.py b/tests/test_networks.py
@@ -7,6 +7,7 @@
 from neurodiffeq.networks import MonomialNN
 from neurodiffeq.networks import SinActv
 from neurodiffeq.networks import Swish
+from neurodiffeq.networks import APTx
 
 MAGIC = 42
 torch.manual_seed(MAGIC)
@@ -147,3 +148,21 @@ def test_swish():
     assert len(list(f.parameters())) == 1
     assert list(f.parameters())[0].shape == ()
     assert torch.isclose(f(x), x * torch.sigmoid(beta * x)).all()
+
+
+
+def test_APTx():
+    x = torch.rand(10, 5)
+
+    f = APTx()
+    print(list(f.parameters()))
+    assert len(list(f.parameters())) == 0
+    assert torch.isclose(f(x),  (1 + torch.nn.Tanh()(x))*x ).all()
+
+    alpha = 1.0
+    beta = 1.0
+    gamma = 0.5
+    f = APTx(alpha,beta,gamma, trainable=True)
+    assert len(list(f.parameters())) == 3
+    assert list(f.parameters())[0].shape == ()
+    assert torch.isclose(f(x),  (alpha + torch.nn.Tanh()(beta*x))*gamma*x ).all()