Merge branch 'main' into go_splits

a-r-j · Feb 9, 2024 · 10e9a28 · 10e9a28
2 parents fc3c999 + 0177167
commit 10e9a28
Show file tree

Hide file tree

Showing 7 changed files with 98 additions and 7 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,8 +1,17 @@
 ### 0.2.6 (UNRELEASED)
 
+### Datasets
 * Add stage-based conditions to `setup` in `ProteinDataModule` [#72](https://github.com/a-r-j/ProteinWorkshop/pull/72)
 * Improves support for datamodules with multiple test sets. Generalises this to support GO and FOLD. Also adds multiple seq ID.-based splits for GO. [#72](https://github.com/a-r-j/ProteinWorkshop/pull/72)
 
+### Models
+
+* Adds missing `pos` attribute to GearNet `required_batch_attributes` (fixes [#73](https://github.com/a-r-j/ProteinWorkshop/issues/73)) [#74](https://github.com/a-r-j/ProteinWorkshop/pull/74)
+
+### Framework
+
+* Adds `InverseSquareRoot` LR scheduler [#71](https://github.com/a-r-j/ProteinWorkshop/pull/71)
+
 
 ### 0.2.5 (28/12/2023)
 

diff --git a/proteinworkshop/config/env/default.yaml b/proteinworkshop/config/env/default.yaml
@@ -22,7 +22,7 @@ env:
     # path to working directory
     work_dir: ${hydra:runtime.cwd}
     # path to logging directory
-    log_dir: ${env.paths.root_dir}/logs/
+    log_dir: ${oc.env:RUNS_PATH}
     runs: ${oc.env:RUNS_PATH}
     run_dir: ${env.paths.runs}/${name}/${env.init_time}
 

diff --git a/proteinworkshop/config/scheduler/inverse_square_root.yaml b/proteinworkshop/config/scheduler/inverse_square_root.yaml
@@ -0,0 +1,32 @@
+scheduler:
+  _target_: proteinworkshop.utils.schedulers.InverseSquareRootLR
+  _partial_: true
+  warmup_steps: 1
+  last_epoch: -1
+
+# The unit of the scheduler's step size, could also be 'step'.
+# 'epoch' updates the scheduler on epoch end whereas 'step'
+# updates it after a optimizer update.
+
+# It is recommended to call step() for LinearWarmupCosineAnnealingLR
+# after each iteration as calling it after each epoch will keep the starting
+# lr at warmup_start_lr for the first epoch which is 0 in most cases.
+interval: "step"
+
+# How many epochs/steps should pass between calls to
+# `scheduler.step()`. 1 corresponds to updating the learning
+# rate after every epoch/step.
+frequency: 1
+
+# Metric to to monitor for schedulers like `ReduceLROnPlateau`
+monitor: "val/loss/total"
+
+# If set to `True`, will enforce that the value specified 'monitor'
+# is available when the scheduler is updated, thus stopping
+# training if not found. If set to `False`, it will only produce a warning
+strict: True
+
+# If using the `LearningRateMonitor` callback to monitor the
+# learning rate progress, this keyword can be used to specify
+# a custom logged name
+name: learning_rate
diff --git a/proteinworkshop/config/sweeps/ec_reaction.yaml b/proteinworkshop/config/sweeps/ec_reaction.yaml
@@ -13,10 +13,7 @@ parameters:
     values: [ec_reaction]
 
   encoder:
-    values: [schnet, gear_net_edge, egnn, gcpnet, tfn, mace, esm]
-
-  optimiser.optimizer.lr:
-    values: [0.0001, 0.001]
+    values: [schnet, gear_net_edge, egnn, gcpnet, tfn, mace]
 
   features:
     values: [ca_base, ca_seq, ca_angles, ca_bb, ca_sc]
@@ -42,7 +39,7 @@ parameters:
   optimiser.optimizer.lr:
     value: ${hparams.hparams.lr}
 
-  decoder.node_label.dropout:
+  decoder.graph_label.dropout:
     value: ${hparams.hparams.decoder_dropout}
 
   name:

diff --git a/proteinworkshop/models/graph_encoders/gear_net.py b/proteinworkshop/models/graph_encoders/gear_net.py
@@ -125,6 +125,7 @@ def required_batch_attributes(self) -> Set[str]:
         """
         return {
             "x",
+            "pos",
             "edge_index",
             "edge_type",
             "edge_attr",

diff --git a/proteinworkshop/scripts/download_processed_data.py b/proteinworkshop/scripts/download_processed_data.py
@@ -91,7 +91,7 @@ def download_processed_data(dataset_name: str, data_dir: Optional[str] = None):
 
     if not os.path.exists(data_dir):
         logger.info(f"Creating data directory at {data_dir}")
-        os.makedirs(parents=True, exist_ok=True)
+        data_dir.mkdir(parents=True, exist_ok=True)
 
     fname = dataset_fname_map[dataset_name]
     save_file = data_dir / f"{fname}.tar.gz"

diff --git a/proteinworkshop/utils/schedulers.py b/proteinworkshop/utils/schedulers.py
@@ -0,0 +1,52 @@
+"""Implement custom learning rate schedulers."""
+
+import warnings
+
+from torch.optim.lr_scheduler import _LRScheduler
+from torch.optim.optimizer import Optimizer
+
+
+class InverseSquareRootLR(_LRScheduler):
+    """Implement the InverseSquareRootLR learning rate scheduler.
+
+    :param optimizer: The optimizer.
+    :type optimizer: Optimizer
+    :param warmup_steps: The number of warmup steps.
+    :type warmup_steps: int
+    :param last_epoch: The index of the last epoch. If -1, the scheduler will
+        start at the initial learning rate.
+    :type last_epoch: int
+    """
+
+    def __init__(
+        self, optimizer: Optimizer, warmup_steps: int, last_epoch: int = -1
+    ):
+        if warmup_steps <= 0:
+            raise ValueError("warmup_steps must be > 0")
+        self._warmup_steps = warmup_steps
+        self._lr_steps = [
+            param_group["lr"] / warmup_steps
+            for param_group in optimizer.param_groups
+        ]
+        self._decay_factors = [
+            param_group["lr"] * warmup_steps**0.5
+            for param_group in optimizer.param_groups
+        ]
+
+        super().__init__(optimizer, last_epoch)
+
+    def get_lr(self):
+        if not self._get_lr_called_within_step:
+            warnings.warn(
+                "To get the last learning rate computed by the scheduler, "
+                "please use `get_last_lr()`.",
+                UserWarning,
+            )
+
+        if self.last_epoch < self._warmup_steps:
+            return [self.last_epoch * lr_step for lr_step in self._lr_steps]
+        else:
+            return [
+                decay_factor * self.last_epoch**-0.5
+                for decay_factor in self._decay_factors
+            ]