Merge pull request #195 from ai4co/fjsp

[Minor] Some final adjustments for scheduling models
ai4co · Jun 14, 2024 · 1972b08 · 1972b08
2 parents 810af10 + 300b0a7
commit 1972b08
Show file tree

Hide file tree

Showing 15 changed files with 353 additions and 173 deletions.
diff --git a/configs/experiment/scheduling/am-pomo.yaml b/configs/experiment/scheduling/am-pomo.yaml
@@ -14,6 +14,7 @@ model:
     _target_: rl4co.models.L2DAttnPolicy
     env_name: ${env.name}
     scaling_factor: ${scaling_factor}
+    normalization: "batch"
   batch_size: 64
   num_starts: 10
   num_augment: 0

diff --git a/configs/experiment/scheduling/am-ppo.yaml b/configs/experiment/scheduling/am-ppo.yaml
@@ -43,14 +43,8 @@ model:
   batch_size: 128
   val_batch_size: 512
   test_batch_size: 64
-  # Song et al use 1000 iterations over batches of 20 = 20_000
-  # We train 10 epochs on a set of 2000 instance = 20_000
   train_data_size: 2000
   mini_batch_size: 512
-  reward_scale: scale
-  optimizer_kwargs:
-    lr: 1e-4
 
 env:
-  stepwise_reward: True
-  _torchrl_mode: True
+  stepwise_reward: True
diff --git a/configs/experiment/scheduling/base.yaml b/configs/experiment/scheduling/base.yaml
@@ -22,17 +22,19 @@ trainer:
 
 seed: 12345678
 
-scaling_factor: 20
+scaling_factor: ${env.generator_params.max_processing_time}
 
 model:
   _target_: ???
   batch_size: ???
   train_data_size: 2_000
   val_data_size: 1_000
-  test_data_size: 1_000
+  test_data_size: 100
   optimizer_kwargs:
-    lr: 1e-4
+    lr: 2e-4
     weight_decay: 1e-6
   lr_scheduler: "ExponentialLR"
   lr_scheduler_kwargs:
     gamma: 0.95
+  reward_scale: scale
+  max_grad_norm: 1
diff --git a/configs/experiment/scheduling/gnn-ppo.yaml b/configs/experiment/scheduling/gnn-ppo.yaml
@@ -12,24 +12,22 @@ logger:
 model:
   _target_: rl4co.models.L2DPPOModel
   policy_kwargs:
-    embed_dim: 128
+    embed_dim: 256
     num_encoder_layers: 3
     scaling_factor: ${scaling_factor}
-    max_grad_norm: 1
-    ppo_epochs: 3
+    ppo_epochs: 2
     het_emb: False
+    normalization: instance
+    test_decode_type: greedy
   batch_size: 128
   val_batch_size: 512
   test_batch_size: 64
   mini_batch_size: 512
-  reward_scale: scale
-  optimizer_kwargs:
-    lr: 1e-4
+
 
 trainer:
   max_epochs: 10
 
 
 env:
-  stepwise_reward: True
-  _torchrl_mode: True
+  stepwise_reward: True
diff --git a/configs/experiment/scheduling/hgnn-pomo.yaml b/configs/experiment/scheduling/hgnn-pomo.yaml
@@ -18,6 +18,7 @@ model:
     stepwise_encoding: False
     scaling_factor: ${scaling_factor}
     het_emb: True
+    normalization: instance
   num_starts: 10
   batch_size: 64
   num_augment: 0

diff --git a/configs/experiment/scheduling/hgnn-ppo.yaml b/configs/experiment/scheduling/hgnn-ppo.yaml
@@ -12,24 +12,16 @@ logger:
 model:
   _target_: rl4co.models.L2DPPOModel
   policy_kwargs:
-    embed_dim: 128
+    embed_dim: 256
     num_encoder_layers: 3
     scaling_factor: ${scaling_factor}
-    max_grad_norm: 1
-    ppo_epochs: 3
+    ppo_epochs: 2
     het_emb: True
+    normalization: instance
   batch_size: 128
   val_batch_size: 512
   test_batch_size: 64
   mini_batch_size: 512
-  reward_scale: scale
-  optimizer_kwargs:
-    lr: 1e-4
-
-trainer:
-  max_epochs: 10
-
 
 env:
-  stepwise_reward: True
-  _torchrl_mode: True
+  stepwise_reward: True
diff --git a/configs/experiment/scheduling/matnet-ppo.yaml b/configs/experiment/scheduling/matnet-ppo.yaml
@@ -36,13 +36,7 @@ model:
   batch_size: 128
   val_batch_size: 512
   test_batch_size: 64
-  # Song et al use 1000 iterations over batches of 20 = 20_000
-  # We train 10 epochs on a set of 2000 instance = 20_000
   mini_batch_size: 512
-  reward_scale: scale
-  optimizer_kwargs:
-    lr: 1e-4
 
 env:
-  stepwise_reward: True
-  _torchrl_mode: True
+  stepwise_reward: True
diff --git a/examples/other/2-scheduling.ipynb b/examples/other/2-scheduling.ipynb
diff --git a/rl4co/envs/scheduling/fjsp/env.py b/rl4co/envs/scheduling/fjsp/env.py
@@ -79,14 +79,32 @@ def __init__(
             else:
                 generator = FJSPGenerator(**generator_params)
         self.generator = generator
-        self.num_mas = generator.num_mas
-        self.num_jobs = generator.num_jobs
-        self.n_ops_max = generator.max_ops_per_job * self.num_jobs
+        self._num_mas = generator.num_mas
+        self._num_jobs = generator.num_jobs
+        self._n_ops_max = generator.max_ops_per_job * self.num_jobs
+
         self.mask_no_ops = mask_no_ops
         self.check_mask = check_mask
         self.stepwise_reward = stepwise_reward
         self._make_spec(self.generator)
 
+    @property
+    def num_mas(self):
+        return self._num_mas
+
+    @property
+    def num_jobs(self):
+        return self._num_jobs
+
+    @property
+    def n_ops_max(self):
+        return self._n_ops_max
+
+    def set_instance_params(self, td):
+        self._num_jobs = td["start_op_per_job"].size(1)
+        self._num_mas = td["proc_times"].size(1)
+        self._n_ops_max = td["proc_times"].size(2)
+
     def _decode_graph_structure(self, td: TensorDict):
         batch_size = td.batch_size
         start_op_per_job = td["start_op_per_job"]
@@ -142,6 +160,8 @@ def _decode_graph_structure(self, td: TensorDict):
         return td, n_ops_max
 
     def _reset(self, td: TensorDict = None, batch_size=None) -> TensorDict:
+        self.set_instance_params(td)
+
         td_reset = td.clone()
 
         td_reset, n_ops_max = self._decode_graph_structure(td_reset)
@@ -333,10 +353,10 @@ def _make_step(self, td: TensorDict) -> TensorDict:
             td["ops_sequence_order"] - gather_by_index(td["job_ops_adj"], selected_job, 1)
         ).clip(0)
         # some checks
-        assert torch.allclose(
-            td["proc_times"].sum(1).gt(0).sum(1),  # num ops with eligible machine
-            (~(td["op_scheduled"] + td["pad_mask"])).sum(1),  # num unscheduled ops
-        )
+        # assert torch.allclose(
+        #     td["proc_times"].sum(1).gt(0).sum(1),  # num ops with eligible machine
+        #     (~(td["op_scheduled"] + td["pad_mask"])).sum(1),  # num unscheduled ops
+        # )
 
         return td
 
@@ -483,7 +503,6 @@ def get_num_starts(self, td):
         # NOTE in the paper they use N_s = 100
         return 100
 
-    @staticmethod
-    def load_data(fpath, batch_size=[]):
+    def load_data(self, fpath, batch_size=[]):
         g = FJSPFileGenerator(fpath)
         return g(batch_size=batch_size)
diff --git a/rl4co/envs/scheduling/fjsp/generator.py b/rl4co/envs/scheduling/fjsp/generator.py
@@ -15,7 +15,6 @@
 
 
 class FJSPGenerator(Generator):
-
     """Data generator for the Flexible Job-Shop Scheduling Problem (FJSP).
 
     Args:
@@ -209,6 +208,8 @@ def __init__(self, file_path: str, n_ops_max: int = None, **unused_kwargs):
         self.num_mas = num_machines
         self.num_jobs = num_jobs
         self.max_ops_per_job = max_ops_per_job
+        self.n_ops_max = max_ops_per_job * num_jobs
+
         self.start_idx = 0
 
     def _generate(self, batch_size: List[int]) -> TensorDict:

diff --git a/rl4co/models/nn/env_embeddings/init.py b/rl4co/models/nn/env_embeddings/init.py
@@ -407,6 +407,7 @@ def _op_features(self, td):
         mean_durations = proc_times.sum(1) / (proc_times.gt(0).sum(1) + 1e-9)
         feats = [
             mean_durations / self.scaling_factor,
+            # td["lbs"] / self.scaling_factor,
             td["is_ready"],
             td["num_eligible"],
             td["ops_job_map"],
@@ -430,20 +431,10 @@ def forward(self, td):
 
 class FJSPInitEmbedding(JSSPInitEmbedding):
     def __init__(self, embed_dim, linear_bias=False, scaling_factor: int = 100):
-        super().__init__(embed_dim, linear_bias, scaling_factor, num_op_feats=5)
+        super().__init__(embed_dim, linear_bias, scaling_factor)
         self.init_ma_embed = nn.Linear(1, self.embed_dim, bias=linear_bias)
         self.edge_embed = nn.Linear(1, embed_dim, bias=linear_bias)
 
-    def _op_features(self, td):
-        feats = [
-            td["lbs"] / self.scaling_factor,
-            td["is_ready"],
-            td["num_eligible"],
-            td["op_scheduled"],
-            td["ops_job_map"],
-        ]
-        return torch.stack(feats, dim=-1)
-
     def forward(self, td: TensorDict):
         ops_emb = self._init_ops_embed(td)
         ma_emb = self._init_machine_embed(td)
@@ -471,19 +462,9 @@ def __init__(
         linear_bias: bool = False,
         scaling_factor: int = 1000,
     ):
-        super().__init__(embed_dim, linear_bias, scaling_factor, num_op_feats=5)
+        super().__init__(embed_dim, linear_bias, scaling_factor)
         self.init_ma_embed = nn.Linear(1, self.embed_dim, bias=linear_bias)
 
-    def _op_features(self, td):
-        feats = [
-            td["lbs"] / self.scaling_factor,
-            td["is_ready"],
-            td["op_scheduled"],
-            td["num_eligible"],
-            td["ops_job_map"],
-        ]
-        return torch.stack(feats, dim=-1)
-
     def _init_machine_embed(self, td: TensorDict):
         busy_for = (td["busy_until"] - td["time"].unsqueeze(1)) / self.scaling_factor
         ma_embeddings = self.init_ma_embed(busy_for.unsqueeze(2))

diff --git a/rl4co/models/rl/common/utils.py b/rl4co/models/rl/common/utils.py
@@ -20,6 +20,8 @@ def __init__(self, scale: str = None):
     def __call__(self, scores: torch.Tensor):
         if self.scale is None:
             return scores
+        elif isinstance(self.scale, int):
+            return scores / self.scale
         # Score scaling
         self.update(scores)
         tensor_to_kwargs = dict(dtype=scores.dtype, device=scores.device)

diff --git a/rl4co/models/rl/ppo/stepwise_ppo.py b/rl4co/models/rl/ppo/stepwise_ppo.py
@@ -1,13 +1,13 @@
 import copy
 
-from typing import Any
+from typing import Any, Union
 
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
 from torchrl.data.replay_buffers import (
-    LazyTensorStorage,
+    LazyMemmapStorage,
     ListStorage,
     SamplerWithoutReplacement,
     TensorDictReplayBuffer,
@@ -23,13 +23,17 @@
 
 def make_replay_buffer(buffer_size, batch_size, device="cpu"):
     if device == "cpu":
-        storage = LazyTensorStorage(buffer_size, device="cpu")
+        storage = LazyMemmapStorage(buffer_size, device="cpu")
+        prefetch = 3
     else:
         storage = ListStorage(buffer_size)
+        prefetch = None
     return TensorDictReplayBuffer(
         storage=storage,
         batch_size=batch_size,
         sampler=SamplerWithoutReplacement(drop_last=True),
+        pin_memory=False,
+        prefetch=prefetch,
     )
 
 
@@ -51,7 +55,7 @@ def __init__(
         metrics: dict = {
             "train": ["loss", "surrogate_loss", "value_loss", "entropy"],
         },
-        reward_scale: str = None,
+        reward_scale: Union[str, int] = None,
         **kwargs,
     ):
         super().__init__(env, policy, metrics=metrics, batch_size=batch_size, **kwargs)
@@ -143,13 +147,12 @@ def shared_step(
             while not next_td["done"].all():
                 with torch.no_grad():
                     td = self.policy_old.act(next_td, self.env, phase="train")
-
-                assert self.env._torchrl_mode, "Use torchrl mode in stepwise PPO"
-                td = self.env.step(td)
-                next_td = td.pop("next")
+                # get next state
+                next_td = self.env.step(td)["next"]
+                # get reward of action
                 reward = self.env.get_reward(next_td, None)
                 reward = self.scaler(reward)
-
+                # add reward to prior state
                 td.set("reward", reward)
                 # add tensordict with action, logprobs and reward information to buffer
                 self.rb.extend(td)

diff --git a/rl4co/models/zoo/l2d/decoder.py b/rl4co/models/zoo/l2d/decoder.py
@@ -178,7 +178,6 @@ def __init__(
         actor_hidden_dim: int = 128,
         actor_hidden_layers: int = 2,
         num_encoder_layers: int = 3,
-        num_heads: int = 8,
         normalization: str = "batch",
         het_emb: bool = False,
         stepwise: bool = False,

diff --git a/rl4co/models/zoo/l2d/policy.py b/rl4co/models/zoo/l2d/policy.py
@@ -35,6 +35,7 @@ def __init__(
         env_name: str = "fjsp",
         het_emb: bool = True,
         scaling_factor: int = 1000,
+        normalization: str = "batch",
         init_embedding: Optional[nn.Module] = None,
         stepwise_encoding: bool = False,
         tanh_clipping: float = 10,
@@ -77,6 +78,7 @@ def __init__(
                 het_emb=het_emb,
                 stepwise=stepwise_encoding,
                 scaling_factor=scaling_factor,
+                normalization=normalization,
             )
 
         # Pass to constructive policy
@@ -101,6 +103,7 @@ def __init__(
         num_heads: int = 8,
         num_encoder_layers: int = 4,
         scaling_factor: int = 1000,
+        normalization: str = "batch",
         env_name: str = "fjsp",
         init_embedding: Optional[nn.Module] = None,
         tanh_clipping: float = 10,
@@ -122,7 +125,7 @@ def __init__(
                 embed_dim=embed_dim,
                 num_heads=num_heads,
                 num_layers=num_encoder_layers,
-                normalization="batch",
+                normalization=normalization,
                 feedforward_hidden=embed_dim * 2,
                 init_embedding=init_embedding,
             )