Skip to content

Commit

Permalink
Merge pull request #195 from ai4co/fjsp
Browse files Browse the repository at this point in the history
[Minor] Some final adjustments for scheduling models
  • Loading branch information
LTluttmann authored Jun 14, 2024
2 parents 810af10 + 300b0a7 commit 1972b08
Show file tree
Hide file tree
Showing 15 changed files with 353 additions and 173 deletions.
1 change: 1 addition & 0 deletions configs/experiment/scheduling/am-pomo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ model:
_target_: rl4co.models.L2DAttnPolicy
env_name: ${env.name}
scaling_factor: ${scaling_factor}
normalization: "batch"
batch_size: 64
num_starts: 10
num_augment: 0
Expand Down
8 changes: 1 addition & 7 deletions configs/experiment/scheduling/am-ppo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,8 @@ model:
batch_size: 128
val_batch_size: 512
test_batch_size: 64
# Song et al use 1000 iterations over batches of 20 = 20_000
# We train 10 epochs on a set of 2000 instance = 20_000
train_data_size: 2000
mini_batch_size: 512
reward_scale: scale
optimizer_kwargs:
lr: 1e-4

env:
stepwise_reward: True
_torchrl_mode: True
stepwise_reward: True
8 changes: 5 additions & 3 deletions configs/experiment/scheduling/base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,19 @@ trainer:

seed: 12345678

scaling_factor: 20
scaling_factor: ${env.generator_params.max_processing_time}

model:
_target_: ???
batch_size: ???
train_data_size: 2_000
val_data_size: 1_000
test_data_size: 1_000
test_data_size: 100
optimizer_kwargs:
lr: 1e-4
lr: 2e-4
weight_decay: 1e-6
lr_scheduler: "ExponentialLR"
lr_scheduler_kwargs:
gamma: 0.95
reward_scale: scale
max_grad_norm: 1
14 changes: 6 additions & 8 deletions configs/experiment/scheduling/gnn-ppo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,22 @@ logger:
model:
_target_: rl4co.models.L2DPPOModel
policy_kwargs:
embed_dim: 128
embed_dim: 256
num_encoder_layers: 3
scaling_factor: ${scaling_factor}
max_grad_norm: 1
ppo_epochs: 3
ppo_epochs: 2
het_emb: False
normalization: instance
test_decode_type: greedy
batch_size: 128
val_batch_size: 512
test_batch_size: 64
mini_batch_size: 512
reward_scale: scale
optimizer_kwargs:
lr: 1e-4


trainer:
max_epochs: 10


env:
stepwise_reward: True
_torchrl_mode: True
stepwise_reward: True
1 change: 1 addition & 0 deletions configs/experiment/scheduling/hgnn-pomo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ model:
stepwise_encoding: False
scaling_factor: ${scaling_factor}
het_emb: True
normalization: instance
num_starts: 10
batch_size: 64
num_augment: 0
Expand Down
16 changes: 4 additions & 12 deletions configs/experiment/scheduling/hgnn-ppo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,16 @@ logger:
model:
_target_: rl4co.models.L2DPPOModel
policy_kwargs:
embed_dim: 128
embed_dim: 256
num_encoder_layers: 3
scaling_factor: ${scaling_factor}
max_grad_norm: 1
ppo_epochs: 3
ppo_epochs: 2
het_emb: True
normalization: instance
batch_size: 128
val_batch_size: 512
test_batch_size: 64
mini_batch_size: 512
reward_scale: scale
optimizer_kwargs:
lr: 1e-4

trainer:
max_epochs: 10


env:
stepwise_reward: True
_torchrl_mode: True
stepwise_reward: True
8 changes: 1 addition & 7 deletions configs/experiment/scheduling/matnet-ppo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,7 @@ model:
batch_size: 128
val_batch_size: 512
test_batch_size: 64
# Song et al use 1000 iterations over batches of 20 = 20_000
# We train 10 epochs on a set of 2000 instance = 20_000
mini_batch_size: 512
reward_scale: scale
optimizer_kwargs:
lr: 1e-4

env:
stepwise_reward: True
_torchrl_mode: True
stepwise_reward: True
376 changes: 283 additions & 93 deletions examples/other/2-scheduling.ipynb

Large diffs are not rendered by default.

37 changes: 28 additions & 9 deletions rl4co/envs/scheduling/fjsp/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,32 @@ def __init__(
else:
generator = FJSPGenerator(**generator_params)
self.generator = generator
self.num_mas = generator.num_mas
self.num_jobs = generator.num_jobs
self.n_ops_max = generator.max_ops_per_job * self.num_jobs
self._num_mas = generator.num_mas
self._num_jobs = generator.num_jobs
self._n_ops_max = generator.max_ops_per_job * self.num_jobs

self.mask_no_ops = mask_no_ops
self.check_mask = check_mask
self.stepwise_reward = stepwise_reward
self._make_spec(self.generator)

@property
def num_mas(self):
return self._num_mas

@property
def num_jobs(self):
return self._num_jobs

@property
def n_ops_max(self):
return self._n_ops_max

def set_instance_params(self, td):
self._num_jobs = td["start_op_per_job"].size(1)
self._num_mas = td["proc_times"].size(1)
self._n_ops_max = td["proc_times"].size(2)

def _decode_graph_structure(self, td: TensorDict):
batch_size = td.batch_size
start_op_per_job = td["start_op_per_job"]
Expand Down Expand Up @@ -142,6 +160,8 @@ def _decode_graph_structure(self, td: TensorDict):
return td, n_ops_max

def _reset(self, td: TensorDict = None, batch_size=None) -> TensorDict:
self.set_instance_params(td)

td_reset = td.clone()

td_reset, n_ops_max = self._decode_graph_structure(td_reset)
Expand Down Expand Up @@ -333,10 +353,10 @@ def _make_step(self, td: TensorDict) -> TensorDict:
td["ops_sequence_order"] - gather_by_index(td["job_ops_adj"], selected_job, 1)
).clip(0)
# some checks
assert torch.allclose(
td["proc_times"].sum(1).gt(0).sum(1), # num ops with eligible machine
(~(td["op_scheduled"] + td["pad_mask"])).sum(1), # num unscheduled ops
)
# assert torch.allclose(
# td["proc_times"].sum(1).gt(0).sum(1), # num ops with eligible machine
# (~(td["op_scheduled"] + td["pad_mask"])).sum(1), # num unscheduled ops
# )

return td

Expand Down Expand Up @@ -483,7 +503,6 @@ def get_num_starts(self, td):
# NOTE in the paper they use N_s = 100
return 100

@staticmethod
def load_data(fpath, batch_size=[]):
def load_data(self, fpath, batch_size=[]):
g = FJSPFileGenerator(fpath)
return g(batch_size=batch_size)
3 changes: 2 additions & 1 deletion rl4co/envs/scheduling/fjsp/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@


class FJSPGenerator(Generator):

"""Data generator for the Flexible Job-Shop Scheduling Problem (FJSP).
Args:
Expand Down Expand Up @@ -209,6 +208,8 @@ def __init__(self, file_path: str, n_ops_max: int = None, **unused_kwargs):
self.num_mas = num_machines
self.num_jobs = num_jobs
self.max_ops_per_job = max_ops_per_job
self.n_ops_max = max_ops_per_job * num_jobs

self.start_idx = 0

def _generate(self, batch_size: List[int]) -> TensorDict:
Expand Down
25 changes: 3 additions & 22 deletions rl4co/models/nn/env_embeddings/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,7 @@ def _op_features(self, td):
mean_durations = proc_times.sum(1) / (proc_times.gt(0).sum(1) + 1e-9)
feats = [
mean_durations / self.scaling_factor,
# td["lbs"] / self.scaling_factor,
td["is_ready"],
td["num_eligible"],
td["ops_job_map"],
Expand All @@ -430,20 +431,10 @@ def forward(self, td):

class FJSPInitEmbedding(JSSPInitEmbedding):
def __init__(self, embed_dim, linear_bias=False, scaling_factor: int = 100):
super().__init__(embed_dim, linear_bias, scaling_factor, num_op_feats=5)
super().__init__(embed_dim, linear_bias, scaling_factor)
self.init_ma_embed = nn.Linear(1, self.embed_dim, bias=linear_bias)
self.edge_embed = nn.Linear(1, embed_dim, bias=linear_bias)

def _op_features(self, td):
feats = [
td["lbs"] / self.scaling_factor,
td["is_ready"],
td["num_eligible"],
td["op_scheduled"],
td["ops_job_map"],
]
return torch.stack(feats, dim=-1)

def forward(self, td: TensorDict):
ops_emb = self._init_ops_embed(td)
ma_emb = self._init_machine_embed(td)
Expand Down Expand Up @@ -471,19 +462,9 @@ def __init__(
linear_bias: bool = False,
scaling_factor: int = 1000,
):
super().__init__(embed_dim, linear_bias, scaling_factor, num_op_feats=5)
super().__init__(embed_dim, linear_bias, scaling_factor)
self.init_ma_embed = nn.Linear(1, self.embed_dim, bias=linear_bias)

def _op_features(self, td):
feats = [
td["lbs"] / self.scaling_factor,
td["is_ready"],
td["op_scheduled"],
td["num_eligible"],
td["ops_job_map"],
]
return torch.stack(feats, dim=-1)

def _init_machine_embed(self, td: TensorDict):
busy_for = (td["busy_until"] - td["time"].unsqueeze(1)) / self.scaling_factor
ma_embeddings = self.init_ma_embed(busy_for.unsqueeze(2))
Expand Down
2 changes: 2 additions & 0 deletions rl4co/models/rl/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def __init__(self, scale: str = None):
def __call__(self, scores: torch.Tensor):
if self.scale is None:
return scores
elif isinstance(self.scale, int):
return scores / self.scale
# Score scaling
self.update(scores)
tensor_to_kwargs = dict(dtype=scores.dtype, device=scores.device)
Expand Down
21 changes: 12 additions & 9 deletions rl4co/models/rl/ppo/stepwise_ppo.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import copy

from typing import Any
from typing import Any, Union

import torch
import torch.nn as nn
import torch.nn.functional as F

from torchrl.data.replay_buffers import (
LazyTensorStorage,
LazyMemmapStorage,
ListStorage,
SamplerWithoutReplacement,
TensorDictReplayBuffer,
Expand All @@ -23,13 +23,17 @@

def make_replay_buffer(buffer_size, batch_size, device="cpu"):
if device == "cpu":
storage = LazyTensorStorage(buffer_size, device="cpu")
storage = LazyMemmapStorage(buffer_size, device="cpu")
prefetch = 3
else:
storage = ListStorage(buffer_size)
prefetch = None
return TensorDictReplayBuffer(
storage=storage,
batch_size=batch_size,
sampler=SamplerWithoutReplacement(drop_last=True),
pin_memory=False,
prefetch=prefetch,
)


Expand All @@ -51,7 +55,7 @@ def __init__(
metrics: dict = {
"train": ["loss", "surrogate_loss", "value_loss", "entropy"],
},
reward_scale: str = None,
reward_scale: Union[str, int] = None,
**kwargs,
):
super().__init__(env, policy, metrics=metrics, batch_size=batch_size, **kwargs)
Expand Down Expand Up @@ -143,13 +147,12 @@ def shared_step(
while not next_td["done"].all():
with torch.no_grad():
td = self.policy_old.act(next_td, self.env, phase="train")

assert self.env._torchrl_mode, "Use torchrl mode in stepwise PPO"
td = self.env.step(td)
next_td = td.pop("next")
# get next state
next_td = self.env.step(td)["next"]
# get reward of action
reward = self.env.get_reward(next_td, None)
reward = self.scaler(reward)

# add reward to prior state
td.set("reward", reward)
# add tensordict with action, logprobs and reward information to buffer
self.rb.extend(td)
Expand Down
1 change: 0 additions & 1 deletion rl4co/models/zoo/l2d/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ def __init__(
actor_hidden_dim: int = 128,
actor_hidden_layers: int = 2,
num_encoder_layers: int = 3,
num_heads: int = 8,
normalization: str = "batch",
het_emb: bool = False,
stepwise: bool = False,
Expand Down
5 changes: 4 additions & 1 deletion rl4co/models/zoo/l2d/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __init__(
env_name: str = "fjsp",
het_emb: bool = True,
scaling_factor: int = 1000,
normalization: str = "batch",
init_embedding: Optional[nn.Module] = None,
stepwise_encoding: bool = False,
tanh_clipping: float = 10,
Expand Down Expand Up @@ -77,6 +78,7 @@ def __init__(
het_emb=het_emb,
stepwise=stepwise_encoding,
scaling_factor=scaling_factor,
normalization=normalization,
)

# Pass to constructive policy
Expand All @@ -101,6 +103,7 @@ def __init__(
num_heads: int = 8,
num_encoder_layers: int = 4,
scaling_factor: int = 1000,
normalization: str = "batch",
env_name: str = "fjsp",
init_embedding: Optional[nn.Module] = None,
tanh_clipping: float = 10,
Expand All @@ -122,7 +125,7 @@ def __init__(
embed_dim=embed_dim,
num_heads=num_heads,
num_layers=num_encoder_layers,
normalization="batch",
normalization=normalization,
feedforward_hidden=embed_dim * 2,
init_embedding=init_embedding,
)
Expand Down

0 comments on commit 1972b08

Please sign in to comment.