diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8a23bda --- /dev/null +++ b/.gitignore @@ -0,0 +1,41 @@ +# Byte-compiled +__pycache__/ +*.egg-info/ + +# Unit test / coverage reports +.pytest_cache/ + +# Vim +*.swp + +# Jupyter Notebook +.ipynb_checkpoints + +# Environments +venv/ + +# Ignore media files +*.mp4 + +# Ignore images +*.png + + +# Matplotlib style +*.mplstyle + +# ctags +tags + +# Ignore intermidiate files +**/results.pkl +**/data.json + +# Ignore cache folders +.cache/ + +# Ignore not optimal checkpoints and logs +logs/ +models/**/pre/ +models/**/post/ +models/**/events** diff --git a/README.md b/README.md new file mode 100644 index 0000000..8ae9e5e --- /dev/null +++ b/README.md @@ -0,0 +1,47 @@ +# Chemotaxis & Reinforcement learning: spatial-temporal information optimal integration + +This is the repository for the project of studying chemotaxis optimal strategies +when the agent uses both temporal and spatial information. + +## File Structure + +The project is organized as follows: + +``` +├── README.md +├── chemoxrl/ # Folder containing the code related to training the agents and env. +├── chemoxrl_aux/ # Utils functions used in analysis but not needed for training +├── models/ # Trained agents weights. +├── pyproject.toml # Package setup for both chemoxrl and chemoxrl[aux] +├── requirements.txt # Project dependencies +└── train.py # Script to train the agent. +``` + +## Setup + +Create a virtual environment and install the requiered packages (Note: we use `jaxlib` for GPU) + +``` +python3 -m venv venv +source venv/bin/activate +pip install -r requirements.txt +``` + +Likewise, we recommend to install this library locally + +``` +pip install -e . +``` + +## Usage + +One can train the agent by using + +``` +python3 train.py +``` + The available options can be seen by using + +``` +python3 train.py --help +``` diff --git a/chemoxrl/__init__.py b/chemoxrl/__init__.py new file mode 100644 index 0000000..5a1e001 --- /dev/null +++ b/chemoxrl/__init__.py @@ -0,0 +1,9 @@ +from . import rppo +from .rppo import ExperimentConfig, ActorCritic +from .cell import EnvParams + +__all__ = [ + "ExperimentConfig", + "ActorCritic", + "EnvParams", +] diff --git a/chemoxrl/cell.py b/chemoxrl/cell.py new file mode 100644 index 0000000..2b380f0 --- /dev/null +++ b/chemoxrl/cell.py @@ -0,0 +1,160 @@ +from functools import partial +from typing import Union + +from flax.struct import dataclass +import jax +import jax.numpy as jnp + + +@dataclass +class EnvParams: + max_steps_in_episode: int = 256 + radius: float = 4 # Radius of the cell (μm). + n_receptors: int = 8 # Number of discrete receptors along surface. + speed: float = 4 # Swimming speed (μm/s). + rotational_diffusion: float = 0.025 # Rotational diffusion constant (1/s). + decay_rate: float = 0.01 # Average decay rate for the simulation. + diffusion_coeff: float = 100 # Average diffusion coefficient for the simulation. + dt: float = 0.1 # Time step of the simulation. + C_min: int = 4 # Minimum value of the exponent for the concentration. + C_max: int = 5 # Maximum value of the exponent for the concentratino. + + +@dataclass +class CellState: + step: int # Step count in the episode. + x: jax.Array # Coordinates of the cell. + theta: jax.Array # Orientation of the cell. + cum_reward: float # Cumulative reward at R(t<=time) + N: Union[jax.Array, int] # Number of particles + d_init: jax.Array # Initial distance to the center (useful for reward) + + +def polar_to_cartesian(r, phi): + x = r * jnp.cos(phi) + y = r * jnp.sin(phi) + return jnp.array([x, y]) + + +def cartesian_to_polar(x, y): + r = jnp.sqrt(x**2 + y**2) + phi = jnp.arctan2(y, x) + return r, phi + + +def gradient(env_params): + return jnp.sqrt(env_params.decay_rate / env_params.diffusion_coeff) + + +class Cell: + @partial(jax.jit, static_argnums=(0, 2)) + def reset(self, rng, env_params): + rng_media, rng_cell = jax.random.split(rng, 2) + Cs = jnp.logspace(env_params.C_min, env_params.C_max, num=100, dtype=jnp.int32) + N = jax.random.choice(rng_cell, Cs, shape=()) + + x, theta = self._init_cell(rng_cell, env_params) + d_init = jnp.hypot(x[0], x[1]) + state = CellState(step=0, x=x, theta=theta, cum_reward=0.0, N=N, d_init=d_init) + + obs = self._get_obs(rng_media, state, env_params) + obs = jnp.concatenate((obs, jnp.zeros(1))) + return obs, state + + @partial(jax.jit, static_argnums=(0, 4)) + def step(self, rng, state, action, env_params): + # Steps returns values until a done happens. Then it returns 0s. + # until the next self.reset is called. + obs_st, state_st, reward, done = self._step(rng, state, action, env_params) + state_re = jax.tree_util.tree_map(lambda x: x*0, state_st) + state = jax.tree_util.tree_map(lambda x, y: jax.lax.select(done, x, y), state_re, state_st) + obs_re = jnp.zeros(self.observation_space(env_params).shape) + obs = jax.lax.select(done, obs_re, obs_st) + return obs, state, reward, done, {} + + def _init_cell(self, rng, env_params): + rng_theta, rng_r, rng_phi = jax.random.split(rng, num=3) + theta = jax.random.uniform(rng_theta) * 2 * jnp.pi + percentile = jax.random.uniform(rng_r, minval=0.3, maxval=0.5) + r = -jnp.log(1 - percentile) / gradient(env_params) + phi = jax.random.uniform(rng_phi) * 2 * jnp.pi + x = polar_to_cartesian(r, phi) + return x, theta + + def _step(self, rng, state, action, env_params): + rng_o, rng_a = jax.random.split(rng, 2) + noise = jax.random.normal(rng_a) * jnp.sqrt(2 * env_params.rotational_diffusion * env_params.dt) + dtheta = action[0] * jnp.pi + noise + theta = (state.theta + dtheta) % (2 * jnp.pi) + v = polar_to_cartesian(env_params.speed, theta) + x = state.x + v * env_params.dt + state = state.replace(x=x, theta=theta) + + obs = self._get_obs(rng_o, state, env_params) + state = state.replace(step=state.step + 1) + + final_reward = self._get_reward(state, env_params) + has_reached = jnp.hypot(x[0], x[1]) <= (-jnp.log(0.9) / gradient(env_params)) + done = (state.step >= env_params.max_steps_in_episode) | has_reached + reward = jax.lax.select(done, final_reward, 0.0) + state = state.replace(cum_reward=state.cum_reward + reward) + + obs = jnp.concatenate((obs, action)) + return obs, state, reward, done + + def _get_obs(self, rng, state, env_params): + M = env_params.n_receptors + a = env_params.radius + + angles = jnp.arange(M) * ((2 * jnp.pi) / M) + state.theta + receptors = state.x + a * jnp.array([jnp.cos(angles), jnp.sin(angles)]).T + sensor_area = (a * jnp.sin(jnp.pi / M)) ** 2 * jnp.pi + B = (state.N / (2 * jnp.pi)) * sensor_area # integration constant + rate = gradient(env_params) + + @jax.vmap + def detect(rng, xi): + d = jnp.hypot(xi[0], xi[1]) + c = rate * jnp.exp(-rate * d) + M_avg = B * c + M = jax.random.poisson(rng, M_avg) + m = jnp.log(M + 1) + return m + + m = detect(jax.random.split(rng, M), receptors) + return m + + def _get_reward(self, state, env_params): + max_steps = env_params.max_steps_in_episode + d = jnp.hypot(state.x[0], state.x[1]) + d_min = -jnp.log(0.9) / gradient(env_params) + distance_reward = jnp.clip((d_min - d) / (state.d_init - d_min), -1.0, 0.0) + time_reward = jnp.clip((max_steps - state.step) / max_steps, 0.0, 1.0) + return distance_reward + time_reward + + @property + def num_actions(self): + return 1 + + def observation_space(self, env_params): + return jnp.empty(shape=(env_params.n_receptors + 1,)) + + +class MultiCell: + def __init__(self, env_params, n_envs): + self.env = Cell() + self.env_params = env_params + self.n_envs = n_envs + self.num_actions = self.env.num_actions + self.observation_space = self.env.observation_space + + @partial(jax.jit, static_argnums=0) + def reset(self, rng): + rngs = jax.random.split(rng, self.n_envs) + return jax.vmap(self.env.reset, in_axes=(0, None))(rngs, self.env_params) + + @partial(jax.jit, static_argnums=0) + def step(self, rng, env_state, actions): + rngs = jax.random.split(rng, self.n_envs) + batched_step = jax.vmap(self.env.step, in_axes=(0, 0, 0, None)) + return batched_step(rngs, env_state, actions, self.env_params) diff --git a/chemoxrl/rppo.py b/chemoxrl/rppo.py new file mode 100644 index 0000000..d9bed0f --- /dev/null +++ b/chemoxrl/rppo.py @@ -0,0 +1,338 @@ +from functools import partial +from typing import NamedTuple + +from flax.core.frozen_dict import freeze +import flax.linen as nn +from flax.training import checkpoints +from flax.training.train_state import TrainState +import jax +import jax.numpy as jnp +import optax +from tqdm import tqdm + +from chemoxrl.cell import MultiCell + + +class ExperimentConfig(NamedTuple): + n_train_envs: int = 256 # Number of train environments in parallel. + total_steps: int = int(10e6) # Number of train steps. + n_steps: int = 301 # Number of steps per environment before training. + max_grad_norm: float = 0.5 # Global norm to clip gradients by. + eval_interval: int = 1_000_000 # Number of steps until an evaluation is done. + n_eval_envs: int = 2048 # Number of parallel envs to evaluate on. + learning_rate: float = 3e-4 # Learning rate. + n_epochs: int = 4 # Number of epochs at each training step. + n_minibatch: int = 4 # Number of minibatches to split the buffer into. + clip_eps: float = 0.2 # Surrogate clipping loss. + entropy_coeff: float = 0.00 # Entropy loss coefficient + critic_coeff: float = 0.5 # Value loss coefficient + discount: float = 0.99 # Discount factor for the GAE calculation. + gae_lambda: float = 0.95 # "GAE lambda" + logdir: str = "./logs/" # Path to store the logs. + load: str = "" # Load a previous policy before training. + seed: int = 118 # Random state seed. + hidden_cells: int = 25 # Neurons in the recurrent network. + spatial: bool = True + memory: bool = True + recurrent: bool = True + + +class Rollout(NamedTuple): + states: jax.Array + actions: jax.Array + rewards: jax.Array + dones: jax.Array + log_probs: jax.Array + values: jax.Array + hidden_state: jax.Array + mask: jax.Array + + +class MemoryCell(nn.Module): + decay: float = 0.1 + + @nn.vmap + @nn.compact + def __call__(self, h, x): + x = jnp.concatenate((x, x.mean(keepdims=True))) + h = h.reshape(-1, 3) + + k = self.decay + h_new = jnp.empty_like(h) + h_new = h_new.at[:, 0].set(k * x + (1 - k) * h[:, 0]) + h_new = h_new.at[:, 1].set(k * h[:, 0] + (1 - k) * h[:, 1]) + h_new = h_new.at[:, 2].set(k * h[:, 1] + (1 - k) * h[:, 2]) + + # Normalize + x_normalized = x.at[:-1].add(-x[-1]) + h_new_normalized = h_new.at[:-1, :].add(-h_new[-1, :]) + h_new_normalized = h_new_normalized.at[-1, 0].add(-x[-1]) + h_new_normalized = h_new_normalized.at[-1, 1].add(-h_new[-1, 0]) + h_new_normalized = h_new_normalized.at[-1, 2].add(-h_new[-1, 1]) + + h_new = h_new.flatten() + h_new_normalized = h_new_normalized.flatten() + x_input = jnp.concatenate((x_normalized, h_new_normalized)) + return h_new, (h_new, x_input) + + +class ActorCritic(nn.Module): + num_output_units: int + num_hidden_units: int = 64 + num_hidden_layers: int = 2 + min_std: float = 0.05 + max_std: float = 1.0 + spatial: bool = True + memory: bool = False + recurrent: bool = False + + @nn.compact + def __call__(self, xs, h_init): + init_fn = nn.initializers.orthogonal(scale=jnp.sqrt(2)) + init_fn_actor = nn.initializers.orthogonal(scale=0.01) + + if not self.spatial: + # All input sensors report the mean (last element is previous action) + xs = xs.at[..., :-1].set(xs[..., :-1].mean(axis=-1, keepdims=True)) + + if self.memory: + if self.recurrent: + F = nn.scan(nn.GRUCell, variable_broadcast="params", split_rngs={"params": False}) + _, hs = F()(h_init, xs) + x_input = hs + else: + xs = xs[..., :-1] # remove the action from the input. + F = nn.scan(MemoryCell) + _, (hs, x_input) = F()(h_init, xs) + else: + hs = nn.Dense(h_init.shape[-1], kernel_init=init_fn, name='feature_ext')(xs) + x_input = hs + + # Critic network + x_v = x_input + for _ in range(self.num_hidden_layers): + x_v = nn.Dense(self.num_hidden_units*2, kernel_init=init_fn)(x_v) + x_v = nn.tanh(x_v) + value = nn.Dense(1, kernel_init=nn.initializers.orthogonal())(x_v) + + # Actor Network + x_a = x_input + for _ in range(self.num_hidden_layers): + x_a = nn.Dense(self.num_hidden_units, kernel_init=init_fn)(x_a) + x_a = nn.tanh(x_a) + + mu = nn.Dense(self.num_output_units, kernel_init=init_fn_actor)(x_a) + log_scale = nn.Dense(self.num_output_units, kernel_init=init_fn_actor)(x_a) + scale = jax.lax.clamp(self.min_std, jax.nn.softplus(-0.5 + log_scale), self.max_std) + return value, (mu, scale), hs + + +def loss_fn(params, apply_fn, minibatch, eps=0.2, entropy_coeff=0.001, vf_coeff=0.5): + s, a, logp_old, target, A, h, mask = minibatch + values, (mu, scale), _ = apply_fn(params, s, h[0]) + values = values[..., 0] + num_entries = jnp.sum(mask) + + # Compute Clipped Surrogate Loss on the policy. + # Normalize the advantage seems to help. + Am = A * mask + Amean = jnp.sum(Am) / num_entries + Astd = jnp.sqrt(jnp.sum(Am**2) / num_entries - Amean**2) + A = (A - Amean) / (Astd + 1e-8) + logp = jax.scipy.stats.norm.logpdf(a, loc=mu, scale=scale).sum(-1) + ratio = jnp.exp(logp - logp_old) + policy_loss = jnp.minimum(ratio * A, jnp.clip(ratio, 1.0 - eps, 1.0 + eps) * A) + policy_loss = -jnp.sum(mask * policy_loss) / num_entries + + # Critic MSE loss. + value_loss = jnp.sum(mask * (values - target) ** 2) / num_entries + + # Entropy of normal distribution is H = -p*ln(p) = 1/2 ln(e*2π*σ^2) + entropy = jnp.sum(0.5 + 0.5 * jnp.log(2 * jnp.pi) + jnp.log(scale), axis=-1) + entropy_loss = jnp.sum(mask * entropy) / num_entries + + # Compute KL divergence (approximation) + approx_kl_div = jnp.sum(mask * ((ratio - 1) - (logp - logp_old))) / num_entries + + loss = policy_loss + vf_coeff * value_loss - entropy_coeff * entropy_loss + aux = (loss, policy_loss, value_loss, entropy_loss, approx_kl_div) + return loss, aux + + +@partial(jax.jit, static_argnums=(3, 4)) +def train_step(rng, train_state, batch, n_epochs=8, n_minibatch=8): + + buffer_size = batch[0].shape[1] + + grad_fn = jax.grad(loss_fn, has_aux=True) + + def epoch_step(state, rng): + def batch_step(state, chosen): + minibatch = jax.tree_util.tree_map(lambda x: x[:, chosen], batch) + grads, metrics = grad_fn(state.params, state.apply_fn, minibatch) + state = state.apply_gradients(grads=grads) + return state, metrics + + batch_indices = jax.random.permutation(rng, buffer_size) + batch_indices = batch_indices.reshape(n_minibatch, -1) + state, metrics = jax.lax.scan(batch_step, init=state, xs=batch_indices) + return state, jax.tree_util.tree_map(jnp.mean, metrics) + + rngs = jax.random.split(rng, n_epochs) + train_state, metrics = jax.lax.scan(epoch_step, init=train_state, xs=rngs) + return train_state, jax.tree_util.tree_map(jnp.mean, metrics) + + +def calculate_gae(values, rewards, dones, discount=0.99, A_lambda=0.95): + def body_fn(A, x): + next_value, done, value, reward = x + value_diff = discount * next_value * (1 - done) - value + delta = reward + value_diff + A = delta + discount * A_lambda * (1 - done) * A + return A, A + + xs = (values[1:], dones[:-1], values[:-1], rewards[:-1]) + num_envs = values.shape[1] + _, gae = jax.lax.scan(body_fn, jnp.zeros(num_envs), xs, reverse=True) + gae = jnp.pad(gae, pad_width=((0, 1), (0, 0))) + return gae + + +@partial(jax.jit, static_argnums=(1, 2, 3)) +def collect_batch(buffer, discount=0.99, A_lambda=0.95, permutate=True): + gae = calculate_gae(buffer.values, buffer.rewards, buffer.dones, discount, A_lambda) + target = gae + buffer.values + batch = (buffer.states, buffer.actions, buffer.log_probs, target, gae, buffer.hidden_state, buffer.mask) + + # Remove the last element of the batch since it doesn't have advantage to compute. + batch = jax.tree_util.tree_map(lambda x: x[:-1], batch) + + if permutate: + batch = jax.tree_util.tree_map(lambda x: x.reshape(-1, *x.shape[2:]), batch) + batch = jax.tree_util.tree_map(lambda x: x[None, ...], batch) + + return batch + + +def init_train_state(rng, env, env_params, config): + model = ActorCritic(num_output_units=env.num_actions, + spatial=config.spatial, + memory=config.memory, + recurrent=config.recurrent) + + obs_shape = env.observation_space(env_params).shape + dummy_x = jnp.ones((1, 1, *obs_shape)) + hidden_state = jnp.zeros((1, config.hidden_cells)) + params = model.init(rng, dummy_x, hidden_state) + + if config.load: + # Overwrite the initial parameters with the loaded ones but + # leave the scale layer as initialised. + ckpt = checkpoints.restore_checkpoint(config.load, target=None) + new_params = {**params, **ckpt['params']} + new_params['params']['Dense_6'] = params['params']['Dense_6'] + params = freeze(new_params) + + tx = optax.adam(config.learning_rate, eps=1e-7) + return TrainState.create(apply_fn=model.apply, params=params, tx=tx) + + +@partial(jax.jit, static_argnums=(1, 2, 3, 4)) +def evaluate(state, env_params, num_envs=1024, hidden_size=24, deterministic=True): + eval_envs = MultiCell(env_params, num_envs) + eval_rng = jax.random.PRNGKey(0) + + def transition_step(carry, rng): + s, env_state, h = carry + rng_action, rng_step = jax.random.split(rng, 2) + out = state.apply_fn(state.params, s[None, ...], h) + _, (mu, scale), next_h = jax.tree_util.tree_map(lambda x: x[-1], out) + a = jax.random.normal(rng_action, shape=mu.shape) * scale + mu + a = jax.lax.select(deterministic, mu, a) + next_s, next_env_state, r, done, __ = eval_envs.step(rng_step, env_state, a) + return (next_s, next_env_state, next_h), (r, done) + + obs, env_state = eval_envs.reset(eval_rng) + h = jnp.zeros((*obs.shape[:-1], hidden_size)) + rngs = jax.random.split(eval_rng, env_params.max_steps_in_episode) + _, (rewards, dones) = jax.lax.scan(transition_step, (obs, env_state, h), rngs) + + end_step = jax.vmap(lambda x: jnp.argwhere(x, size=1)[...,0], in_axes=1)(dones)[...,0] + mean_reward = rewards[end_step, jnp.arange(num_envs)].mean() + reached_percentage = (end_step < env_params.max_steps_in_episode-1).mean() + arrival_time = end_step * env_params.dt + return mean_reward, reached_percentage, arrival_time + + +def train_loop(rng, config, env_params, checkpointer): + + # Initialise the batched environment. + envs = MultiCell(env_params, config.n_train_envs) + + # Initialise the network + rng, rng_init = jax.random.split(rng, num=2) + train_state = init_train_state(rng_init, envs, env_params, config) + + @jax.jit + def run_episodes(train_state, rng): + + def step(carry, rng): + train_state, obs, env_state, h, mask = carry + rng_action, rng_step = jax.random.split(rng, 2) + out = train_state.apply_fn(train_state.params, obs[None, ...], h) + value, (mu, scale), next_h = jax.tree_util.tree_map(lambda x: x[-1], out) + action = jax.random.normal(rng_action, shape=mu.shape) * scale + mu + log_prob = jax.scipy.stats.norm.logpdf(action, mu, scale).sum(-1) + next_obs, next_env_state, reward, done, _ = envs.step(rng_step, env_state, action) + rollout = Rollout(obs, action, reward, done, log_prob, value[..., 0], h, 1-mask) + return (train_state, next_obs, next_env_state, next_h, mask | done), rollout + + rng, rng_reset = jax.random.split(rng, 2) + rngs = jax.random.split(rng, env_params.max_steps_in_episode+1) + + obs, env_state = envs.reset(rng_reset) + hidden_state = jnp.zeros(shape=(*obs.shape[:-1], config.hidden_cells)) + mask = jnp.zeros(config.n_train_envs) > 0 + init = (train_state, obs, env_state, hidden_state, mask) + _, buffer = jax.lax.scan(step, init, rngs) + return buffer + + step_size = int(config.n_train_envs * config.n_steps) + eval_interval = max(1, int(config.total_steps / 100 // step_size)) + pbar = tqdm(total=config.total_steps, dynamic_ncols=True) + + steps_passed = 0 + for step in range(config.total_steps // step_size): + + # Perform n_steps for each of the num_train_envs in parallel. + buffer = run_episodes(train_state, jax.random.fold_in(rng, step)) + steps_passed += step_size + + # Collect the buffer from the rollout and perform a training step corresponding + # to n_minibatch*n_epochs gradient descent steps with the current policy. + rng_train, rng = jax.random.split(rng, 2) + batch = collect_batch(buffer, config.discount, config.gae_lambda, not config.memory) + train_state, metrics = train_step(rng_train, train_state, batch, config.n_epochs, config.n_minibatch) + metrics = jax.tree_util.tree_map(jnp.mean, metrics) + + logging_step = int(step * step_size) + checkpointer.writer.scalar("train/loss", metrics[0], logging_step) + checkpointer.writer.scalar("train/policy_loss", metrics[1], logging_step) + checkpointer.writer.scalar("train/critic_loss", metrics[2], logging_step) + checkpointer.writer.scalar("train/entropy_loss", metrics[3], logging_step) + checkpointer.writer.scalar("train/kl_divergence", metrics[4], logging_step) + + if step % eval_interval == 0: + eval_metrics = evaluate(train_state, env_params, config.n_eval_envs, config.hidden_cells, False) + metrics = jax.tree_util.tree_map(jnp.mean, eval_metrics) + checkpointer.writer.scalar("eval/reward", metrics[0], logging_step) + checkpointer.writer.scalar("eval/reached", metrics[1], logging_step) + checkpointer.writer.scalar("eval/ftp", metrics[2], logging_step) + pbar.set_description(f"R: {metrics[0]:.2f}({metrics[1]:.2f})") + pbar.update(steps_passed) + steps_passed = 0 + + ckpt = {"params": train_state.params} + checkpoints.save_checkpoint(checkpointer.dir, target=ckpt, step=logging_step, keep_every_n_steps=int(1e6), keep=10) + + return train_state diff --git a/chemoxrl_aux/__init__.py b/chemoxrl_aux/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/chemoxrl_aux/evaluate.py b/chemoxrl_aux/evaluate.py new file mode 100644 index 0000000..c01d9aa --- /dev/null +++ b/chemoxrl_aux/evaluate.py @@ -0,0 +1,35 @@ +import jax +import jax.numpy as jnp +import numpy as np + +from chemoxrl.cell import MultiCell + +def evaluate_policy(model, num_envs, seed): + rng, rng_reset = jax.random.split(jax.random.PRNGKey(seed)) + eval_envs = MultiCell(model.env_params, num_envs) + + @jax.jit + def transition_step(carry, rng): + s, env_state, h = carry + rng_action, rng_step = jax.random.split(rng, 2) + a, h = model.policy(rng_action, s, h) + next_s, next_env_state, _, done, __ = eval_envs.step(rng_step, env_state, a) + h = jnp.where(done[:, None], jnp.zeros_like(h), h) + return (next_s, next_env_state, h), done + + obs, env_state = eval_envs.reset(rng_reset) + h = jnp.zeros((*obs.shape[:-1], model.hidden_size)) + rngs = jax.random.split(rng, model.env_params.max_steps_in_episode) + _, dones = jax.lax.scan(transition_step, init=(obs, env_state, h), xs=rngs) + + # Compute the arrival times of the simulations + last_step = jnp.argmax(dones, axis=0) + times = last_step[last_step > 0] * model.env_params.dt + + # Compute the chemotatic efficiency of the policy + rate = np.sqrt(model.env_params.decay_rate / model.env_params.diffusion_coeff) + delta = -np.log(0.9) / rate + d_init = np.sqrt(np.sum(env_state.x**2, axis=-1)) + efficiencies = (d_init - delta) / (model.env_params.speed * times) + + return times, efficiencies diff --git a/chemoxrl_aux/loader.py b/chemoxrl_aux/loader.py new file mode 100644 index 0000000..22ca711 --- /dev/null +++ b/chemoxrl_aux/loader.py @@ -0,0 +1,90 @@ +from dataclasses import asdict +import json +import pathlib +from typing import Callable, List, NamedTuple + +from flax.training import checkpoints +import pick +from chemoxrl import EnvParams, ExperimentConfig +from chemoxrl_aux.policies import greedy_policy, rppo_policy + + +class CkptModel(NamedTuple): + name: str + label: str + env_params: EnvParams + hidden_size: int + policy: Callable + + +def make_pickable(model: CkptModel): + env_params = asdict(model.env_params) + _model = {"name": model.name, "label": model.label, "env_params": env_params} + return _model + + +MODEL_LABELS = { + (True, True, True): "Combined", + (True, False, False): "Spatial", + (True, False, True): "Spatial", + (False, True, True): "Temporal", + (False, True, False): "Temporal(Markovian)", + (True, True, False): "Combined (Markovian)", +} + + +def load_ckpt(ckpt, deterministic=True, max_steps=0) -> CkptModel: + path = pathlib.Path(ckpt) + + with open(path / "env_params.json", "r") as f: + args = json.load(f) + + with open(path / "config.json", "r") as f: + args.update(json.load(f)) + + env_args = {k: v for k, v in args.items() if k in EnvParams.__dict__} + if max_steps > 0: + env_args["max_steps_in_episode"] = max_steps + env_params = EnvParams(**env_args) + ec_args = {k: v for k, v in args.items() if k in ExperimentConfig.__dict__} + config = ExperimentConfig(**ec_args) + params = checkpoints.restore_checkpoint(path, target=None)["params"] + name = path.name + label = MODEL_LABELS[(config.spatial, config.memory, config.recurrent)] + policy = rppo_policy(params, deterministic, config.spatial, config.memory, config.recurrent) + return CkptModel(name, label, env_params, config.hidden_cells, policy) + + +def load_options(checkpoints_dir, pattern="*"): + root = pathlib.Path(checkpoints_dir) + directories = list(root.glob(pattern)) + directories = sorted(directories, key=lambda f: f.stat().st_ctime, reverse=True) + labels = [f"{o.stem} ({len(list(o.glob('checkpoint_*')))} checkpoints)" for o in directories] + return directories, labels + + +def load_multi_interactive(base_dir, pattern="*", select_all=False, **kwargs) -> List[CkptModel]: + directories, labels = load_options(base_dir, pattern) + if select_all: + selected = directories + else: + selected = pick.pick(labels, title="Choose checkpoint(s) to load", multiselect=True) + selected = [directories[s[1]] for s in selected] + results = [load_ckpt(s, **kwargs) for s in selected] + return results + + +def load_multi(base_dir, pattern="*", **kwargs) -> List[CkptModel]: + selected, labels = load_options(base_dir, pattern) + results = [load_ckpt(s, **kwargs) for s in selected] + return results + + +def load_greedy(env_params, step, epsilon, kernel, adapt): + name = f"greedy_{kernel}_{epsilon:.4f}_{step:04d}" + "_adapt" if adapt else "" + label = rf"{kernel}{'(adaptive)' if adapt else ''} T={step:1d} eps={epsilon:.2f}" + policy, hidden_size = greedy_policy( + env_params, steps=step, eps=epsilon, kernel=kernel, adapt=adapt + ) + model = CkptModel(name, label, env_params, hidden_size, policy) + return model diff --git a/chemoxrl_aux/policies.py b/chemoxrl_aux/policies.py new file mode 100644 index 0000000..13ad2a1 --- /dev/null +++ b/chemoxrl_aux/policies.py @@ -0,0 +1,63 @@ +import jax +import jax.numpy as jnp + +import chemoxrl +from chemoxrl.cell import Cell + + +def random_walk_policy(n=4): + options = jnp.linspace(0, n, endpoint=False) / n + + def policy(rng, _, h): + dtheta = jax.random.choice(rng, options) + return (jnp.array([dtheta]), h) + + return jax.jit(jax.vmap(policy, in_axes=(None, 0, 0))), 1 + + +def greedy_policy(env_params, eps=0.1, steps=1, kernel="lin", adapt=False, gamma=1.0,): + M = env_params.n_receptors + phi = jnp.arange(M) * 2 * jnp.pi / M + + hidden_size = steps * M + + if kernel == "exp": + K = jnp.exp(-gamma*jnp.arange(steps))[:, jnp.newaxis] + else: + K = jnp.ones((steps, 1)) + + interp = jax.vmap(lambda x, c: jnp.interp(phi, x, c, period=(2 * jnp.pi)), in_axes=(None, 0)) + + def policy(rng, s, h): + m = s[:M] + h = jnp.reshape(h, (steps, M)) + if adapt: + theta = phi - s[-1] * jnp.pi + h = interp(theta, h) + + h = jnp.roll(h, 1, axis=0) + h = h.at[0].set(m) + weights = jnp.mean(h*K, axis=0) + ex = jnp.sum(jnp.cos(phi) * weights) + ey = jnp.sum(jnp.sin(phi) * weights) + dtheta = jnp.arctan2(ey, ex) / jnp.pi + dtheta = jnp.clip(dtheta, -eps, eps) + a = jnp.array([dtheta]) + return a, h.flatten() + + _select_action_fn = jax.vmap(policy, in_axes=(None, 0, 0)) + _select_action_fn = jax.jit(_select_action_fn) + return _select_action_fn, hidden_size + + +def rppo_policy(params, deterministic=False, spatial=False, memory=False, recurrent=False): + model = chemoxrl.ActorCritic(Cell().num_actions, spatial=spatial, memory=memory, recurrent=recurrent) + + def policy(rng, s, h): + outs = model.apply(params, s[None], h) + _, (mu, scale), h = jax.tree_util.tree_map(lambda x: x[-1], outs) + a = jax.random.normal(rng, shape=mu.shape) * scale + mu + a = jax.lax.select(deterministic, mu, a) + return a, h + + return jax.jit(policy) diff --git a/models/PPO_R05_SF_MT_4096_20230704-0845_42/checkpoint_1992294400 b/models/PPO_R05_SF_MT_4096_20230704-0845_42/checkpoint_1992294400 new file mode 100644 index 0000000..1e6e979 Binary files /dev/null and b/models/PPO_R05_SF_MT_4096_20230704-0845_42/checkpoint_1992294400 differ diff --git a/models/PPO_R05_SF_MT_4096_20230704-0845_42/config.json b/models/PPO_R05_SF_MT_4096_20230704-0845_42/config.json new file mode 100644 index 0000000..5b4d45f --- /dev/null +++ b/models/PPO_R05_SF_MT_4096_20230704-0845_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R05_SF_MT_4096_20230704-0845_42/env_params.json b/models/PPO_R05_SF_MT_4096_20230704-0845_42/env_params.json new file mode 100644 index 0000000..b8b4d43 --- /dev/null +++ b/models/PPO_R05_SF_MT_4096_20230704-0845_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 0.5, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R05_ST_MF_4096_20230613-1523_42/checkpoint_1912602624 b/models/PPO_R05_ST_MF_4096_20230613-1523_42/checkpoint_1912602624 new file mode 100644 index 0000000..2b6b0de Binary files /dev/null and b/models/PPO_R05_ST_MF_4096_20230613-1523_42/checkpoint_1912602624 differ diff --git a/models/PPO_R05_ST_MF_4096_20230613-1523_42/config.json b/models/PPO_R05_ST_MF_4096_20230613-1523_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R05_ST_MF_4096_20230613-1523_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R05_ST_MF_4096_20230613-1523_42/env_params.json b/models/PPO_R05_ST_MF_4096_20230613-1523_42/env_params.json new file mode 100644 index 0000000..b8b4d43 --- /dev/null +++ b/models/PPO_R05_ST_MF_4096_20230613-1523_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 0.5, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R05_ST_MT_4096_20230619-1003_42/checkpoint_2818572288 b/models/PPO_R05_ST_MT_4096_20230619-1003_42/checkpoint_2818572288 new file mode 100644 index 0000000..52459e1 Binary files /dev/null and b/models/PPO_R05_ST_MT_4096_20230619-1003_42/checkpoint_2818572288 differ diff --git a/models/PPO_R05_ST_MT_4096_20230619-1003_42/config.json b/models/PPO_R05_ST_MT_4096_20230619-1003_42/config.json new file mode 100644 index 0000000..2352c04 --- /dev/null +++ b/models/PPO_R05_ST_MT_4096_20230619-1003_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 3000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R05_ST_MT_4096_20230619-1003_42/env_params.json b/models/PPO_R05_ST_MT_4096_20230619-1003_42/env_params.json new file mode 100644 index 0000000..b8b4d43 --- /dev/null +++ b/models/PPO_R05_ST_MT_4096_20230619-1003_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 0.5, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R06_SF_MT_4096_20230704-1011_43/checkpoint_272629760 b/models/PPO_R06_SF_MT_4096_20230704-1011_43/checkpoint_272629760 new file mode 100644 index 0000000..2bf1b0e Binary files /dev/null and b/models/PPO_R06_SF_MT_4096_20230704-1011_43/checkpoint_272629760 differ diff --git a/models/PPO_R06_SF_MT_4096_20230704-1011_43/config.json b/models/PPO_R06_SF_MT_4096_20230704-1011_43/config.json new file mode 100644 index 0000000..ff95c56 --- /dev/null +++ b/models/PPO_R06_SF_MT_4096_20230704-1011_43/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R05_SF_MT_4096_20230704-0845_42", + "seed": 43, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R06_SF_MT_4096_20230704-1011_43/env_params.json b/models/PPO_R06_SF_MT_4096_20230704-1011_43/env_params.json new file mode 100644 index 0000000..f3eb746 --- /dev/null +++ b/models/PPO_R06_SF_MT_4096_20230704-1011_43/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 0.6, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R06_ST_MF_4096_20230613-1523_42/checkpoint_1115684864 b/models/PPO_R06_ST_MF_4096_20230613-1523_42/checkpoint_1115684864 new file mode 100644 index 0000000..113aeac Binary files /dev/null and b/models/PPO_R06_ST_MF_4096_20230613-1523_42/checkpoint_1115684864 differ diff --git a/models/PPO_R06_ST_MF_4096_20230613-1523_42/config.json b/models/PPO_R06_ST_MF_4096_20230613-1523_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R06_ST_MF_4096_20230613-1523_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R06_ST_MF_4096_20230613-1523_42/env_params.json b/models/PPO_R06_ST_MF_4096_20230613-1523_42/env_params.json new file mode 100644 index 0000000..f3eb746 --- /dev/null +++ b/models/PPO_R06_ST_MF_4096_20230613-1523_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 0.6, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R06_ST_MT_4096_20230619-1202_43/checkpoint_273678336 b/models/PPO_R06_ST_MT_4096_20230619-1202_43/checkpoint_273678336 new file mode 100644 index 0000000..5f8c4d1 Binary files /dev/null and b/models/PPO_R06_ST_MT_4096_20230619-1202_43/checkpoint_273678336 differ diff --git a/models/PPO_R06_ST_MT_4096_20230619-1202_43/config.json b/models/PPO_R06_ST_MT_4096_20230619-1202_43/config.json new file mode 100644 index 0000000..f8d354a --- /dev/null +++ b/models/PPO_R06_ST_MT_4096_20230619-1202_43/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R05_ST_MT_4096_20230619-1003_42", + "seed": 43, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R06_ST_MT_4096_20230619-1202_43/env_params.json b/models/PPO_R06_ST_MT_4096_20230619-1202_43/env_params.json new file mode 100644 index 0000000..f3eb746 --- /dev/null +++ b/models/PPO_R06_ST_MT_4096_20230619-1202_43/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 0.6, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R07_SF_MT_4096_20230704-1033_44/checkpoint_297795584 b/models/PPO_R07_SF_MT_4096_20230704-1033_44/checkpoint_297795584 new file mode 100644 index 0000000..3feffed Binary files /dev/null and b/models/PPO_R07_SF_MT_4096_20230704-1033_44/checkpoint_297795584 differ diff --git a/models/PPO_R07_SF_MT_4096_20230704-1033_44/config.json b/models/PPO_R07_SF_MT_4096_20230704-1033_44/config.json new file mode 100644 index 0000000..e748f29 --- /dev/null +++ b/models/PPO_R07_SF_MT_4096_20230704-1033_44/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R06_SF_MT_4096_20230704-1011_43", + "seed": 44, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R07_SF_MT_4096_20230704-1033_44/env_params.json b/models/PPO_R07_SF_MT_4096_20230704-1033_44/env_params.json new file mode 100644 index 0000000..0bcb2d3 --- /dev/null +++ b/models/PPO_R07_SF_MT_4096_20230704-1033_44/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 0.7, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R07_ST_MF_4096_20230613-1523_42/checkpoint_1932525568 b/models/PPO_R07_ST_MF_4096_20230613-1523_42/checkpoint_1932525568 new file mode 100644 index 0000000..a783ae6 Binary files /dev/null and b/models/PPO_R07_ST_MF_4096_20230613-1523_42/checkpoint_1932525568 differ diff --git a/models/PPO_R07_ST_MF_4096_20230613-1523_42/config.json b/models/PPO_R07_ST_MF_4096_20230613-1523_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R07_ST_MF_4096_20230613-1523_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R07_ST_MF_4096_20230613-1523_42/env_params.json b/models/PPO_R07_ST_MF_4096_20230613-1523_42/env_params.json new file mode 100644 index 0000000..0bcb2d3 --- /dev/null +++ b/models/PPO_R07_ST_MF_4096_20230613-1523_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 0.7, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R07_ST_MT_4096_20230619-1247_44/checkpoint_537919488 b/models/PPO_R07_ST_MT_4096_20230619-1247_44/checkpoint_537919488 new file mode 100644 index 0000000..24b5242 Binary files /dev/null and b/models/PPO_R07_ST_MT_4096_20230619-1247_44/checkpoint_537919488 differ diff --git a/models/PPO_R07_ST_MT_4096_20230619-1247_44/config.json b/models/PPO_R07_ST_MT_4096_20230619-1247_44/config.json new file mode 100644 index 0000000..0edf0e8 --- /dev/null +++ b/models/PPO_R07_ST_MT_4096_20230619-1247_44/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R06_ST_MT_4096_20230619-1202_43", + "seed": 44, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R07_ST_MT_4096_20230619-1247_44/env_params.json b/models/PPO_R07_ST_MT_4096_20230619-1247_44/env_params.json new file mode 100644 index 0000000..0bcb2d3 --- /dev/null +++ b/models/PPO_R07_ST_MT_4096_20230619-1247_44/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 0.7, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R08_SF_MT_4096_20230704-1054_45/checkpoint_293601280 b/models/PPO_R08_SF_MT_4096_20230704-1054_45/checkpoint_293601280 new file mode 100644 index 0000000..1e9d11e Binary files /dev/null and b/models/PPO_R08_SF_MT_4096_20230704-1054_45/checkpoint_293601280 differ diff --git a/models/PPO_R08_SF_MT_4096_20230704-1054_45/config.json b/models/PPO_R08_SF_MT_4096_20230704-1054_45/config.json new file mode 100644 index 0000000..ed4d1f1 --- /dev/null +++ b/models/PPO_R08_SF_MT_4096_20230704-1054_45/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R07_SF_MT_4096_20230704-1033_44", + "seed": 45, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R08_SF_MT_4096_20230704-1054_45/env_params.json b/models/PPO_R08_SF_MT_4096_20230704-1054_45/env_params.json new file mode 100644 index 0000000..16ae156 --- /dev/null +++ b/models/PPO_R08_SF_MT_4096_20230704-1054_45/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 0.8, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R08_ST_MF_4096_20230613-1522_42/checkpoint_1992294400 b/models/PPO_R08_ST_MF_4096_20230613-1522_42/checkpoint_1992294400 new file mode 100644 index 0000000..675e606 Binary files /dev/null and b/models/PPO_R08_ST_MF_4096_20230613-1522_42/checkpoint_1992294400 differ diff --git a/models/PPO_R08_ST_MF_4096_20230613-1522_42/config.json b/models/PPO_R08_ST_MF_4096_20230613-1522_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R08_ST_MF_4096_20230613-1522_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R08_ST_MF_4096_20230613-1522_42/env_params.json b/models/PPO_R08_ST_MF_4096_20230613-1522_42/env_params.json new file mode 100644 index 0000000..16ae156 --- /dev/null +++ b/models/PPO_R08_ST_MF_4096_20230613-1522_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 0.8, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R08_ST_MT_4096_20230619-1331_45/checkpoint_679477248 b/models/PPO_R08_ST_MT_4096_20230619-1331_45/checkpoint_679477248 new file mode 100644 index 0000000..5984c05 Binary files /dev/null and b/models/PPO_R08_ST_MT_4096_20230619-1331_45/checkpoint_679477248 differ diff --git a/models/PPO_R08_ST_MT_4096_20230619-1331_45/config.json b/models/PPO_R08_ST_MT_4096_20230619-1331_45/config.json new file mode 100644 index 0000000..69ccc03 --- /dev/null +++ b/models/PPO_R08_ST_MT_4096_20230619-1331_45/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R07_ST_MT_4096_20230619-1247_44", + "seed": 45, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R08_ST_MT_4096_20230619-1331_45/env_params.json b/models/PPO_R08_ST_MT_4096_20230619-1331_45/env_params.json new file mode 100644 index 0000000..16ae156 --- /dev/null +++ b/models/PPO_R08_ST_MT_4096_20230619-1331_45/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 0.8, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R09_SF_MT_4096_20230704-1116_46/checkpoint_239075328 b/models/PPO_R09_SF_MT_4096_20230704-1116_46/checkpoint_239075328 new file mode 100644 index 0000000..e54d633 Binary files /dev/null and b/models/PPO_R09_SF_MT_4096_20230704-1116_46/checkpoint_239075328 differ diff --git a/models/PPO_R09_SF_MT_4096_20230704-1116_46/config.json b/models/PPO_R09_SF_MT_4096_20230704-1116_46/config.json new file mode 100644 index 0000000..98a3137 --- /dev/null +++ b/models/PPO_R09_SF_MT_4096_20230704-1116_46/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R08_SF_MT_4096_20230704-1054_45", + "seed": 46, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R09_SF_MT_4096_20230704-1116_46/env_params.json b/models/PPO_R09_SF_MT_4096_20230704-1116_46/env_params.json new file mode 100644 index 0000000..dbcadc6 --- /dev/null +++ b/models/PPO_R09_SF_MT_4096_20230704-1116_46/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 0.9, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R09_ST_MF_4096_20230613-1522_42/checkpoint_1195376640 b/models/PPO_R09_ST_MF_4096_20230613-1522_42/checkpoint_1195376640 new file mode 100644 index 0000000..4adfc65 Binary files /dev/null and b/models/PPO_R09_ST_MF_4096_20230613-1522_42/checkpoint_1195376640 differ diff --git a/models/PPO_R09_ST_MF_4096_20230613-1522_42/config.json b/models/PPO_R09_ST_MF_4096_20230613-1522_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R09_ST_MF_4096_20230613-1522_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R09_ST_MF_4096_20230613-1522_42/env_params.json b/models/PPO_R09_ST_MF_4096_20230613-1522_42/env_params.json new file mode 100644 index 0000000..dbcadc6 --- /dev/null +++ b/models/PPO_R09_ST_MF_4096_20230613-1522_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 0.9, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R09_ST_MT_4096_20230619-1412_46/checkpoint_868220928 b/models/PPO_R09_ST_MT_4096_20230619-1412_46/checkpoint_868220928 new file mode 100644 index 0000000..7635491 Binary files /dev/null and b/models/PPO_R09_ST_MT_4096_20230619-1412_46/checkpoint_868220928 differ diff --git a/models/PPO_R09_ST_MT_4096_20230619-1412_46/config.json b/models/PPO_R09_ST_MT_4096_20230619-1412_46/config.json new file mode 100644 index 0000000..427112e --- /dev/null +++ b/models/PPO_R09_ST_MT_4096_20230619-1412_46/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R08_ST_MT_4096_20230619-1331_45", + "seed": 46, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R09_ST_MT_4096_20230619-1412_46/env_params.json b/models/PPO_R09_ST_MT_4096_20230619-1412_46/env_params.json new file mode 100644 index 0000000..dbcadc6 --- /dev/null +++ b/models/PPO_R09_ST_MT_4096_20230619-1412_46/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 0.9, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R10_SF_MT_4096_20230704-1138_47/checkpoint_348127232 b/models/PPO_R10_SF_MT_4096_20230704-1138_47/checkpoint_348127232 new file mode 100644 index 0000000..c50640b Binary files /dev/null and b/models/PPO_R10_SF_MT_4096_20230704-1138_47/checkpoint_348127232 differ diff --git a/models/PPO_R10_SF_MT_4096_20230704-1138_47/config.json b/models/PPO_R10_SF_MT_4096_20230704-1138_47/config.json new file mode 100644 index 0000000..b2badfc --- /dev/null +++ b/models/PPO_R10_SF_MT_4096_20230704-1138_47/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R09_SF_MT_4096_20230704-1116_46", + "seed": 47, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R10_SF_MT_4096_20230704-1138_47/env_params.json b/models/PPO_R10_SF_MT_4096_20230704-1138_47/env_params.json new file mode 100644 index 0000000..e546bee --- /dev/null +++ b/models/PPO_R10_SF_MT_4096_20230704-1138_47/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.0, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R10_ST_MF_4096_20230613-1521_42/checkpoint_1832910848 b/models/PPO_R10_ST_MF_4096_20230613-1521_42/checkpoint_1832910848 new file mode 100644 index 0000000..56061e3 Binary files /dev/null and b/models/PPO_R10_ST_MF_4096_20230613-1521_42/checkpoint_1832910848 differ diff --git a/models/PPO_R10_ST_MF_4096_20230613-1521_42/config.json b/models/PPO_R10_ST_MF_4096_20230613-1521_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R10_ST_MF_4096_20230613-1521_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R10_ST_MF_4096_20230613-1521_42/env_params.json b/models/PPO_R10_ST_MF_4096_20230613-1521_42/env_params.json new file mode 100644 index 0000000..e546bee --- /dev/null +++ b/models/PPO_R10_ST_MF_4096_20230613-1521_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.0, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R10_ST_MT_4096_20230619-1454_47/checkpoint_386924544 b/models/PPO_R10_ST_MT_4096_20230619-1454_47/checkpoint_386924544 new file mode 100644 index 0000000..448305e Binary files /dev/null and b/models/PPO_R10_ST_MT_4096_20230619-1454_47/checkpoint_386924544 differ diff --git a/models/PPO_R10_ST_MT_4096_20230619-1454_47/config.json b/models/PPO_R10_ST_MT_4096_20230619-1454_47/config.json new file mode 100644 index 0000000..03bb9b7 --- /dev/null +++ b/models/PPO_R10_ST_MT_4096_20230619-1454_47/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R09_ST_MT_4096_20230619-1412_46", + "seed": 47, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R10_ST_MT_4096_20230619-1454_47/env_params.json b/models/PPO_R10_ST_MT_4096_20230619-1454_47/env_params.json new file mode 100644 index 0000000..e546bee --- /dev/null +++ b/models/PPO_R10_ST_MT_4096_20230619-1454_47/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.0, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R11_SF_MT_4096_20230704-1158_48/checkpoint_297795584 b/models/PPO_R11_SF_MT_4096_20230704-1158_48/checkpoint_297795584 new file mode 100644 index 0000000..7e37dde Binary files /dev/null and b/models/PPO_R11_SF_MT_4096_20230704-1158_48/checkpoint_297795584 differ diff --git a/models/PPO_R11_SF_MT_4096_20230704-1158_48/config.json b/models/PPO_R11_SF_MT_4096_20230704-1158_48/config.json new file mode 100644 index 0000000..1722df7 --- /dev/null +++ b/models/PPO_R11_SF_MT_4096_20230704-1158_48/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R10_SF_MT_4096_20230704-1138_47", + "seed": 48, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R11_SF_MT_4096_20230704-1158_48/env_params.json b/models/PPO_R11_SF_MT_4096_20230704-1158_48/env_params.json new file mode 100644 index 0000000..d5d54a1 --- /dev/null +++ b/models/PPO_R11_SF_MT_4096_20230704-1158_48/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.1, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R11_ST_MF_4096_20230613-1521_42/checkpoint_1793064960 b/models/PPO_R11_ST_MF_4096_20230613-1521_42/checkpoint_1793064960 new file mode 100644 index 0000000..4aa1f85 Binary files /dev/null and b/models/PPO_R11_ST_MF_4096_20230613-1521_42/checkpoint_1793064960 differ diff --git a/models/PPO_R11_ST_MF_4096_20230613-1521_42/config.json b/models/PPO_R11_ST_MF_4096_20230613-1521_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R11_ST_MF_4096_20230613-1521_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R11_ST_MF_4096_20230613-1521_42/env_params.json b/models/PPO_R11_ST_MF_4096_20230613-1521_42/env_params.json new file mode 100644 index 0000000..d5d54a1 --- /dev/null +++ b/models/PPO_R11_ST_MF_4096_20230613-1521_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.1, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R11_ST_MT_4096_20230619-1536_48/checkpoint_386924544 b/models/PPO_R11_ST_MT_4096_20230619-1536_48/checkpoint_386924544 new file mode 100644 index 0000000..2842b03 Binary files /dev/null and b/models/PPO_R11_ST_MT_4096_20230619-1536_48/checkpoint_386924544 differ diff --git a/models/PPO_R11_ST_MT_4096_20230619-1536_48/config.json b/models/PPO_R11_ST_MT_4096_20230619-1536_48/config.json new file mode 100644 index 0000000..b41aa43 --- /dev/null +++ b/models/PPO_R11_ST_MT_4096_20230619-1536_48/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R10_ST_MT_4096_20230619-1454_47", + "seed": 48, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R11_ST_MT_4096_20230619-1536_48/env_params.json b/models/PPO_R11_ST_MT_4096_20230619-1536_48/env_params.json new file mode 100644 index 0000000..d5d54a1 --- /dev/null +++ b/models/PPO_R11_ST_MT_4096_20230619-1536_48/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.1, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R12_SF_MT_4096_20230704-1219_49/checkpoint_306184192 b/models/PPO_R12_SF_MT_4096_20230704-1219_49/checkpoint_306184192 new file mode 100644 index 0000000..550f8fa Binary files /dev/null and b/models/PPO_R12_SF_MT_4096_20230704-1219_49/checkpoint_306184192 differ diff --git a/models/PPO_R12_SF_MT_4096_20230704-1219_49/config.json b/models/PPO_R12_SF_MT_4096_20230704-1219_49/config.json new file mode 100644 index 0000000..99d291e --- /dev/null +++ b/models/PPO_R12_SF_MT_4096_20230704-1219_49/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R11_SF_MT_4096_20230704-1158_48", + "seed": 49, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R12_SF_MT_4096_20230704-1219_49/env_params.json b/models/PPO_R12_SF_MT_4096_20230704-1219_49/env_params.json new file mode 100644 index 0000000..24b7fcc --- /dev/null +++ b/models/PPO_R12_SF_MT_4096_20230704-1219_49/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.2, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R12_ST_MF_4096_20230613-1510_42/checkpoint_1912602624 b/models/PPO_R12_ST_MF_4096_20230613-1510_42/checkpoint_1912602624 new file mode 100644 index 0000000..135ac4c Binary files /dev/null and b/models/PPO_R12_ST_MF_4096_20230613-1510_42/checkpoint_1912602624 differ diff --git a/models/PPO_R12_ST_MF_4096_20230613-1510_42/config.json b/models/PPO_R12_ST_MF_4096_20230613-1510_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R12_ST_MF_4096_20230613-1510_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R12_ST_MF_4096_20230613-1510_42/env_params.json b/models/PPO_R12_ST_MF_4096_20230613-1510_42/env_params.json new file mode 100644 index 0000000..24b7fcc --- /dev/null +++ b/models/PPO_R12_ST_MF_4096_20230613-1510_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.2, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R12_ST_MT_4096_20230619-1613_49/checkpoint_934281216 b/models/PPO_R12_ST_MT_4096_20230619-1613_49/checkpoint_934281216 new file mode 100644 index 0000000..6277011 Binary files /dev/null and b/models/PPO_R12_ST_MT_4096_20230619-1613_49/checkpoint_934281216 differ diff --git a/models/PPO_R12_ST_MT_4096_20230619-1613_49/config.json b/models/PPO_R12_ST_MT_4096_20230619-1613_49/config.json new file mode 100644 index 0000000..4e0236c --- /dev/null +++ b/models/PPO_R12_ST_MT_4096_20230619-1613_49/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R11_ST_MT_4096_20230619-1536_48", + "seed": 49, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R12_ST_MT_4096_20230619-1613_49/env_params.json b/models/PPO_R12_ST_MT_4096_20230619-1613_49/env_params.json new file mode 100644 index 0000000..24b7fcc --- /dev/null +++ b/models/PPO_R12_ST_MT_4096_20230619-1613_49/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.2, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R13_SF_MT_4096_20230704-1239_50/checkpoint_322961408 b/models/PPO_R13_SF_MT_4096_20230704-1239_50/checkpoint_322961408 new file mode 100644 index 0000000..b9b4663 Binary files /dev/null and b/models/PPO_R13_SF_MT_4096_20230704-1239_50/checkpoint_322961408 differ diff --git a/models/PPO_R13_SF_MT_4096_20230704-1239_50/config.json b/models/PPO_R13_SF_MT_4096_20230704-1239_50/config.json new file mode 100644 index 0000000..bf5e6c7 --- /dev/null +++ b/models/PPO_R13_SF_MT_4096_20230704-1239_50/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R12_SF_MT_4096_20230704-1219_49", + "seed": 50, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R13_SF_MT_4096_20230704-1239_50/env_params.json b/models/PPO_R13_SF_MT_4096_20230704-1239_50/env_params.json new file mode 100644 index 0000000..b532c88 --- /dev/null +++ b/models/PPO_R13_SF_MT_4096_20230704-1239_50/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.3, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R13_ST_MF_4096_20230613-1510_42/checkpoint_1892679680 b/models/PPO_R13_ST_MF_4096_20230613-1510_42/checkpoint_1892679680 new file mode 100644 index 0000000..217a084 Binary files /dev/null and b/models/PPO_R13_ST_MF_4096_20230613-1510_42/checkpoint_1892679680 differ diff --git a/models/PPO_R13_ST_MF_4096_20230613-1510_42/config.json b/models/PPO_R13_ST_MF_4096_20230613-1510_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R13_ST_MF_4096_20230613-1510_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R13_ST_MF_4096_20230613-1510_42/env_params.json b/models/PPO_R13_ST_MF_4096_20230613-1510_42/env_params.json new file mode 100644 index 0000000..b532c88 --- /dev/null +++ b/models/PPO_R13_ST_MF_4096_20230613-1510_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.3, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R13_ST_MT_4096_20230619-1654_50/checkpoint_764411904 b/models/PPO_R13_ST_MT_4096_20230619-1654_50/checkpoint_764411904 new file mode 100644 index 0000000..86b5682 Binary files /dev/null and b/models/PPO_R13_ST_MT_4096_20230619-1654_50/checkpoint_764411904 differ diff --git a/models/PPO_R13_ST_MT_4096_20230619-1654_50/config.json b/models/PPO_R13_ST_MT_4096_20230619-1654_50/config.json new file mode 100644 index 0000000..6efe349 --- /dev/null +++ b/models/PPO_R13_ST_MT_4096_20230619-1654_50/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R12_ST_MT_4096_20230619-1613_49", + "seed": 50, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R13_ST_MT_4096_20230619-1654_50/env_params.json b/models/PPO_R13_ST_MT_4096_20230619-1654_50/env_params.json new file mode 100644 index 0000000..b532c88 --- /dev/null +++ b/models/PPO_R13_ST_MT_4096_20230619-1654_50/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.3, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R14_SF_MT_4096_20230704-1300_51/checkpoint_314572800 b/models/PPO_R14_SF_MT_4096_20230704-1300_51/checkpoint_314572800 new file mode 100644 index 0000000..1f35f98 Binary files /dev/null and b/models/PPO_R14_SF_MT_4096_20230704-1300_51/checkpoint_314572800 differ diff --git a/models/PPO_R14_SF_MT_4096_20230704-1300_51/config.json b/models/PPO_R14_SF_MT_4096_20230704-1300_51/config.json new file mode 100644 index 0000000..a14364d --- /dev/null +++ b/models/PPO_R14_SF_MT_4096_20230704-1300_51/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R13_SF_MT_4096_20230704-1239_50", + "seed": 51, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R14_SF_MT_4096_20230704-1300_51/env_params.json b/models/PPO_R14_SF_MT_4096_20230704-1300_51/env_params.json new file mode 100644 index 0000000..0d42421 --- /dev/null +++ b/models/PPO_R14_SF_MT_4096_20230704-1300_51/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.4, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R14_ST_MF_4096_20230613-1510_42/checkpoint_1872756736 b/models/PPO_R14_ST_MF_4096_20230613-1510_42/checkpoint_1872756736 new file mode 100644 index 0000000..dbb21a4 Binary files /dev/null and b/models/PPO_R14_ST_MF_4096_20230613-1510_42/checkpoint_1872756736 differ diff --git a/models/PPO_R14_ST_MF_4096_20230613-1510_42/config.json b/models/PPO_R14_ST_MF_4096_20230613-1510_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R14_ST_MF_4096_20230613-1510_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R14_ST_MF_4096_20230613-1510_42/env_params.json b/models/PPO_R14_ST_MF_4096_20230613-1510_42/env_params.json new file mode 100644 index 0000000..0d42421 --- /dev/null +++ b/models/PPO_R14_ST_MF_4096_20230613-1510_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.4, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R14_ST_MT_4096_20230619-1737_51/checkpoint_811597824 b/models/PPO_R14_ST_MT_4096_20230619-1737_51/checkpoint_811597824 new file mode 100644 index 0000000..811a479 Binary files /dev/null and b/models/PPO_R14_ST_MT_4096_20230619-1737_51/checkpoint_811597824 differ diff --git a/models/PPO_R14_ST_MT_4096_20230619-1737_51/config.json b/models/PPO_R14_ST_MT_4096_20230619-1737_51/config.json new file mode 100644 index 0000000..53d8144 --- /dev/null +++ b/models/PPO_R14_ST_MT_4096_20230619-1737_51/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R13_ST_MT_4096_20230619-1654_50", + "seed": 51, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R14_ST_MT_4096_20230619-1737_51/env_params.json b/models/PPO_R14_ST_MT_4096_20230619-1737_51/env_params.json new file mode 100644 index 0000000..0d42421 --- /dev/null +++ b/models/PPO_R14_ST_MT_4096_20230619-1737_51/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.4, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R15_SF_MT_4096_20230704-1320_52/checkpoint_461373440 b/models/PPO_R15_SF_MT_4096_20230704-1320_52/checkpoint_461373440 new file mode 100644 index 0000000..8d989d7 Binary files /dev/null and b/models/PPO_R15_SF_MT_4096_20230704-1320_52/checkpoint_461373440 differ diff --git a/models/PPO_R15_SF_MT_4096_20230704-1320_52/config.json b/models/PPO_R15_SF_MT_4096_20230704-1320_52/config.json new file mode 100644 index 0000000..59403bd --- /dev/null +++ b/models/PPO_R15_SF_MT_4096_20230704-1320_52/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R14_SF_MT_4096_20230704-1300_51", + "seed": 52, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R15_SF_MT_4096_20230704-1320_52/env_params.json b/models/PPO_R15_SF_MT_4096_20230704-1320_52/env_params.json new file mode 100644 index 0000000..ad12c3e --- /dev/null +++ b/models/PPO_R15_SF_MT_4096_20230704-1320_52/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.5, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R15_ST_MF_4096_20230613-1510_42/checkpoint_1892679680 b/models/PPO_R15_ST_MF_4096_20230613-1510_42/checkpoint_1892679680 new file mode 100644 index 0000000..4019954 Binary files /dev/null and b/models/PPO_R15_ST_MF_4096_20230613-1510_42/checkpoint_1892679680 differ diff --git a/models/PPO_R15_ST_MF_4096_20230613-1510_42/config.json b/models/PPO_R15_ST_MF_4096_20230613-1510_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R15_ST_MF_4096_20230613-1510_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R15_ST_MF_4096_20230613-1510_42/env_params.json b/models/PPO_R15_ST_MF_4096_20230613-1510_42/env_params.json new file mode 100644 index 0000000..ad12c3e --- /dev/null +++ b/models/PPO_R15_ST_MF_4096_20230613-1510_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.5, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R15_ST_MT_4096_20230619-1820_52/checkpoint_990904320 b/models/PPO_R15_ST_MT_4096_20230619-1820_52/checkpoint_990904320 new file mode 100644 index 0000000..6bec99a Binary files /dev/null and b/models/PPO_R15_ST_MT_4096_20230619-1820_52/checkpoint_990904320 differ diff --git a/models/PPO_R15_ST_MT_4096_20230619-1820_52/config.json b/models/PPO_R15_ST_MT_4096_20230619-1820_52/config.json new file mode 100644 index 0000000..8c97aba --- /dev/null +++ b/models/PPO_R15_ST_MT_4096_20230619-1820_52/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R14_ST_MT_4096_20230619-1737_51", + "seed": 52, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R15_ST_MT_4096_20230619-1820_52/env_params.json b/models/PPO_R15_ST_MT_4096_20230619-1820_52/env_params.json new file mode 100644 index 0000000..ad12c3e --- /dev/null +++ b/models/PPO_R15_ST_MT_4096_20230619-1820_52/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.5, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R16_SF_MT_4096_20230704-1340_53/checkpoint_348127232 b/models/PPO_R16_SF_MT_4096_20230704-1340_53/checkpoint_348127232 new file mode 100644 index 0000000..1420d1c Binary files /dev/null and b/models/PPO_R16_SF_MT_4096_20230704-1340_53/checkpoint_348127232 differ diff --git a/models/PPO_R16_SF_MT_4096_20230704-1340_53/config.json b/models/PPO_R16_SF_MT_4096_20230704-1340_53/config.json new file mode 100644 index 0000000..396c653 --- /dev/null +++ b/models/PPO_R16_SF_MT_4096_20230704-1340_53/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R15_SF_MT_4096_20230704-1320_52", + "seed": 53, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R16_SF_MT_4096_20230704-1340_53/env_params.json b/models/PPO_R16_SF_MT_4096_20230704-1340_53/env_params.json new file mode 100644 index 0000000..f4278cf --- /dev/null +++ b/models/PPO_R16_SF_MT_4096_20230704-1340_53/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.6, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R16_ST_MF_4096_20230613-1509_42/checkpoint_1693450240 b/models/PPO_R16_ST_MF_4096_20230613-1509_42/checkpoint_1693450240 new file mode 100644 index 0000000..6560427 Binary files /dev/null and b/models/PPO_R16_ST_MF_4096_20230613-1509_42/checkpoint_1693450240 differ diff --git a/models/PPO_R16_ST_MF_4096_20230613-1509_42/config.json b/models/PPO_R16_ST_MF_4096_20230613-1509_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R16_ST_MF_4096_20230613-1509_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R16_ST_MF_4096_20230613-1509_42/env_params.json b/models/PPO_R16_ST_MF_4096_20230613-1509_42/env_params.json new file mode 100644 index 0000000..f4278cf --- /dev/null +++ b/models/PPO_R16_ST_MF_4096_20230613-1509_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.6, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R16_ST_MT_4096_20230619-1901_53/checkpoint_660602880 b/models/PPO_R16_ST_MT_4096_20230619-1901_53/checkpoint_660602880 new file mode 100644 index 0000000..bb75dee Binary files /dev/null and b/models/PPO_R16_ST_MT_4096_20230619-1901_53/checkpoint_660602880 differ diff --git a/models/PPO_R16_ST_MT_4096_20230619-1901_53/config.json b/models/PPO_R16_ST_MT_4096_20230619-1901_53/config.json new file mode 100644 index 0000000..e742551 --- /dev/null +++ b/models/PPO_R16_ST_MT_4096_20230619-1901_53/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R15_ST_MT_4096_20230619-1820_52", + "seed": 53, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R16_ST_MT_4096_20230619-1901_53/env_params.json b/models/PPO_R16_ST_MT_4096_20230619-1901_53/env_params.json new file mode 100644 index 0000000..f4278cf --- /dev/null +++ b/models/PPO_R16_ST_MT_4096_20230619-1901_53/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.6, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R17_SF_MT_4096_20230704-1358_54/checkpoint_268435456 b/models/PPO_R17_SF_MT_4096_20230704-1358_54/checkpoint_268435456 new file mode 100644 index 0000000..b9b830a Binary files /dev/null and b/models/PPO_R17_SF_MT_4096_20230704-1358_54/checkpoint_268435456 differ diff --git a/models/PPO_R17_SF_MT_4096_20230704-1358_54/config.json b/models/PPO_R17_SF_MT_4096_20230704-1358_54/config.json new file mode 100644 index 0000000..0896a7e --- /dev/null +++ b/models/PPO_R17_SF_MT_4096_20230704-1358_54/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R16_SF_MT_4096_20230704-1340_53", + "seed": 54, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R17_SF_MT_4096_20230704-1358_54/env_params.json b/models/PPO_R17_SF_MT_4096_20230704-1358_54/env_params.json new file mode 100644 index 0000000..28343dd --- /dev/null +++ b/models/PPO_R17_SF_MT_4096_20230704-1358_54/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.7, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R17_ST_MF_4096_20230613-1509_42/checkpoint_1773142016 b/models/PPO_R17_ST_MF_4096_20230613-1509_42/checkpoint_1773142016 new file mode 100644 index 0000000..dc69460 Binary files /dev/null and b/models/PPO_R17_ST_MF_4096_20230613-1509_42/checkpoint_1773142016 differ diff --git a/models/PPO_R17_ST_MF_4096_20230613-1509_42/config.json b/models/PPO_R17_ST_MF_4096_20230613-1509_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R17_ST_MF_4096_20230613-1509_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R17_ST_MF_4096_20230613-1509_42/env_params.json b/models/PPO_R17_ST_MF_4096_20230613-1509_42/env_params.json new file mode 100644 index 0000000..28343dd --- /dev/null +++ b/models/PPO_R17_ST_MF_4096_20230613-1509_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.7, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R17_ST_MT_4096_20230619-1940_54/checkpoint_802160640 b/models/PPO_R17_ST_MT_4096_20230619-1940_54/checkpoint_802160640 new file mode 100644 index 0000000..f118ae8 Binary files /dev/null and b/models/PPO_R17_ST_MT_4096_20230619-1940_54/checkpoint_802160640 differ diff --git a/models/PPO_R17_ST_MT_4096_20230619-1940_54/config.json b/models/PPO_R17_ST_MT_4096_20230619-1940_54/config.json new file mode 100644 index 0000000..2188656 --- /dev/null +++ b/models/PPO_R17_ST_MT_4096_20230619-1940_54/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R16_ST_MT_4096_20230619-1901_53", + "seed": 54, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R17_ST_MT_4096_20230619-1940_54/env_params.json b/models/PPO_R17_ST_MT_4096_20230619-1940_54/env_params.json new file mode 100644 index 0000000..28343dd --- /dev/null +++ b/models/PPO_R17_ST_MT_4096_20230619-1940_54/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.7, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R18_SF_MT_4096_20230704-1416_55/checkpoint_234881024 b/models/PPO_R18_SF_MT_4096_20230704-1416_55/checkpoint_234881024 new file mode 100644 index 0000000..5f7e340 Binary files /dev/null and b/models/PPO_R18_SF_MT_4096_20230704-1416_55/checkpoint_234881024 differ diff --git a/models/PPO_R18_SF_MT_4096_20230704-1416_55/config.json b/models/PPO_R18_SF_MT_4096_20230704-1416_55/config.json new file mode 100644 index 0000000..12ff59b --- /dev/null +++ b/models/PPO_R18_SF_MT_4096_20230704-1416_55/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R17_SF_MT_4096_20230704-1358_54", + "seed": 55, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R18_SF_MT_4096_20230704-1416_55/env_params.json b/models/PPO_R18_SF_MT_4096_20230704-1416_55/env_params.json new file mode 100644 index 0000000..e14ca2a --- /dev/null +++ b/models/PPO_R18_SF_MT_4096_20230704-1416_55/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.8, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R18_ST_MF_4096_20230613-1509_42/checkpoint_1852833792 b/models/PPO_R18_ST_MF_4096_20230613-1509_42/checkpoint_1852833792 new file mode 100644 index 0000000..7b021e2 Binary files /dev/null and b/models/PPO_R18_ST_MF_4096_20230613-1509_42/checkpoint_1852833792 differ diff --git a/models/PPO_R18_ST_MF_4096_20230613-1509_42/config.json b/models/PPO_R18_ST_MF_4096_20230613-1509_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R18_ST_MF_4096_20230613-1509_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R18_ST_MF_4096_20230613-1509_42/env_params.json b/models/PPO_R18_ST_MF_4096_20230613-1509_42/env_params.json new file mode 100644 index 0000000..e14ca2a --- /dev/null +++ b/models/PPO_R18_ST_MF_4096_20230613-1509_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.8, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R18_ST_MT_4096_20230619-2016_55/checkpoint_698351616 b/models/PPO_R18_ST_MT_4096_20230619-2016_55/checkpoint_698351616 new file mode 100644 index 0000000..359fcb1 Binary files /dev/null and b/models/PPO_R18_ST_MT_4096_20230619-2016_55/checkpoint_698351616 differ diff --git a/models/PPO_R18_ST_MT_4096_20230619-2016_55/config.json b/models/PPO_R18_ST_MT_4096_20230619-2016_55/config.json new file mode 100644 index 0000000..ae266a8 --- /dev/null +++ b/models/PPO_R18_ST_MT_4096_20230619-2016_55/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R17_ST_MT_4096_20230619-1940_54", + "seed": 55, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R18_ST_MT_4096_20230619-2016_55/env_params.json b/models/PPO_R18_ST_MT_4096_20230619-2016_55/env_params.json new file mode 100644 index 0000000..e14ca2a --- /dev/null +++ b/models/PPO_R18_ST_MT_4096_20230619-2016_55/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.8, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R19_SF_MT_4096_20230704-1437_56/checkpoint_306184192 b/models/PPO_R19_SF_MT_4096_20230704-1437_56/checkpoint_306184192 new file mode 100644 index 0000000..14ca480 Binary files /dev/null and b/models/PPO_R19_SF_MT_4096_20230704-1437_56/checkpoint_306184192 differ diff --git a/models/PPO_R19_SF_MT_4096_20230704-1437_56/config.json b/models/PPO_R19_SF_MT_4096_20230704-1437_56/config.json new file mode 100644 index 0000000..5694468 --- /dev/null +++ b/models/PPO_R19_SF_MT_4096_20230704-1437_56/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R18_SF_MT_4096_20230704-1416_55", + "seed": 56, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R19_SF_MT_4096_20230704-1437_56/env_params.json b/models/PPO_R19_SF_MT_4096_20230704-1437_56/env_params.json new file mode 100644 index 0000000..6468b94 --- /dev/null +++ b/models/PPO_R19_SF_MT_4096_20230704-1437_56/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.9, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R19_ST_MF_4096_20230613-1509_42/checkpoint_1812987904 b/models/PPO_R19_ST_MF_4096_20230613-1509_42/checkpoint_1812987904 new file mode 100644 index 0000000..910a7d6 Binary files /dev/null and b/models/PPO_R19_ST_MF_4096_20230613-1509_42/checkpoint_1812987904 differ diff --git a/models/PPO_R19_ST_MF_4096_20230613-1509_42/config.json b/models/PPO_R19_ST_MF_4096_20230613-1509_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R19_ST_MF_4096_20230613-1509_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R19_ST_MF_4096_20230613-1509_42/env_params.json b/models/PPO_R19_ST_MF_4096_20230613-1509_42/env_params.json new file mode 100644 index 0000000..6468b94 --- /dev/null +++ b/models/PPO_R19_ST_MF_4096_20230613-1509_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.9, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R19_ST_MT_4096_20230619-2052_56/checkpoint_830472192 b/models/PPO_R19_ST_MT_4096_20230619-2052_56/checkpoint_830472192 new file mode 100644 index 0000000..72b0635 Binary files /dev/null and b/models/PPO_R19_ST_MT_4096_20230619-2052_56/checkpoint_830472192 differ diff --git a/models/PPO_R19_ST_MT_4096_20230619-2052_56/config.json b/models/PPO_R19_ST_MT_4096_20230619-2052_56/config.json new file mode 100644 index 0000000..4d5f46d --- /dev/null +++ b/models/PPO_R19_ST_MT_4096_20230619-2052_56/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R18_ST_MT_4096_20230619-2016_55", + "seed": 56, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R19_ST_MT_4096_20230619-2052_56/env_params.json b/models/PPO_R19_ST_MT_4096_20230619-2052_56/env_params.json new file mode 100644 index 0000000..6468b94 --- /dev/null +++ b/models/PPO_R19_ST_MT_4096_20230619-2052_56/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 1.9, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R20_SF_MT_4096_20230704-1457_57/checkpoint_272629760 b/models/PPO_R20_SF_MT_4096_20230704-1457_57/checkpoint_272629760 new file mode 100644 index 0000000..e78fcac Binary files /dev/null and b/models/PPO_R20_SF_MT_4096_20230704-1457_57/checkpoint_272629760 differ diff --git a/models/PPO_R20_SF_MT_4096_20230704-1457_57/config.json b/models/PPO_R20_SF_MT_4096_20230704-1457_57/config.json new file mode 100644 index 0000000..7866a9e --- /dev/null +++ b/models/PPO_R20_SF_MT_4096_20230704-1457_57/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R19_SF_MT_4096_20230704-1437_56", + "seed": 57, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R20_SF_MT_4096_20230704-1457_57/env_params.json b/models/PPO_R20_SF_MT_4096_20230704-1457_57/env_params.json new file mode 100644 index 0000000..1cff71d --- /dev/null +++ b/models/PPO_R20_SF_MT_4096_20230704-1457_57/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.0, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R20_ST_MF_4096_20230613-1457_42/checkpoint_1773142016 b/models/PPO_R20_ST_MF_4096_20230613-1457_42/checkpoint_1773142016 new file mode 100644 index 0000000..7f0594f Binary files /dev/null and b/models/PPO_R20_ST_MF_4096_20230613-1457_42/checkpoint_1773142016 differ diff --git a/models/PPO_R20_ST_MF_4096_20230613-1457_42/config.json b/models/PPO_R20_ST_MF_4096_20230613-1457_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R20_ST_MF_4096_20230613-1457_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R20_ST_MF_4096_20230613-1457_42/env_params.json b/models/PPO_R20_ST_MF_4096_20230613-1457_42/env_params.json new file mode 100644 index 0000000..1cff71d --- /dev/null +++ b/models/PPO_R20_ST_MF_4096_20230613-1457_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.0, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R20_ST_MT_4096_20230619-2133_57/checkpoint_943718400 b/models/PPO_R20_ST_MT_4096_20230619-2133_57/checkpoint_943718400 new file mode 100644 index 0000000..9e3e1cb Binary files /dev/null and b/models/PPO_R20_ST_MT_4096_20230619-2133_57/checkpoint_943718400 differ diff --git a/models/PPO_R20_ST_MT_4096_20230619-2133_57/config.json b/models/PPO_R20_ST_MT_4096_20230619-2133_57/config.json new file mode 100644 index 0000000..041d305 --- /dev/null +++ b/models/PPO_R20_ST_MT_4096_20230619-2133_57/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R19_ST_MT_4096_20230619-2052_56", + "seed": 57, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R20_ST_MT_4096_20230619-2133_57/env_params.json b/models/PPO_R20_ST_MT_4096_20230619-2133_57/env_params.json new file mode 100644 index 0000000..1cff71d --- /dev/null +++ b/models/PPO_R20_ST_MT_4096_20230619-2133_57/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.0, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R21_SF_MT_4096_20230704-1517_58/checkpoint_239075328 b/models/PPO_R21_SF_MT_4096_20230704-1517_58/checkpoint_239075328 new file mode 100644 index 0000000..a2d286b Binary files /dev/null and b/models/PPO_R21_SF_MT_4096_20230704-1517_58/checkpoint_239075328 differ diff --git a/models/PPO_R21_SF_MT_4096_20230704-1517_58/config.json b/models/PPO_R21_SF_MT_4096_20230704-1517_58/config.json new file mode 100644 index 0000000..6ae85bc --- /dev/null +++ b/models/PPO_R21_SF_MT_4096_20230704-1517_58/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R20_SF_MT_4096_20230704-1457_57", + "seed": 58, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R21_SF_MT_4096_20230704-1517_58/env_params.json b/models/PPO_R21_SF_MT_4096_20230704-1517_58/env_params.json new file mode 100644 index 0000000..3e9f5f8 --- /dev/null +++ b/models/PPO_R21_SF_MT_4096_20230704-1517_58/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.1, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R21_ST_MF_4096_20230613-1457_42/checkpoint_1892679680 b/models/PPO_R21_ST_MF_4096_20230613-1457_42/checkpoint_1892679680 new file mode 100644 index 0000000..7b89b47 Binary files /dev/null and b/models/PPO_R21_ST_MF_4096_20230613-1457_42/checkpoint_1892679680 differ diff --git a/models/PPO_R21_ST_MF_4096_20230613-1457_42/config.json b/models/PPO_R21_ST_MF_4096_20230613-1457_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R21_ST_MF_4096_20230613-1457_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R21_ST_MF_4096_20230613-1457_42/env_params.json b/models/PPO_R21_ST_MF_4096_20230613-1457_42/env_params.json new file mode 100644 index 0000000..3e9f5f8 --- /dev/null +++ b/models/PPO_R21_ST_MF_4096_20230613-1457_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.1, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R21_ST_MT_4096_20230619-2218_58/checkpoint_905969664 b/models/PPO_R21_ST_MT_4096_20230619-2218_58/checkpoint_905969664 new file mode 100644 index 0000000..e32172f Binary files /dev/null and b/models/PPO_R21_ST_MT_4096_20230619-2218_58/checkpoint_905969664 differ diff --git a/models/PPO_R21_ST_MT_4096_20230619-2218_58/config.json b/models/PPO_R21_ST_MT_4096_20230619-2218_58/config.json new file mode 100644 index 0000000..44f6481 --- /dev/null +++ b/models/PPO_R21_ST_MT_4096_20230619-2218_58/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R20_ST_MT_4096_20230619-2133_57", + "seed": 58, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R21_ST_MT_4096_20230619-2218_58/env_params.json b/models/PPO_R21_ST_MT_4096_20230619-2218_58/env_params.json new file mode 100644 index 0000000..3e9f5f8 --- /dev/null +++ b/models/PPO_R21_ST_MT_4096_20230619-2218_58/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.1, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R22_SF_MT_4096_20230704-1537_59/checkpoint_440401920 b/models/PPO_R22_SF_MT_4096_20230704-1537_59/checkpoint_440401920 new file mode 100644 index 0000000..ce4e254 Binary files /dev/null and b/models/PPO_R22_SF_MT_4096_20230704-1537_59/checkpoint_440401920 differ diff --git a/models/PPO_R22_SF_MT_4096_20230704-1537_59/config.json b/models/PPO_R22_SF_MT_4096_20230704-1537_59/config.json new file mode 100644 index 0000000..b405257 --- /dev/null +++ b/models/PPO_R22_SF_MT_4096_20230704-1537_59/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R21_SF_MT_4096_20230704-1517_58", + "seed": 59, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R22_SF_MT_4096_20230704-1537_59/env_params.json b/models/PPO_R22_SF_MT_4096_20230704-1537_59/env_params.json new file mode 100644 index 0000000..dc184d2 --- /dev/null +++ b/models/PPO_R22_SF_MT_4096_20230704-1537_59/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.2, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R22_ST_MF_4096_20230613-1457_42/checkpoint_1972371456 b/models/PPO_R22_ST_MF_4096_20230613-1457_42/checkpoint_1972371456 new file mode 100644 index 0000000..54ac56f Binary files /dev/null and b/models/PPO_R22_ST_MF_4096_20230613-1457_42/checkpoint_1972371456 differ diff --git a/models/PPO_R22_ST_MF_4096_20230613-1457_42/config.json b/models/PPO_R22_ST_MF_4096_20230613-1457_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R22_ST_MF_4096_20230613-1457_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R22_ST_MF_4096_20230613-1457_42/env_params.json b/models/PPO_R22_ST_MF_4096_20230613-1457_42/env_params.json new file mode 100644 index 0000000..dc184d2 --- /dev/null +++ b/models/PPO_R22_ST_MF_4096_20230613-1457_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.2, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R22_ST_MT_4096_20230619-2259_59/checkpoint_839909376 b/models/PPO_R22_ST_MT_4096_20230619-2259_59/checkpoint_839909376 new file mode 100644 index 0000000..e4aad59 Binary files /dev/null and b/models/PPO_R22_ST_MT_4096_20230619-2259_59/checkpoint_839909376 differ diff --git a/models/PPO_R22_ST_MT_4096_20230619-2259_59/config.json b/models/PPO_R22_ST_MT_4096_20230619-2259_59/config.json new file mode 100644 index 0000000..8245381 --- /dev/null +++ b/models/PPO_R22_ST_MT_4096_20230619-2259_59/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R21_ST_MT_4096_20230619-2218_58", + "seed": 59, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R22_ST_MT_4096_20230619-2259_59/env_params.json b/models/PPO_R22_ST_MT_4096_20230619-2259_59/env_params.json new file mode 100644 index 0000000..dc184d2 --- /dev/null +++ b/models/PPO_R22_ST_MT_4096_20230619-2259_59/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.2, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R23_SF_MT_4096_20230704-1555_60/checkpoint_301989888 b/models/PPO_R23_SF_MT_4096_20230704-1555_60/checkpoint_301989888 new file mode 100644 index 0000000..2b7a69e Binary files /dev/null and b/models/PPO_R23_SF_MT_4096_20230704-1555_60/checkpoint_301989888 differ diff --git a/models/PPO_R23_SF_MT_4096_20230704-1555_60/config.json b/models/PPO_R23_SF_MT_4096_20230704-1555_60/config.json new file mode 100644 index 0000000..70c0a0c --- /dev/null +++ b/models/PPO_R23_SF_MT_4096_20230704-1555_60/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R22_SF_MT_4096_20230704-1537_59", + "seed": 60, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R23_SF_MT_4096_20230704-1555_60/env_params.json b/models/PPO_R23_SF_MT_4096_20230704-1555_60/env_params.json new file mode 100644 index 0000000..ff17204 --- /dev/null +++ b/models/PPO_R23_SF_MT_4096_20230704-1555_60/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.3, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R23_ST_MF_4096_20230613-1457_42/checkpoint_1733296128 b/models/PPO_R23_ST_MF_4096_20230613-1457_42/checkpoint_1733296128 new file mode 100644 index 0000000..069228d Binary files /dev/null and b/models/PPO_R23_ST_MF_4096_20230613-1457_42/checkpoint_1733296128 differ diff --git a/models/PPO_R23_ST_MF_4096_20230613-1457_42/config.json b/models/PPO_R23_ST_MF_4096_20230613-1457_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R23_ST_MF_4096_20230613-1457_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R23_ST_MF_4096_20230613-1457_42/env_params.json b/models/PPO_R23_ST_MF_4096_20230613-1457_42/env_params.json new file mode 100644 index 0000000..ff17204 --- /dev/null +++ b/models/PPO_R23_ST_MF_4096_20230613-1457_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.3, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R23_ST_MT_4096_20230619-2340_60/checkpoint_821035008 b/models/PPO_R23_ST_MT_4096_20230619-2340_60/checkpoint_821035008 new file mode 100644 index 0000000..5bbce68 Binary files /dev/null and b/models/PPO_R23_ST_MT_4096_20230619-2340_60/checkpoint_821035008 differ diff --git a/models/PPO_R23_ST_MT_4096_20230619-2340_60/config.json b/models/PPO_R23_ST_MT_4096_20230619-2340_60/config.json new file mode 100644 index 0000000..9bb53c2 --- /dev/null +++ b/models/PPO_R23_ST_MT_4096_20230619-2340_60/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R22_ST_MT_4096_20230619-2259_59", + "seed": 60, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R23_ST_MT_4096_20230619-2340_60/env_params.json b/models/PPO_R23_ST_MT_4096_20230619-2340_60/env_params.json new file mode 100644 index 0000000..ff17204 --- /dev/null +++ b/models/PPO_R23_ST_MT_4096_20230619-2340_60/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.3, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R24_SF_MT_4096_20230704-1614_61/checkpoint_448790528 b/models/PPO_R24_SF_MT_4096_20230704-1614_61/checkpoint_448790528 new file mode 100644 index 0000000..d5003e7 Binary files /dev/null and b/models/PPO_R24_SF_MT_4096_20230704-1614_61/checkpoint_448790528 differ diff --git a/models/PPO_R24_SF_MT_4096_20230704-1614_61/config.json b/models/PPO_R24_SF_MT_4096_20230704-1614_61/config.json new file mode 100644 index 0000000..86251db --- /dev/null +++ b/models/PPO_R24_SF_MT_4096_20230704-1614_61/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R23_SF_MT_4096_20230704-1555_60", + "seed": 61, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R24_SF_MT_4096_20230704-1614_61/env_params.json b/models/PPO_R24_SF_MT_4096_20230704-1614_61/env_params.json new file mode 100644 index 0000000..50a33cc --- /dev/null +++ b/models/PPO_R24_SF_MT_4096_20230704-1614_61/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.4, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R24_ST_MF_4096_20230613-1457_42/checkpoint_1952448512 b/models/PPO_R24_ST_MF_4096_20230613-1457_42/checkpoint_1952448512 new file mode 100644 index 0000000..d7a60c8 Binary files /dev/null and b/models/PPO_R24_ST_MF_4096_20230613-1457_42/checkpoint_1952448512 differ diff --git a/models/PPO_R24_ST_MF_4096_20230613-1457_42/config.json b/models/PPO_R24_ST_MF_4096_20230613-1457_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R24_ST_MF_4096_20230613-1457_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R24_ST_MF_4096_20230613-1457_42/env_params.json b/models/PPO_R24_ST_MF_4096_20230613-1457_42/env_params.json new file mode 100644 index 0000000..50a33cc --- /dev/null +++ b/models/PPO_R24_ST_MF_4096_20230613-1457_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.4, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R24_ST_MT_4096_20230620-0021_61/checkpoint_736100352 b/models/PPO_R24_ST_MT_4096_20230620-0021_61/checkpoint_736100352 new file mode 100644 index 0000000..247ceb2 Binary files /dev/null and b/models/PPO_R24_ST_MT_4096_20230620-0021_61/checkpoint_736100352 differ diff --git a/models/PPO_R24_ST_MT_4096_20230620-0021_61/config.json b/models/PPO_R24_ST_MT_4096_20230620-0021_61/config.json new file mode 100644 index 0000000..a75f3f9 --- /dev/null +++ b/models/PPO_R24_ST_MT_4096_20230620-0021_61/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R23_ST_MT_4096_20230619-2340_60", + "seed": 61, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R24_ST_MT_4096_20230620-0021_61/env_params.json b/models/PPO_R24_ST_MT_4096_20230620-0021_61/env_params.json new file mode 100644 index 0000000..50a33cc --- /dev/null +++ b/models/PPO_R24_ST_MT_4096_20230620-0021_61/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.4, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R25_SF_MT_4096_20230704-1635_62/checkpoint_331350016 b/models/PPO_R25_SF_MT_4096_20230704-1635_62/checkpoint_331350016 new file mode 100644 index 0000000..a4e82fa Binary files /dev/null and b/models/PPO_R25_SF_MT_4096_20230704-1635_62/checkpoint_331350016 differ diff --git a/models/PPO_R25_SF_MT_4096_20230704-1635_62/config.json b/models/PPO_R25_SF_MT_4096_20230704-1635_62/config.json new file mode 100644 index 0000000..dd6d678 --- /dev/null +++ b/models/PPO_R25_SF_MT_4096_20230704-1635_62/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R24_SF_MT_4096_20230704-1614_61", + "seed": 62, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R25_SF_MT_4096_20230704-1635_62/env_params.json b/models/PPO_R25_SF_MT_4096_20230704-1635_62/env_params.json new file mode 100644 index 0000000..f2877ba --- /dev/null +++ b/models/PPO_R25_SF_MT_4096_20230704-1635_62/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.5, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R25_ST_MF_4096_20230613-1457_42/checkpoint_1753219072 b/models/PPO_R25_ST_MF_4096_20230613-1457_42/checkpoint_1753219072 new file mode 100644 index 0000000..fc94334 Binary files /dev/null and b/models/PPO_R25_ST_MF_4096_20230613-1457_42/checkpoint_1753219072 differ diff --git a/models/PPO_R25_ST_MF_4096_20230613-1457_42/config.json b/models/PPO_R25_ST_MF_4096_20230613-1457_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R25_ST_MF_4096_20230613-1457_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R25_ST_MF_4096_20230613-1457_42/env_params.json b/models/PPO_R25_ST_MF_4096_20230613-1457_42/env_params.json new file mode 100644 index 0000000..f2877ba --- /dev/null +++ b/models/PPO_R25_ST_MF_4096_20230613-1457_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.5, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R25_ST_MT_4096_20230620-0101_62/checkpoint_726663168 b/models/PPO_R25_ST_MT_4096_20230620-0101_62/checkpoint_726663168 new file mode 100644 index 0000000..3d43a1f Binary files /dev/null and b/models/PPO_R25_ST_MT_4096_20230620-0101_62/checkpoint_726663168 differ diff --git a/models/PPO_R25_ST_MT_4096_20230620-0101_62/config.json b/models/PPO_R25_ST_MT_4096_20230620-0101_62/config.json new file mode 100644 index 0000000..550fcd8 --- /dev/null +++ b/models/PPO_R25_ST_MT_4096_20230620-0101_62/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R24_ST_MT_4096_20230620-0021_61", + "seed": 62, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R25_ST_MT_4096_20230620-0101_62/env_params.json b/models/PPO_R25_ST_MT_4096_20230620-0101_62/env_params.json new file mode 100644 index 0000000..f2877ba --- /dev/null +++ b/models/PPO_R25_ST_MT_4096_20230620-0101_62/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.5, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R26_SF_MT_4096_20230704-1653_63/checkpoint_222298112 b/models/PPO_R26_SF_MT_4096_20230704-1653_63/checkpoint_222298112 new file mode 100644 index 0000000..5b4b3f3 Binary files /dev/null and b/models/PPO_R26_SF_MT_4096_20230704-1653_63/checkpoint_222298112 differ diff --git a/models/PPO_R26_SF_MT_4096_20230704-1653_63/config.json b/models/PPO_R26_SF_MT_4096_20230704-1653_63/config.json new file mode 100644 index 0000000..f30206f --- /dev/null +++ b/models/PPO_R26_SF_MT_4096_20230704-1653_63/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R25_SF_MT_4096_20230704-1635_62", + "seed": 63, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R26_SF_MT_4096_20230704-1653_63/env_params.json b/models/PPO_R26_SF_MT_4096_20230704-1653_63/env_params.json new file mode 100644 index 0000000..dcdea0a --- /dev/null +++ b/models/PPO_R26_SF_MT_4096_20230704-1653_63/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.6, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R26_ST_MF_4096_20230613-1457_42/checkpoint_1972371456 b/models/PPO_R26_ST_MF_4096_20230613-1457_42/checkpoint_1972371456 new file mode 100644 index 0000000..550eb90 Binary files /dev/null and b/models/PPO_R26_ST_MF_4096_20230613-1457_42/checkpoint_1972371456 differ diff --git a/models/PPO_R26_ST_MF_4096_20230613-1457_42/config.json b/models/PPO_R26_ST_MF_4096_20230613-1457_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R26_ST_MF_4096_20230613-1457_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R26_ST_MF_4096_20230613-1457_42/env_params.json b/models/PPO_R26_ST_MF_4096_20230613-1457_42/env_params.json new file mode 100644 index 0000000..dcdea0a --- /dev/null +++ b/models/PPO_R26_ST_MF_4096_20230613-1457_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.6, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R26_ST_MT_4096_20230620-0137_63/checkpoint_839909376 b/models/PPO_R26_ST_MT_4096_20230620-0137_63/checkpoint_839909376 new file mode 100644 index 0000000..ebba01e Binary files /dev/null and b/models/PPO_R26_ST_MT_4096_20230620-0137_63/checkpoint_839909376 differ diff --git a/models/PPO_R26_ST_MT_4096_20230620-0137_63/config.json b/models/PPO_R26_ST_MT_4096_20230620-0137_63/config.json new file mode 100644 index 0000000..146cc7e --- /dev/null +++ b/models/PPO_R26_ST_MT_4096_20230620-0137_63/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R25_ST_MT_4096_20230620-0101_62", + "seed": 63, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R26_ST_MT_4096_20230620-0137_63/env_params.json b/models/PPO_R26_ST_MT_4096_20230620-0137_63/env_params.json new file mode 100644 index 0000000..dcdea0a --- /dev/null +++ b/models/PPO_R26_ST_MT_4096_20230620-0137_63/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.6, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R27_SF_MT_4096_20230704-1714_64/checkpoint_239075328 b/models/PPO_R27_SF_MT_4096_20230704-1714_64/checkpoint_239075328 new file mode 100644 index 0000000..842b08c Binary files /dev/null and b/models/PPO_R27_SF_MT_4096_20230704-1714_64/checkpoint_239075328 differ diff --git a/models/PPO_R27_SF_MT_4096_20230704-1714_64/config.json b/models/PPO_R27_SF_MT_4096_20230704-1714_64/config.json new file mode 100644 index 0000000..587b23e --- /dev/null +++ b/models/PPO_R27_SF_MT_4096_20230704-1714_64/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R26_SF_MT_4096_20230704-1653_63", + "seed": 64, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R27_SF_MT_4096_20230704-1714_64/env_params.json b/models/PPO_R27_SF_MT_4096_20230704-1714_64/env_params.json new file mode 100644 index 0000000..55d0312 --- /dev/null +++ b/models/PPO_R27_SF_MT_4096_20230704-1714_64/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.7, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R27_ST_MF_4096_20230613-1457_42/checkpoint_1952448512 b/models/PPO_R27_ST_MF_4096_20230613-1457_42/checkpoint_1952448512 new file mode 100644 index 0000000..59ca552 Binary files /dev/null and b/models/PPO_R27_ST_MF_4096_20230613-1457_42/checkpoint_1952448512 differ diff --git a/models/PPO_R27_ST_MF_4096_20230613-1457_42/config.json b/models/PPO_R27_ST_MF_4096_20230613-1457_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R27_ST_MF_4096_20230613-1457_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R27_ST_MF_4096_20230613-1457_42/env_params.json b/models/PPO_R27_ST_MF_4096_20230613-1457_42/env_params.json new file mode 100644 index 0000000..55d0312 --- /dev/null +++ b/models/PPO_R27_ST_MF_4096_20230613-1457_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.7, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R27_ST_MT_4096_20230620-0214_64/checkpoint_830472192 b/models/PPO_R27_ST_MT_4096_20230620-0214_64/checkpoint_830472192 new file mode 100644 index 0000000..69672e2 Binary files /dev/null and b/models/PPO_R27_ST_MT_4096_20230620-0214_64/checkpoint_830472192 differ diff --git a/models/PPO_R27_ST_MT_4096_20230620-0214_64/config.json b/models/PPO_R27_ST_MT_4096_20230620-0214_64/config.json new file mode 100644 index 0000000..978f859 --- /dev/null +++ b/models/PPO_R27_ST_MT_4096_20230620-0214_64/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R26_ST_MT_4096_20230620-0137_63", + "seed": 64, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R27_ST_MT_4096_20230620-0214_64/env_params.json b/models/PPO_R27_ST_MT_4096_20230620-0214_64/env_params.json new file mode 100644 index 0000000..55d0312 --- /dev/null +++ b/models/PPO_R27_ST_MT_4096_20230620-0214_64/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.7, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R28_SF_MT_4096_20230704-1732_65/checkpoint_356515840 b/models/PPO_R28_SF_MT_4096_20230704-1732_65/checkpoint_356515840 new file mode 100644 index 0000000..627d279 Binary files /dev/null and b/models/PPO_R28_SF_MT_4096_20230704-1732_65/checkpoint_356515840 differ diff --git a/models/PPO_R28_SF_MT_4096_20230704-1732_65/config.json b/models/PPO_R28_SF_MT_4096_20230704-1732_65/config.json new file mode 100644 index 0000000..0f52edc --- /dev/null +++ b/models/PPO_R28_SF_MT_4096_20230704-1732_65/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R27_SF_MT_4096_20230704-1714_64", + "seed": 65, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R28_SF_MT_4096_20230704-1732_65/env_params.json b/models/PPO_R28_SF_MT_4096_20230704-1732_65/env_params.json new file mode 100644 index 0000000..37dda37 --- /dev/null +++ b/models/PPO_R28_SF_MT_4096_20230704-1732_65/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.8, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R28_ST_MF_4096_20230613-1444_42/checkpoint_1952448512 b/models/PPO_R28_ST_MF_4096_20230613-1444_42/checkpoint_1952448512 new file mode 100644 index 0000000..a876b58 Binary files /dev/null and b/models/PPO_R28_ST_MF_4096_20230613-1444_42/checkpoint_1952448512 differ diff --git a/models/PPO_R28_ST_MF_4096_20230613-1444_42/config.json b/models/PPO_R28_ST_MF_4096_20230613-1444_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R28_ST_MF_4096_20230613-1444_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R28_ST_MF_4096_20230613-1444_42/env_params.json b/models/PPO_R28_ST_MF_4096_20230613-1444_42/env_params.json new file mode 100644 index 0000000..37dda37 --- /dev/null +++ b/models/PPO_R28_ST_MF_4096_20230613-1444_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.8, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R28_ST_MT_4096_20230620-0253_65/checkpoint_905969664 b/models/PPO_R28_ST_MT_4096_20230620-0253_65/checkpoint_905969664 new file mode 100644 index 0000000..f49baa5 Binary files /dev/null and b/models/PPO_R28_ST_MT_4096_20230620-0253_65/checkpoint_905969664 differ diff --git a/models/PPO_R28_ST_MT_4096_20230620-0253_65/config.json b/models/PPO_R28_ST_MT_4096_20230620-0253_65/config.json new file mode 100644 index 0000000..aa5a818 --- /dev/null +++ b/models/PPO_R28_ST_MT_4096_20230620-0253_65/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R27_ST_MT_4096_20230620-0214_64", + "seed": 65, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R28_ST_MT_4096_20230620-0253_65/env_params.json b/models/PPO_R28_ST_MT_4096_20230620-0253_65/env_params.json new file mode 100644 index 0000000..37dda37 --- /dev/null +++ b/models/PPO_R28_ST_MT_4096_20230620-0253_65/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.8, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R29_SF_MT_4096_20230704-1755_66/checkpoint_289406976 b/models/PPO_R29_SF_MT_4096_20230704-1755_66/checkpoint_289406976 new file mode 100644 index 0000000..dee82d7 Binary files /dev/null and b/models/PPO_R29_SF_MT_4096_20230704-1755_66/checkpoint_289406976 differ diff --git a/models/PPO_R29_SF_MT_4096_20230704-1755_66/config.json b/models/PPO_R29_SF_MT_4096_20230704-1755_66/config.json new file mode 100644 index 0000000..22e3256 --- /dev/null +++ b/models/PPO_R29_SF_MT_4096_20230704-1755_66/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R28_SF_MT_4096_20230704-1732_65", + "seed": 66, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R29_SF_MT_4096_20230704-1755_66/env_params.json b/models/PPO_R29_SF_MT_4096_20230704-1755_66/env_params.json new file mode 100644 index 0000000..2b5a5d7 --- /dev/null +++ b/models/PPO_R29_SF_MT_4096_20230704-1755_66/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.9, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R29_ST_MF_4096_20230613-1444_42/checkpoint_1912602624 b/models/PPO_R29_ST_MF_4096_20230613-1444_42/checkpoint_1912602624 new file mode 100644 index 0000000..8e97d3c Binary files /dev/null and b/models/PPO_R29_ST_MF_4096_20230613-1444_42/checkpoint_1912602624 differ diff --git a/models/PPO_R29_ST_MF_4096_20230613-1444_42/config.json b/models/PPO_R29_ST_MF_4096_20230613-1444_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R29_ST_MF_4096_20230613-1444_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R29_ST_MF_4096_20230613-1444_42/env_params.json b/models/PPO_R29_ST_MF_4096_20230613-1444_42/env_params.json new file mode 100644 index 0000000..2b5a5d7 --- /dev/null +++ b/models/PPO_R29_ST_MF_4096_20230613-1444_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.9, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R29_ST_MT_4096_20230620-0329_66/checkpoint_235929600 b/models/PPO_R29_ST_MT_4096_20230620-0329_66/checkpoint_235929600 new file mode 100644 index 0000000..0a4e224 Binary files /dev/null and b/models/PPO_R29_ST_MT_4096_20230620-0329_66/checkpoint_235929600 differ diff --git a/models/PPO_R29_ST_MT_4096_20230620-0329_66/config.json b/models/PPO_R29_ST_MT_4096_20230620-0329_66/config.json new file mode 100644 index 0000000..d76eb46 --- /dev/null +++ b/models/PPO_R29_ST_MT_4096_20230620-0329_66/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R28_ST_MT_4096_20230620-0253_65", + "seed": 66, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R29_ST_MT_4096_20230620-0329_66/env_params.json b/models/PPO_R29_ST_MT_4096_20230620-0329_66/env_params.json new file mode 100644 index 0000000..2b5a5d7 --- /dev/null +++ b/models/PPO_R29_ST_MT_4096_20230620-0329_66/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 2.9, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R30_SF_MT_4096_20230704-1815_67/checkpoint_289406976 b/models/PPO_R30_SF_MT_4096_20230704-1815_67/checkpoint_289406976 new file mode 100644 index 0000000..5e28895 Binary files /dev/null and b/models/PPO_R30_SF_MT_4096_20230704-1815_67/checkpoint_289406976 differ diff --git a/models/PPO_R30_SF_MT_4096_20230704-1815_67/config.json b/models/PPO_R30_SF_MT_4096_20230704-1815_67/config.json new file mode 100644 index 0000000..e1e128c --- /dev/null +++ b/models/PPO_R30_SF_MT_4096_20230704-1815_67/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R29_SF_MT_4096_20230704-1755_66", + "seed": 67, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R30_SF_MT_4096_20230704-1815_67/env_params.json b/models/PPO_R30_SF_MT_4096_20230704-1815_67/env_params.json new file mode 100644 index 0000000..87bae78 --- /dev/null +++ b/models/PPO_R30_SF_MT_4096_20230704-1815_67/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 3.0, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R30_ST_MF_4096_20230613-1444_42/checkpoint_1912602624 b/models/PPO_R30_ST_MF_4096_20230613-1444_42/checkpoint_1912602624 new file mode 100644 index 0000000..396430a Binary files /dev/null and b/models/PPO_R30_ST_MF_4096_20230613-1444_42/checkpoint_1912602624 differ diff --git a/models/PPO_R30_ST_MF_4096_20230613-1444_42/config.json b/models/PPO_R30_ST_MF_4096_20230613-1444_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R30_ST_MF_4096_20230613-1444_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R30_ST_MF_4096_20230613-1444_42/env_params.json b/models/PPO_R30_ST_MF_4096_20230613-1444_42/env_params.json new file mode 100644 index 0000000..87bae78 --- /dev/null +++ b/models/PPO_R30_ST_MF_4096_20230613-1444_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 3.0, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R30_ST_MT_4096_20230620-0409_67/checkpoint_434110464 b/models/PPO_R30_ST_MT_4096_20230620-0409_67/checkpoint_434110464 new file mode 100644 index 0000000..a6059f8 Binary files /dev/null and b/models/PPO_R30_ST_MT_4096_20230620-0409_67/checkpoint_434110464 differ diff --git a/models/PPO_R30_ST_MT_4096_20230620-0409_67/config.json b/models/PPO_R30_ST_MT_4096_20230620-0409_67/config.json new file mode 100644 index 0000000..0f5913a --- /dev/null +++ b/models/PPO_R30_ST_MT_4096_20230620-0409_67/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R29_ST_MT_4096_20230620-0329_66", + "seed": 67, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R30_ST_MT_4096_20230620-0409_67/env_params.json b/models/PPO_R30_ST_MT_4096_20230620-0409_67/env_params.json new file mode 100644 index 0000000..87bae78 --- /dev/null +++ b/models/PPO_R30_ST_MT_4096_20230620-0409_67/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 3.0, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R31_SF_MT_4096_20230704-1836_68/checkpoint_331350016 b/models/PPO_R31_SF_MT_4096_20230704-1836_68/checkpoint_331350016 new file mode 100644 index 0000000..2666fc6 Binary files /dev/null and b/models/PPO_R31_SF_MT_4096_20230704-1836_68/checkpoint_331350016 differ diff --git a/models/PPO_R31_SF_MT_4096_20230704-1836_68/config.json b/models/PPO_R31_SF_MT_4096_20230704-1836_68/config.json new file mode 100644 index 0000000..fcfb391 --- /dev/null +++ b/models/PPO_R31_SF_MT_4096_20230704-1836_68/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R30_SF_MT_4096_20230704-1815_67", + "seed": 68, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R31_SF_MT_4096_20230704-1836_68/env_params.json b/models/PPO_R31_SF_MT_4096_20230704-1836_68/env_params.json new file mode 100644 index 0000000..704bd05 --- /dev/null +++ b/models/PPO_R31_SF_MT_4096_20230704-1836_68/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 3.1, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R31_ST_MF_4096_20230613-1444_42/checkpoint_1972371456 b/models/PPO_R31_ST_MF_4096_20230613-1444_42/checkpoint_1972371456 new file mode 100644 index 0000000..219588f Binary files /dev/null and b/models/PPO_R31_ST_MF_4096_20230613-1444_42/checkpoint_1972371456 differ diff --git a/models/PPO_R31_ST_MF_4096_20230613-1444_42/config.json b/models/PPO_R31_ST_MF_4096_20230613-1444_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R31_ST_MF_4096_20230613-1444_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R31_ST_MF_4096_20230613-1444_42/env_params.json b/models/PPO_R31_ST_MF_4096_20230613-1444_42/env_params.json new file mode 100644 index 0000000..704bd05 --- /dev/null +++ b/models/PPO_R31_ST_MF_4096_20230613-1444_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 3.1, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R31_ST_MT_4096_20230620-0451_68/checkpoint_283115520 b/models/PPO_R31_ST_MT_4096_20230620-0451_68/checkpoint_283115520 new file mode 100644 index 0000000..929e498 Binary files /dev/null and b/models/PPO_R31_ST_MT_4096_20230620-0451_68/checkpoint_283115520 differ diff --git a/models/PPO_R31_ST_MT_4096_20230620-0451_68/config.json b/models/PPO_R31_ST_MT_4096_20230620-0451_68/config.json new file mode 100644 index 0000000..7967525 --- /dev/null +++ b/models/PPO_R31_ST_MT_4096_20230620-0451_68/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R30_ST_MT_4096_20230620-0409_67", + "seed": 68, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R31_ST_MT_4096_20230620-0451_68/env_params.json b/models/PPO_R31_ST_MT_4096_20230620-0451_68/env_params.json new file mode 100644 index 0000000..704bd05 --- /dev/null +++ b/models/PPO_R31_ST_MT_4096_20230620-0451_68/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 3.1, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R32_SF_MT_4096_20230704-1859_69/checkpoint_234881024 b/models/PPO_R32_SF_MT_4096_20230704-1859_69/checkpoint_234881024 new file mode 100644 index 0000000..1a539dc Binary files /dev/null and b/models/PPO_R32_SF_MT_4096_20230704-1859_69/checkpoint_234881024 differ diff --git a/models/PPO_R32_SF_MT_4096_20230704-1859_69/config.json b/models/PPO_R32_SF_MT_4096_20230704-1859_69/config.json new file mode 100644 index 0000000..b7df993 --- /dev/null +++ b/models/PPO_R32_SF_MT_4096_20230704-1859_69/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R31_SF_MT_4096_20230704-1836_68", + "seed": 69, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R32_SF_MT_4096_20230704-1859_69/env_params.json b/models/PPO_R32_SF_MT_4096_20230704-1859_69/env_params.json new file mode 100644 index 0000000..ecb67a4 --- /dev/null +++ b/models/PPO_R32_SF_MT_4096_20230704-1859_69/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 3.2, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R32_ST_MF_4096_20230613-1444_42/checkpoint_1952448512 b/models/PPO_R32_ST_MF_4096_20230613-1444_42/checkpoint_1952448512 new file mode 100644 index 0000000..52e6851 Binary files /dev/null and b/models/PPO_R32_ST_MF_4096_20230613-1444_42/checkpoint_1952448512 differ diff --git a/models/PPO_R32_ST_MF_4096_20230613-1444_42/config.json b/models/PPO_R32_ST_MF_4096_20230613-1444_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R32_ST_MF_4096_20230613-1444_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R32_ST_MF_4096_20230613-1444_42/env_params.json b/models/PPO_R32_ST_MF_4096_20230613-1444_42/env_params.json new file mode 100644 index 0000000..ecb67a4 --- /dev/null +++ b/models/PPO_R32_ST_MF_4096_20230613-1444_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 3.2, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R32_ST_MT_4096_20230620-0529_69/checkpoint_235929600 b/models/PPO_R32_ST_MT_4096_20230620-0529_69/checkpoint_235929600 new file mode 100644 index 0000000..996a7aa Binary files /dev/null and b/models/PPO_R32_ST_MT_4096_20230620-0529_69/checkpoint_235929600 differ diff --git a/models/PPO_R32_ST_MT_4096_20230620-0529_69/config.json b/models/PPO_R32_ST_MT_4096_20230620-0529_69/config.json new file mode 100644 index 0000000..1f396af --- /dev/null +++ b/models/PPO_R32_ST_MT_4096_20230620-0529_69/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R31_ST_MT_4096_20230620-0451_68", + "seed": 69, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R32_ST_MT_4096_20230620-0529_69/env_params.json b/models/PPO_R32_ST_MT_4096_20230620-0529_69/env_params.json new file mode 100644 index 0000000..ecb67a4 --- /dev/null +++ b/models/PPO_R32_ST_MT_4096_20230620-0529_69/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 3.2, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R33_SF_MT_4096_20230704-1918_70/checkpoint_352321536 b/models/PPO_R33_SF_MT_4096_20230704-1918_70/checkpoint_352321536 new file mode 100644 index 0000000..6ba81f0 Binary files /dev/null and b/models/PPO_R33_SF_MT_4096_20230704-1918_70/checkpoint_352321536 differ diff --git a/models/PPO_R33_SF_MT_4096_20230704-1918_70/config.json b/models/PPO_R33_SF_MT_4096_20230704-1918_70/config.json new file mode 100644 index 0000000..390016d --- /dev/null +++ b/models/PPO_R33_SF_MT_4096_20230704-1918_70/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R32_SF_MT_4096_20230704-1859_69", + "seed": 70, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R33_SF_MT_4096_20230704-1918_70/env_params.json b/models/PPO_R33_SF_MT_4096_20230704-1918_70/env_params.json new file mode 100644 index 0000000..7d00f6d --- /dev/null +++ b/models/PPO_R33_SF_MT_4096_20230704-1918_70/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 3.3, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R33_ST_MF_4096_20230613-1444_42/checkpoint_1952448512 b/models/PPO_R33_ST_MF_4096_20230613-1444_42/checkpoint_1952448512 new file mode 100644 index 0000000..6993b97 Binary files /dev/null and b/models/PPO_R33_ST_MF_4096_20230613-1444_42/checkpoint_1952448512 differ diff --git a/models/PPO_R33_ST_MF_4096_20230613-1444_42/config.json b/models/PPO_R33_ST_MF_4096_20230613-1444_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R33_ST_MF_4096_20230613-1444_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R33_ST_MF_4096_20230613-1444_42/env_params.json b/models/PPO_R33_ST_MF_4096_20230613-1444_42/env_params.json new file mode 100644 index 0000000..7d00f6d --- /dev/null +++ b/models/PPO_R33_ST_MF_4096_20230613-1444_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 3.3, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R33_ST_MT_4096_20230620-0606_70/checkpoint_226492416 b/models/PPO_R33_ST_MT_4096_20230620-0606_70/checkpoint_226492416 new file mode 100644 index 0000000..826dd89 Binary files /dev/null and b/models/PPO_R33_ST_MT_4096_20230620-0606_70/checkpoint_226492416 differ diff --git a/models/PPO_R33_ST_MT_4096_20230620-0606_70/config.json b/models/PPO_R33_ST_MT_4096_20230620-0606_70/config.json new file mode 100644 index 0000000..010eb5f --- /dev/null +++ b/models/PPO_R33_ST_MT_4096_20230620-0606_70/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R32_ST_MT_4096_20230620-0529_69", + "seed": 70, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R33_ST_MT_4096_20230620-0606_70/env_params.json b/models/PPO_R33_ST_MT_4096_20230620-0606_70/env_params.json new file mode 100644 index 0000000..7d00f6d --- /dev/null +++ b/models/PPO_R33_ST_MT_4096_20230620-0606_70/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 3.3, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R34_SF_MT_4096_20230704-1936_71/checkpoint_306184192 b/models/PPO_R34_SF_MT_4096_20230704-1936_71/checkpoint_306184192 new file mode 100644 index 0000000..586b858 Binary files /dev/null and b/models/PPO_R34_SF_MT_4096_20230704-1936_71/checkpoint_306184192 differ diff --git a/models/PPO_R34_SF_MT_4096_20230704-1936_71/config.json b/models/PPO_R34_SF_MT_4096_20230704-1936_71/config.json new file mode 100644 index 0000000..3fd7463 --- /dev/null +++ b/models/PPO_R34_SF_MT_4096_20230704-1936_71/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 500000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "logs_2023_07_04", + "load": "logs_2023_07_04/PPO_R33_SF_MT_4096_20230704-1918_70", + "seed": 71, + "hidden_cells": 25, + "spatial": false, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R34_SF_MT_4096_20230704-1936_71/env_params.json b/models/PPO_R34_SF_MT_4096_20230704-1936_71/env_params.json new file mode 100644 index 0000000..9dde15f --- /dev/null +++ b/models/PPO_R34_SF_MT_4096_20230704-1936_71/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 3.4, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R34_ST_MF_4096_20230613-1444_42/checkpoint_1773142016 b/models/PPO_R34_ST_MF_4096_20230613-1444_42/checkpoint_1773142016 new file mode 100644 index 0000000..ab04bd4 Binary files /dev/null and b/models/PPO_R34_ST_MF_4096_20230613-1444_42/checkpoint_1773142016 differ diff --git a/models/PPO_R34_ST_MF_4096_20230613-1444_42/config.json b/models/PPO_R34_ST_MF_4096_20230613-1444_42/config.json new file mode 100644 index 0000000..714377c --- /dev/null +++ b/models/PPO_R34_ST_MF_4096_20230613-1444_42/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 2000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_2", + "load": "", + "seed": 42, + "hidden_cells": 25, + "spatial": true, + "memory": false, + "recurrent": false +} \ No newline at end of file diff --git a/models/PPO_R34_ST_MF_4096_20230613-1444_42/env_params.json b/models/PPO_R34_ST_MF_4096_20230613-1444_42/env_params.json new file mode 100644 index 0000000..9dde15f --- /dev/null +++ b/models/PPO_R34_ST_MF_4096_20230613-1444_42/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 3.4, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/models/PPO_R34_ST_MT_4096_20230620-0643_71/checkpoint_226492416 b/models/PPO_R34_ST_MT_4096_20230620-0643_71/checkpoint_226492416 new file mode 100644 index 0000000..081cd43 Binary files /dev/null and b/models/PPO_R34_ST_MT_4096_20230620-0643_71/checkpoint_226492416 differ diff --git a/models/PPO_R34_ST_MT_4096_20230620-0643_71/config.json b/models/PPO_R34_ST_MT_4096_20230620-0643_71/config.json new file mode 100644 index 0000000..d4ac322 --- /dev/null +++ b/models/PPO_R34_ST_MT_4096_20230620-0643_71/config.json @@ -0,0 +1,23 @@ +{ + "n_train_envs": 4096, + "total_steps": 1000000000, + "n_steps": 256, + "max_grad_norm": 0.5, + "eval_interval": 1000000, + "n_eval_envs": 4096, + "learning_rate": 0.0003, + "n_epochs": 8, + "n_minibatch": 8, + "clip_eps": 0.2, + "entropy_coeff": 0.01, + "critic_coeff": 0.5, + "discount": 1.0, + "gae_lambda": 1.0, + "logdir": "./new_rot_6", + "load": "new_rot_6/PPO_R33_ST_MT_4096_20230620-0606_70", + "seed": 71, + "hidden_cells": 25, + "spatial": true, + "memory": true, + "recurrent": true +} \ No newline at end of file diff --git a/models/PPO_R34_ST_MT_4096_20230620-0643_71/env_params.json b/models/PPO_R34_ST_MT_4096_20230620-0643_71/env_params.json new file mode 100644 index 0000000..9dde15f --- /dev/null +++ b/models/PPO_R34_ST_MT_4096_20230620-0643_71/env_params.json @@ -0,0 +1,12 @@ +{ + "max_steps_in_episode": 256, + "radius": 3.4, + "n_receptors": 5, + "speed": 5, + "rotational_diffusion": 0.025, + "decay_rate": 0.01, + "diffusion_coeff": 100, + "dt": 0.1, + "C_min": 4, + "C_max": 5 +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..61cfde0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "chemoxrl" +description = "Code for the Reinforcement learning on chemotaxis strategies paper." +version = "0.1.0" +authors = [ + { name = "Albert Alonso", email = "albert.alonso@nbi.ku.dk" }, + { name = "Julius B. Kirkegaard", emaul = "julius.kirkegaard@nbi.ku.dk" } +] +license = {file = "LICENSE"} +readme = "README.md" + + +[tool.setuptools] +packages = ["chemoxrl", "chemoxrl_aux"] + +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7e8434b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +flax==0.6.11 +jax==0.4.11 +jaxlib==0.4.11+cuda12.cudnn88 +matplotlib==3.7.1 +numpy==1.25.0 +optax==0.1.5 +orbax-checkpoint==0.2.6 +packaging==23.1 +pick==2.2.0 +rich==13.4.2 +scipy==1.10.1 +tqdm==4.65.0 +typing_extensions==4.6.3 diff --git a/train.py b/train.py new file mode 100644 index 0000000..73c298c --- /dev/null +++ b/train.py @@ -0,0 +1,94 @@ +import argparse +from collections import namedtuple +import json +import pathlib +import time +import dataclasses + +from flax.metrics import tensorboard +import jax + +from chemoxrl import rppo +from chemoxrl.cell import EnvParams + + +parser = argparse.ArgumentParser(description="Script to run experiment.") + +# ExperimentConfig parameters +parser.add_argument("--n_train_envs", type=int, default=4096) +parser.add_argument("--total_steps", type=int, default=int(1e9)) +parser.add_argument("--n_steps", type=int, default=256) +parser.add_argument("--max_grad_norm", type=float, default=0.5) +parser.add_argument("--n_eval_envs", type=int, default=4096) +parser.add_argument("--learning_rate", type=float, default=3e-4) +parser.add_argument("--n_epochs", type=int, default=8) +parser.add_argument("--n_minibatch", type=int, default=8) +parser.add_argument("--clip_eps", type=float, default=0.2) +parser.add_argument("--entropy_coeff", type=float, default=0.01) +parser.add_argument("--critic_coeff", type=float, default=0.5) +parser.add_argument("--discount", type=float, default=1.0) +parser.add_argument("--gae_lambda", type=float, default=1.0) +parser.add_argument("--logdir", type=str, default="./logs/") +parser.add_argument("--load", type=str, default="") +parser.add_argument("--seed", type=int, default=42) +parser.add_argument("--hidden_cells", type=int, default=25) +parser.add_argument("--memory", type=bool, default=False, action=argparse.BooleanOptionalAction) +parser.add_argument("--spatial", type=bool, default=True, action=argparse.BooleanOptionalAction) +parser.add_argument("--recurrent", default=True, action=argparse.BooleanOptionalAction) + +# EnvParams parameters +parser.add_argument("--max_steps_in_episode", type=int, default=256) +parser.add_argument("--radius", type=float, default=1.0) +parser.add_argument("--n_receptors", type=int, default=5) +parser.add_argument("--speed", type=float, default=5) +parser.add_argument("--rotational_diffusion", type=float, default=0.025) +parser.add_argument("--decay_rate", type=float, default=0.01) +parser.add_argument("--diffusion_coeff", type=float, default=100) +parser.add_argument("--dt", type=float, default=0.1) +parser.add_argument("--C_min", type=int, default=4) +parser.add_argument("--C_max", type=int, default=5) + + +# Training flags +parser.add_argument("--noise", type=bool, default=True, action=argparse.BooleanOptionalAction) +args = parser.parse_args() + +env_args = {k: v for k, v in vars(args).items() if k in EnvParams.__dict__} +if not args.noise: + env_args["C_max"] = 9 + env_args["C_min"] = 8 +env_params = EnvParams(**env_args) +print(env_params) + +ec_args = {k: v for k, v in vars(args).items() if k in rppo.ExperimentConfig.__dict__} +if args.memory and not args.recurrent: + ec_args['hidden_cells'] = 3 * (args.n_receptors+1) +config = rppo.ExperimentConfig(**ec_args) +print(config._asdict()) + +Checkpointer = namedtuple("Checkpointer", ["dir", "writer"]) + +def init_logger(config, params): + # Create the directory with the logs and checkpoints. + # Define the experiment name. + cell_type = f"S{'T' if config.spatial else 'F'}_M{'T' if config.memory else 'F'}" + radius = f"PPO_R{int(params.radius*10):02d}".replace(".", "-") + timestamp = time.strftime("%Y%m%d-%H%M") + experiment_name = f"{radius}_{cell_type}_{config.n_train_envs}_{timestamp}_{config.seed}" + model_dir = pathlib.Path(config.logdir) / experiment_name + + # Initialise the tensorboard logger and same the config files. + summary_writer = tensorboard.SummaryWriter(log_dir=model_dir) + with open(model_dir / "env_params.json", "w") as f: + f.write(json.dumps(dataclasses.asdict(params), indent=4)) + with open(model_dir / "config.json", "w") as f: + f.write(json.dumps(config._asdict(), indent=4)) + print(f"Writting logs to: {model_dir}") + return Checkpointer(model_dir, summary_writer) + +# NOTE: Apparently jax needs to run before tensorboard.SummaryWriter. +# otherwise there is some libcudnn error. +rng = jax.random.PRNGKey(config.seed) +ckpt_manager = init_logger(config, env_params) + +rppo.train_loop(rng, config, env_params, ckpt_manager)