GFNOrg · josephdviviano · Jan 13, 2025 · Oct 29, 2024 · Nov 2, 2024 · Nov 5, 2024
diff --git a/src/gfn/containers/trajectories.py b/src/gfn/containers/trajectories.py
@@ -5,9 +5,8 @@
 if TYPE_CHECKING:
     from gfn.actions import Actions
     from gfn.env import Env
-    from gfn.states import States, DiscreteStates
+    from gfn.states import States
 
-import numpy as np
 import torch
 
 from gfn.containers.base import Container
@@ -101,7 +100,7 @@ def __init__(
             and self._log_rewards.dtype == torch.float
         )
 
-        if log_probs is not None:
+        if log_probs is not None and log_probs.shape != (0, 0):
             assert (
                 log_probs.shape == (self.max_length, self.n_trajectories)
                 and log_probs.dtype == torch.float
@@ -122,15 +121,15 @@ def __repr__(self) -> str:
         for traj in states[:10]:
             one_traj_repr = []
             for step in traj:
-                one_traj_repr.append(str(step.numpy()))
+                one_traj_repr.append(str(step.cpu().numpy()))
                 if step.equal(self.env.s0 if self.is_backward else self.env.sf):
                     break
             trajectories_representation += "-> ".join(one_traj_repr) + "\n"
         return (
             f"Trajectories(n_trajectories={self.n_trajectories}, max_length={self.max_length}, First 10 trajectories:"
             + f"states=\n{trajectories_representation}"
             # + f"actions=\n{self.actions.tensor.squeeze().transpose(0, 1)[:10].numpy()}, "
-            + f"when_is_done={self.when_is_done[:10].numpy()})"
+            + f"when_is_done={self.when_is_done[:10].cpu().numpy()})"
         )
 
     @property
@@ -428,6 +427,68 @@ def to_non_initial_intermediary_and_terminating_states(
             conditioning,
         )
 
+    @staticmethod
+    def reverse_backward_trajectories(trajectories: Trajectories) -> Trajectories:
+        """Reverses a backward trajectory"""
+        # FIXME: This method is not compatible with continuous GFN.
+
+        assert trajectories.is_backward, "Trajectories must be backward."
+        new_actions = torch.full(
+            (
+                trajectories.max_length + 1,
+                len(trajectories),
+                *trajectories.actions.action_shape,
+            ),
+            -1,
+        )
+
+        # env.sf should never be None unless something went wrong during class
+        # instantiation.
+        if trajectories.env.sf is None:
+            raise AttributeError(
+                "Something went wrong during the instantiation of environment {}".format(
+                    trajectories.env
+                )
+            )
+
+        new_when_is_done = trajectories.when_is_done + 1
+        new_states = trajectories.env.sf.repeat(
+            new_when_is_done.max() + 1, len(trajectories), 1
+        )
+
+        # FIXME: Can we vectorize this?
+        # FIXME: Also, loop over batch or sequence?
+        for i in range(len(trajectories)):
+            new_actions[trajectories.when_is_done[i], i] = (
+                trajectories.env.n_actions - 1
+            )
+            new_actions[
+                : trajectories.when_is_done[i], i
+            ] = trajectories.actions.tensor[: trajectories.when_is_done[i], i].flip(0)
+
+            new_states[
+                : trajectories.when_is_done[i] + 1, i
+            ] = trajectories.states.tensor[: trajectories.when_is_done[i] + 1, i].flip(
+                0
+            )
+
+        trajectories_states = trajectories.env.states_from_tensor(new_states)
+        trajectories_actions = trajectories.env.actions_from_tensor(new_actions)
+
+        return Trajectories(
+            env=trajectories.env,
+            states=trajectories_states,
+            conditioning=trajectories.conditioning,
+            actions=trajectories_actions,
+            when_is_done=new_when_is_done,
+            is_backward=False,
+            log_rewards=trajectories.log_rewards,
+            log_probs=None,  # We can't simply pass the trajectories.log_probs
+            # Since `log_probs` is assumed to be the forward log probabilities.
+            # FIXME: To resolve this, we can save log_pfs and log_pbs in the trajectories object.
+            estimator_outputs=None,  # Same as `log_probs`.
+        )
+
 
 def pad_dim0_to_target(a: torch.Tensor, target_dim0: int) -> torch.Tensor:
     """Pads tensor a to match the dimention of b."""

diff --git a/src/gfn/env.py b/src/gfn/env.py
@@ -61,6 +61,8 @@ def __init__(
         self.dummy_action = dummy_action
         self.exit_action = exit_action
 
+        # Warning: don't use self.States or self.Actions to initialize an instance of the class.
+        # Use self.states_from_tensor or self.actions_from_tensor instead.
         self.States = self.make_states_class()
         self.Actions = self.make_actions_class()
 
@@ -85,7 +87,9 @@ def states_from_tensor(self, tensor: torch.Tensor):
         """
         return self.States(tensor)
 
-    def states_from_batch_shape(self, batch_shape: Tuple):
+    def states_from_batch_shape(
+        self, batch_shape: Tuple, random: bool = False, sink: bool = False
+    ):
         """Returns a batch of s0 states with a given batch_shape.
 
         Args:
@@ -94,7 +98,7 @@ def states_from_batch_shape(self, batch_shape: Tuple):
         Returns:
             States: A batch of initial states.
         """
-        return self.States.from_batch_shape(batch_shape)
+        return self.States.from_batch_shape(batch_shape, random=random, sink=sink)
 
     def actions_from_tensor(self, tensor: torch.Tensor):
         """Wraps the supplied Tensor an an Actions instance.
@@ -218,7 +222,7 @@ def reset(
             batch_shape = (1,)
         if isinstance(batch_shape, int):
             batch_shape = (batch_shape,)
-        return self.States.from_batch_shape(
+        return self.states_from_batch_shape(
             batch_shape=batch_shape, random=random, sink=sink
         )
 
@@ -441,21 +445,21 @@ def reset(
             batch_shape = (1,)
         if isinstance(batch_shape, int):
             batch_shape = (batch_shape,)
-        states = self.States.from_batch_shape(
+        states = self.states_from_batch_shape(
             batch_shape=batch_shape, random=random, sink=sink
         )
         self.update_masks(states)
 
         return states
 
     @abstractmethod
-    def update_masks(self, states: type[States]) -> None:
+    def update_masks(self, states: States) -> None:
         """Updates the masks in States.
 
         Called automatically after each step for discrete environments.
         """
 
-    def make_states_class(self) -> type[States]:
+    def make_states_class(self) -> type[DiscreteStates]:
         env = self
 
         class DiscreteEnvStates(DiscreteStates):

diff --git a/src/gfn/gym/discrete_ebm.py b/src/gfn/gym/discrete_ebm.py
@@ -113,7 +113,7 @@ def __init__(
             preprocessor=preprocessor,
         )
 
-    def update_masks(self, states: type[States]) -> None:
+    def update_masks(self, states: DiscreteStates) -> None:
         states.forward_masks[..., : self.ndim] = states.tensor == -1
         states.forward_masks[..., self.ndim : 2 * self.ndim] = states.tensor == -1
         states.forward_masks[..., -1] = torch.all(states.tensor != -1, dim=-1)
@@ -248,13 +248,13 @@ def all_states(self) -> DiscreteStates:
         digits = torch.arange(3, device=self.device)
         all_states = torch.cartesian_prod(*[digits] * self.ndim)
         all_states = all_states - 1
-        return self.States(all_states)
+        return self.states_from_tensor(all_states)
 
     @property
     def terminating_states(self) -> DiscreteStates:
         digits = torch.arange(2, device=self.device)
         all_states = torch.cartesian_prod(*[digits] * self.ndim)
-        return self.States(all_states)
+        return self.states_from_tensor(all_states)
 
     @property
     def true_dist_pmf(self) -> torch.Tensor:

diff --git a/src/gfn/gym/hypergrid.py b/src/gfn/gym/hypergrid.py
@@ -82,7 +82,7 @@ def __init__(
             preprocessor=preprocessor,
         )
 
-    def update_masks(self, states: type[DiscreteStates]) -> None:
+    def update_masks(self, states: DiscreteStates) -> None:
         """Update the masks based on the current states."""
         # Not allowed to take any action beyond the environment height, but
         # allow early termination.
@@ -223,13 +223,13 @@ def build_grid(self) -> DiscreteStates:
         rearrange_string += " ".join([f"n{i}" for i in range(ndim, 0, -1)])
         rearrange_string += " ndim"
         grid = rearrange(grid, rearrange_string).long()
-        return self.States(grid)
+        return self.states_from_tensor(grid)
 
     @property
     def all_states(self) -> DiscreteStates:
         grid = self.build_grid()
         flat_grid = rearrange(grid.tensor, "... ndim -> (...) ndim")
-        return self.States(flat_grid)
+        return self.states_from_tensor(flat_grid)
 
     @property
     def terminating_states(self) -> DiscreteStates: