From a85f78d8607e7e9613ba5e4619e06b7cc1a44fa3 Mon Sep 17 00:00:00 2001
From: Pariterre <pariterre@hotmail.com>
Date: Mon, 13 Jan 2025 15:55:15 -0500
Subject: [PATCH] Finalized the torch example and added a test

---
 .github/workflows/run_tests_linux.yml         |  8 ++
 .gitmodules                                   |  3 +
 bioptim/examples/__main__.py                  |  8 ++
 .../pytorch_ocp.py}                           | 60 +++++++--------
 .../temporary_non_working_example.py          | 40 ----------
 bioptim/interfaces/interface_utils.py         |  4 +-
 bioptim/models/torch/torch_model.py           | 74 +++++++++++++------
 external/l4casadi                             |  1 +
 tests/shard3/test_global_getting_started.py   | 14 ++++
 9 files changed, 115 insertions(+), 97 deletions(-)
 rename bioptim/examples/{getting_started/custom_non_casadi_dynamics.py => deep_neural_network/pytorch_ocp.py} (86%)
 delete mode 100644 bioptim/examples/getting_started/temporary_non_working_example.py
 create mode 160000 external/l4casadi

diff --git a/.github/workflows/run_tests_linux.yml b/.github/workflows/run_tests_linux.yml
index 01b77774f..3876090e5 100644
--- a/.github/workflows/run_tests_linux.yml
+++ b/.github/workflows/run_tests_linux.yml
@@ -59,6 +59,14 @@ jobs:
           python -c "import bioptim"
         if: matrix.shard == 1
 
+      - name: Install pytorch on Linux
+        run: |
+          pip install torch>=2.0
+          cd external/l4casadi
+          pip install . --no-build-isolation
+          cd ../..
+        if: matrix.shard == 3
+
       - name: Run tests with code coverage
         run: pytest -v --color=yes --cov-report term-missing --cov=bioptim --cov-report=xml:coverage.xml tests/shard${{ matrix.shard }}
         if: matrix.os == 'ubuntu-latest'
diff --git a/.gitmodules b/.gitmodules
index 3df878b8e..633e50b64 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "external/acados"]
 	path = external/acados
 	url = https://github.com/acados/acados.git
+[submodule "external/l4casadi"]
+	path = external/l4casadi
+	url = https://github.com/pariterre/l4casadi
diff --git a/bioptim/examples/__main__.py b/bioptim/examples/__main__.py
index c10f451a3..c4174b6ee 100644
--- a/bioptim/examples/__main__.py
+++ b/bioptim/examples/__main__.py
@@ -135,6 +135,14 @@
                 ]
             ),
         ),
+        (
+            "deep_neural_network",
+            OrderedDict(
+                [
+                    ("pytorch ocp", "pytorch_ocp.py"),
+                ]
+            ),
+        ),
     ]
 )
 
diff --git a/bioptim/examples/getting_started/custom_non_casadi_dynamics.py b/bioptim/examples/deep_neural_network/pytorch_ocp.py
similarity index 86%
rename from bioptim/examples/getting_started/custom_non_casadi_dynamics.py
rename to bioptim/examples/deep_neural_network/pytorch_ocp.py
index 0906879a2..e4a24cb75 100644
--- a/bioptim/examples/getting_started/custom_non_casadi_dynamics.py
+++ b/bioptim/examples/deep_neural_network/pytorch_ocp.py
@@ -1,16 +1,21 @@
 """
-TODO: Explain what is this example about
-TODO: All the documentation
-
-This example is similar to the getting_started/pendulum.py example, but the dynamics are computed using a non-casadi 
-based model. This is useful when the dynamics are computed using a different library (e.g. TensorFlow, PyTorch, etc.)
+This example is similar to the getting_started/pendulum.py example, but the dynamics are computed using a deep neural 
+network driven by PyTorch. For this example to work, we create a neural network model that trains against the biorbd 
+library to predict the acceleration of the pendulum. Obviously, one can replace the prediction model with any other
+pytorch model. The class TorchModel is thereafter used to wrap the model using L4casadi to be used in the bioptim framework, 
+if one needs a more flexible way to define the dynamics, objective functions or constraints, they can start from that 
+class and modify it to their needs.
+
+Extra information:
+All information regarding the installation, limitations, credits and known problems can be found in the header of the
+TorchModel class.
 """
 
 import os
 from typing import Self
 
 import biorbd
-from bioptim import OptimalControlProgram, DynamicsFcn, BoundsList, Dynamics
+from bioptim import OptimalControlProgram, DynamicsFcn, BoundsList, Dynamics, Objective, ObjectiveFcn, Solver
 from bioptim.models.torch.torch_model import TorchModel
 import numpy as np
 import torch
@@ -35,12 +40,7 @@ def early_stop(self, validation_loss):
 
 
 class NeuralNetworkModel(torch.nn.Module):
-    def __init__(
-        self,
-        layer_node_count: tuple[int],
-        dropout_probability: float,
-        use_batch_norm: bool,
-    ):
+    def __init__(self, layer_node_count: tuple[int], dropout_probability: float):
         super(NeuralNetworkModel, self).__init__()
         activations = torch.nn.GELU()
 
@@ -53,9 +53,6 @@ def __init__(
             layers.append(
                 torch.nn.Linear(first_and_hidden_layers_node_count[i], first_and_hidden_layers_node_count[i + 1])
             )
-            if use_batch_norm:
-                raise NotImplementedError("Batch normalization is not yet implemented")
-                layers.append(torch.nn.BatchNorm1d(first_and_hidden_layers_node_count[i + 1]))
             layers.append(activations)
             layers.append(torch.nn.Dropout(dropout_probability))
         layers.append(torch.nn.Linear(first_and_hidden_layers_node_count[-1], layer_node_count[-1]))
@@ -88,7 +85,7 @@ def get_torch_device() -> torch.device:
         return torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
     def train_me(
-        self, training_data: list[torch.Tensor], validation_data: list[torch.Tensor], max_epochs: int = 5
+        self, training_data: list[torch.Tensor], validation_data: list[torch.Tensor], max_epochs: int = 100
     ) -> None:
         # More details about scheduler in documentation
         scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
@@ -121,15 +118,16 @@ def save_me(self, path: str) -> None:
             if not all(prob == dropout_probability[0] for prob in dropout_probability):
                 raise ValueError("Different dropout probabilities for different layers")
             dropout_probability = dropout_probability[0]
-
-        use_batch_norm = any(isinstance(model, torch.nn.BatchNorm1d) for model in self._forward_model)
+        else:
+            dropout_probability = dropout_probability[0]
 
         dico = {
             "layer_node_count": layer_node_count,
             "dropout_probability": dropout_probability,
-            "use_batch_norm": use_batch_norm,
             "state_dict": self.state_dict(),
         }
+        if not os.path.isdir(os.path.dirname(path)):
+            os.makedirs(os.path.dirname(path))
         torch.save(dico, path)
 
     @classmethod
@@ -138,7 +136,6 @@ def load_me(cls, path: str) -> Self:
         inputs = {
             "layer_node_count": data["layer_node_count"],
             "dropout_probability": data["dropout_probability"],
-            "use_batch_norm": data["use_batch_norm"],
         }
         model = NeuralNetworkModel(**inputs)
         model.load_state_dict(data["state_dict"])
@@ -196,14 +193,6 @@ def prepare_ocp(
     dynamics = Dynamics(DynamicsFcn.TORQUE_DRIVEN)
     torch_model = TorchModel(torch_model=model)
 
-    q = np.array([0, 0])
-    qdot = np.array([0, 0])
-    tau = np.array([0, 0])
-    qddot = torch_model.forward_dynamics()(q, qdot, tau, [], [])
-    biorbd_model = biorbd.Model("models/pendulum.bioMod")
-    qddot2 = biorbd_model.ForwardDynamics(q, qdot, tau).to_array()
-    print(qddot - qddot2)
-
     # Path bounds
     x_bounds = BoundsList()
     x_bounds["q"] = [-3.14 * 1.5] * torch_model.nb_q, [3.14 * 1.5] * torch_model.nb_q
@@ -217,6 +206,9 @@ def prepare_ocp(
     u_bounds["tau"] = [-100] * torch_model.nb_tau, [100] * torch_model.nb_tau
     u_bounds["tau"][1, :] = 0  # ...but remove the capability to actively rotate
 
+    # Add objective functions
+    objective_functions = Objective(ObjectiveFcn.Lagrange.MINIMIZE_CONTROL, key="tau")
+
     return OptimalControlProgram(
         torch_model,
         dynamics,
@@ -224,6 +216,7 @@ def prepare_ocp(
         final_time,
         x_bounds=x_bounds,
         u_bounds=u_bounds,
+        objective_functions=objective_functions,
         use_sx=False,
     )
 
@@ -239,6 +232,7 @@ def generate_dataset(biorbd_model: biorbd.Model, data_point_count: int) -> list[
         ]
     ).squeeze()
     tau_ranges = np.array([-100, 100] * biorbd_model.nbGeneralizedTorque()).reshape(-1, 2)
+    tau_ranges[1, :] = 0
 
     q = torch.rand(data_point_count, biorbd_model.nbQ()) * (q_ranges[:, 1] - q_ranges[:, 0]) + q_ranges[:, 0]
     qdot = (
@@ -266,12 +260,12 @@ def generate_dataset(biorbd_model: biorbd.Model, data_point_count: int) -> list[
 def main():
     # --- Prepare a predictive model --- #
     force_new_training = False
-    biorbd_model = biorbd.Model("models/pendulum.bioMod")
+    biorbd_model = biorbd.Model("../getting_started/models/pendulum.bioMod")
     training_data = generate_dataset(biorbd_model, data_point_count=30000)
     validation_data = generate_dataset(biorbd_model, data_point_count=3000)
 
     if force_new_training or not os.path.isfile("models/trained_pendulum_model.pt"):
-        model = NeuralNetworkModel(layer_node_count=(6, 512, 512, 2), dropout_probability=0.2, use_batch_norm=False)
+        model = NeuralNetworkModel(layer_node_count=(6, 8, 2), dropout_probability=0.2)
         model.train_me(training_data, validation_data, max_epochs=300)
         model.save_me("models/trained_pendulum_model.pt")
     else:
@@ -280,11 +274,7 @@ def main():
     ocp = prepare_ocp(model=model, final_time=1, n_shooting=40)
 
     # --- Solve the ocp --- #
-    sol = ocp.solve()
-
-    # --- Show the results graph --- #
-    # sol.print_cost()
-    sol.graphs(show_bounds=True)
+    ocp.solve(Solver.IPOPT(show_online_optim=True))
 
 
 if __name__ == "__main__":
diff --git a/bioptim/examples/getting_started/temporary_non_working_example.py b/bioptim/examples/getting_started/temporary_non_working_example.py
deleted file mode 100644
index 762ac8735..000000000
--- a/bioptim/examples/getting_started/temporary_non_working_example.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from casadi import Opti, MX, Function
-import l4casadi as l4c
-import torch
-
-
-class NeuralNetworkModel(torch.nn.Module):
-    def __init__(self, layer_node_count: tuple[int]):
-        super(NeuralNetworkModel, self).__init__()
-        layers = torch.nn.ModuleList()
-        layers.append(torch.nn.Linear(layer_node_count[0], layer_node_count[-1]))
-        self._forward_model = torch.nn.Sequential(*layers)
-        self.eval()
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        return self._forward_model(x)
-
-
-def main():
-    opti = Opti()
-    nx = nu = 1
-    torch_model = NeuralNetworkModel(layer_node_count=(nu, nx))
-
-    # ---- decision variables ---------
-    x = opti.variable(nx, 1)  # state
-    u = opti.variable(nu, 1)  # control
-
-    # ---- dynamic constraints --------
-    x_sym = MX.sym("x", nx, 1)
-    u_sym = MX.sym("u", nu, 1)
-    forward_model = l4c.L4CasADi(torch_model, device="cpu")
-    f = Function("xdot", [x_sym, u_sym], [x_sym - forward_model(u_sym)])
-    opti.subject_to(f(x, u) == 0)  # Adding this line yields the error : jac_adj_i0_adj_o0 is not provided by L4CasADi.
-
-    # ---- solve NLP  ------
-    opti.solver("ipopt")
-    opti.solve()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/bioptim/interfaces/interface_utils.py b/bioptim/interfaces/interface_utils.py
index 83962aa9f..d103e3759 100644
--- a/bioptim/interfaces/interface_utils.py
+++ b/bioptim/interfaces/interface_utils.py
@@ -1,3 +1,4 @@
+import warnings
 from time import perf_counter
 
 import numpy as np
@@ -172,7 +173,8 @@ def _shake_tree_for_penalties(ocp, penalties_cx, v, v_bounds, expand):
         try:
             penalty = penalty.expand()
         except RuntimeError:
-            # This happens mostly when there is a Newton decent in the penalty
+            # This happens mostly when there is a Newton decent in the penalty. Raise warning to notify the user
+            warnings.warn("Could not expand during the shake_tree.")
             pass
     return penalty(vertcat(*dt, v[len(dt) :]))
 
diff --git a/bioptim/models/torch/torch_model.py b/bioptim/models/torch/torch_model.py
index 585170b88..270dbeeea 100644
--- a/bioptim/models/torch/torch_model.py
+++ b/bioptim/models/torch/torch_model.py
@@ -1,33 +1,62 @@
-from typing import Callable
-
-from casadi import SX, MX, vertcat, horzcat, norm_fro, Function
+"""
+This wrapper can be used to wrap a PyTorch model and use it in bioptim. This is a much incomplete class though as compared
+to the BiorbdModel class. The main reason is that as opposed to Biorbd, the dynamics produced by a PyTorch model can be
+of any nature. This means this wrapper be more viewed as an example of how to wrap a PyTorch model in bioptim than an actual
+generic wrapper.
+
+This wrapper is based on the l4casadi library (https://github.com/Tim-Salzmann/l4casadi) which is a bridge between CasADi 
+and PyTorch. 
+
+INSTALLATION:
+Note these instructions may be outdated. Please refer to the l4casadi documentation for the most up-to-date instructions.
+
+First, make sure pytorch is installed by running the following command:
+    pip install torch>=2.0
+Please note that some depencecies are required. At the time of writing, the following packages were required:
+    pip install setuptools>=68.1 scikit-build>=0.17 cmake>=3.27 ninja>=1.11
+Then, install l4casadi as the interface between CasADi and PyTorch, by running the following command:
+    pip install l4casadi --no-build-isolation
+
+    
+LIMITATIONS:
+Since pytorch is wrapped using L4casadi, the casadi functions are generated using External. This means SX variables and
+expanding functions are not supported. This will be computationally intensive when solving, making such approach rather slow when
+compared to a programmatic approach. Still, it uses much less memory than the symbolic approach, so it has its own advantages.
+
+
+
+KNOWN ISSUES: 
+    On Windows (and possibly other platforms), you may randomly get the following error when running this example:
+        ```python
+        OMP: Error #15: Initializing libomp.dll, but found libiomp5md.dll already initialized.
+        ```
+    This error comes from the fact that installing the libraries copies the libiomp5md.dll file at two different locations.
+    When it tries to load them, it notices that "another" library is already loaded. If you are 100% sure that both libraries
+    are the exact same version, you can safely ignore this error. To do so, you can add the following lines at the beginning
+    of your script:
+        ```python
+        import os
+        os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
+        ```
+    That being said, this can be very problematic if the two libraries are not the exact same version. The safer approach is
+    to delete one of the file. To do so, simply navigate to the folders where the files libiomp5md.dll are located and delete it 
+    (or back-it up by adding ".bak" to the name) keeping only one (keeping the one in site-packages seems to work fine).
+"""
+
+from casadi import MX, vertcat, Function
 import l4casadi as l4c
-import numpy as np
 import torch
 
-# """
-# INSTALLATION:
-# First, make sure pytorch is installed
-
-#     pip install torch>=2.0 --index-url https://download.pytorch.org/whl/cpu/torch_stable.html
-# setuptools>=68.1
-# scikit-build>=0.17
-# cmake>=3.27
-# ninja>=1.11
-# Then, install l4casadi as the interface between CasADi and PyTorch
-
-#     pip install l4casadi --no-build-isolation
-
-# """
-
 
 class TorchModel:
     """
     This class wraps a pytorch model and allows the user to call some useful functions on it.
     """
 
-    def __init__(self, torch_model: torch.nn.Module):
-        self._dynamic_model = l4c.L4CasADi(torch_model, device="cpu")  # device='cuda' for GPU
+    def __init__(self, torch_model: torch.nn.Module, device="cuda" if torch.cuda.is_available() else "cpu"):
+        self._dynamic_model = l4c.L4CasADi(
+            torch_model, device=device, generate_jac_jac=True, generate_adj1=False, generate_jac_adj1=False
+        )
 
         self._nb_dof = torch_model.size_in // 3
         self._symbolic_variables()
@@ -66,6 +95,9 @@ def nb_tau(self) -> int:
         return self.nb_dof
 
     def forward_dynamics(self, with_contact: bool = False) -> Function:
+        if with_contact:
+            raise NotImplementedError("Contact dynamics are not implemented for torch models")
+
         return Function(
             "forward_dynamics",
             [self.q, self.qdot, self.tau, self.external_forces, self.parameters],
diff --git a/external/l4casadi b/external/l4casadi
new file mode 160000
index 000000000..f01a85d6e
--- /dev/null
+++ b/external/l4casadi
@@ -0,0 +1 @@
+Subproject commit f01a85d6e0151b6178c257aa0e9691219ea05c10
diff --git a/tests/shard3/test_global_getting_started.py b/tests/shard3/test_global_getting_started.py
index 54c8911b9..453ec0941 100644
--- a/tests/shard3/test_global_getting_started.py
+++ b/tests/shard3/test_global_getting_started.py
@@ -22,6 +22,7 @@
     ControlType,
     PhaseDynamics,
     SolutionMerge,
+    Solver,
 )
 from tests.utils import TestUtils
 
@@ -1488,3 +1489,16 @@ def test_example_variable_scaling(phase_dynamics):
     # initial and final controls
     npt.assert_almost_equal(tau[:, 0], np.array([-1000.00000999, 0.0]))
     npt.assert_almost_equal(tau[:, -1], np.array([-1000.00000999, 0.0]))
+
+
+def test_deep_neural_network():
+    from bioptim.examples.deep_neural_network import pytorch_ocp as ocp_module
+
+    model = ocp_module.NeuralNetworkModel(layer_node_count=(6, 8, 2), dropout_probability=0.2)
+    ocp = ocp_module.prepare_ocp(model=model, final_time=1, n_shooting=3)
+    solver = Solver.IPOPT()
+    solver.set_maximum_iterations(1)
+
+    # We can launch the solving, but won't be able to check the results as the model is not trained so it is random
+    os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
+    ocp.solve(solver=solver)