From 7801c0fe80e377f1fa2eb0eddb78472c0afff036 Mon Sep 17 00:00:00 2001 From: Anes Benmerzoug Date: Wed, 15 May 2024 07:03:35 +0200 Subject: [PATCH] Fixes and improvements --- notebooks/nb_20_dynamic_programming.ipynb | 44 +- notebooks/nb_40_LQR.ipynb | 64 ++- .../nb_70_machine_learning_control.ipynb | 499 ++++++++---------- notebooks/nb_90_practice.ipynb | 193 ++++++- src/training_ml_control/control.py | 17 +- .../environments/acrobot.py | 6 - src/training_ml_control/model.py | 13 +- src/training_ml_control/plots.py | 26 +- 8 files changed, 496 insertions(+), 366 deletions(-) diff --git a/notebooks/nb_20_dynamic_programming.ipynb b/notebooks/nb_20_dynamic_programming.ipynb index ede502b..7b67c1e 100644 --- a/notebooks/nb_20_dynamic_programming.ipynb +++ b/notebooks/nb_20_dynamic_programming.ipynb @@ -133,7 +133,9 @@ "\n", "Dynamic programming (DP) is a method that in general solves optimization problems that involve making a sequence of decisions (multi-stage decision making problems) by determining, for each decision, subproblems that can be solved similarily, such that an optimal solution of the original problem can be found from optimal solutions of subproblems. This method is based on *Bellman’s Principle of Optimality*:\n", "\n", - "> An optimal policy has the property that whatever the initial state and initial decision are, the remaining decisions must constitute an optimal policy with regard to the state resulting from the first decision." + "> An optimal policy has the property that whatever the initial state and initial decision are, the remaining decisions must constitute an optimal policy with regard to the state resulting from the first decision.\n", + "\n", + "A problem is said to satisfy the Principle of Optimality if the sub-solutions of an optimal solution of the problem are themselves optimal solutions for their subproblems." ] }, { @@ -696,7 +698,7 @@ "It can be show that for every $s, \\tau \\in [t_0, t_f]$, $s \\leq \\tau$ , and $\\mathbf{x} \\in \\mathbf{X}$, we have:\n", "\n", "$$\n", - "V(s, \\mathbf{x}) = \\underset{}{\\min} \\left[ \\int\\limits_{s}^{\\tau} c(\\mathbf{x}(t), \\mathbf{u}(t)) d\\tau + V(\\tau, \\mathbf{x}(\\tau)) \\right]\n", + "V(s, \\mathbf{x}) = \\underset{\\mathbf{u(t)}}{min} \\left[ \\int\\limits_{s}^{\\tau} c(\\mathbf{x}(t), \\mathbf{u}(t)) d\\tau + V(\\tau, \\mathbf{x}(\\tau)) \\right]\n", "$$\n", "\n", "Which is another version of the Bellman equation." @@ -708,47 +710,13 @@ "source": [ "### Hamilton-Jacobi-Bellman Equation\n", "\n", - "The Hamilton-Jacobi-Bellman (HJB) equation is the following:\n", + "The Hamilton-Jacobi-Bellman (HJB) equation is given by:\n", "\n", "$$\n", "- \\frac{\\partial V}{\\partial t} = \\underset{\\mathbf{u(t)}}{min} \\left[ \\left( \\frac{\\partial V}{\\partial \\mathbf{x}} \\right)^T f(\\mathbf{x}(t), \\mathbf{u}(t)) + c(\\mathbf{x}(t), \\mathbf{u}(t)) \\right]\n", "$$\n", "\n", - "\n", - ":::{note} Mathematical Derivation\n", - ":class: dropdown\n", - "\n", - "It can be shown that the optimal condition is also satisfied in this case:\n", - "\n", - "$$\n", - "V(x(t_0), t_0, t_f) = \\underset{\\mathbf{u(t)}}{min} \\left[ c(t + \\right]\n", - "$$\n", - "\n", - "$$\n", - "V(x(t_0), t_0, t_f) = V(x(t_0), t_0, t) + V(x(t), t, t_f)\n", - "$$\n", - "\n", - "$$\n", - "\\frac{d}{dt}(V(\\mathbf{x}(t), t, t_f) = \\frac{\\partial V}{\\partial t} + \\left( \\frac{\\partial V}{\\partial \\mathbf{x}} \\right)^T \\underbrace{\\frac{d\\mathbf{x}}{dt}}_{= f(\\mathbf{x}(t), \\mathbf{u}(t))}\n", - "$$\n", - "\n", - "The term on the left can be simplified to:\n", - "\n", - "$$\n", - "\\frac{d}{dt}(V(\\mathbf{x}(t), t, t_f) &= \\underset{\\mathbf{u(t)}}{min} \\frac{d}{dt} \\left[ c(\\mathbf{x}(t_f), t_f) + \\int\\limits_{t_0}^{t_f} c(\\mathbf{x}(t), \\mathbf{u}(t)) d\\tau \\right] \\\\\n", - "&= \\underset{\\mathbf{u(t)}}{min} \\left[ \\frac{d}{dt} \\int\\limits_{t_0}^{t_f} c(\\mathbf{x}(t), \\mathbf{u}(t)) d\\tau \\right] \\\\\n", - "&= \\underset{\\mathbf{u(t)}}{min} \\left[ -c(\\mathbf{x}(t), \\mathbf{u}(t)) \\right]\n", - "$$\n", - "\n", - "Replacing this new expression into the original one and moving some terms aronud we get:\n", - "\n", - "$$\n", - "- \\frac{\\partial V}{\\partial t} = \\underset{\\mathbf{u(t)}}{min} \\left[ \\left( \\frac{\\partial V}{\\partial \\mathbf{x}} \\right)^T f(\\mathbf{x}(t), \\mathbf{u}(t)) + c(\\mathbf{x}(t), \\mathbf{u}(t)) \\right]\n", - "$$\n", - "\n", - "This is called the Hamilton-Jacobi-Bellman (HJB) equation.\n", - "\n", - ":::" + "It is a sufficient condition of optimality i.e., that if $V$ satisfies the HJB, it must be the value function." ] } ], diff --git a/notebooks/nb_40_LQR.ipynb b/notebooks/nb_40_LQR.ipynb index e759892..f875554 100644 --- a/notebooks/nb_40_LQR.ipynb +++ b/notebooks/nb_40_LQR.ipynb @@ -169,59 +169,67 @@ "\\mathbf{x}_{t+1} = A \\mathbf{x}_t + B \\mathbf{u}_t\n", "$$\n", "\n", - "where $\\mathbf{x} \\in \\mathbb {R} ^{n}$ (that is, $\\mathbf{x}$ is an $n$-dimensional real-valued vector) is the state of the system and $\\mathbf{u} \\in \\mathbb {R} ^{m}$ is the control input.\n", - "\n", - "Given a quadratic cost function for the system in the infinite-horizon case, defined as:\n", - "\n", - "$$\n", - "J(\\mathbf{x}_0, \\mathbf{u}) = \\sum \\limits _{k = 0}^{\\infty} \\mathbf{x}_k^{T}Q \\mathbf{x}_k + \\mathbf{u}_k^{T} R \\mathbf{u}_k\n", - "$$\n", - "\n", - "With $Q = Q^T \\succeq 0$, $R = R^T \\succeq 0$." + "where $\\mathbf{x} \\in \\mathbb {R} ^{n}$ (that is, $\\mathbf{x}$ is an $n$-dimensional real-valued vector) is the state of the system and $\\mathbf{u} \\in \\mathbb {R} ^{m}$ is the control input." ] }, { "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, + "metadata": {}, "source": [ + "## Finite Horizon\n", + "\n", + "Given a quadratic cost function for the system in the finite-horizon case, defined as:\n", + "|\n", + "$$\n", + "J(\\mathbf{x}_0, \\mathbf{u}, N) = \\mathbf{x}_N^{T} Q_N \\mathbf{x}_N + \\sum \\limits _{k = 0}^{N-1} \\mathbf{x}_k^{T}Q \\mathbf{x}_k + \\mathbf{u}_k^{T} R \\mathbf{u}_k\n", + "$$\n", + "\n", + "With $Q_N = Q_N^T \\succeq 0$, $Q = Q^T \\succeq 0$, $R = R^T \\succeq 0$.\n", + "\n", "It can be shown that the control law that minizes the cost is given by:\n", "\n", "$$\n", - "\\mathbf{u}_k = -K \\mathbf{x}_k\n", + "\\mathbf{u}_k = K_k \\mathbf{x}_k\n", "$$\n", "\n", - "With: $K = (R + B^T P B)^{-1} B^T P B$\n", + "With: $K_k = -(R + B^T P_k B)^{-1} B^T P_k B$\n", "\n", - "and $P$ is found by solving the discrete time algebraic Riccati equation (DARE):\n", + "and $P_k$ is found by solving the discrete time dynamic Riccati equation:\n", "\n", "$$\n", - "Q + A^{T}PA-(A^{T}PB)(R+B^{T}PB)^{-1}(B^{T}PA) = P.\n", - "$$" + "P_{k-1} = Q + A^{T}P_k A - (A^{T} P_k B)(R+B^{T}P_k B)^{-1}(B^{T}P_k A)\n", + "$$\n", + "\n", + "With $P_N = Q_N$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - ":::{admonition} Derivation\n", - ":class: tip\n", + "## Infinite Horizon\n", + "\n", + "Given a quadratic cost function for the system in the infinite-horizon case, defined as:\n", "\n", "$$\n", - "V_k(x) = \\min_{u \\in \\mathbf{U}} \\left[ c(x, u) + V_{k+1}(f(x, u)) \\right]\n", + "J(\\mathbf{x}_0, \\mathbf{u}) = \\sum \\limits _{k = 0}^{\\infty} \\mathbf{x}_k^{T}Q \\mathbf{x}_k + \\mathbf{u}_k^{T} R \\mathbf{u}_k\n", "$$\n", "\n", + "With $Q = Q^T \\succeq 0$, $R = R^T \\succeq 0$.\n", + "\n", + "It can be shown that the control law that minizes the cost is given by:\n", + "\n", "$$\n", - "V_k(x) = \\min_{u \\in \\mathbf{U}} \\left[ \\mathbf{x}_k^{T}Q \\mathbf{x}_k + \\mathbf{u}_k^{T} R \\mathbf{u}_k + V_{k+1}(f(x, u)) \\right]\n", + "\\mathbf{u}_k = K \\mathbf{x}_k\n", "$$\n", "\n", + "With: $K = -(R + B^T P B)^{-1} B^T P B$\n", + "\n", + "and $P$ is found by solving the discrete time algebraic Riccati equation (DARE):\n", + "\n", "$$\n", - "V_0(x) = \\min_{u \\in \\mathbf{U}} \\left[ \\mathbf{x}_0^{T}Q \\mathbf{x}_0 + \\mathbf{u}_0^{T} R \\mathbf{u}_0 + V_{k+1}(f(x_0, u_0)) \\right]\n", - "$$\n", - ":::" + "P = Q + A^{T}PA-(A^{T}PB)(R+B^{T}PB)^{-1}(B^{T}PA)\n", + "$$" ] }, { @@ -529,7 +537,7 @@ ")\n", ":::\n", "\n", - "#### Simulation\n", + "**Simulation**\n", "\n", ":::{code-cell} ipython3\n", "inverted_pendulum_lqr_controller.reset_history()\n", @@ -545,7 +553,7 @@ "animate_inverted_pendulum_simulation(inverted_pendulum_lqr_controller.data)\n", ":::\n", "\n", - "#### Evaluation\n", + "**Evaluation**\n", "\n", ":::{code-cell} ipython3\n", "class LQRController:\n", diff --git a/notebooks/nb_70_machine_learning_control.ipynb b/notebooks/nb_70_machine_learning_control.ipynb index cd61dea..e57720b 100644 --- a/notebooks/nb_70_machine_learning_control.ipynb +++ b/notebooks/nb_70_machine_learning_control.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "8497a4b2-a020-4fba-8539-bc1361a34023", "metadata": { "editable": true, @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "f7339d3d-eae4-4e05-a7fb-050df96782e7", "metadata": { "init_cell": true, @@ -41,184 +41,14 @@ "ActiveScene" ] }, - "outputs": [ - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "%presentation_style" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "6e892149-098a-42c7-b554-bbb83918888d", "metadata": { "init_cell": true, @@ -236,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "cd78d373-34af-4b57-8d16-dbf6f662331d", "metadata": { "init_cell": true, @@ -249,16 +79,7 @@ "ActiveScene" ] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)\n", - "Hello from the pygame community. https://www.pygame.org/contribute.html\n" - ] - } - ], + "outputs": [], "source": [ "%autoreload\n", "\n", @@ -274,6 +95,8 @@ " ConstantController,\n", " SineController,\n", " SumOfSineController,\n", + " PRBSController,\n", + " SchroederSweepController,\n", " RandomController,\n", ")\n", "from training_ml_control.environments import (\n", @@ -284,7 +107,7 @@ "from training_ml_control.nb_utils import show_video, display_array\n", "\n", "sns.set_theme()\n", - "plt.rcParams[\"figure.figsize\"] = [12, 8]" + "plt.rcParams[\"figure.figsize\"] = [14, 8]" ] }, { @@ -412,7 +235,24 @@ "- Pseudorandom binary sequence (PRBS)\n", "- Autoregressive, moving average process\n", "- Periodic signals: sum of sinusoids\n", - "- Schroeder Sweep\n", + "- Schroeder Sweep (Sum of phase shifted sinusoids)\n", + "\n", + " $$\n", + " u(t) = \\sum\\limits_{k=1}^M A_k \\cos(\\frac{2\\pi k \\tau(t)}{T} + \\phi_k), \\quad t = 0, 1, \\dots , N − 1\n", + " $$\n", + "\n", + " Where:\n", + "\n", + " $$\n", + " \\begin{array}{l}\n", + " k = \\sqrt{\\frac{P}{M}}\\\\\n", + " \\phi_1 = 0\\\\\n", + " \\phi_k = \\phi_{k-1} − \\frac{\\pi k^2}{N}, \\quad k = 2, 3, \\dots , M\n", + " \\end{array}\n", + " $$\n", + "\n", + " With $M$ the number of harmonically related frequencies, $\\phi_k$ the phase angles of the harmonic components to produce low Peak Factor (PF),\n", + " $N$ the number of time steps, $P$ the total desired input power.\n", "\n", "For certain systems in many safety-critical tasks such as space exploration or robot-assisted surgery we have to rely on human experts to control the system and avoid damaging it or its environment." ] @@ -432,7 +272,7 @@ "metadata": {}, "outputs": [], "source": [ - "cart_env = create_cart_environment(max_steps=300, goal_position=20, max_position=30)" + "cart_env = create_cart_environment(max_steps=500, goal_position=20, max_position=30)" ] }, { @@ -442,9 +282,9 @@ "metadata": {}, "outputs": [], "source": [ - "cart_observations = []\n", - "cart_actions = []\n", - "cart_frames = []\n", + "cart_observations = {}\n", + "cart_actions = {}\n", + "cart_frames = {}\n", "\n", "controllers = {\n", " f\"Step @ {cart_env.max_action / 2}\": ConstantController(\n", @@ -456,17 +296,22 @@ " \"Sinusoid @ 0.5Hz\": SineController(\n", " cart_env, np.asarray([cart_env.max_action]), frequency=0.5\n", " ),\n", - " \"Sum of Sinusoids @ [2Hz, 10Hz]\": SumOfSineController(\n", - " cart_env, np.asarray([cart_env.max_action]), frequencies=[2, 10]\n", + " \"Schroeder Sweep\": SchroederSweepController(\n", + " cart_env,\n", + " np.asarray([cart_env.max_action]),\n", + " n_time_steps=500,\n", + " n_harmonics=5,\n", + " frequency=2,\n", " ),\n", + " \"PRBS\": PRBSController(np.asarray([cart_env.max_action])),\n", " \"Random\": RandomController(cart_env),\n", "}\n", "\n", - "for controller in controllers.values():\n", + "for controller_name, controller in controllers.items():\n", " result = simulate_environment(cart_env, controller=controller)\n", - " cart_observations.append(result.observations)\n", - " cart_actions.append(result.actions)\n", - " cart_frames.append(result.frames)" + " cart_observations[controller_name] = result.observations\n", + " cart_actions[controller_name] = result.actions\n", + " cart_frames[controller_name] = result.frames" ] }, { @@ -476,11 +321,11 @@ "metadata": {}, "outputs": [], "source": [ - "fig, axes = plt.subplots(len(controllers))\n", + "fig, axes = plt.subplots(len(cart_actions), figsize=(12, 10))\n", "axes = axes.ravel()\n", - "for i, label in enumerate(controllers):\n", - " t = np.arange(len(cart_actions[i])) * cart_env.dt\n", - " axes[i].plot(t, cart_actions[i], label=label)\n", + "for i, (label, actions) in enumerate(cart_actions.items()):\n", + " t = np.arange(len(actions)) * cart_env.dt\n", + " axes[i].plot(t, actions, label=label)\n", " axes[i].set_xlabel(\"Time\")\n", " axes[i].set_title(label)\n", "fig.tight_layout()\n", @@ -497,9 +342,9 @@ "fig, axes = plt.subplots(1, 2, sharex=True)\n", "axes = axes.ravel()\n", "for i, label in zip(range(2), [\"$x$\", r\"$\\dot{x}$\"]):\n", - " for j, controller_name in enumerate(controllers):\n", - " t = np.arange(len(cart_observations[j][:])) * cart_env.dt\n", - " axes[i].plot(t, cart_observations[j][:, i], label=controller_name)\n", + " for j, (controller_name, observations) in enumerate(cart_observations.items()):\n", + " t = np.arange(len(observations[:])) * cart_env.dt\n", + " axes[i].plot(t, observations[:, i], label=controller_name)\n", " axes[i].set_xlabel(\"Time\")\n", " axes[i].set_title(label)\n", "plt.legend()\n", @@ -526,10 +371,8 @@ "source": [ "fig, ax = plt.subplots(1, 1, sharex=True)\n", "\n", - "for j, controller_name in enumerate(controllers):\n", - " ax.plot(\n", - " cart_observations[j][:, 0], cart_observations[j][:, 1], label=controller_name\n", - " )\n", + "for j, (controller_name, observations) in enumerate(cart_observations.items()):\n", + " ax.plot(observations[:, 0], observations[:, 1], label=controller_name)\n", " ax.set_xlabel(\"$x$\")\n", " ax.set_ylabel(\"$\\dot{x}$\")\n", "plt.legend()\n", @@ -555,8 +398,8 @@ "fig, axes = plt.subplots(1, 2, sharex=True)\n", "axes = axes.ravel()\n", "for i, label in zip(range(2), [\"$x$\", r\"$\\dot{x}$\"]):\n", - " for j, controller_name in enumerate(controllers):\n", - " f, Pxx_den = periodogram(cart_observations[j][:, i], 1 / cart_env.dt)\n", + " for controller_name, observations in cart_observations.items():\n", + " f, Pxx_den = periodogram(observations[:, i], 1 / cart_env.dt)\n", " axes[i].semilogy(f, Pxx_den, label=controller_name)\n", " axes[i].set_xlabel(\"frequency [Hz]\")\n", " axes[i].set_ylabel(\"Power Spectral Density [V**2/Hz]\")\n", @@ -668,8 +511,93 @@ "source": [ "## Sparse Identification of Nonlinear Dynamics (SINDy)\n", "\n", + "The SINDY algorithm identifies fully nonlinear dynamical systems from measurement data. This relies on the fact that many dynamical systems have relatively few terms in the right hand side of the governing equations:\n", + "\n", + "$$\n", + "\\dot{\\mathbf{x}} = f(\\mathbf{x})\n", + "$$\n", + "\n", + "To determine the function $f$ from data, we collect a time history of the state $\\mathbf{x}$ and either measure the derivative $\\dot{\\mathbf{x}}$\n", + "or approximate it numerically. The data are sampled at several times $t_1, t_2, \\dots, t_m$ and arranged into two matrices:\n", + "\n", + "$$\n", + "\\mathbf{X} = \\begin{bmatrix}\n", + "\\mathbf{x}^T(t_1)\\\\\n", + "\\mathbf{x}^T(t_2)\\\\\n", + "\\vdots\\\\\n", + "\\mathbf{x}^T(t_m)\n", + "\\end{bmatrix}\n", + "= \\begin{bmatrix}\n", + "x_1(t_1) & x_2(t_1) & \\dots & x_n(t_1)\\\\\n", + "x_1(t_2) & x_2(t_2) & \\dots & x_n(t_2)\\\\\n", + "\\vdots & \\vdots & \\ddots & \\vdots\\\\\n", + "x_1(t_m) & x_2(t_m) & \\dots & x_n(t_m)\\\\\n", + "\\end{bmatrix}\n", + "$$\n", + "\n", + "$$\n", + "\\dot{\\mathbf{X}} = \\begin{bmatrix}\n", + "\\dot{\\mathbf{x}}^T(t_1)\\\\\n", + "\\dot{\\mathbf{x}}^T(t_2)\\\\\n", + "\\vdots\\\\\n", + "\\dot{\\mathbf{x}}^T(t_m)\n", + "\\end{bmatrix}\n", + "= \\begin{bmatrix}\n", + "\\dot{x}_1(t_1) & \\dot{x}_2(t_1) & \\dots & \\dot{x}_n(t_1)\\\\\n", + "\\dot{x}_1(t_2) & \\dot{x}_2(t_2) & \\dots & \\dot{x}_n(t_2)\\\\\n", + "\\vdots & \\vdots & \\ddots & \\vdots\\\\\n", + "\\dot{x}_1(t_m) & \\dot{x}_2(t_m) & \\dots & \\dot{x}_n(t_m)\\\\\n", + "\\end{bmatrix}\n", + "$$\n", + "\n", + "Next, we construct a library $\\Theta(\\mathbf{X})$ consisting of candidate non-linear functions of the columns of $\\mathbf{X}$.\n", + "For example, \\Theta(\\mathbf{X})$ may consist of constant, polynomial, and trigonometric terms:\n", + "\n", + "$$\n", + "\\Theta(\\mathbf{X}) = \\begin{bmatrix}\n", + "\\mathbf{1} & \\mathbf{X} & \\mathbf{X}^{P_2} & \\mathbf{X}^{P_3} & \\dots & \\sin(\\mathbf{X}) & \\cos(\\mathbf{X}) \n", + "\\end{bmatrix}\n", + "$$\n", + "\n", + "Here, higher polynomials are denoted as $\\mathbf{X}^{P_2}$, $\\mathbf{X}^{P_3}$, etc.,\n", + "where $\\mathbf{X}^{P_2}$ denotes the quadratic nonlinearities in the state x:\n", + "\n", + "$$\n", + "\\mathbf{X}^{P_2} = \\begin{bmatrix}\n", + "x_1^2(t_1) & x_1(t_1)x_2(t_1) & \\dots & x_n^2(t_1)\\\\\n", + "x_1^2(t_2) & x_1(t_2)x_2(t_2) & \\dots & x_n^2(t_2)\\\\\n", + "\\vdots & \\vdots & \\ddots & \\vdots\\\\\n", + "x_1^2(t_m) & x_1(t_m)x_2(t_m) & \\dots & x_n^2(t_m)\\\\\n", + "\\end{bmatrix}\n", + "$$\n", + "\n", + "Each column of $\\Theta(\\mathbf{X})$ represents a candidate function for the right-hand side of the system's ODE.\n", + "There is tremendous freedom in choosing the entries in this matrix of nonlinearities.\n", + "\n", + "Because we assume that only a few of these nonlinearities are active in each row of $f$,\n", + "we may set up a sparse regression problem to determine the sparse vectors of coefficients\n", + "$\\Xi = \\begin{bmatrix} \\xi_1 ^ \\xi_2 & \\dots & \\xi_n \\end{bmatrix}$ that determine which nonlinearities are active:\n", + "\n", + "$$\n", + "\\dot{\\mathbf{X}} = \\Theta(\\mathbf{X})\\Xi\n", + "$$\n", + "\n", + "This is illustrated in {ref}`sindy-diagram`. Each column $\\xi_k$ of $Xi$ is a sparse vector of coefficients determining\n", + "which terms are active in the right-hand side for one of the row equations:\n", + "\n", + "$$\n", + "\\dot{x}_k = f_k(x) = \\Theta(x^T)\\xi_k\n", + "$$" + ] + }, + { + "cell_type": "markdown", + "id": "c217e8f7-11ed-48a6-913a-428ea9bb6ded", + "metadata": {}, + "source": [ ":::{figure} _static/images/70_sindy_diagram.svg\n", - ":width: 90%\n", + ":width: 80%\n", + ":label: sindy-diagram\n", "\n", "Schematic of the SINDy algorithm, demonstrated on the Lorenz equations. Data are collected from the system, including a time history of the states $\\mathbf{X}$ and derivatives $\\dot{\\mathbf{X}}$. Next, a library of nonlinear functions of the states, $\\Theta(\\mathbf{X})$, is constructed. This is then used to find the fewest terms needed to satisfy $\\dot{\\mathbf{X}} = \\Theta(\\mathbf{X})\\Xi$. The few entries in the vectors of $\\Xi$, solved for by sparse regression, denote the relevant\n", "terms in the right-hand side of the dynamics. *Taken from {cite}`brunton_discovering_2016`*\n", @@ -681,7 +609,17 @@ "id": "6390573f-cc23-4a2d-a6ea-b3a4dc6c63cc", "metadata": {}, "source": [ - "### SINDy with Control (SINDyc)" + "### SINDy with Control (SINDyc)\n", + "\n", + "SINDYc generalizes the SINDY method to include inputs and control. In particular, it considers the nonlinear dynamical system with inputs $\\mathbf{u}$:\n", + "\n", + "$$\n", + "\\dot{\\mathbf{x}} = f(\\mathbf{x}, \\mathbf{u})\n", + "$$\n", + "\n", + "The SINDY algorithm is readily generalized to include actuation, as this merely requires building a larger library $\\Theta(\\mathbf{X}, \\mathbf{U})$\n", + "of candidate functions that include $u$; these functions can include nonlinear cross terms in $\\mathbf{x}$ and $\\mathbf{u}$.\n", + "This extension requires measurements of the state $\\mathbf{x}$ as well as the input signal $\\mathbf{u}$." ] }, { @@ -944,9 +882,10 @@ "metadata": {}, "outputs": [], "source": [ - "cart_env = create_cart_environment(max_steps=300, goal_position=29, max_position=30)\n", - "cart_observations = []\n", - "cart_actions = []\n", + "cart_env = create_cart_environment(max_steps=500, goal_position=29, max_position=30)\n", + "\n", + "cart_observations = {}\n", + "cart_actions = {}\n", "\n", "controllers = {\n", " f\"Step @ {cart_env.max_action / 2}\": ConstantController(\n", @@ -958,16 +897,43 @@ " \"Sinusoid @ 0.5Hz\": SineController(\n", " cart_env, np.asarray([cart_env.max_action]), frequency=0.5\n", " ),\n", - " \"Sum of Sinusoids @ [2Hz, 10Hz]\": SumOfSineController(\n", - " cart_env, np.asarray([cart_env.max_action]), frequencies=[2, 10]\n", + " \"Schroeder Sweep\": SchroederSweepController(\n", + " cart_env,\n", + " np.asarray([cart_env.max_action]),\n", + " n_time_steps=500,\n", + " n_harmonics=5,\n", + " frequency=2,\n", " ),\n", + " \"PRBS\": PRBSController(np.asarray([cart_env.max_action])),\n", " \"Random\": RandomController(cart_env),\n", "}\n", "\n", - "for controller in controllers.values():\n", + "for controller_name, controller in controllers.items():\n", " result = simulate_environment(cart_env, controller=controller)\n", - " cart_observations.append(result.observations)\n", - " cart_actions.append(result.actions)" + " cart_observations[controller_name] = result.observations\n", + " cart_actions[controller_name] = result.actions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78e0f204-0e80-4a8e-854e-1926ad6c3843", + "metadata": {}, + "outputs": [], + "source": [ + "# We use the data from PRBS\n", + "X = cart_observations[\"PRBS\"][:-1].copy()\n", + "U = cart_actions[\"PRBS\"].copy()\n", + "t = np.arange(0, len(X)) * cart_env.dt\n", + "# Train with first half of the data\n", + "train_size = len(X) // 2\n", + "X_train = X[:train_size]\n", + "U_train = U[:train_size]\n", + "t_train = t[:train_size]\n", + "# Test with rest of data\n", + "X_test = X[train_size:]\n", + "U_test = U[train_size:]\n", + "t_test = t[train_size:]" ] }, { @@ -975,45 +941,49 @@ "id": "d3bb4af8-22b2-4857-98c4-81d9d978d2d8", "metadata": {}, "source": [ - "### SINDYc" + "### SINDYc\n", + "\n", + "For the SINDYc method we use the [pysindy](https://github.com/dynamicslab/pysindy) package.\n", + "\n", + "Refer to its [introduction](https://pysindy.readthedocs.io/en/latest/examples/2_introduction_to_sindy/example.html) page for usage information." ] }, { "cell_type": "code", "execution_count": null, - "id": "78e0f204-0e80-4a8e-854e-1926ad6c3843", + "id": "af3d0377-fadd-4844-8217-a4da41d72d51", "metadata": {}, "outputs": [], "source": [ - "# Train with random\n", - "X_train = cart_observations[-1][:-1].copy()\n", - "U_train = cart_actions[-1].copy()\n", - "t_train = np.arange(0, len(X_train)) * cart_env.dt\n", - "# Test with step\n", - "X_test = cart_observations[0][:-1].copy()\n", - "U_test = cart_actions[0].copy()\n", - "t_test = np.arange(0, len(X_test)) * cart_env.dt" + "opt = ps.STLSQ(threshold=0.1, max_iter=100)\n", + "feature_library = ps.IdentityLibrary()\n", + "differentiation_method = ps.FiniteDifference(order=1)\n", + "model = ps.SINDy(\n", + " optimizer=opt,\n", + " feature_library=feature_library,\n", + " differentiation_method=differentiation_method,\n", + ")\n", + "model.fit(X_train, u=U_train, t=t_train)" ] }, { "cell_type": "code", "execution_count": null, - "id": "af3d0377-fadd-4844-8217-a4da41d72d51", + "id": "3f4878e9-df19-4541-93cc-e205d6fdc173", "metadata": {}, "outputs": [], "source": [ - "model = ps.SINDy()\n", - "model.fit(X_train, u=U_train, t=t_train)" + "model.print()" ] }, { "cell_type": "code", "execution_count": null, - "id": "3f4878e9-df19-4541-93cc-e205d6fdc173", + "id": "2f6a25d4-694a-4d3a-bf8a-10c3929692aa", "metadata": {}, "outputs": [], "source": [ - "model.print()" + "print(\"Model score: %f\" % model.score(X_test, u=U_test, t=cart_env.dt))" ] }, { @@ -1033,14 +1003,14 @@ "metadata": {}, "outputs": [], "source": [ - "fig, axs = plt.subplots(X_test.shape[1], 1, sharex=True, figsize=(7, 9))\n", + "fig, axs = plt.subplots(1, X_test.shape[1], sharex=True)\n", "for i in range(X_test.shape[1]):\n", " axs[i].plot(t_test, X_test[:, i], \"k\", label=\"Measured\")\n", " axs[i].plot(t_test[1:], X_test_sim[:, i], \"r--\", label=\"Model\")\n", " axs[i].legend()\n", " axs[i].set(xlabel=\"t\", ylabel=\"$x_{}$\".format(i + 1))\n", "\n", - "fig = plt.figure(figsize=(10, 4.5))\n", + "fig = plt.figure()\n", "ax1 = fig.add_subplot(1, 2, 1)\n", "ax1.plot(X_test[:, 0], X_test[:, 1], \"k\")\n", "ax1.set(xlabel=\"$x_1$\", ylabel=\"$x_2$\", title=\"true simulation\")\n", @@ -1058,24 +1028,11 @@ "id": "82c5089b-49e7-49e6-8824-2fc1c71bd08b", "metadata": {}, "source": [ - "### DMDc" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0233a5d7-45a2-4a27-a48f-cce3894c2fec", - "metadata": {}, - "outputs": [], - "source": [ - "# Train with random\n", - "X_train = cart_observations[-1][:-1].copy()\n", - "U_train = cart_actions[-1].copy()\n", - "t_train = np.arange(0, len(X_train)) * cart_env.dt\n", - "# Test with step\n", - "X_test = cart_observations[0][:-1].copy()\n", - "U_test = cart_actions[0].copy()\n", - "t_test = np.arange(0, len(X_test)) * cart_env.dt" + "### DMDc\n", + "\n", + "For the SINDYc method we use the [pykoopman](https://github.com/dynamicslab/pykoopman/) package.\n", + "\n", + "Refer to its [documentation](https://pykoopman.readthedocs.io/en/master/) page for usage information." ] }, { @@ -1133,22 +1090,28 @@ { "cell_type": "code", "execution_count": null, - "id": "c49ef6c6-783c-4be3-8790-e52ca7bac051", + "id": "b1342ac6-85b1-4d15-a95b-26a3ec0541d6", "metadata": {}, "outputs": [], "source": [ - "t = np.arange(0, len(U))\n", - "fig, axs = plt.subplots(3, 1, sharex=True, figsize=(16, 8))\n", - "axs[0].plot(t_test, U_test, \"-k\")\n", - "axs[0].set(ylabel=r\"$u$\")\n", - "axs[1].plot(t_test, X_test[:, 0], \"-\", color=\"b\", label=\"True\")\n", - "axs[1].plot(t_test, Xkoop[:, 0], \"--r\", label=\"DMDc\")\n", - "axs[1].set(ylabel=r\"$x_1$\")\n", - "axs[2].plot(t_test, X_test[:, 1], \"-\", color=\"b\", label=\"True\")\n", - "axs[2].plot(t_test, Xkoop[:, 1], \"--r\", label=\"DMDc\")\n", - "axs[2].set(ylabel=r\"$x_2$\", xlabel=r\"$t$\")\n", - "axs[1].legend(loc=\"best\")\n", - "fig.tight_layout();" + "fig, axs = plt.subplots(1, X_test.shape[1], sharex=True)\n", + "for i in range(X_test.shape[1]):\n", + " axs[i].plot(t_test, X_test[:, i], \"k\", label=\"Measured\")\n", + " axs[i].plot(t_test, Xkoop[:, i], \"r--\", label=\"Model\")\n", + " axs[i].legend()\n", + " axs[i].set(xlabel=\"t\", ylabel=\"$x_{}$\".format(i + 1))\n", + "\n", + "fig = plt.figure()\n", + "ax1 = fig.add_subplot(1, 2, 1)\n", + "ax1.plot(X_test[:, 0], X_test[:, 1], \"k\")\n", + "ax1.set(xlabel=\"$x_1$\", ylabel=\"$x_2$\", title=\"true simulation\")\n", + "\n", + "ax2 = fig.add_subplot(1, 2, 2)\n", + "ax2.plot(Xkoop[:, 0], Xkoop[:, 1], \"r--\")\n", + "ax2.set(xlabel=\"$x_1$\", ylabel=\"$x_2$\", title=\"model simulation\")\n", + "\n", + "plt.tight_layout()\n", + "fig.show();" ] }, { diff --git a/notebooks/nb_90_practice.ipynb b/notebooks/nb_90_practice.ipynb index 4c2eb72..949f536 100644 --- a/notebooks/nb_90_practice.ipynb +++ b/notebooks/nb_90_practice.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "8497a4b2-a020-4fba-8539-bc1361a34023", "metadata": { "editable": true, @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "f7339d3d-eae4-4e05-a7fb-050df96782e7", "metadata": { "init_cell": true, @@ -41,14 +41,184 @@ "ActiveScene" ] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "%presentation_style" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "6e892149-098a-42c7-b554-bbb83918888d", "metadata": { "init_cell": true, @@ -66,7 +236,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "cd78d373-34af-4b57-8d16-dbf6f662331d", "metadata": { "init_cell": true, @@ -79,7 +249,16 @@ "ActiveScene" ] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)\n", + "Hello from the pygame community. https://www.pygame.org/contribute.html\n" + ] + } + ], "source": [ "%autoreload\n", "\n", @@ -153,7 +332,7 @@ "\n", "As seen below, the links are represented in blue and are connected by two joints represented in green (or yellow?).\n", "\n", - "If you would like to use a mathematical model of the system, then please refer to [the following page](http://incompleteideas.net/book/11/node4.html) for the equations.\n", + "If you would like to use a mathematical model of the system, then please refer to [the following page](http://incompleteideas.net/book/first/ebook/node110.html) for the equations.\n", "\n", "**First Goal**\n", "\n", diff --git a/src/training_ml_control/control.py b/src/training_ml_control/control.py index 1b6991c..0e9b035 100644 --- a/src/training_ml_control/control.py +++ b/src/training_ml_control/control.py @@ -12,6 +12,8 @@ "ConstantController", "SineController", "SumOfSineController", + "SchroederSweepController", + "PRBSController", "RandomController", "build_lqr_controller", "build_mpc_controller", @@ -82,12 +84,14 @@ def __init__( n_time_steps: int = 200, input_power: float = 10, n_harmonics: int = 3, + frequency: float = 1, ) -> None: self.dt = env.unwrapped.dt self.u_max = u_max self.input_power = input_power self.n_time_steps = n_time_steps self.n_harmonics = n_harmonics + self.frequency = frequency self.amplitude = np.sqrt(self.input_power / self.n_harmonics) self.phis = np.zeros(self.n_harmonics) for k in range(1, self.n_harmonics): @@ -99,16 +103,25 @@ def act(self, measurement: NDArray) -> NDArray: self.i += 1 u = np.asarray([0.0]) for k, phi in enumerate(self.phis): - u += np.cos(2 * np.pi * (k + 1) * t + phi) + u += np.cos(2 * np.pi * (k + 1) * self.frequency * t + phi) u *= self.amplitude return u +class PRBSController: + def __init__(self, u_max: NDArray = np.asarray([10]), seed: int = 16) -> None: + self.u_max = u_max + self.rng = np.random.default_rng(seed) + + def act(self, measurement: NDArray) -> NDArray: + return self.rng.choice([self.u_max, -self.u_max]) + + class RandomController: def __init__(self, env: Env) -> None: self.action_space = env.action_space - def act(self, measurment: NDArray) -> NDArray: + def act(self, measurement: NDArray) -> NDArray: return self.action_space.sample() diff --git a/src/training_ml_control/environments/acrobot.py b/src/training_ml_control/environments/acrobot.py index fb153d7..de68918 100644 --- a/src/training_ml_control/environments/acrobot.py +++ b/src/training_ml_control/environments/acrobot.py @@ -44,12 +44,6 @@ def __init__( self.target_height = target_height self.max_torque = max_torque - if self.target_height > self.LINK_LENGTH_1 + self.LINK_LENGTH_2: - raise ValueError( - f"Target height, {self.target_height} is too high." - f"It should be less than {self.LINK_LENGTH_1 + self.LINK_LENGTH_2}" - ) - high = np.array( [1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2], dtype=np.float32 ) diff --git a/src/training_ml_control/model.py b/src/training_ml_control/model.py index 45f6c1d..8fac790 100644 --- a/src/training_ml_control/model.py +++ b/src/training_ml_control/model.py @@ -96,12 +96,17 @@ def build_inverted_pendulum_nonlinear_model( half_length = l / 2 polemass_length = m_p * half_length - temp = (force + polemass_length * dtheta**2 * casadi.sin(theta)) / total_mass - ddtheta = (g * casadi.sin(theta) - casadi.cos(theta) * temp) / ( - half_length * (4.0 / 3.0 - m_p * casadi.cos(theta) ** 2 / total_mass) + numerator = (total_mass) * g * casadi.sin(theta) - casadi.cos(theta) * ( + force + m_p * l * dtheta**2 * casadi.sin(theta) ) + denominator = (4 / 3) * (total_mass) * l - m_p * l * casadi.cos(theta) ** 2 + ddtheta = numerator / denominator - ddpos = temp - polemass_length * ddtheta * casadi.cos(theta) / total_mass + numerator = m_c * g * casadi.cos(theta) * casadi.sin(theta) - (4 / 3) * ( + force + m_c * l * dtheta**2 * casadi.sin(theta) + ) + denominator = m_c * casadi.cos(theta) ** 2 - (4 / 3) * (total_mass) + ddpos = numerator / denominator model.set_rhs("position", dpos) model.set_rhs("velocity", ddpos) diff --git a/src/training_ml_control/plots.py b/src/training_ml_control/plots.py index 92cc44c..ffd76aa 100644 --- a/src/training_ml_control/plots.py +++ b/src/training_ml_control/plots.py @@ -191,8 +191,8 @@ def animate_inverted_pendulum_simulation( ax1.axhline(0, color="black") bar = ax1.plot([], [], "-o", linewidth=5, markersize=10) - ax1.set_xlim(-1, 1) - ax1.set_ylim(-1, 1) + ax1.set_xlim(-3, 3) + ax1.set_ylim(-2, 2) ax1.set_axis_off() fig.align_ylabels() @@ -219,13 +219,13 @@ def update(t_ind): line_x = np.array( [ 0, - 0.3 * np.sin(x[0]), + 0.7 * np.sin(x[0]), ], ) line_y = np.array( [ 0, - 0.3 * np.cos(x[0]), + 0.7 * np.cos(x[0]), ], ) line = np.stack([line_x, line_y]) @@ -256,9 +256,9 @@ def animate_full_inverted_pendulum_simulation( ax5 = plt.subplot2grid((5, 2), (3, 1)) ax6 = plt.subplot2grid((5, 2), (4, 1)) - ax2.set_ylabel("$y$") - ax3.set_ylabel("$\\theta$") - ax4.set_ylabel("$\\dot{y}$") + ax2.set_ylabel("$x$") + ax3.set_ylabel("$\\dot{x}$") + ax4.set_ylabel("$\\theta$") ax5.set_ylabel("$\\dot{\\theta}$") ax6.set_ylabel("Force") @@ -272,8 +272,8 @@ def animate_full_inverted_pendulum_simulation( ax6.set_xlabel("Time") graphics.add_line(var_type="_x", var_name="position", axis=ax2) - graphics.add_line(var_type="_x", var_name="theta", axis=ax3) - graphics.add_line(var_type="_x", var_name="velocity", axis=ax4) + graphics.add_line(var_type="_x", var_name="velocity", axis=ax3) + graphics.add_line(var_type="_x", var_name="theta", axis=ax4) graphics.add_line(var_type="_x", var_name="dtheta", axis=ax5) graphics.add_line(var_type="_u", var_name="force", axis=ax6) @@ -285,8 +285,8 @@ def animate_full_inverted_pendulum_simulation( ax1.axhline(0, color="black") bar = ax1.plot([], [], "-o", linewidth=5, markersize=10) - ax1.set_xlim(-1, 1) - ax1.set_ylim(-1, 1) + ax1.set_xlim(-3, 3) + ax1.set_ylim(-2, 2) ax1.set_axis_off() fig.align_ylabels() @@ -317,13 +317,13 @@ def update(t_ind): line_x = np.array( [ x[0], - x[0] + 0.3 * np.sin(x[1]), + x[0] + 0.7 * np.sin(x[2]), ], ) line_y = np.array( [ 0, - 0.3 * np.cos(x[1]), + 0.7 * np.cos(x[2]), ], ) line = np.stack([line_x, line_y])