From 0b51f4dad54559ed924511af60e7cca7e7747dc0 Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Mon, 21 Oct 2024 15:05:19 +1300
Subject: [PATCH 01/15] Add tutorial Example: inventory management

---
 docs/make.jl                   |   1 +
 docs/src/tutorial/inventory.jl | 172 +++++++++++++++++++++++++++++++++
 2 files changed, 173 insertions(+)
 create mode 100644 docs/src/tutorial/inventory.jl

diff --git a/docs/make.jl b/docs/make.jl
index 3c663485a..01bbce04d 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -157,6 +157,7 @@ Documenter.makedocs(;
             "tutorial/example_newsvendor.md",
             "tutorial/example_reservoir.md",
             "tutorial/example_milk_producer.md",
+            "tutorial/inventory.md",
         ],
         "How-to guides" => [
             "guides/access_previous_variables.md",
diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl
new file mode 100644
index 000000000..3bc3b2c48
--- /dev/null
+++ b/docs/src/tutorial/inventory.jl
@@ -0,0 +1,172 @@
+#  Copyright (c) 2017-24, Oscar Dowson and SDDP.jl contributors.        #src
+#  This Source Code Form is subject to the terms of the Mozilla Public  #src
+#  License, v. 2.0. If a copy of the MPL was not distributed with this  #src
+#  file, You can obtain one at http://mozilla.org/MPL/2.0/.             #src
+
+# # Example: inventory management
+
+# The purpose of this tutorial is to demonstrate a well-known inventory
+# management problem with a finite- and infinite-horizon policy.
+
+# ## Required packages
+
+# This tutorial requires the following packages:
+
+using SDDP
+import Distributions
+import HiGHS
+import Plots
+import Statistics
+
+# ## Background
+
+# Consider a periodic review inventory problem involving a single product. The
+# initial inventory is denoted by $x_0 \geq 0$, and a decision-maker can place
+# an order at the start of each stage. The objective is to minimize expected
+# costs over the planning horizon. The following parameters define the cost
+# structure:
+#
+#  * $c$ is the unit cost for purchasing each unit
+#  * $h$ is the holding cost per unit remaining at the end of each stage
+#  * $p$ is the shortage cost per unit of unsatisfied demand at the end of each
+#    stage
+#
+# There are no fixed ordering costs, and the demand at each stage is assumed to
+# follow an independent and identically distributed random variable with
+# cumulative distribution function (CDF) $\Phi(\cdot)$. Any unsatisfied demand
+# is backlogged and carried forward to the next stage.
+
+# At each stage, an agent must decide how many items to order or, equivalently,
+# what the inventory level should be at the beginning of the next stage. The
+# per-stage costs are the sum of the order costs, shortage and holding costs
+# incurred at the end of the stage, after demand is realized. If $x$ represents
+# the inventory level at the beginning of a stage, and $y$ is the level of
+# inventory after ordering, the stage costs are given by
+#
+# ```math
+# c(y-x) + \int_{0}^y h(y-\xi) d\Phi(\xi) + \int_{y}^{\infty} p(\xi - y) d\Phi(\xi).
+# ```
+
+# Following Chapter 19 of Introduction to Operations Research by Hillier and
+# Lieberman (7th edition), we use the following parameters: $c=15, h=1, p=15$,
+# and demand follows a continuous uniform distribution between 0 and 800.
+
+x0 = 100        # Initial inventory
+α = 0.995       # discount factor
+c = 35          # unit inventory cost
+h = 1           # unit inventory holding cost
+p = 15          # unit order cost
+UD = 800        # upper bound for demand
+N = 10          # size of sample space
+Ω = rand(Distributions.Uniform(0, UD), N);
+
+# This is a well-known inventory problem with a closed-form solution. The
+# optimal policy is a simple order-up-to policy: if the inventory level is
+# below 741 units, the decision-maker orders up to 741 units. Otherwise, no
+# order is placed. For a detailed analysis, refer to Foundations of Stochastic
+# Inventory Theory by Evan Porteus (Stanford Business Books, 2002).
+
+# ## Finite horizon
+
+# For a finite horizon of length $T$,  the problem is to minimize the total
+# expected cost over all stages, with a terminal value function given by:
+# ```math
+# v_T(x) = cx.
+# ```
+# This reflects that at the end of the last stage, the decision-maker can
+# recover the unit cost for each leftover item while incurring any backlog costs
+# and shortage penalties from the previous period.
+
+T = 10 # number of stages
+
+model = SDDP.LinearPolicyGraph(
+    stages = T + 1,
+    sense = :Min,
+    lower_bound = 0.0,
+    optimizer = HiGHS.Optimizer,
+) do sp, t
+    @variable(sp, x, SDDP.State, initial_value = x0)
+    @variable(sp, y, SDDP.State, initial_value = x0)
+    @constraint(sp, y.out >= x.out)
+    @constraint(sp, con_balance, x.out == y.in)
+    if t == 1
+        @stageobjective(sp, 0)
+    elseif t == T + 1
+        @stageobjective(sp, α^(t - 1) * (-c * x.in))
+    else
+        @stageobjective(
+            sp,
+            α^(t - 1) * (
+                p * UD / 2 + (c - p) * y.in - c * x.in + (h + p) / (2 * UD) * y.in^2
+            )
+        )
+        SDDP.parameterize(ω -> JuMP.set_normalized_rhs(con_balance, -ω), sp, Ω)
+    end
+    return
+end
+
+# Train and simulate the policy:
+
+SDDP.train(model; iteration_limit = 100)
+simulations = SDDP.simulate(model, 200, [:y])
+objective_values = [sum(t[:stage_objective] for t in s) for s in simulations]
+μ, ci = round.(SDDP.confidence_interval(objective_values, 1.96); digits = 2)
+lower_bound = round(SDDP.calculate_bound(model); digits = 2)
+println("Confidence interval: ", μ, " ± ", ci)
+println("Lower bound: ", lower_bound)
+
+# Plot the optimal inventory levels:
+
+SDDP.publication_plot(
+    simulations;
+    title = "Inventory level",
+    xlabel = "Stage",
+    ylabel = "Inventory level",
+) do data
+    return data[:y].in
+end
+
+# ## Infinite horizon
+
+# For the infinite horizon case, assume a discount factor $\alpha=0.995$. The
+# objective in this case is to minimize the discounted expected costs over an
+# infinite planning horizon.
+
+model = SDDP.PolicyGraph(
+    SDDP.UnicyclicGraph(α; num_nodes = 1);
+    sense = :Min,
+    lower_bound = 0.0,
+    optimizer = HiGHS.Optimizer,
+) do sp, t
+    @variable(sp, x, SDDP.State, initial_value = x0)
+    @variable(sp, y, SDDP.State, initial_value = x0)
+    @constraint(sp, y.out >= x.out)
+    @constraint(sp, con_balance, x.out == y.in)
+    @stageobjective(
+        sp,
+        p * UD / 2 + (c - p) * y.in - c * x.in + (h + p) / (2 * UD) * y.in^2
+    )
+    SDDP.parameterize(ω -> JuMP.set_normalized_rhs(con_balance, -ω), sp, Ω)
+    return
+end
+
+# Train and simulate the policy:
+
+SDDP.train(model; iteration_limit = 100)
+simulations = SDDP.simulate(model, 200, [:y])
+objective_values = [sum(t[:stage_objective] for t in s) for s in simulations]
+μ, ci = round.(SDDP.confidence_interval(objective_values, 1.96); digits = 2)
+lower_bound = round(SDDP.calculate_bound(model); digits = 2)
+println("Confidence interval: ", μ, " ± ", ci)
+println("Lower bound: ", lower_bound)
+
+# Plot the optimal inventory levels:
+
+SDDP.publication_plot(
+    simulations;
+    title = "Inventory level",
+    xlabel = "Stage",
+    ylabel = "Inventory level",
+) do data
+    return data[:y].in
+end

From 1ce774919cddaf66ed875f7117dcfc934d6a3953 Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Thu, 24 Oct 2024 11:06:54 +1300
Subject: [PATCH 02/15] Update

---
 docs/src/tutorial/inventory.jl | 111 +++++++++++++++++++--------------
 1 file changed, 64 insertions(+), 47 deletions(-)

diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl
index 3bc3b2c48..a7b78e8bb 100644
--- a/docs/src/tutorial/inventory.jl
+++ b/docs/src/tutorial/inventory.jl
@@ -26,9 +26,9 @@ import Statistics
 # costs over the planning horizon. The following parameters define the cost
 # structure:
 #
-#  * $c$ is the unit cost for purchasing each unit
-#  * $h$ is the holding cost per unit remaining at the end of each stage
-#  * $p$ is the shortage cost per unit of unsatisfied demand at the end of each
+#  * `c` is the unit cost for purchasing each unit
+#  * `h` is the holding cost per unit remaining at the end of each stage
+#  * `p` is the shortage cost per unit of unsatisfied demand at the end of each
 #    stage
 #
 # There are no fixed ordering costs, and the demand at each stage is assumed to
@@ -78,29 +78,30 @@ N = 10          # size of sample space
 # and shortage penalties from the previous period.
 
 T = 10 # number of stages
-
 model = SDDP.LinearPolicyGraph(
     stages = T + 1,
     sense = :Min,
     lower_bound = 0.0,
     optimizer = HiGHS.Optimizer,
 ) do sp, t
-    @variable(sp, x, SDDP.State, initial_value = x0)
-    @variable(sp, y, SDDP.State, initial_value = x0)
-    @constraint(sp, y.out >= x.out)
-    @constraint(sp, con_balance, x.out == y.in)
+    @variable(sp, x_inventory >= 0, SDDP.State, initial_value = x0)
+    @variable(sp, x_demand >= 0, SDDP.State, initial_value = 0)
+    ## u_buy is a Decision-Hazard control variable. We decide u.out for use in
+    ## the next stage
+    @variable(sp, u_buy >= 0, SDDP.State, initial_value = 0)
+    @variable(sp, u_sell >= 0)
+    @variable(sp, w_demand == 0)
+    @constraint(sp, x_inventory.out == x_inventory.in + u_buy.in - u_sell)
+    @constraint(sp, x_demand.out == x_demand.in + w_demand - u_sell)
     if t == 1
-        @stageobjective(sp, 0)
-    elseif t == T + 1
-        @stageobjective(sp, α^(t - 1) * (-c * x.in))
+        fix(u_sell, 0; force = true)
+        @stageobjective(sp, c * u_buy.out)
     else
         @stageobjective(
             sp,
-            α^(t - 1) * (
-                p * UD / 2 + (c - p) * y.in - c * x.in + (h + p) / (2 * UD) * y.in^2
-            )
+            c * u_buy.out + h * x_inventory.out + p * x_demand.out,
         )
-        SDDP.parameterize(ω -> JuMP.set_normalized_rhs(con_balance, -ω), sp, Ω)
+        SDDP.parameterize(ω -> JuMP.fix(w_demand, ω), sp, Ω)
     end
     return
 end
@@ -117,14 +118,15 @@ println("Lower bound: ", lower_bound)
 
 # Plot the optimal inventory levels:
 
-SDDP.publication_plot(
-    simulations;
-    title = "Inventory level",
+Plots.plot(
+    SDDP.publication_plot(simulations; title = "x_inventory.out") do data
+        return data[:x_inventory].out
+    end;
+    SDDP.publication_plot(simulations; title = "u_buy.out") do data
+        return data[:u_buy].out
+    end;
     xlabel = "Stage",
-    ylabel = "Inventory level",
-) do data
-    return data[:y].in
-end
+)
 
 # ## Infinite horizon
 
@@ -132,41 +134,56 @@ end
 # objective in this case is to minimize the discounted expected costs over an
 # infinite planning horizon.
 
+graph = SDDP.LinearGraph(2)
+SDDP.add_edge(graph, 2 => 1, α)
 model = SDDP.PolicyGraph(
-    SDDP.UnicyclicGraph(α; num_nodes = 1);
+    graph;
     sense = :Min,
     lower_bound = 0.0,
     optimizer = HiGHS.Optimizer,
 ) do sp, t
-    @variable(sp, x, SDDP.State, initial_value = x0)
-    @variable(sp, y, SDDP.State, initial_value = x0)
-    @constraint(sp, y.out >= x.out)
-    @constraint(sp, con_balance, x.out == y.in)
-    @stageobjective(
-        sp,
-        p * UD / 2 + (c - p) * y.in - c * x.in + (h + p) / (2 * UD) * y.in^2
-    )
-    SDDP.parameterize(ω -> JuMP.set_normalized_rhs(con_balance, -ω), sp, Ω)
+    @variable(sp, x_inventory >= 0, SDDP.State, initial_value = x0)
+    @variable(sp, x_demand >= 0, SDDP.State, initial_value = 0)
+    ## u_buy is a Decision-Hazard control variable. We decide u.out for use in
+    ## the next stage
+    @variable(sp, u_buy >= 0, SDDP.State, initial_value = 0)
+    @variable(sp, u_sell >= 0)
+    @variable(sp, w_demand == 0)
+    @constraint(sp, x_inventory.out == x_inventory.in + u_buy.in - u_sell)
+    @constraint(sp, x_demand.out == x_demand.in + w_demand - u_sell)
+    if t == 1
+        fix(u_sell, 0; force = true)
+        @stageobjective(sp, c * u_buy.out)
+    else
+        @stageobjective(
+            sp,
+            c * u_buy.out + h * x_inventory.out + p * x_demand.out,
+        )
+        SDDP.parameterize(ω -> JuMP.fix(w_demand, ω), sp, Ω)
+    end
     return
 end
 
-# Train and simulate the policy:
+SDDP.train(model; iteration_limit = 100, log_every_iteration = true)
 
-SDDP.train(model; iteration_limit = 100)
-simulations = SDDP.simulate(model, 200, [:y])
-objective_values = [sum(t[:stage_objective] for t in s) for s in simulations]
-μ, ci = round.(SDDP.confidence_interval(objective_values, 1.96); digits = 2)
-lower_bound = round(SDDP.calculate_bound(model); digits = 2)
-println("Confidence interval: ", μ, " ± ", ci)
-println("Lower bound: ", lower_bound)
+simulations = SDDP.simulate(
+    model,
+    200,
+    [:x_inventory, :u_buy];
+    sampling_scheme = SDDP.InSampleMonteCarlo(;
+        max_depth = 50,
+        terminate_on_dummy_leaf = false,
+    ),
+);
 
 # Plot the optimal inventory levels:
 
-SDDP.publication_plot(
-    simulations;
-    title = "Inventory level",
+Plots.plot(
+    SDDP.publication_plot(simulations; title = "x_inventory.out") do data
+        return data[:x_inventory].out
+    end;
+    SDDP.publication_plot(simulations; title = "u_buy.out") do data
+        return data[:u_buy].out
+    end;
     xlabel = "Stage",
-    ylabel = "Inventory level",
-) do data
-    return data[:y].in
-end
+)

From cf2f386ebd561bed7a9212b5b2bdc48b5b90d17b Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Thu, 24 Oct 2024 11:21:36 +1300
Subject: [PATCH 03/15] Update

---
 docs/src/tutorial/inventory.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl
index a7b78e8bb..ce2abb376 100644
--- a/docs/src/tutorial/inventory.jl
+++ b/docs/src/tutorial/inventory.jl
@@ -109,7 +109,7 @@ end
 # Train and simulate the policy:
 
 SDDP.train(model; iteration_limit = 100)
-simulations = SDDP.simulate(model, 200, [:y])
+simulations = SDDP.simulate(model, 200, [:x_inventory, :u_buy])
 objective_values = [sum(t[:stage_objective] for t in s) for s in simulations]
 μ, ci = round.(SDDP.confidence_interval(objective_values, 1.96); digits = 2)
 lower_bound = round(SDDP.calculate_bound(model); digits = 2)

From 54871d2ed5e31f09cf8c7d5ad8826832cd830dff Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Thu, 24 Oct 2024 11:45:42 +1300
Subject: [PATCH 04/15] Update

---
 docs/src/tutorial/inventory.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl
index ce2abb376..fbf5c23cd 100644
--- a/docs/src/tutorial/inventory.jl
+++ b/docs/src/tutorial/inventory.jl
@@ -121,7 +121,7 @@ println("Lower bound: ", lower_bound)
 Plots.plot(
     SDDP.publication_plot(simulations; title = "x_inventory.out") do data
         return data[:x_inventory].out
-    end;
+    end,
     SDDP.publication_plot(simulations; title = "u_buy.out") do data
         return data[:u_buy].out
     end;
@@ -181,7 +181,7 @@ simulations = SDDP.simulate(
 Plots.plot(
     SDDP.publication_plot(simulations; title = "x_inventory.out") do data
         return data[:x_inventory].out
-    end;
+    end,
     SDDP.publication_plot(simulations; title = "u_buy.out") do data
         return data[:u_buy].out
     end;

From 3a6b6c40e4f1222bff2495f60a6b1ece02119f6f Mon Sep 17 00:00:00 2001
From: Oscar Dowson <odow@users.noreply.github.com>
Date: Thu, 24 Oct 2024 13:58:06 +1300
Subject: [PATCH 05/15] Update inventory.jl

---
 docs/src/tutorial/inventory.jl | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl
index fbf5c23cd..25df7bfce 100644
--- a/docs/src/tutorial/inventory.jl
+++ b/docs/src/tutorial/inventory.jl
@@ -78,7 +78,7 @@ N = 10          # size of sample space
 # and shortage penalties from the previous period.
 
 T = 10 # number of stages
-model = SDDP.LinearPolicyGraph(
+model = SDDP.LinearPolicyGraph(;
     stages = T + 1,
     sense = :Min,
     lower_bound = 0.0,
@@ -97,10 +97,7 @@ model = SDDP.LinearPolicyGraph(
         fix(u_sell, 0; force = true)
         @stageobjective(sp, c * u_buy.out)
     else
-        @stageobjective(
-            sp,
-            c * u_buy.out + h * x_inventory.out + p * x_demand.out,
-        )
+        @stageobjective(sp, c * u_buy.out + h * x_inventory.out + p * x_demand.out)
         SDDP.parameterize(ω -> JuMP.fix(w_demand, ω), sp, Ω)
     end
     return
@@ -108,7 +105,7 @@ end
 
 # Train and simulate the policy:
 
-SDDP.train(model; iteration_limit = 100)
+SDDP.train(model; iteration_limit = 200)
 simulations = SDDP.simulate(model, 200, [:x_inventory, :u_buy])
 objective_values = [sum(t[:stage_objective] for t in s) for s in simulations]
 μ, ci = round.(SDDP.confidence_interval(objective_values, 1.96); digits = 2)
@@ -155,16 +152,13 @@ model = SDDP.PolicyGraph(
         fix(u_sell, 0; force = true)
         @stageobjective(sp, c * u_buy.out)
     else
-        @stageobjective(
-            sp,
-            c * u_buy.out + h * x_inventory.out + p * x_demand.out,
-        )
+        @stageobjective(sp, c * u_buy.out + h * x_inventory.out + p * x_demand.out)
         SDDP.parameterize(ω -> JuMP.fix(w_demand, ω), sp, Ω)
     end
     return
 end
 
-SDDP.train(model; iteration_limit = 100, log_every_iteration = true)
+SDDP.train(model; iteration_limit = 200)
 
 simulations = SDDP.simulate(
     model,

From 82c48c868fd47206390cf830e229ba98b8d5c02f Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Thu, 24 Oct 2024 15:00:32 +1300
Subject: [PATCH 06/15] Update

---
 docs/src/tutorial/inventory.jl | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl
index 25df7bfce..1c38f8669 100644
--- a/docs/src/tutorial/inventory.jl
+++ b/docs/src/tutorial/inventory.jl
@@ -96,6 +96,10 @@ model = SDDP.LinearPolicyGraph(;
     if t == 1
         fix(u_sell, 0; force = true)
         @stageobjective(sp, c * u_buy.out)
+    elseif t == T + 1
+        fix(u_buy.out, 0; force = true)
+        @stageobjective(sp, -c * x_inventory.out + c * x_demand.out)
+        SDDP.parameterize(ω -> JuMP.fix(w_demand, ω), sp, Ω)
     else
         @stageobjective(sp, c * u_buy.out + h * x_inventory.out + p * x_demand.out)
         SDDP.parameterize(ω -> JuMP.fix(w_demand, ω), sp, Ω)
@@ -105,7 +109,7 @@ end
 
 # Train and simulate the policy:
 
-SDDP.train(model; iteration_limit = 200)
+SDDP.train(model; iteration_limit = 300)
 simulations = SDDP.simulate(model, 200, [:x_inventory, :u_buy])
 objective_values = [sum(t[:stage_objective] for t in s) for s in simulations]
 μ, ci = round.(SDDP.confidence_interval(objective_values, 1.96); digits = 2)
@@ -158,8 +162,11 @@ model = SDDP.PolicyGraph(
     return
 end
 
-SDDP.train(model; iteration_limit = 200)
-
+SDDP.train(
+    model;
+    iteration_limit = 300,
+    sampling_scheme = SDDP.InSampleMonteCarlo(; rollout_limit = i -> i),
+)
 simulations = SDDP.simulate(
     model,
     200,

From fbfc6aba9548aa13ede6b6029c7aa5a0d556dc6f Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Thu, 24 Oct 2024 17:03:49 +1300
Subject: [PATCH 07/15] Update

---
 docs/src/tutorial/inventory.jl | 42 ++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl
index 1c38f8669..251d2634f 100644
--- a/docs/src/tutorial/inventory.jl
+++ b/docs/src/tutorial/inventory.jl
@@ -52,7 +52,7 @@ import Statistics
 # and demand follows a continuous uniform distribution between 0 and 800.
 
 x0 = 100        # Initial inventory
-α = 0.995       # discount factor
+α = 0.95       # discount factor
 c = 35          # unit inventory cost
 h = 1           # unit inventory holding cost
 p = 15          # unit order cost
@@ -109,7 +109,7 @@ end
 
 # Train and simulate the policy:
 
-SDDP.train(model; iteration_limit = 300)
+SDDP.train(model)
 simulations = SDDP.simulate(model, 200, [:x_inventory, :u_buy])
 objective_values = [sum(t[:stage_objective] for t in s) for s in simulations]
 μ, ci = round.(SDDP.confidence_interval(objective_values, 1.96); digits = 2)
@@ -119,15 +119,15 @@ println("Lower bound: ", lower_bound)
 
 # Plot the optimal inventory levels:
 
-Plots.plot(
-    SDDP.publication_plot(simulations; title = "x_inventory.out") do data
-        return data[:x_inventory].out
-    end,
-    SDDP.publication_plot(simulations; title = "u_buy.out") do data
-        return data[:u_buy].out
-    end;
+plt = SDDP.publication_plot(
+    simulations;
+    title = "x_inventory.out + u_buy.out",
     xlabel = "Stage",
-)
+    ylabel = "Quantity",
+) do data
+    return data[:x_inventory].out + data[:u_buy].out
+end
+Plots.hline!(plt, [741]; label = "Analytic solution")
 
 # ## Infinite horizon
 
@@ -136,7 +136,7 @@ Plots.plot(
 # infinite planning horizon.
 
 graph = SDDP.LinearGraph(2)
-SDDP.add_edge(graph, 2 => 1, α)
+SDDP.add_edge(graph, 2 => 2, α)
 model = SDDP.PolicyGraph(
     graph;
     sense = :Min,
@@ -165,7 +165,9 @@ end
 SDDP.train(
     model;
     iteration_limit = 300,
-    sampling_scheme = SDDP.InSampleMonteCarlo(; rollout_limit = i -> i),
+    sampling_scheme = SDDP.InSampleMonteCarlo(;
+        rollout_limit = i -> min(i, 20 * ceil(Int, i / 100)),
+    ),
 )
 simulations = SDDP.simulate(
     model,
@@ -179,12 +181,12 @@ simulations = SDDP.simulate(
 
 # Plot the optimal inventory levels:
 
-Plots.plot(
-    SDDP.publication_plot(simulations; title = "x_inventory.out") do data
-        return data[:x_inventory].out
-    end,
-    SDDP.publication_plot(simulations; title = "u_buy.out") do data
-        return data[:u_buy].out
-    end;
+plt = SDDP.publication_plot(
+    simulations;
+    title = "x_inventory.out + u_buy.out",
     xlabel = "Stage",
-)
+    ylabel = "Quantity",
+) do data
+    return data[:x_inventory].out + data[:u_buy].out
+end
+Plots.hline!(plt, [741]; label = "Analytic solution")

From 953b4b2bae0749ea7a41a813f73dbefb0a2e4ec7 Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Thu, 24 Oct 2024 17:08:29 +1300
Subject: [PATCH 08/15] Update

---
 docs/src/tutorial/inventory.jl | 37 +++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl
index 251d2634f..d477b74a9 100644
--- a/docs/src/tutorial/inventory.jl
+++ b/docs/src/tutorial/inventory.jl
@@ -39,26 +39,20 @@ import Statistics
 # At each stage, an agent must decide how many items to order or, equivalently,
 # what the inventory level should be at the beginning of the next stage. The
 # per-stage costs are the sum of the order costs, shortage and holding costs
-# incurred at the end of the stage, after demand is realized. If $x$ represents
-# the inventory level at the beginning of a stage, and $y$ is the level of
-# inventory after ordering, the stage costs are given by
-#
-# ```math
-# c(y-x) + \int_{0}^y h(y-\xi) d\Phi(\xi) + \int_{y}^{\infty} p(\xi - y) d\Phi(\xi).
-# ```
+# incurred at the end of the stage, after demand is realized.
 
 # Following Chapter 19 of Introduction to Operations Research by Hillier and
-# Lieberman (7th edition), we use the following parameters: $c=15, h=1, p=15$,
-# and demand follows a continuous uniform distribution between 0 and 800.
+# Lieberman (7th edition), we use the following parameters: $c=15, h=1, p=15$.
 
-x0 = 100        # Initial inventory
-α = 0.95       # discount factor
+x_0 = 10        # initial inventory
 c = 35          # unit inventory cost
 h = 1           # unit inventory holding cost
 p = 15          # unit order cost
-UD = 800        # upper bound for demand
-N = 10          # size of sample space
-Ω = rand(Distributions.Uniform(0, UD), N);
+
+# Demand follows a continuous uniform distribution between 0 and 800. We
+# construct a sample average approximation with 10 scenarios:
+
+Ω = rand(Distributions.Uniform(0, 800), 10);
 
 # This is a well-known inventory problem with a closed-form solution. The
 # optimal policy is a simple order-up-to policy: if the inventory level is
@@ -84,7 +78,7 @@ model = SDDP.LinearPolicyGraph(;
     lower_bound = 0.0,
     optimizer = HiGHS.Optimizer,
 ) do sp, t
-    @variable(sp, x_inventory >= 0, SDDP.State, initial_value = x0)
+    @variable(sp, x_inventory >= 0, SDDP.State, initial_value = x_0)
     @variable(sp, x_demand >= 0, SDDP.State, initial_value = 0)
     ## u_buy is a Decision-Hazard control variable. We decide u.out for use in
     ## the next stage
@@ -129,11 +123,18 @@ plt = SDDP.publication_plot(
 end
 Plots.hline!(plt, [741]; label = "Analytic solution")
 
+# Note that, because of our sample average approximation, we do not obtain the
+# analytic solution. However, if we increased the number of sample points, our
+# solution would approach the analytic solution.
+
 # ## Infinite horizon
 
-# For the infinite horizon case, assume a discount factor $\alpha=0.995$. The
-# objective in this case is to minimize the discounted expected costs over an
-# infinite planning horizon.
+# For the infinite horizon case, assume a discount factor $\alpha=0.995$.
+
+α = 0.95
+
+# The objective in this case is to minimize the discounted expected costs over
+# an infinite planning horizon.
 
 graph = SDDP.LinearGraph(2)
 SDDP.add_edge(graph, 2 => 2, α)

From fdd67b3be3dab2861353c511541ed93b3326542f Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Thu, 24 Oct 2024 17:53:53 +1300
Subject: [PATCH 09/15] Update

---
 docs/src/tutorial/inventory.jl | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl
index d477b74a9..e3097d52c 100644
--- a/docs/src/tutorial/inventory.jl
+++ b/docs/src/tutorial/inventory.jl
@@ -52,7 +52,7 @@ p = 15          # unit order cost
 # Demand follows a continuous uniform distribution between 0 and 800. We
 # construct a sample average approximation with 10 scenarios:
 
-Ω = rand(Distributions.Uniform(0, 800), 10);
+Ω = range(0, 800; length = 10);
 
 # This is a well-known inventory problem with a closed-form solution. The
 # optimal policy is a simple order-up-to policy: if the inventory level is
@@ -118,6 +118,7 @@ plt = SDDP.publication_plot(
     title = "x_inventory.out + u_buy.out",
     xlabel = "Stage",
     ylabel = "Quantity",
+    ylims = (0, 1_000),
 ) do data
     return data[:x_inventory].out + data[:u_buy].out
 end
@@ -144,7 +145,7 @@ model = SDDP.PolicyGraph(
     lower_bound = 0.0,
     optimizer = HiGHS.Optimizer,
 ) do sp, t
-    @variable(sp, x_inventory >= 0, SDDP.State, initial_value = x0)
+    @variable(sp, x_inventory >= 0, SDDP.State, initial_value = x_0)
     @variable(sp, x_demand >= 0, SDDP.State, initial_value = 0)
     ## u_buy is a Decision-Hazard control variable. We decide u.out for use in
     ## the next stage
@@ -163,13 +164,7 @@ model = SDDP.PolicyGraph(
     return
 end
 
-SDDP.train(
-    model;
-    iteration_limit = 300,
-    sampling_scheme = SDDP.InSampleMonteCarlo(;
-        rollout_limit = i -> min(i, 20 * ceil(Int, i / 100)),
-    ),
-)
+SDDP.train(model; iteration_limit = 300)
 simulations = SDDP.simulate(
     model,
     200,
@@ -187,6 +182,7 @@ plt = SDDP.publication_plot(
     title = "x_inventory.out + u_buy.out",
     xlabel = "Stage",
     ylabel = "Quantity",
+    ylims = (0, 1_000),
 ) do data
     return data[:x_inventory].out + data[:u_buy].out
 end

From c91c676ec0a7e6c63b395595a34376611457b243 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <odow@users.noreply.github.com>
Date: Thu, 24 Oct 2024 19:04:28 +1300
Subject: [PATCH 10/15] Update accept.txt

---
 docs/styles/Vocab/SDDP-Vocab/accept.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/styles/Vocab/SDDP-Vocab/accept.txt b/docs/styles/Vocab/SDDP-Vocab/accept.txt
index 8228630c0..1dcadf9f5 100644
--- a/docs/styles/Vocab/SDDP-Vocab/accept.txt
+++ b/docs/styles/Vocab/SDDP-Vocab/accept.txt
@@ -39,6 +39,7 @@ Markovian
 Pagnoncelli
 Papavasiliou
 Philpott
+Porteus
 Putterman
 Wasserstein
 Xpress

From 62722940a8d24fe03364415978da036cfc0f93ba Mon Sep 17 00:00:00 2001
From: Oscar Dowson <odow@users.noreply.github.com>
Date: Thu, 24 Oct 2024 19:09:52 +1300
Subject: [PATCH 11/15] Update docs/src/tutorial/inventory.jl

---
 docs/src/tutorial/inventory.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl
index e3097d52c..cec4bb719 100644
--- a/docs/src/tutorial/inventory.jl
+++ b/docs/src/tutorial/inventory.jl
@@ -130,7 +130,7 @@ Plots.hline!(plt, [741]; label = "Analytic solution")
 
 # ## Infinite horizon
 
-# For the infinite horizon case, assume a discount factor $\alpha=0.995$.
+# For the infinite horizon case, assume a discount factor $\alpha=0.95$.
 
 α = 0.95
 

From f34e2c022b28c44fd6bed3c6a483fcd0abebfcf7 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <odow@users.noreply.github.com>
Date: Thu, 24 Oct 2024 19:47:08 +1300
Subject: [PATCH 12/15] Apply suggestions from code review

---
 docs/src/tutorial/inventory.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl
index cec4bb719..0cab62693 100644
--- a/docs/src/tutorial/inventory.jl
+++ b/docs/src/tutorial/inventory.jl
@@ -50,9 +50,9 @@ h = 1           # unit inventory holding cost
 p = 15          # unit order cost
 
 # Demand follows a continuous uniform distribution between 0 and 800. We
-# construct a sample average approximation with 10 scenarios:
+# construct a sample average approximation with 20 scenarios:
 
-Ω = range(0, 800; length = 10);
+Ω = range(0, 800; length = 20);
 
 # This is a well-known inventory problem with a closed-form solution. The
 # optimal policy is a simple order-up-to policy: if the inventory level is

From 71b956b009b19ad13af57a49c1866b202925a14e Mon Sep 17 00:00:00 2001
From: Oscar Dowson <odow@users.noreply.github.com>
Date: Fri, 25 Oct 2024 10:25:56 +1300
Subject: [PATCH 13/15] Update inventory.jl

---
 docs/src/tutorial/inventory.jl | 47 ++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 22 deletions(-)

diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl
index 0cab62693..fb39dfd9c 100644
--- a/docs/src/tutorial/inventory.jl
+++ b/docs/src/tutorial/inventory.jl
@@ -36,10 +36,9 @@ import Statistics
 # cumulative distribution function (CDF) $\Phi(\cdot)$. Any unsatisfied demand
 # is backlogged and carried forward to the next stage.
 
-# At each stage, an agent must decide how many items to order or, equivalently,
-# what the inventory level should be at the beginning of the next stage. The
-# per-stage costs are the sum of the order costs, shortage and holding costs
-# incurred at the end of the stage, after demand is realized.
+# At each stage, an agent must decide how many items to order. The per-stage
+# costs are the sum of the order costs, shortage and holding costs incurred at
+# the end of the stage, after demand is realized.
 
 # Following Chapter 19 of Introduction to Operations Research by Hillier and
 # Lieberman (7th edition), we use the following parameters: $c=15, h=1, p=15$.
@@ -56,20 +55,18 @@ p = 15          # unit order cost
 
 # This is a well-known inventory problem with a closed-form solution. The
 # optimal policy is a simple order-up-to policy: if the inventory level is
-# below 741 units, the decision-maker orders up to 741 units. Otherwise, no
-# order is placed. For a detailed analysis, refer to Foundations of Stochastic
-# Inventory Theory by Evan Porteus (Stanford Business Books, 2002).
+# below a certain number of units, the decision-maker orders up to that number
+# of units. Otherwise, no order is placed. For a detailed analysis, refer
+# to Foundations of Stochastic Inventory Theory by Evan Porteus (Stanford
+# Business Books, 2002).
 
 # ## Finite horizon
 
 # For a finite horizon of length $T$,  the problem is to minimize the total
-# expected cost over all stages, with a terminal value function given by:
-# ```math
-# v_T(x) = cx.
-# ```
-# This reflects that at the end of the last stage, the decision-maker can
-# recover the unit cost for each leftover item while incurring any backlog costs
-# and shortage penalties from the previous period.
+# expected cost over all stages.
+
+# In the last stage, the decision-maker can recover the unit cost `c` for each
+# leftover item, or buy out any remaining backlog, also at the unit cost `c`.
 
 T = 10 # number of stages
 model = SDDP.LinearPolicyGraph(;
@@ -122,23 +119,24 @@ plt = SDDP.publication_plot(
 ) do data
     return data[:x_inventory].out + data[:u_buy].out
 end
-Plots.hline!(plt, [741]; label = "Analytic solution")
 
-# Note that, because of our sample average approximation, we do not obtain the
-# analytic solution. However, if we increased the number of sample points, our
-# solution would approach the analytic solution.
+# In the early stages, we indeed recover an order-up-to policy. However,
+# there are end-of-horizon effects as the agent tries to optimize their
+# decision making knowing that they have 10 realizations of demand.
 
 # ## Infinite horizon
 
-# For the infinite horizon case, assume a discount factor $\alpha=0.95$.
+# We can remove the end-of-horizonn effects by considering an infinite
+# horizon model. We assume a discount factor $\alpha=0.95$:
 
 α = 0.95
+graph = SDDP.LinearGraph(2)
+SDDP.add_edge(graph, 2 => 2, α)
+graph
 
 # The objective in this case is to minimize the discounted expected costs over
 # an infinite planning horizon.
 
-graph = SDDP.LinearGraph(2)
-SDDP.add_edge(graph, 2 => 2, α)
 model = SDDP.PolicyGraph(
     graph;
     sense = :Min,
@@ -186,4 +184,9 @@ plt = SDDP.publication_plot(
 ) do data
     return data[:x_inventory].out + data[:u_buy].out
 end
-Plots.hline!(plt, [741]; label = "Analytic solution")
+Plots.hline!(plt, [622]; label = "Analytic solution")
+
+# We again recover an order-up-to policy. The analytic solution is to
+# order-up-to 662 units. We do not precisely recover this solution because
+# we used a sample average approximation of 20 elements. If we increased the
+# number of samples, our solution would approach the analytic solution.

From 29c29f698c03b56eb65f9736baa78be001f3e0bc Mon Sep 17 00:00:00 2001
From: Oscar Dowson <odow@users.noreply.github.com>
Date: Fri, 25 Oct 2024 10:50:49 +1300
Subject: [PATCH 14/15] Update docs/src/tutorial/inventory.jl

---
 docs/src/tutorial/inventory.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl
index fb39dfd9c..4fe56e2f0 100644
--- a/docs/src/tutorial/inventory.jl
+++ b/docs/src/tutorial/inventory.jl
@@ -184,7 +184,7 @@ plt = SDDP.publication_plot(
 ) do data
     return data[:x_inventory].out + data[:u_buy].out
 end
-Plots.hline!(plt, [622]; label = "Analytic solution")
+Plots.hline!(plt, [662]; label = "Analytic solution")
 
 # We again recover an order-up-to policy. The analytic solution is to
 # order-up-to 662 units. We do not precisely recover this solution because

From 29a8a8b6e46dde4f8e9935ece9aada169e4b84cf Mon Sep 17 00:00:00 2001
From: Oscar Dowson <odow@users.noreply.github.com>
Date: Fri, 25 Oct 2024 10:51:20 +1300
Subject: [PATCH 15/15] Update docs/src/tutorial/inventory.jl

---
 docs/src/tutorial/inventory.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl
index 4fe56e2f0..41e79ddd2 100644
--- a/docs/src/tutorial/inventory.jl
+++ b/docs/src/tutorial/inventory.jl
@@ -162,7 +162,7 @@ model = SDDP.PolicyGraph(
     return
 end
 
-SDDP.train(model; iteration_limit = 300)
+SDDP.train(model; iteration_limit = 400)
 simulations = SDDP.simulate(
     model,
     200,