From 0b51f4dad54559ed924511af60e7cca7e7747dc0 Mon Sep 17 00:00:00 2001 From: odow Date: Mon, 21 Oct 2024 15:05:19 +1300 Subject: [PATCH 01/15] Add tutorial Example: inventory management --- docs/make.jl | 1 + docs/src/tutorial/inventory.jl | 172 +++++++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 docs/src/tutorial/inventory.jl diff --git a/docs/make.jl b/docs/make.jl index 3c663485a..01bbce04d 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -157,6 +157,7 @@ Documenter.makedocs(; "tutorial/example_newsvendor.md", "tutorial/example_reservoir.md", "tutorial/example_milk_producer.md", + "tutorial/inventory.md", ], "How-to guides" => [ "guides/access_previous_variables.md", diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl new file mode 100644 index 000000000..3bc3b2c48 --- /dev/null +++ b/docs/src/tutorial/inventory.jl @@ -0,0 +1,172 @@ +# Copyright (c) 2017-24, Oscar Dowson and SDDP.jl contributors. #src +# This Source Code Form is subject to the terms of the Mozilla Public #src +# License, v. 2.0. If a copy of the MPL was not distributed with this #src +# file, You can obtain one at http://mozilla.org/MPL/2.0/. #src + +# # Example: inventory management + +# The purpose of this tutorial is to demonstrate a well-known inventory +# management problem with a finite- and infinite-horizon policy. + +# ## Required packages + +# This tutorial requires the following packages: + +using SDDP +import Distributions +import HiGHS +import Plots +import Statistics + +# ## Background + +# Consider a periodic review inventory problem involving a single product. The +# initial inventory is denoted by $x_0 \geq 0$, and a decision-maker can place +# an order at the start of each stage. The objective is to minimize expected +# costs over the planning horizon. The following parameters define the cost +# structure: +# +# * $c$ is the unit cost for purchasing each unit +# * $h$ is the holding cost per unit remaining at the end of each stage +# * $p$ is the shortage cost per unit of unsatisfied demand at the end of each +# stage +# +# There are no fixed ordering costs, and the demand at each stage is assumed to +# follow an independent and identically distributed random variable with +# cumulative distribution function (CDF) $\Phi(\cdot)$. Any unsatisfied demand +# is backlogged and carried forward to the next stage. + +# At each stage, an agent must decide how many items to order or, equivalently, +# what the inventory level should be at the beginning of the next stage. The +# per-stage costs are the sum of the order costs, shortage and holding costs +# incurred at the end of the stage, after demand is realized. If $x$ represents +# the inventory level at the beginning of a stage, and $y$ is the level of +# inventory after ordering, the stage costs are given by +# +# ```math +# c(y-x) + \int_{0}^y h(y-\xi) d\Phi(\xi) + \int_{y}^{\infty} p(\xi - y) d\Phi(\xi). +# ``` + +# Following Chapter 19 of Introduction to Operations Research by Hillier and +# Lieberman (7th edition), we use the following parameters: $c=15, h=1, p=15$, +# and demand follows a continuous uniform distribution between 0 and 800. + +x0 = 100 # Initial inventory +α = 0.995 # discount factor +c = 35 # unit inventory cost +h = 1 # unit inventory holding cost +p = 15 # unit order cost +UD = 800 # upper bound for demand +N = 10 # size of sample space +Ω = rand(Distributions.Uniform(0, UD), N); + +# This is a well-known inventory problem with a closed-form solution. The +# optimal policy is a simple order-up-to policy: if the inventory level is +# below 741 units, the decision-maker orders up to 741 units. Otherwise, no +# order is placed. For a detailed analysis, refer to Foundations of Stochastic +# Inventory Theory by Evan Porteus (Stanford Business Books, 2002). + +# ## Finite horizon + +# For a finite horizon of length $T$, the problem is to minimize the total +# expected cost over all stages, with a terminal value function given by: +# ```math +# v_T(x) = cx. +# ``` +# This reflects that at the end of the last stage, the decision-maker can +# recover the unit cost for each leftover item while incurring any backlog costs +# and shortage penalties from the previous period. + +T = 10 # number of stages + +model = SDDP.LinearPolicyGraph( + stages = T + 1, + sense = :Min, + lower_bound = 0.0, + optimizer = HiGHS.Optimizer, +) do sp, t + @variable(sp, x, SDDP.State, initial_value = x0) + @variable(sp, y, SDDP.State, initial_value = x0) + @constraint(sp, y.out >= x.out) + @constraint(sp, con_balance, x.out == y.in) + if t == 1 + @stageobjective(sp, 0) + elseif t == T + 1 + @stageobjective(sp, α^(t - 1) * (-c * x.in)) + else + @stageobjective( + sp, + α^(t - 1) * ( + p * UD / 2 + (c - p) * y.in - c * x.in + (h + p) / (2 * UD) * y.in^2 + ) + ) + SDDP.parameterize(ω -> JuMP.set_normalized_rhs(con_balance, -ω), sp, Ω) + end + return +end + +# Train and simulate the policy: + +SDDP.train(model; iteration_limit = 100) +simulations = SDDP.simulate(model, 200, [:y]) +objective_values = [sum(t[:stage_objective] for t in s) for s in simulations] +μ, ci = round.(SDDP.confidence_interval(objective_values, 1.96); digits = 2) +lower_bound = round(SDDP.calculate_bound(model); digits = 2) +println("Confidence interval: ", μ, " ± ", ci) +println("Lower bound: ", lower_bound) + +# Plot the optimal inventory levels: + +SDDP.publication_plot( + simulations; + title = "Inventory level", + xlabel = "Stage", + ylabel = "Inventory level", +) do data + return data[:y].in +end + +# ## Infinite horizon + +# For the infinite horizon case, assume a discount factor $\alpha=0.995$. The +# objective in this case is to minimize the discounted expected costs over an +# infinite planning horizon. + +model = SDDP.PolicyGraph( + SDDP.UnicyclicGraph(α; num_nodes = 1); + sense = :Min, + lower_bound = 0.0, + optimizer = HiGHS.Optimizer, +) do sp, t + @variable(sp, x, SDDP.State, initial_value = x0) + @variable(sp, y, SDDP.State, initial_value = x0) + @constraint(sp, y.out >= x.out) + @constraint(sp, con_balance, x.out == y.in) + @stageobjective( + sp, + p * UD / 2 + (c - p) * y.in - c * x.in + (h + p) / (2 * UD) * y.in^2 + ) + SDDP.parameterize(ω -> JuMP.set_normalized_rhs(con_balance, -ω), sp, Ω) + return +end + +# Train and simulate the policy: + +SDDP.train(model; iteration_limit = 100) +simulations = SDDP.simulate(model, 200, [:y]) +objective_values = [sum(t[:stage_objective] for t in s) for s in simulations] +μ, ci = round.(SDDP.confidence_interval(objective_values, 1.96); digits = 2) +lower_bound = round(SDDP.calculate_bound(model); digits = 2) +println("Confidence interval: ", μ, " ± ", ci) +println("Lower bound: ", lower_bound) + +# Plot the optimal inventory levels: + +SDDP.publication_plot( + simulations; + title = "Inventory level", + xlabel = "Stage", + ylabel = "Inventory level", +) do data + return data[:y].in +end From 1ce774919cddaf66ed875f7117dcfc934d6a3953 Mon Sep 17 00:00:00 2001 From: odow Date: Thu, 24 Oct 2024 11:06:54 +1300 Subject: [PATCH 02/15] Update --- docs/src/tutorial/inventory.jl | 111 +++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 47 deletions(-) diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl index 3bc3b2c48..a7b78e8bb 100644 --- a/docs/src/tutorial/inventory.jl +++ b/docs/src/tutorial/inventory.jl @@ -26,9 +26,9 @@ import Statistics # costs over the planning horizon. The following parameters define the cost # structure: # -# * $c$ is the unit cost for purchasing each unit -# * $h$ is the holding cost per unit remaining at the end of each stage -# * $p$ is the shortage cost per unit of unsatisfied demand at the end of each +# * `c` is the unit cost for purchasing each unit +# * `h` is the holding cost per unit remaining at the end of each stage +# * `p` is the shortage cost per unit of unsatisfied demand at the end of each # stage # # There are no fixed ordering costs, and the demand at each stage is assumed to @@ -78,29 +78,30 @@ N = 10 # size of sample space # and shortage penalties from the previous period. T = 10 # number of stages - model = SDDP.LinearPolicyGraph( stages = T + 1, sense = :Min, lower_bound = 0.0, optimizer = HiGHS.Optimizer, ) do sp, t - @variable(sp, x, SDDP.State, initial_value = x0) - @variable(sp, y, SDDP.State, initial_value = x0) - @constraint(sp, y.out >= x.out) - @constraint(sp, con_balance, x.out == y.in) + @variable(sp, x_inventory >= 0, SDDP.State, initial_value = x0) + @variable(sp, x_demand >= 0, SDDP.State, initial_value = 0) + ## u_buy is a Decision-Hazard control variable. We decide u.out for use in + ## the next stage + @variable(sp, u_buy >= 0, SDDP.State, initial_value = 0) + @variable(sp, u_sell >= 0) + @variable(sp, w_demand == 0) + @constraint(sp, x_inventory.out == x_inventory.in + u_buy.in - u_sell) + @constraint(sp, x_demand.out == x_demand.in + w_demand - u_sell) if t == 1 - @stageobjective(sp, 0) - elseif t == T + 1 - @stageobjective(sp, α^(t - 1) * (-c * x.in)) + fix(u_sell, 0; force = true) + @stageobjective(sp, c * u_buy.out) else @stageobjective( sp, - α^(t - 1) * ( - p * UD / 2 + (c - p) * y.in - c * x.in + (h + p) / (2 * UD) * y.in^2 - ) + c * u_buy.out + h * x_inventory.out + p * x_demand.out, ) - SDDP.parameterize(ω -> JuMP.set_normalized_rhs(con_balance, -ω), sp, Ω) + SDDP.parameterize(ω -> JuMP.fix(w_demand, ω), sp, Ω) end return end @@ -117,14 +118,15 @@ println("Lower bound: ", lower_bound) # Plot the optimal inventory levels: -SDDP.publication_plot( - simulations; - title = "Inventory level", +Plots.plot( + SDDP.publication_plot(simulations; title = "x_inventory.out") do data + return data[:x_inventory].out + end; + SDDP.publication_plot(simulations; title = "u_buy.out") do data + return data[:u_buy].out + end; xlabel = "Stage", - ylabel = "Inventory level", -) do data - return data[:y].in -end +) # ## Infinite horizon @@ -132,41 +134,56 @@ end # objective in this case is to minimize the discounted expected costs over an # infinite planning horizon. +graph = SDDP.LinearGraph(2) +SDDP.add_edge(graph, 2 => 1, α) model = SDDP.PolicyGraph( - SDDP.UnicyclicGraph(α; num_nodes = 1); + graph; sense = :Min, lower_bound = 0.0, optimizer = HiGHS.Optimizer, ) do sp, t - @variable(sp, x, SDDP.State, initial_value = x0) - @variable(sp, y, SDDP.State, initial_value = x0) - @constraint(sp, y.out >= x.out) - @constraint(sp, con_balance, x.out == y.in) - @stageobjective( - sp, - p * UD / 2 + (c - p) * y.in - c * x.in + (h + p) / (2 * UD) * y.in^2 - ) - SDDP.parameterize(ω -> JuMP.set_normalized_rhs(con_balance, -ω), sp, Ω) + @variable(sp, x_inventory >= 0, SDDP.State, initial_value = x0) + @variable(sp, x_demand >= 0, SDDP.State, initial_value = 0) + ## u_buy is a Decision-Hazard control variable. We decide u.out for use in + ## the next stage + @variable(sp, u_buy >= 0, SDDP.State, initial_value = 0) + @variable(sp, u_sell >= 0) + @variable(sp, w_demand == 0) + @constraint(sp, x_inventory.out == x_inventory.in + u_buy.in - u_sell) + @constraint(sp, x_demand.out == x_demand.in + w_demand - u_sell) + if t == 1 + fix(u_sell, 0; force = true) + @stageobjective(sp, c * u_buy.out) + else + @stageobjective( + sp, + c * u_buy.out + h * x_inventory.out + p * x_demand.out, + ) + SDDP.parameterize(ω -> JuMP.fix(w_demand, ω), sp, Ω) + end return end -# Train and simulate the policy: +SDDP.train(model; iteration_limit = 100, log_every_iteration = true) -SDDP.train(model; iteration_limit = 100) -simulations = SDDP.simulate(model, 200, [:y]) -objective_values = [sum(t[:stage_objective] for t in s) for s in simulations] -μ, ci = round.(SDDP.confidence_interval(objective_values, 1.96); digits = 2) -lower_bound = round(SDDP.calculate_bound(model); digits = 2) -println("Confidence interval: ", μ, " ± ", ci) -println("Lower bound: ", lower_bound) +simulations = SDDP.simulate( + model, + 200, + [:x_inventory, :u_buy]; + sampling_scheme = SDDP.InSampleMonteCarlo(; + max_depth = 50, + terminate_on_dummy_leaf = false, + ), +); # Plot the optimal inventory levels: -SDDP.publication_plot( - simulations; - title = "Inventory level", +Plots.plot( + SDDP.publication_plot(simulations; title = "x_inventory.out") do data + return data[:x_inventory].out + end; + SDDP.publication_plot(simulations; title = "u_buy.out") do data + return data[:u_buy].out + end; xlabel = "Stage", - ylabel = "Inventory level", -) do data - return data[:y].in -end +) From cf2f386ebd561bed7a9212b5b2bdc48b5b90d17b Mon Sep 17 00:00:00 2001 From: odow Date: Thu, 24 Oct 2024 11:21:36 +1300 Subject: [PATCH 03/15] Update --- docs/src/tutorial/inventory.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl index a7b78e8bb..ce2abb376 100644 --- a/docs/src/tutorial/inventory.jl +++ b/docs/src/tutorial/inventory.jl @@ -109,7 +109,7 @@ end # Train and simulate the policy: SDDP.train(model; iteration_limit = 100) -simulations = SDDP.simulate(model, 200, [:y]) +simulations = SDDP.simulate(model, 200, [:x_inventory, :u_buy]) objective_values = [sum(t[:stage_objective] for t in s) for s in simulations] μ, ci = round.(SDDP.confidence_interval(objective_values, 1.96); digits = 2) lower_bound = round(SDDP.calculate_bound(model); digits = 2) From 54871d2ed5e31f09cf8c7d5ad8826832cd830dff Mon Sep 17 00:00:00 2001 From: odow Date: Thu, 24 Oct 2024 11:45:42 +1300 Subject: [PATCH 04/15] Update --- docs/src/tutorial/inventory.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl index ce2abb376..fbf5c23cd 100644 --- a/docs/src/tutorial/inventory.jl +++ b/docs/src/tutorial/inventory.jl @@ -121,7 +121,7 @@ println("Lower bound: ", lower_bound) Plots.plot( SDDP.publication_plot(simulations; title = "x_inventory.out") do data return data[:x_inventory].out - end; + end, SDDP.publication_plot(simulations; title = "u_buy.out") do data return data[:u_buy].out end; @@ -181,7 +181,7 @@ simulations = SDDP.simulate( Plots.plot( SDDP.publication_plot(simulations; title = "x_inventory.out") do data return data[:x_inventory].out - end; + end, SDDP.publication_plot(simulations; title = "u_buy.out") do data return data[:u_buy].out end; From 3a6b6c40e4f1222bff2495f60a6b1ece02119f6f Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Thu, 24 Oct 2024 13:58:06 +1300 Subject: [PATCH 05/15] Update inventory.jl --- docs/src/tutorial/inventory.jl | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl index fbf5c23cd..25df7bfce 100644 --- a/docs/src/tutorial/inventory.jl +++ b/docs/src/tutorial/inventory.jl @@ -78,7 +78,7 @@ N = 10 # size of sample space # and shortage penalties from the previous period. T = 10 # number of stages -model = SDDP.LinearPolicyGraph( +model = SDDP.LinearPolicyGraph(; stages = T + 1, sense = :Min, lower_bound = 0.0, @@ -97,10 +97,7 @@ model = SDDP.LinearPolicyGraph( fix(u_sell, 0; force = true) @stageobjective(sp, c * u_buy.out) else - @stageobjective( - sp, - c * u_buy.out + h * x_inventory.out + p * x_demand.out, - ) + @stageobjective(sp, c * u_buy.out + h * x_inventory.out + p * x_demand.out) SDDP.parameterize(ω -> JuMP.fix(w_demand, ω), sp, Ω) end return @@ -108,7 +105,7 @@ end # Train and simulate the policy: -SDDP.train(model; iteration_limit = 100) +SDDP.train(model; iteration_limit = 200) simulations = SDDP.simulate(model, 200, [:x_inventory, :u_buy]) objective_values = [sum(t[:stage_objective] for t in s) for s in simulations] μ, ci = round.(SDDP.confidence_interval(objective_values, 1.96); digits = 2) @@ -155,16 +152,13 @@ model = SDDP.PolicyGraph( fix(u_sell, 0; force = true) @stageobjective(sp, c * u_buy.out) else - @stageobjective( - sp, - c * u_buy.out + h * x_inventory.out + p * x_demand.out, - ) + @stageobjective(sp, c * u_buy.out + h * x_inventory.out + p * x_demand.out) SDDP.parameterize(ω -> JuMP.fix(w_demand, ω), sp, Ω) end return end -SDDP.train(model; iteration_limit = 100, log_every_iteration = true) +SDDP.train(model; iteration_limit = 200) simulations = SDDP.simulate( model, From 82c48c868fd47206390cf830e229ba98b8d5c02f Mon Sep 17 00:00:00 2001 From: odow Date: Thu, 24 Oct 2024 15:00:32 +1300 Subject: [PATCH 06/15] Update --- docs/src/tutorial/inventory.jl | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl index 25df7bfce..1c38f8669 100644 --- a/docs/src/tutorial/inventory.jl +++ b/docs/src/tutorial/inventory.jl @@ -96,6 +96,10 @@ model = SDDP.LinearPolicyGraph(; if t == 1 fix(u_sell, 0; force = true) @stageobjective(sp, c * u_buy.out) + elseif t == T + 1 + fix(u_buy.out, 0; force = true) + @stageobjective(sp, -c * x_inventory.out + c * x_demand.out) + SDDP.parameterize(ω -> JuMP.fix(w_demand, ω), sp, Ω) else @stageobjective(sp, c * u_buy.out + h * x_inventory.out + p * x_demand.out) SDDP.parameterize(ω -> JuMP.fix(w_demand, ω), sp, Ω) @@ -105,7 +109,7 @@ end # Train and simulate the policy: -SDDP.train(model; iteration_limit = 200) +SDDP.train(model; iteration_limit = 300) simulations = SDDP.simulate(model, 200, [:x_inventory, :u_buy]) objective_values = [sum(t[:stage_objective] for t in s) for s in simulations] μ, ci = round.(SDDP.confidence_interval(objective_values, 1.96); digits = 2) @@ -158,8 +162,11 @@ model = SDDP.PolicyGraph( return end -SDDP.train(model; iteration_limit = 200) - +SDDP.train( + model; + iteration_limit = 300, + sampling_scheme = SDDP.InSampleMonteCarlo(; rollout_limit = i -> i), +) simulations = SDDP.simulate( model, 200, From fbfc6aba9548aa13ede6b6029c7aa5a0d556dc6f Mon Sep 17 00:00:00 2001 From: odow Date: Thu, 24 Oct 2024 17:03:49 +1300 Subject: [PATCH 07/15] Update --- docs/src/tutorial/inventory.jl | 42 ++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl index 1c38f8669..251d2634f 100644 --- a/docs/src/tutorial/inventory.jl +++ b/docs/src/tutorial/inventory.jl @@ -52,7 +52,7 @@ import Statistics # and demand follows a continuous uniform distribution between 0 and 800. x0 = 100 # Initial inventory -α = 0.995 # discount factor +α = 0.95 # discount factor c = 35 # unit inventory cost h = 1 # unit inventory holding cost p = 15 # unit order cost @@ -109,7 +109,7 @@ end # Train and simulate the policy: -SDDP.train(model; iteration_limit = 300) +SDDP.train(model) simulations = SDDP.simulate(model, 200, [:x_inventory, :u_buy]) objective_values = [sum(t[:stage_objective] for t in s) for s in simulations] μ, ci = round.(SDDP.confidence_interval(objective_values, 1.96); digits = 2) @@ -119,15 +119,15 @@ println("Lower bound: ", lower_bound) # Plot the optimal inventory levels: -Plots.plot( - SDDP.publication_plot(simulations; title = "x_inventory.out") do data - return data[:x_inventory].out - end, - SDDP.publication_plot(simulations; title = "u_buy.out") do data - return data[:u_buy].out - end; +plt = SDDP.publication_plot( + simulations; + title = "x_inventory.out + u_buy.out", xlabel = "Stage", -) + ylabel = "Quantity", +) do data + return data[:x_inventory].out + data[:u_buy].out +end +Plots.hline!(plt, [741]; label = "Analytic solution") # ## Infinite horizon @@ -136,7 +136,7 @@ Plots.plot( # infinite planning horizon. graph = SDDP.LinearGraph(2) -SDDP.add_edge(graph, 2 => 1, α) +SDDP.add_edge(graph, 2 => 2, α) model = SDDP.PolicyGraph( graph; sense = :Min, @@ -165,7 +165,9 @@ end SDDP.train( model; iteration_limit = 300, - sampling_scheme = SDDP.InSampleMonteCarlo(; rollout_limit = i -> i), + sampling_scheme = SDDP.InSampleMonteCarlo(; + rollout_limit = i -> min(i, 20 * ceil(Int, i / 100)), + ), ) simulations = SDDP.simulate( model, @@ -179,12 +181,12 @@ simulations = SDDP.simulate( # Plot the optimal inventory levels: -Plots.plot( - SDDP.publication_plot(simulations; title = "x_inventory.out") do data - return data[:x_inventory].out - end, - SDDP.publication_plot(simulations; title = "u_buy.out") do data - return data[:u_buy].out - end; +plt = SDDP.publication_plot( + simulations; + title = "x_inventory.out + u_buy.out", xlabel = "Stage", -) + ylabel = "Quantity", +) do data + return data[:x_inventory].out + data[:u_buy].out +end +Plots.hline!(plt, [741]; label = "Analytic solution") From 953b4b2bae0749ea7a41a813f73dbefb0a2e4ec7 Mon Sep 17 00:00:00 2001 From: odow Date: Thu, 24 Oct 2024 17:08:29 +1300 Subject: [PATCH 08/15] Update --- docs/src/tutorial/inventory.jl | 37 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl index 251d2634f..d477b74a9 100644 --- a/docs/src/tutorial/inventory.jl +++ b/docs/src/tutorial/inventory.jl @@ -39,26 +39,20 @@ import Statistics # At each stage, an agent must decide how many items to order or, equivalently, # what the inventory level should be at the beginning of the next stage. The # per-stage costs are the sum of the order costs, shortage and holding costs -# incurred at the end of the stage, after demand is realized. If $x$ represents -# the inventory level at the beginning of a stage, and $y$ is the level of -# inventory after ordering, the stage costs are given by -# -# ```math -# c(y-x) + \int_{0}^y h(y-\xi) d\Phi(\xi) + \int_{y}^{\infty} p(\xi - y) d\Phi(\xi). -# ``` +# incurred at the end of the stage, after demand is realized. # Following Chapter 19 of Introduction to Operations Research by Hillier and -# Lieberman (7th edition), we use the following parameters: $c=15, h=1, p=15$, -# and demand follows a continuous uniform distribution between 0 and 800. +# Lieberman (7th edition), we use the following parameters: $c=15, h=1, p=15$. -x0 = 100 # Initial inventory -α = 0.95 # discount factor +x_0 = 10 # initial inventory c = 35 # unit inventory cost h = 1 # unit inventory holding cost p = 15 # unit order cost -UD = 800 # upper bound for demand -N = 10 # size of sample space -Ω = rand(Distributions.Uniform(0, UD), N); + +# Demand follows a continuous uniform distribution between 0 and 800. We +# construct a sample average approximation with 10 scenarios: + +Ω = rand(Distributions.Uniform(0, 800), 10); # This is a well-known inventory problem with a closed-form solution. The # optimal policy is a simple order-up-to policy: if the inventory level is @@ -84,7 +78,7 @@ model = SDDP.LinearPolicyGraph(; lower_bound = 0.0, optimizer = HiGHS.Optimizer, ) do sp, t - @variable(sp, x_inventory >= 0, SDDP.State, initial_value = x0) + @variable(sp, x_inventory >= 0, SDDP.State, initial_value = x_0) @variable(sp, x_demand >= 0, SDDP.State, initial_value = 0) ## u_buy is a Decision-Hazard control variable. We decide u.out for use in ## the next stage @@ -129,11 +123,18 @@ plt = SDDP.publication_plot( end Plots.hline!(plt, [741]; label = "Analytic solution") +# Note that, because of our sample average approximation, we do not obtain the +# analytic solution. However, if we increased the number of sample points, our +# solution would approach the analytic solution. + # ## Infinite horizon -# For the infinite horizon case, assume a discount factor $\alpha=0.995$. The -# objective in this case is to minimize the discounted expected costs over an -# infinite planning horizon. +# For the infinite horizon case, assume a discount factor $\alpha=0.995$. + +α = 0.95 + +# The objective in this case is to minimize the discounted expected costs over +# an infinite planning horizon. graph = SDDP.LinearGraph(2) SDDP.add_edge(graph, 2 => 2, α) From fdd67b3be3dab2861353c511541ed93b3326542f Mon Sep 17 00:00:00 2001 From: odow Date: Thu, 24 Oct 2024 17:53:53 +1300 Subject: [PATCH 09/15] Update --- docs/src/tutorial/inventory.jl | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl index d477b74a9..e3097d52c 100644 --- a/docs/src/tutorial/inventory.jl +++ b/docs/src/tutorial/inventory.jl @@ -52,7 +52,7 @@ p = 15 # unit order cost # Demand follows a continuous uniform distribution between 0 and 800. We # construct a sample average approximation with 10 scenarios: -Ω = rand(Distributions.Uniform(0, 800), 10); +Ω = range(0, 800; length = 10); # This is a well-known inventory problem with a closed-form solution. The # optimal policy is a simple order-up-to policy: if the inventory level is @@ -118,6 +118,7 @@ plt = SDDP.publication_plot( title = "x_inventory.out + u_buy.out", xlabel = "Stage", ylabel = "Quantity", + ylims = (0, 1_000), ) do data return data[:x_inventory].out + data[:u_buy].out end @@ -144,7 +145,7 @@ model = SDDP.PolicyGraph( lower_bound = 0.0, optimizer = HiGHS.Optimizer, ) do sp, t - @variable(sp, x_inventory >= 0, SDDP.State, initial_value = x0) + @variable(sp, x_inventory >= 0, SDDP.State, initial_value = x_0) @variable(sp, x_demand >= 0, SDDP.State, initial_value = 0) ## u_buy is a Decision-Hazard control variable. We decide u.out for use in ## the next stage @@ -163,13 +164,7 @@ model = SDDP.PolicyGraph( return end -SDDP.train( - model; - iteration_limit = 300, - sampling_scheme = SDDP.InSampleMonteCarlo(; - rollout_limit = i -> min(i, 20 * ceil(Int, i / 100)), - ), -) +SDDP.train(model; iteration_limit = 300) simulations = SDDP.simulate( model, 200, @@ -187,6 +182,7 @@ plt = SDDP.publication_plot( title = "x_inventory.out + u_buy.out", xlabel = "Stage", ylabel = "Quantity", + ylims = (0, 1_000), ) do data return data[:x_inventory].out + data[:u_buy].out end From c91c676ec0a7e6c63b395595a34376611457b243 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Thu, 24 Oct 2024 19:04:28 +1300 Subject: [PATCH 10/15] Update accept.txt --- docs/styles/Vocab/SDDP-Vocab/accept.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/styles/Vocab/SDDP-Vocab/accept.txt b/docs/styles/Vocab/SDDP-Vocab/accept.txt index 8228630c0..1dcadf9f5 100644 --- a/docs/styles/Vocab/SDDP-Vocab/accept.txt +++ b/docs/styles/Vocab/SDDP-Vocab/accept.txt @@ -39,6 +39,7 @@ Markovian Pagnoncelli Papavasiliou Philpott +Porteus Putterman Wasserstein Xpress From 62722940a8d24fe03364415978da036cfc0f93ba Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Thu, 24 Oct 2024 19:09:52 +1300 Subject: [PATCH 11/15] Update docs/src/tutorial/inventory.jl --- docs/src/tutorial/inventory.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl index e3097d52c..cec4bb719 100644 --- a/docs/src/tutorial/inventory.jl +++ b/docs/src/tutorial/inventory.jl @@ -130,7 +130,7 @@ Plots.hline!(plt, [741]; label = "Analytic solution") # ## Infinite horizon -# For the infinite horizon case, assume a discount factor $\alpha=0.995$. +# For the infinite horizon case, assume a discount factor $\alpha=0.95$. α = 0.95 From f34e2c022b28c44fd6bed3c6a483fcd0abebfcf7 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Thu, 24 Oct 2024 19:47:08 +1300 Subject: [PATCH 12/15] Apply suggestions from code review --- docs/src/tutorial/inventory.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl index cec4bb719..0cab62693 100644 --- a/docs/src/tutorial/inventory.jl +++ b/docs/src/tutorial/inventory.jl @@ -50,9 +50,9 @@ h = 1 # unit inventory holding cost p = 15 # unit order cost # Demand follows a continuous uniform distribution between 0 and 800. We -# construct a sample average approximation with 10 scenarios: +# construct a sample average approximation with 20 scenarios: -Ω = range(0, 800; length = 10); +Ω = range(0, 800; length = 20); # This is a well-known inventory problem with a closed-form solution. The # optimal policy is a simple order-up-to policy: if the inventory level is From 71b956b009b19ad13af57a49c1866b202925a14e Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Fri, 25 Oct 2024 10:25:56 +1300 Subject: [PATCH 13/15] Update inventory.jl --- docs/src/tutorial/inventory.jl | 47 ++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl index 0cab62693..fb39dfd9c 100644 --- a/docs/src/tutorial/inventory.jl +++ b/docs/src/tutorial/inventory.jl @@ -36,10 +36,9 @@ import Statistics # cumulative distribution function (CDF) $\Phi(\cdot)$. Any unsatisfied demand # is backlogged and carried forward to the next stage. -# At each stage, an agent must decide how many items to order or, equivalently, -# what the inventory level should be at the beginning of the next stage. The -# per-stage costs are the sum of the order costs, shortage and holding costs -# incurred at the end of the stage, after demand is realized. +# At each stage, an agent must decide how many items to order. The per-stage +# costs are the sum of the order costs, shortage and holding costs incurred at +# the end of the stage, after demand is realized. # Following Chapter 19 of Introduction to Operations Research by Hillier and # Lieberman (7th edition), we use the following parameters: $c=15, h=1, p=15$. @@ -56,20 +55,18 @@ p = 15 # unit order cost # This is a well-known inventory problem with a closed-form solution. The # optimal policy is a simple order-up-to policy: if the inventory level is -# below 741 units, the decision-maker orders up to 741 units. Otherwise, no -# order is placed. For a detailed analysis, refer to Foundations of Stochastic -# Inventory Theory by Evan Porteus (Stanford Business Books, 2002). +# below a certain number of units, the decision-maker orders up to that number +# of units. Otherwise, no order is placed. For a detailed analysis, refer +# to Foundations of Stochastic Inventory Theory by Evan Porteus (Stanford +# Business Books, 2002). # ## Finite horizon # For a finite horizon of length $T$, the problem is to minimize the total -# expected cost over all stages, with a terminal value function given by: -# ```math -# v_T(x) = cx. -# ``` -# This reflects that at the end of the last stage, the decision-maker can -# recover the unit cost for each leftover item while incurring any backlog costs -# and shortage penalties from the previous period. +# expected cost over all stages. + +# In the last stage, the decision-maker can recover the unit cost `c` for each +# leftover item, or buy out any remaining backlog, also at the unit cost `c`. T = 10 # number of stages model = SDDP.LinearPolicyGraph(; @@ -122,23 +119,24 @@ plt = SDDP.publication_plot( ) do data return data[:x_inventory].out + data[:u_buy].out end -Plots.hline!(plt, [741]; label = "Analytic solution") -# Note that, because of our sample average approximation, we do not obtain the -# analytic solution. However, if we increased the number of sample points, our -# solution would approach the analytic solution. +# In the early stages, we indeed recover an order-up-to policy. However, +# there are end-of-horizon effects as the agent tries to optimize their +# decision making knowing that they have 10 realizations of demand. # ## Infinite horizon -# For the infinite horizon case, assume a discount factor $\alpha=0.95$. +# We can remove the end-of-horizonn effects by considering an infinite +# horizon model. We assume a discount factor $\alpha=0.95$: α = 0.95 +graph = SDDP.LinearGraph(2) +SDDP.add_edge(graph, 2 => 2, α) +graph # The objective in this case is to minimize the discounted expected costs over # an infinite planning horizon. -graph = SDDP.LinearGraph(2) -SDDP.add_edge(graph, 2 => 2, α) model = SDDP.PolicyGraph( graph; sense = :Min, @@ -186,4 +184,9 @@ plt = SDDP.publication_plot( ) do data return data[:x_inventory].out + data[:u_buy].out end -Plots.hline!(plt, [741]; label = "Analytic solution") +Plots.hline!(plt, [622]; label = "Analytic solution") + +# We again recover an order-up-to policy. The analytic solution is to +# order-up-to 662 units. We do not precisely recover this solution because +# we used a sample average approximation of 20 elements. If we increased the +# number of samples, our solution would approach the analytic solution. From 29c29f698c03b56eb65f9736baa78be001f3e0bc Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Fri, 25 Oct 2024 10:50:49 +1300 Subject: [PATCH 14/15] Update docs/src/tutorial/inventory.jl --- docs/src/tutorial/inventory.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl index fb39dfd9c..4fe56e2f0 100644 --- a/docs/src/tutorial/inventory.jl +++ b/docs/src/tutorial/inventory.jl @@ -184,7 +184,7 @@ plt = SDDP.publication_plot( ) do data return data[:x_inventory].out + data[:u_buy].out end -Plots.hline!(plt, [622]; label = "Analytic solution") +Plots.hline!(plt, [662]; label = "Analytic solution") # We again recover an order-up-to policy. The analytic solution is to # order-up-to 662 units. We do not precisely recover this solution because From 29a8a8b6e46dde4f8e9935ece9aada169e4b84cf Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Fri, 25 Oct 2024 10:51:20 +1300 Subject: [PATCH 15/15] Update docs/src/tutorial/inventory.jl --- docs/src/tutorial/inventory.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/tutorial/inventory.jl b/docs/src/tutorial/inventory.jl index 4fe56e2f0..41e79ddd2 100644 --- a/docs/src/tutorial/inventory.jl +++ b/docs/src/tutorial/inventory.jl @@ -162,7 +162,7 @@ model = SDDP.PolicyGraph( return end -SDDP.train(model; iteration_limit = 300) +SDDP.train(model; iteration_limit = 400) simulations = SDDP.simulate( model, 200,