Skip to content

Commit

Permalink
Add missing legal_action_space_mask default methods (#1075)
Browse files Browse the repository at this point in the history
* Fix devcontainer

* Add patch for missing legal_action_space_mask defaults, add test to StockTradingEnv
  • Loading branch information
jeremiahpslewis authored May 13, 2024
1 parent cf14bf0 commit 89a46d9
Show file tree
Hide file tree
Showing 41 changed files with 139 additions and 58 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@
"--privileged"
],
"dockerFile": "Dockerfile",
"updateContentCommand": "julia -e 'using Pkg; Pkg.develop(path=\"src/ReinforcementLearningBase\"); Pkg.develop(path=\"src/ReinforcementLearningEnvironments\"); Pkg.develop(path=\"src/ReinforcementLearningCore\"); Pkg.develop(path=\"src/ReinforcementLearningFarm\"); Pkg.develop(path=\"src/ReinforcementLearning\");'"
"updateContentCommand": "julia -e 'using Pkg; Pkg.develop(path=\"src/ReinforcementLearningBase\"); Pkg.develop(path=\"src/ReinforcementLearningEnvironments\"); Pkg.develop(path=\"src/ReinforcementLearningCore\"); Pkg.develop(path=\"src/ReinforcementLearningFarm\"); Pkg.develop(path=\".\");'"
}
2 changes: 1 addition & 1 deletion .github/workflows/CompatHelper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
run: which julia
continue-on-error: true
- name: Install Julia, but only if it is not already available in the PATH
uses: julia-actions/setup-julia@v1
uses: julia-actions/setup-julia@v2
with:
version: '1'
arch: ${{ runner.arch }}
Expand Down
18 changes: 9 additions & 9 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 100
- uses: julia-actions/setup-julia@v1
- uses: julia-actions/setup-julia@v2
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: julia-actions/cache@v1
- uses: julia-actions/cache@v2
- name: Get changed files
id: RLBase-changed
uses: tj-actions/changed-files@v42
Expand Down Expand Up @@ -75,11 +75,11 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 100
- uses: julia-actions/setup-julia@v1
- uses: julia-actions/setup-julia@v2
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: julia-actions/cache@v1
- uses: julia-actions/cache@v2
- name: Get changed files
id: RLCore-changed
uses: tj-actions/changed-files@v42
Expand Down Expand Up @@ -121,11 +121,11 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 100
- uses: julia-actions/setup-julia@v1
- uses: julia-actions/setup-julia@v2
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: julia-actions/cache@v1
- uses: julia-actions/cache@v2
- name: Get changed files
id: RLFarm-changed
uses: tj-actions/changed-files@v42
Expand Down Expand Up @@ -168,11 +168,11 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 100
- uses: julia-actions/setup-julia@v1
- uses: julia-actions/setup-julia@v2
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
# - uses: julia-actions/cache@v1
# - uses: julia-actions/cache@v2
- name: Get changed files
id: RLEnvironments-changed
uses: tj-actions/changed-files@v42
Expand Down Expand Up @@ -205,7 +205,7 @@ jobs:
with:
fetch-depth: 0
- run: python -m pip install --user matplotlib
- uses: julia-actions/setup-julia@v1
- uses: julia-actions/setup-julia@v2
with:
version: "1"
- name: Build homepage
Expand Down
1 change: 0 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ authors = ["Johanni Brea <[email protected]>", "Jun Tian <tianjun.c
version = "0.11.0"

[deps]
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
ReinforcementLearningBase = "e575027e-6cd6-5018-9292-cdc6200d2b44"
ReinforcementLearningCore = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15415,7 +15415,7 @@ <h2 id="Environments">Environments<a class="anchor-link" href="#Environments">&#
<div class="text_cell_render border-box-sizing rendered_html">

<pre><code>RLBase.action_space(env::MultiArmBanditsEnv) = Base.OneTo(length(env.true_values))
RLBase.state(env::MultiArmBanditsEnv) = 1
RLBase.state(env::MultiArmBanditsEnv, ::Observation, ::DefaultPlayer) = 1
RLBase.state_space(env::MultiArmBanditsEnv) = Base.OneTo(1)
RLBase.is_terminated(env::MultiArmBanditsEnv) = env.is_terminated
RLBase.reward(env::MultiArmBanditsEnv) = env.reward
Expand Down
4 changes: 2 additions & 2 deletions docs/src/How_to_write_a_customized_environment.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ Here `RLBase` is just an alias for `ReinforcementLearningBase`.

```@repl customized_env
RLBase.reward(env::LotteryEnv) = env.reward
RLBase.state(env::LotteryEnv) = !isnothing(env.reward)
RLBase.state(env::LotteryEnv, ::Observation, ::DefaultPlayer) = !isnothing(env.reward)
RLBase.state_space(env::LotteryEnv) = [false, true]
RLBase.is_terminated(env::LotteryEnv) = !isnothing(env.reward)
RLBase.reset!(env::LotteryEnv) = env.reward = nothing
Expand Down Expand Up @@ -181,7 +181,7 @@ RLCore.forward(p.learner.approximator, false)

OK, now we know where the problem is. But how to fix it?

An initial idea is to rewrite the `RLBase.state(env::LotteryEnv)` function to
An initial idea is to rewrite the `RLBase.state(env::LotteryEnv, ::Observation, ::DefaultPlayer)` function to
force it return an `Int`. That's workable. But in some cases, we may be using
environments written by others and it's not very easy to modify the code
directly. Fortunatelly, some environment wrappers are provided to help us
Expand Down
8 changes: 8 additions & 0 deletions src/ReinforcementLearningBase/NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
### ReinforcementLearningBase.jl Release Notes

#### v0.13.1

- Don't call `legal_action_space_mask` methods when `ActionStyle` is `MinimalActionSet`

#### v0.13.0

- Breaking release compatible with RL.jl v0.11

#### v0.12.0

- Transition to `RLCore.forward`, `RLBase.act!`, `RLBase.plan!` and `Base.push!` syntax instead of functional objects for hooks, policies and environments
Expand Down
2 changes: 1 addition & 1 deletion src/ReinforcementLearningBase/Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "ReinforcementLearningBase"
uuid = "e575027e-6cd6-5018-9292-cdc6200d2b44"
authors = ["Johanni Brea <[email protected]>", "Jun Tian <[email protected]>"]
version = "0.13.0"
version = "0.13.1"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
5 changes: 4 additions & 1 deletion src/ReinforcementLearningBase/src/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ For environments of [`MINIMAL_ACTION_SET`](@ref), the result is the same with
@multi_agent_env_api legal_action_space(env::AbstractEnv, player=current_player(env)) =
legal_action_space(ActionStyle(env), env, player)

legal_action_space(::MinimalActionSet, env, player::AbstractPlayer) = action_space(env)
legal_action_space(::MinimalActionSet, env::AbstractEnv, player::AbstractPlayer) = action_space(env)

"""
legal_action_space_mask(env, player=current_player(env)) -> AbstractArray{Bool}
Expand All @@ -497,6 +497,9 @@ Required for environments of [`FULL_ACTION_SET`](@ref). As a default implementat
the subset [`legal_action_space`](@ref).
"""
@multi_agent_env_api legal_action_space_mask(env::AbstractEnv, player=current_player(env)) =
legal_action_space_mask(ActionStyle(env), env, player)

legal_action_space_mask(::FullActionSet, env::AbstractEnv, player=current_player(env)) =
map(action_space(env, player)) do action
action in legal_action_space(env, player)
end
Expand Down
2 changes: 1 addition & 1 deletion src/ReinforcementLearningBase/test/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ struct TestEnv <: RLBase.AbstractEnv
state::Int
end

function RLBase.state(env::TestEnv, ::Observation{Any}, ::DefaultPlayer)
function RLBase.state(env::TestEnv, ::Observation, ::DefaultPlayer)
return env.state
end

Expand Down
4 changes: 4 additions & 0 deletions src/ReinforcementLearningCore/NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# ReinforcementLearningCore.jl Release Notes

#### v0.15.3

- Make `FluxApproximator` work with `QBasedPolicy`

#### v0.15.2

- Make QBasedPolicy general for AbstractLearner s (#1069)
Expand Down
2 changes: 1 addition & 1 deletion src/ReinforcementLearningCore/Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "ReinforcementLearningCore"
uuid = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6"
version = "0.15.2"
version = "0.15.3"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,15 @@ function RLBase.optimise!(::AbstractLearner, ::AbstractStage, ::Trajectory) end

function RLBase.optimise!(::AbstractLearner, ::AbstractStage, ::NamedTuple) end

function RLBase.plan!(explorer::AbstractExplorer, learner::AbstractLearner, env::AbstractEnv)
legal_action_space_ = RLBase.legal_action_space_mask(env)
RLBase.plan!(explorer, forward(learner, env), legal_action_space_)
function RLBase.plan!(explorer::AbstractExplorer, learner::AbstractLearner, env::AbstractEnv, player=current_player(env))
return RLBase.plan!(ActionStyle(env), explorer, learner, env, player)
end

function RLBase.plan!(explorer::AbstractExplorer, learner::AbstractLearner, env::AbstractEnv, player::AbstractPlayer)
function RLBase.plan!(::FullActionSet, explorer::AbstractExplorer, learner::AbstractLearner, env::AbstractEnv, player=current_player(env))
legal_action_space_ = RLBase.legal_action_space_mask(env, player)
return RLBase.plan!(explorer, forward(learner, env, player), legal_action_space_)
end

function RLBase.plan!(::MinimalActionSet, explorer::AbstractExplorer, learner::AbstractLearner, env::AbstractEnv, player=current_player(env))
return RLBase.plan!(explorer, forward(learner, env, player))
end
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ FluxApproximator(model, optimiser::Flux.Optimise.AbstractOptimiser; use_gpu=fals
Flux.@layer FluxApproximator trainable=(model,)

forward(A::FluxApproximator, args...; kwargs...) = A.model(args...; kwargs...)
forward(A::FluxApproximator, env::E) where {E <: AbstractEnv} = env |> state |> (x -> forward(A, x))
forward(A::FluxApproximator, env::E, player::AbstractPlayer=current_player(env)) where {E <: AbstractEnv} = env |> (x -> state(x, player)) |> (x -> forward(A, x))

RLBase.optimise!(A::FluxApproximator, grad::NamedTuple) =
Flux.Optimise.update!(A.optimiser_state, A.model, grad.model)
7 changes: 7 additions & 0 deletions src/ReinforcementLearningCore/src/policies/q_based_policy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,14 @@ function RLBase.plan!(policy::QBasedPolicy{L,Ex}, env::E, player::Player) where
end

RLBase.prob(policy::QBasedPolicy{L,Ex}, env::AbstractEnv) where {L<:AbstractLearner,Ex<:AbstractExplorer} =
prob(ActionStyle(env), policy, env)

RLBase.prob(::MinimalActionSet, policy::QBasedPolicy{L,Ex}, env::AbstractEnv) where {L<:AbstractLearner,Ex<:AbstractExplorer} =
prob(policy.explorer, forward(policy.learner, env))

RLBase.prob(::FullActionSet, policy::QBasedPolicy{L,Ex}, env::AbstractEnv) where {L<:AbstractLearner,Ex<:AbstractExplorer} =
prob(policy.explorer, forward(policy.learner, env), legal_action_space_mask(env))


#the internal learner defines the optimization stage.
RLBase.optimise!(policy::QBasedPolicy, stage::AbstractStage, trajectory::Trajectory) = RLBase.optimise!(policy.learner, stage, trajectory)
6 changes: 3 additions & 3 deletions src/ReinforcementLearningCore/src/policies/random_policy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ RandomPolicy(s = nothing; rng = Random.default_rng()) = RandomPolicy(s, rng)

RLBase.optimise!(::RandomPolicy, x::NamedTuple) = nothing

RLBase.plan!(p::RandomPolicy{S,RNG}, env::AbstractEnv) where {S,RNG<:AbstractRNG} = rand(p.rng, p.action_space)
RLBase.plan!(p::RandomPolicy{S,RNG}, ::AbstractEnv) where {S,RNG<:AbstractRNG} = rand(p.rng, p.action_space)

function RLBase.plan!(p::RandomPolicy{Nothing,RNG}, env::AbstractEnv) where {RNG<:AbstractRNG}
legal_action_space_ = RLBase.legal_action_space(env)
Expand All @@ -45,7 +45,7 @@ function RLBase.prob(p::RandomPolicy{S,RNG}, s) where {S,RNG<:AbstractRNG}
Categorical(Fill(1 / n, n); check_args = false)
end

RLBase.prob(p::RandomPolicy{Nothing,RNG}, x) where {RNG<:AbstractRNG} =
RLBase.prob(::RandomPolicy{Nothing,RNG}, x) where {RNG<:AbstractRNG} =
@error "no I really don't know how to calculate the prob from nothing"

#####
Expand All @@ -54,7 +54,7 @@ RLBase.prob(p::RandomPolicy{Nothing,RNG}, env::AbstractEnv) where {RNG<:Abstract
prob(p, env, ChanceStyle(env))

function RLBase.prob(
p::RandomPolicy{Nothing,RNG},
::RandomPolicy{Nothing,RNG},
env::AbstractEnv,
::RLBase.AbstractChanceStyle,
) where {RNG<:AbstractRNG}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ struct MockLearner <: AbstractLearner end
return [1.0, 2.0]
end

RLBase.state(::MockEnv, ::Observation{Any}, ::DefaultPlayer) = 1
RLBase.state(::MockEnv, ::Observation, ::DefaultPlayer) = 1
RLBase.state(::MockEnv, ::Observation{Any}, ::Player) = 1

env = MockEnv()
Expand Down
10 changes: 9 additions & 1 deletion src/ReinforcementLearningEnvironments/NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
### ReinforcementLearningEnvironments.jl Release Notes

#### v0.9.1

- Update `state` calls to use full signature (so compatible with more algorithms)

#### v0.9.0

- Compatible with RL.jl v0.11

#### v0.8

- Transition to `RLCore.forward`, `RLBase.act!`, `RLBase.plan!` and `Base.push!` syntax instead of functional objects for hooks, policies and environments
Expand Down Expand Up @@ -63,4 +71,4 @@

#### v0.6.0

- Set `AcrobotEnv` into lazy loading to reduce the dependency of `OrdinaryDiffEq`.
- Set `AcrobotEnv` into lazy loading to reduce the dependency of `OrdinaryDiffEq`.
2 changes: 2 additions & 0 deletions src/ReinforcementLearningEnvironments/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ ArcadeLearningEnvironment = "b7f77d8d-088d-5e02-8ac0-89aab2acc977"
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"
DomainSets = "5b8099bc-c8ec-5219-889f-1d9e522a28bf"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
OpenSpiel = "ceb70bd2-fe3f-44f0-b81f-41608acaf2f2"
OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
Expand All @@ -48,6 +49,7 @@ test = [
"JLD2",
"Conda",
"DomainSets",
"Flux",
"OpenSpiel",
"OrdinaryDiffEq",
"PyCall",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ RLBase.state_space(env::AcrobotEnv) = ArrayProductDomain(
)

RLBase.is_terminated(env::AcrobotEnv) = env.done
RLBase.state(env::AcrobotEnv) = acrobot_observation(env.state)
RLBase.state(env::AcrobotEnv, ::Observation, ::DefaultPlayer) = acrobot_observation(env.state)
RLBase.reward(env::AcrobotEnv) = env.reward

function RLBase.reset!(env::AcrobotEnv{T}) where {T<:Number}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ RLBase.nameof(env::AtariEnv) = "AtariEnv($(env.name))"
RLBase.action_space(env::AtariEnv) = env.action_space
RLBase.reward(env::AtariEnv) = env.reward
RLBase.is_terminated(env::AtariEnv) = is_terminal(env)
RLBase.state(env::AtariEnv) = env.screens[1]
RLBase.state(env::AtariEnv, ::Observation, ::DefaultPlayer) = env.screens[1]
RLBase.state_space(env::AtariEnv) = env.observation_space

function Random.seed!(env::AtariEnv, s)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ function RLBase.is_terminated(env::GymEnv{T}) where {T}
end
end

function RLBase.state(env::GymEnv{T}) where {T}
function RLBase.state(env::GymEnv{T}, ::Observation, ::DefaultPlayer) where {T}
if pyisinstance(env.state, PyCall.@pyglobalobj :PyTuple_Type) && length(env.state) == 4
obs, reward, isdone, info = convert(Tuple{T,Float64,Bool,PyDict}, env.state)
obs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ RLBase.act!(env::SnakeGameEnv, action::Int) = env([SNAKE_GAME_ACTIONS[action]])
RLBase.act!(env::SnakeGameEnv, actions::Vector{Int}) = env(map(a -> SNAKE_GAME_ACTIONS[a], actions))

RLBase.action_space(env::SnakeGameEnv) = Base.OneTo(4)
RLBase.state(env::SnakeGameEnv) = env.game.board
RLBase.state(env::SnakeGameEnv, ::Observation, ::DefaultPlayer) = env.game.board
RLBase.state_space(env::SnakeGameEnv) = ArrayProductDomain(fill(false:true, size(env.game.board)))
RLBase.reward(env::SnakeGameEnv{<:Any,SINGLE_AGENT}) =
length(env.game.snakes[]) - env.latest_snakes_length[]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ function RLBase.act!(env::BitFlippingEnv, action::Int)
end
end

RLBase.state(env::BitFlippingEnv) = state(env::BitFlippingEnv, Observation{BitArray{1}}())
RLBase.state(env::BitFlippingEnv, ::Observation, ::DefaultPlayer) = state(env::BitFlippingEnv, Observation{BitArray{1}}())
RLBase.state(env::BitFlippingEnv, ::Observation) = env.state
RLBase.state(env::BitFlippingEnv, ::GoalState) = env.goal_state
RLBase.state_space(env::BitFlippingEnv, ::Observation) = ArrayProductDomain(fill(false:true, env.N))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ CartPoleEnv{T}(; kwargs...) where {T} = CartPoleEnv(T=T, kwargs...)
Random.seed!(env::CartPoleEnv, seed) = Random.seed!(env.rng, seed)
RLBase.reward(env::CartPoleEnv{T}) where {T} = env.done ? zero(T) : one(T)
RLBase.is_terminated(env::CartPoleEnv) = env.done
RLBase.state(env::CartPoleEnv) = env.state
RLBase.state(env::CartPoleEnv, ::Observation, ::DefaultPlayer) = env.state

function RLBase.state_space(env::CartPoleEnv{T}) where {T}
((-2 * env.params.xthreshold) .. (2 * env.params.xthreshold)) ×
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ function RLBase.act!(env::GraphShortestPathEnv, action)
env.reward = env.pos == env.goal ? 0 : -1
end

RLBase.state(env::GraphShortestPathEnv) = env.pos
RLBase.state(env::GraphShortestPathEnv, ::Observation, ::DefaultPlayer) = env.pos
RLBase.state_space(env::GraphShortestPathEnv) = axes(env.graph, 2)
RLBase.action_space(env::GraphShortestPathEnv) = axes(env.graph, 2)
RLBase.legal_action_space(env::GraphShortestPathEnv) = (env.graph[:, env.pos]).nzind
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ RLBase.action_space(env::KuhnPokerEnv, ::ChancePlayer) = Base.OneTo(length(KUHN_

RLBase.legal_action_space(env::KuhnPokerEnv, p::ChancePlayer) = Tuple(x for x in action_space(env, p) if KUHN_POKER_CARDS[x] env.cards)

function RLBase.legal_action_space_mask(env::KuhnPokerEnv, p::ChancePlayer)
function RLBase.legal_action_space_mask(env::KuhnPokerEnv, ::ChancePlayer)
m = fill(true, 3)
m[env.cards] .= false
m
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ function RLBase.legal_action_space_mask(env::MontyHallEnv)
mask
end

function RLBase.state(env::MontyHallEnv)
function RLBase.state(env::MontyHallEnv, ::Observation, ::DefaultPlayer)
if isnothing(env.host_action)
1
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ RLBase.action_space(::MountainCarEnv{<:AbstractFloat,<:AbstractFloat}) = -1.0 ..

RLBase.reward(env::MountainCarEnv{T}) where {T} = env.done ? zero(T) : -one(T)
RLBase.is_terminated(env::MountainCarEnv) = env.done
RLBase.state(env::MountainCarEnv) = env.state
RLBase.state(env::MountainCarEnv, ::Observation, ::DefaultPlayer) = env.state

function RLBase.reset!(env::MountainCarEnv{T}) where {T}
env.state[1] = 0.2 * rand(env.rng, T) - 0.6
Expand Down
Loading

0 comments on commit 89a46d9

Please sign in to comment.