From e705227427c968e8684f14ce0c81a1622ec49409 Mon Sep 17 00:00:00 2001 From: Carlo Lucibello Date: Sun, 2 Feb 2025 21:18:45 +0100 Subject: [PATCH] add slidingwindow (#196) * add slidingwindow * docs --- docs/src/api.md | 1 + src/MLUtils.jl | 3 ++ src/folds.jl | 4 +-- src/slidingwindow.jl | 72 +++++++++++++++++++++++++++++++++++++++++++ test/slidingwindow.jl | 31 +++++++++++++++++++ 5 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 src/slidingwindow.jl create mode 100644 test/slidingwindow.jl diff --git a/docs/src/api.md b/docs/src/api.md index f0eb597..f895427 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -35,6 +35,7 @@ DataLoader obsview ObsView randobs +slidingwindow ``` ## Partitioning diff --git a/src/MLUtils.jl b/src/MLUtils.jl index ec7bba4..ca2f746 100644 --- a/src/MLUtils.jl +++ b/src/MLUtils.jl @@ -57,6 +57,9 @@ include("resample.jl") export oversample, undersample +include("slidingwindow.jl") +export slidingwindow + include("splitobs.jl") export splitobs diff --git a/src/folds.jl b/src/folds.jl index ff33692..0f33c17 100644 --- a/src/folds.jl +++ b/src/folds.jl @@ -79,7 +79,7 @@ distributed among the parts. ```julia for (x_train, x_val) in kfolds(X, k=10) # code called 10 times - # nobs(x_val) may differ up to ±1 over iterations + # numobs(x_val) may differ up to ±1 over iterations end ``` @@ -172,7 +172,7 @@ data is copied until [`getobs`](@ref) is invoked. ```julia for (train, val) in leavepout(X, p=2) - # if nobs(X) is dividable by 2, + # if numobs(X) is dividable by 2, # then numobs(val) will be 2 for each iteraton, # otherwise it may be 3 for the first few iterations. end diff --git a/src/slidingwindow.jl b/src/slidingwindow.jl new file mode 100644 index 0000000..9851894 --- /dev/null +++ b/src/slidingwindow.jl @@ -0,0 +1,72 @@ +struct SlidingWindow{T} + data::T + size::Int + stride::Int + count::Int +end + +Base.length(A::SlidingWindow) = A.count + +function Base.getindex(A::SlidingWindow, i::Int) + 1 <= i <= length(A) || throw(BoundsError(A, i)) + windowrange = getrange(A, i) + return getobs(A.data, windowrange) +end + +function getrange(A::SlidingWindow, i::Int) + offset = 1 + (i-1) * A.stride + return offset:offset+A.size-1 +end + +function Base.show(io::IO, A::SlidingWindow) + return print(io, "slidingwindow($(A.data), size=$(A.size), stride=$(A.stride))") +end + +Base.iterate(A::SlidingWindow, i::Int=1) = i > length(A) ? nothing : (A[i], i+1) + +""" + slidingwindow(data; size, stride=1) -> SlidingWindow + +Return a vector-like view of the `data` for which each element is +a fixed size "window" of `size` adjacent observations. Note that only complete +windows are included in the output, which implies that it is +possible for excess observations to be omitted from the view. + +Note that the windows are not materialized at construction time. +To actually get a copy of the data at some window use indexing or [`getobs`](@ref). + +```jldoctest +julia> s = slidingwindow(1:20, size=6) +slidingwindow(1:20, size=6, stride=1) + +julia> s[1] +1:6 + +julia> s[2] +2:7 +``` + +The optional parameter `stride` can be used to specify the +distance between the start elements of each adjacent window. +By default the stride is equal to 1. + +```jldoctest +julia> s = slidingwindow(1:20, size=6, stride=3) +slidingwindow(1:20, size=6, stride=3) + +julia> for w in s; println(w); end +1:6 +4:9 +7:12 +10:15 +13:18 +``` +""" +function slidingwindow(data; size::Int, stride::Int=1) + size > 0 || throw(ArgumentError("Specified window size must be strictly greater than 0. Actual: $size")) + size <= numobs(data) || throw(ArgumentError("Specified window size is too large for the given number of observations")) + stride > 0 || throw(ArgumentError("Specified stride must be strictly greater than 0. Actual: $stride")) + count = floor(Int, (numobs(data) - size + stride) / stride) + return SlidingWindow(data, size, stride, count) +end + diff --git a/test/slidingwindow.jl b/test/slidingwindow.jl new file mode 100644 index 0000000..d0bc9c1 --- /dev/null +++ b/test/slidingwindow.jl @@ -0,0 +1,31 @@ +@testset "slidingwindow" begin + data = 1:20 + s = slidingwindow(data, size=5) + @test length(s) == 16 + @test s[1] == 1:5 + @test s[2] == 2:6 + + s = slidingwindow(data, size=5, stride=3) + @test length(s) == 6 + @test s[1] == 1:5 + @test s[2] == 4:8 + @test s[3] == 7:11 + @test s[4] == 10:14 + @test s[5] == 13:17 + @test s[6] == 16:20 + + data = reshape(1:18, 3, 6) + s = slidingwindow(data, size=2) + @test length(s) == 5 + @test s[1] isa Matrix{Int} + @test s[1] == [1 4; 2 5; 3 6] + @test s[2] == [4 7; 5 8; 6 9] + + c = 0 + for w in s + @test w isa Matrix{Int} + @test size(w) == (3, 2) + c += 1 + end + @test c == 5 +end