Skip to content

Commit

Permalink
adds method for splitting data into periods
Browse files Browse the repository at this point in the history
minor

return iterator and adds test

starts to add retime

changes _split

changes LTS

overload Base.split

changes interface of _split to return timestamps
  • Loading branch information
ValentinKaisermayer committed Jan 20, 2025
1 parent f456c1e commit 8048f64
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 4 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ jobs:
fail-fast: false
matrix:
version:
- "1.6.7" # LTS
- "1.6"
- "1.10.7" # LTS
- "1" # Latest Release
os:
- ubuntu-latest
Expand Down
11 changes: 11 additions & 0 deletions docs/src/split.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,14 @@ using MarketData
tail(cl)
tail(cl, 3)
```

## Splitting by period

Splitting data by a given function, e.g. `Dates.day` into periods.

```@repl
using TimeSeries
using MarketData
split(cl, Dates.day)
```
1 change: 1 addition & 0 deletions src/TimeSeries.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,6 @@ include("modify.jl")
include("basemisc.jl")
include("deprecated.jl")
include("plotrecipes.jl")
include("retime.jl")

end # module TimeSeries
48 changes: 48 additions & 0 deletions src/retime.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
function retime(ta, new_dt::Dates.Period; kwargs...)
new_timestamps = timestamp(ta)[1]:new_dt:timestamp(ta)[end]
return retime(ta, new_timestamps; kwargs...)
end

function retime(ta, period::Function; kwargs...)
new_timestamps = map(i -> first(timestamp(ta)[i]), _split(timestamp(ta), period))
return retime(ta, new_timestamps; kwargs...)
end

function retime(
ta::TimeSeries{T,N,D,A},
new_timestamps::AbstractVector{DN};
upsample=:previous,
downsample::Union{Symbol,Function}=:mean,
extrapolate::Bool=true,
) where {T,N,D,A,DN}
new_values = zeros(T, length(new_timestamps), size(values(ta), 2))
old_timestamps = convert(Vector{DN}, timestamp(ta))
old_values = values(ta)
@views begin
for col_i in 1:size(old_values, 2)
_retime!(new_values[:, col_i], old_timestamps, old_values[:, col_i], new_timestamps, upsample, downsample, extrapolate)
end
end
return TimeArray(new_timestamps, new_values, colnames(ta), meta(ta))
end

function _retime!(
new_values::AbstractVector{A},
old_timestamps::AbstractVector{D},
old_values::AbstractVector{A},
new_timestamps::AbstractVector{D},
upsample,
downsample,
extrapolate,
) where {D,A}

x = Dates.value.(old_timestamps)
x_min, x_max = extrema(x)
x_new = Dates.value.(new_timestamps)

# check each interval between i and i+1 if there is no or one sample (upsample), more than one sample (downsample)
for i in eachindex(x_new)
end
return
end

33 changes: 33 additions & 0 deletions src/split.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,36 @@ end
Base.first(ta::TimeArray) = head(ta, 1)

Base.last(ta::TimeArray) = tail(ta, 1)

"""
split(data::TimeSeries.TimeArray, period::Function)
Split `data` by `period` function, returns a vector of `TimeSeries.TimeArray`.
## Arguments
- `data::TimeSeries.TimeArray`: Data to split
- `period::Function`: Function, e.g. `Dates.day` that is used to split the `data`.
"""
Base.split(data::TimeSeries.TimeArray, period::Function) =
Iterators.map(i -> data[i], _split(TimeSeries.timestamp(data), period))

function _split(ts::AbstractVector{D}, period::Function) where {D<:TimeType}
m = length(ts)
idx = UnitRange{Int}[]
isempty(ts) && return idx

sizehint!(idx, m)
t0 = period(ts[1])
j = 1
for i in 1:(m - 1)
t1 = period(ts[i + 1])
t0 == t1 && continue
push!(idx, j:i)
j = i + 1
t0 = t1
end
push!(idx, j:m)

return Iterators.map(i -> ts[i], idx)
end
17 changes: 15 additions & 2 deletions test/split.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
using Dates
using Test

using MarketData

using TimeSeries

@testset "split" begin
Expand Down Expand Up @@ -123,4 +121,19 @@ using TimeSeries
@test length(last(ohlc)) == 1
end
end

@testset "split period" begin
for period in [day, week, month, year]
for cl_ in split(cl, period)
@test allequal(period.(timestamp(cl_)))
end
end
@test length(split(cl, day)) == 500
@test length(split(cl, week)) == 105
@test length(split(cl, month)) == 24
@test length(split(cl, year)) == 2

# test empty timearray
@test length(split(to(cl, Date(2000)), week)) == 0
end
end # @testset "split"

0 comments on commit 8048f64

Please sign in to comment.