From 48d72ab2d3e4a915367f05cf33f4ec7ef79408bb Mon Sep 17 00:00:00 2001 From: Valentin Kaisermayer Date: Sat, 14 Dec 2024 22:50:21 +0100 Subject: [PATCH] adds retime adds missing deps another missing dep support symbols as well handle missing data adds docs on retime minor --- .gitignore | 1 + Project.toml | 11 --- docs/make.jl | 1 + docs/src/index.md | 1 + docs/src/plotting.md | 6 +- docs/src/retime.md | 90 ++++++++++++++++++ src/TimeSeries.jl | 19 +++- src/retime.jl | 214 +++++++++++++++++++++++++++++++++++++++---- test/Project.toml | 10 ++ test/retime.jl | 205 +++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 11 files changed, 527 insertions(+), 32 deletions(-) create mode 100644 docs/src/retime.md create mode 100644 test/Project.toml create mode 100644 test/retime.jl diff --git a/.gitignore b/.gitignore index 827b2130..f10e505d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .swp docs/build/ Manifest.toml +test/Manifest.toml diff --git a/Project.toml b/Project.toml index 9c2f0e14..107ca2a3 100644 --- a/Project.toml +++ b/Project.toml @@ -18,7 +18,6 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] DelimitedFiles = "1" DocStringExtensions = "0.8, 0.9" -MarketData = "0.14" RecipesBase = "0.5, 0.7, 0.8, 1.0" Reexport = "1" Statistics = "1" @@ -27,13 +26,3 @@ julia = "1.6" PrettyTables = "2" IteratorInterfaceExtensions = "1" TableTraits = "1" - -[extras] -CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" -DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -MarketData = "945b72a4-3b13-509d-9b46-1525bb5c06de" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[targets] -test = ["CSV", "DataFrames", "MarketData", "Random", "Test"] diff --git a/docs/make.jl b/docs/make.jl index 96d01ecd..b5e09293 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -20,6 +20,7 @@ makedocs(; "tables.md", "dotfile.md", "plotting.md", + "retime.md", ], ) diff --git a/docs/src/index.md b/docs/src/index.md index 4bf21b0d..dd535f0c 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -19,5 +19,6 @@ Pages = [ "tables.md", "dotfile.md", "plotting.md", + "retime.md", ] ``` diff --git a/docs/src/plotting.md b/docs/src/plotting.md index 2f97e37f..cb1665d5 100644 --- a/docs/src/plotting.md +++ b/docs/src/plotting.md @@ -17,10 +17,8 @@ ta = yahoo(:GOOG, YahooOpt(; period1=now() - Month(1))) ## Plotting as multiple series -The recipe allows `TimeArray` objects to be passed as input to `plot`. The -recipe will plot each variable as an individual line, aligning all -variables to the same y axis. -backend). +The recipe allows `TimeArray` objects to be passed as input to `plot`. +The recipe will plot each variable as an individual line, aligning all variables to the same y axis. ```@example plot plot(ta[:Open, :High, :Low, :Close]) diff --git a/docs/src/retime.md b/docs/src/retime.md new file mode 100644 index 00000000..4160db86 --- /dev/null +++ b/docs/src/retime.md @@ -0,0 +1,90 @@ +# Retime + +The `retime` function allows you to retime, i.e. change the timestamps of a `TimeArray`, similar to what [Matlab's retime](https://www.mathworks.com/help/matlab/ref/timetable.retime.html) does. + +```@example retime +using Plots, Dates, TimeSeries +default(show = false) # hide +ENV["GKSwstype"] = "100" # hide +gr() +timestamps = range(DateTime(2020, 1, 1), length = 7*24, step = Hour(1)) +ta = TimeArray(timestamps, cumsum(randn(7*24)), [:a]) +``` + +## Using a new time step +```@example retime +retime(ta, Minute(15)) +``` + +## Using new timestep vector +```@example retime +new_timestamps = range(DateTime(2020, 1, 1), DateTime(2020, 1, 2), step = Minute(15)) +retime(ta, new_timestamps) +``` + +## Irregular timestamps +You can perform retime on irregularly spaced timestamps, both using a `TimeArray` with irregular timestamps or using a vector of irregular timestamps. Depending on the timestamps `upsampling` or `downsampling` is used. +```@example retime +new_timestamps = vcat( + range(DateTime(2020, 1, 1), DateTime(2020, 1, 2)-Minute(15), step = Minute(15)), + range(DateTime(2020, 1, 2), DateTime(2020, 1, 3), step = Hour(1)), +) +retime(ta, new_timestamps) +``` + +## Upsampling + +Interpolation is done using the `upsample` argument. If no data is directly hit, the specified `upsample` method is used. Available `upsample` methods are: +- `Linear()` or `:linear` +- `Nearest()` or `:nearest` +- `Previous()` or `:previous` +- `Next()` or `:next` + +```@example retime +ta_ = retime(ta, Minute(15), upsample=Linear()) +``` + +```@example retime +plot(ta) +plot!(ta_) +savefig("retime-upsampling.svg"); nothing # hide +``` +![](retime-upsampling.svg) + +## Downsampling + +Downsampling or aggregation is done using the `downsample` argument. This applies a function to each interval not including the right-edge of the interval. If no data is present in the interval the specified `upsample` method is used. +Available `downsample` methods are: +- `Mean()` or `:mean` +- `Min()` or `:min` +- `Max()` or `:max` +- `Count()` or `:count` +- `Sum()` or `:sum` +- `Median()` or `:median` +- `First()` or `:first` +- `Last()` or `:last` + +```@example retime +ta_ = retime(ta, Hour(6), downsample=Mean()) +``` + +```@example retime +plot(ta) +plot!(ta_) +savefig("retime-downsample.svg"); nothing # hide +``` +![](retime-downsample.svg) + +## Extrapolation + +Extrapolation at the beginning and end of the time series is done using the `extrapolate` argument. +Available `extrapolate` methods are: +- `FillConstant(value)` or `:fillconstant` +- `NearestExtrapolate()` or `:nearest` +- `MissingExtrapolate()` or `:missing` +- `NaNExtrapolate()` or `:nan` + +```@example retime +new_timestamps = range(DateTime(2019, 12, 31), DateTime(2020, 1, 2), step = Minute(15)) +ta_ = retime(ta, new_timestamps, extrapolate=MissingExtrapolate()) +``` \ No newline at end of file diff --git a/src/TimeSeries.jl b/src/TimeSeries.jl index 280b4746..e4f911bf 100644 --- a/src/TimeSeries.jl +++ b/src/TimeSeries.jl @@ -36,7 +36,24 @@ export TimeArray, merge, collapse, readtimearray, - writetimearray + writetimearray, + retime, + Linear, + Previous, + Next, + Nearest, + Mean, + Min, + Max, + Count, + Sum, + Median, + First, + Last, + FillConstant, + NearestExtrapolate, + MissingExtrapolate, + NaNExtrapolate # modify.jl export rename, rename! diff --git a/src/retime.jl b/src/retime.jl index ad11c0be..d1531a17 100644 --- a/src/retime.jl +++ b/src/retime.jl @@ -1,48 +1,230 @@ +# Abstract types for interpolation, aggregation, and extrapolation methods +abstract type InterpolationMethod end +abstract type AggregationMethod end +abstract type ExtrapolationMethod end + +# Interpolation methods +struct Linear <: InterpolationMethod end +struct Previous <: InterpolationMethod end +struct Next <: InterpolationMethod end +struct Nearest <: InterpolationMethod end + +# Aggregation methods +struct Mean <: AggregationMethod end +struct Min <: AggregationMethod end +struct Max <: AggregationMethod end +struct Count <: AggregationMethod end +struct Sum <: AggregationMethod end +struct Median <: AggregationMethod end +struct First <: AggregationMethod end +struct Last <: AggregationMethod end + +# Extrapolation methods +struct FillConstant{V} <: ExtrapolationMethod + value::V +end +struct NearestExtrapolate <: ExtrapolationMethod end +struct MissingExtrapolate <: ExtrapolationMethod end +struct NaNExtrapolate <: ExtrapolationMethod end + +_toInterpolationMethod(x::Symbol) = _toInterpolationMethod(Val(x)) +_toInterpolationMethod(::Val{:linear}) = Linear() +_toInterpolationMethod(::Val{:previous}) = Previous() +_toInterpolationMethod(::Val{:next}) = Next() +_toInterpolationMethod(::Val{:nearest}) = Nearest() +_toInterpolationMethod(x::InterpolationMethod) = x + +_toAggregationMethod(x::Symbol) = _toAggregationMethod(Val(x)) +_toAggregationMethod(::Val{:mean}) = Mean() +_toAggregationMethod(::Val{:min}) = Min() +_toAggregationMethod(::Val{:max}) = Max() +_toAggregationMethod(::Val{:count}) = Count() +_toAggregationMethod(::Val{:sum}) = Sum() +_toAggregationMethod(::Val{:median}) = Median() +_toAggregationMethod(::Val{:first}) = First() +_toAggregationMethod(::Val{:last}) = Last() +_toAggregationMethod(x::AggregationMethod) = x + +_toExtrapolationMethod(x::Symbol) = _toExtrapolationMethod(Val(x)) +_toExtrapolationMethod(::Val{:fillconstant}) = FillConstant(0.0) +_toExtrapolationMethod(::Val{:nearest}) = NearestExtrapolate() +_toExtrapolationMethod(::Val{:missing}) = MissingExtrapolate() +_toExtrapolationMethod(::Val{:nan}) = NaNExtrapolate() +_toExtrapolationMethod(x::ExtrapolationMethod) = x + function retime(ta, new_dt::Dates.Period; kwargs...) new_timestamps = timestamp(ta)[1]:new_dt:timestamp(ta)[end] return retime(ta, new_timestamps; kwargs...) end function retime(ta, period::Function; kwargs...) - new_timestamps = map(i -> first(timestamp(ta)[i]), _split(timestamp(ta), period)) + new_timestamps = map(i -> first(i), _split(timestamp(ta), period)) return retime(ta, new_timestamps; kwargs...) end function retime( - ta::TimeSeries{T,N,D,A}, + ta::TimeArray{T,N,D,A}, new_timestamps::AbstractVector{DN}; - upsample=:previous, - downsample::Union{Symbol,Function}=:mean, - extrapolate::Bool=true, + upsample::Union{Symbol,InterpolationMethod}=Previous(), + downsample::Union{Symbol,AggregationMethod}=Mean(), + extrapolate::Union{Symbol,ExtrapolationMethod}=NearestExtrapolate(), + skip_missing::Bool=true, ) where {T,N,D,A,DN} - new_values = zeros(T, length(new_timestamps), size(values(ta), 2)) + upsample = _toInterpolationMethod(upsample) + downsample = _toAggregationMethod(downsample) + extrapolate = _toExtrapolationMethod(extrapolate) + + new_values = __get_new_values(T, length(new_timestamps), size(values(ta), 2), extrapolate, skip_missing) old_timestamps = convert(Vector{DN}, timestamp(ta)) old_values = values(ta) @views begin for col_i in 1:size(old_values, 2) - _retime!(new_values[:, col_i], old_timestamps, old_values[:, col_i], new_timestamps, upsample, downsample, extrapolate) + if skip_missing + idx = findall(x -> !ismissing(x) && !isnan(x), old_values[:, col_i]) + else + idx = ones(Int, length(old_timestamps)) + end + + _retime!( + new_values[:, col_i], + old_timestamps[idx], + old_values[idx, col_i], + new_timestamps, + upsample, + downsample, + extrapolate, + skip_missing, + ) end end return TimeArray(new_timestamps, new_values, colnames(ta), meta(ta)) end function _retime!( - new_values::AbstractVector{A}, + new_values::AbstractVector{AN}, old_timestamps::AbstractVector{D}, old_values::AbstractVector{A}, new_timestamps::AbstractVector{D}, - upsample, - downsample, - extrapolate, -) where {D,A} - + upsample::InterpolationMethod, + downsample::AggregationMethod, + extrapolate::ExtrapolationMethod, + skip_missing::Bool, +) where {D,AN,A} x = Dates.value.(old_timestamps) x_min, x_max = extrema(x) x_new = Dates.value.(new_timestamps) - # check each interval between i and i+1 if there is no or one sample (upsample), more than one sample (downsample) - for i in eachindex(x_new) + N = length(x_new) + + @views begin + # check each interval between i and i+1 if there is no or one sample (upsample), more than one sample (downsample) + for i in 1:N + if x_new[i] < x_min || x_new[i] > x_max + # Handle extrapolation + new_values[i] = _extrapolate(extrapolate, x_new[i], x, old_values) + else + idx = if i < N + _get_idx(x, x_new[i], x_new[i+1]) + else + # assume that the last interval is the same length as the second to last one + _get_idx(x, x_new[i], x_new[i] + (x_new[i] - x_new[i-1])) + end + + if isempty(idx) + # No original samples lie between x_new[i] and x_new[i+1] --> Upsampling + new_values[i] = _upsample(upsample, x, old_values, x_new[i]) + elseif length(idx) == 1 + if x_new[i] == x[idx[1]] # directly hit the sample, do not try the upsample method + new_values[i] = old_values[idx[1]] + else + # Only one sample found in the interval x_new[i] and x_new[i+1] --> use the upsample method + new_values[i] = _upsample(upsample, x, old_values, x_new[i]) + end + else + # Multiple samples were found in the interval [x_new[i], x_new[i+1]) --> use the downsample method to get the agglomeration + new_values[i] = _downsample(downsample, old_values[idx]) + end + end + end + end + return nothing +end + +function __get_new_values(T, N, n, extrapolate, skip_missing) + return zeros(skip_missing ? nonmissingtype(T) : T, N, n) +end +function __get_new_values(T, N, n, extrapolate::MissingExtrapolate, skip_missing) + return zeros(Union{Missing,T}, N, n) +end + +function _get_idx(x::AbstractVector{<:Real}, x_left::Real, x_right::Real) + idx_left = searchsortedfirst(x, x_left) # greater or equal to x_left + idx_right = searchsortedlast(x, prevfloat(Float64(x_right))) # less to x_right + return idx_left:idx_right +end + +# Extrapolation dispatch +function _extrapolate(m::FillConstant, t_new, x, y) + return m.value +end + +function _extrapolate(::NearestExtrapolate, t_new, x, y) + idx = argmin(abs.(x .- t_new)) + return y[idx] +end + +function _extrapolate(::MissingExtrapolate, t_new, x, y) + return missing +end + +function _extrapolate(::NaNExtrapolate, t_new, x, y) + return NaN +end + +# Interpolation dispatch +function _upsample(::Linear, x_old, old_values, x) + idx_next = searchsortedfirst(x_old, x) # greater or equal to x + idx_prev = searchsortedlast(x_old, x) # less or equal to x + return y = if idx_prev == idx_next # avoid division by zero + old_values[idx_prev] + else + old_values[idx_prev] + + (x - x_old[idx_prev]) * (old_values[idx_next] - old_values[idx_prev]) / (x_old[idx_next] - x_old[idx_prev]) + end +end + +function _upsample(::Previous, x_old, old_values, x) + idx_prev = searchsortedlast(x_old, x) # less or equal to x + return old_values[idx_prev] +end + +function _upsample(::Next, x_old, old_values, x) + idx_next = searchsortedfirst(x_old, x) # greater or equal to x + return old_values[idx_next] +end + +function _upsample(::Nearest, x_old, old_values, x) + idx_next = searchsortedfirst(x_old, x) # greater or equal to x + idx_prev = searchsortedlast(x_old, x) # less or equal to x + y = if idx_prev == idx_next # avoid division by zero + old_values[idx_prev] + else + pos = (x - x_old[idx_prev]) / (x_old[idx_next] - x_old[idx_prev]) + if pos < 0.5 + old_values[idx_prev] + else + old_values[idx_next] + end end - return + return y end +# Aggregation dispatch +_downsample(::Mean, values_in_range) = mean(values_in_range) +_downsample(::Min, values_in_range) = minimum(values_in_range) +_downsample(::Max, values_in_range) = maximum(values_in_range) +_downsample(::Count, values_in_range) = count(!ismissing, values_in_range) +_downsample(::Sum, values_in_range) = sum(values_in_range) +_downsample(::Median, values_in_range) = median(values_in_range) +_downsample(::First, values_in_range) = first(values_in_range) +_downsample(::Last, values_in_range) = last(values_in_range) \ No newline at end of file diff --git a/test/Project.toml b/test/Project.toml new file mode 100644 index 00000000..c05da130 --- /dev/null +++ b/test/Project.toml @@ -0,0 +1,10 @@ +[deps] +CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +MarketData = "945b72a4-3b13-509d-9b46-1525bb5c06de" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +TimeSeries = "9e3dc215-6440-5c97-bce1-76c03772f85e" diff --git a/test/retime.jl b/test/retime.jl new file mode 100644 index 00000000..d540c9b5 --- /dev/null +++ b/test/retime.jl @@ -0,0 +1,205 @@ +using Test +using MarketData +using TimeSeries +using Dates +using Statistics + +@testset "retime" begin + + @testset "interpolation" begin + @test TimeSeries._toInterpolationMethod(:linear) == TimeSeries.Linear() + @test TimeSeries._toInterpolationMethod(:nearest) == TimeSeries.Nearest() + @test TimeSeries._toInterpolationMethod(:previous) == TimeSeries.Previous() + @test TimeSeries._toInterpolationMethod(:next) == TimeSeries.Next() + + @test_throws MethodError TimeSeries._toInterpolationMethod(:foo) + end + + @testset "aggregation" begin + @test TimeSeries._toAggregationMethod(:mean) == TimeSeries.Mean() + @test TimeSeries._toAggregationMethod(:min) == TimeSeries.Min() + @test TimeSeries._toAggregationMethod(:max) == TimeSeries.Max() + @test TimeSeries._toAggregationMethod(:count) == TimeSeries.Count() + @test TimeSeries._toAggregationMethod(:sum) == TimeSeries.Sum() + @test TimeSeries._toAggregationMethod(:median) == TimeSeries.Median() + @test TimeSeries._toAggregationMethod(:first) == TimeSeries.First() + @test TimeSeries._toAggregationMethod(:last) == TimeSeries.Last() + + @test_throws MethodError TimeSeries._toAggregationMethod(:foo) + end + + @testset "extrapolation" begin + @test TimeSeries._toExtrapolationMethod(:fillconstant) == TimeSeries.FillConstant(0.0) + @test TimeSeries._toExtrapolationMethod(:nearest) == TimeSeries.NearestExtrapolate() + @test TimeSeries._toExtrapolationMethod(:missing) == TimeSeries.MissingExtrapolate() + @test TimeSeries._toExtrapolationMethod(:nan) == TimeSeries.NaNExtrapolate() + + @test_throws MethodError TimeSeries._toExtrapolationMethod(:foo) + end + + @testset "single column" begin + new_timestamps = collect(Dates.Date(2000):Dates.Week(1):Dates.Date(2001)) + + funcs = [mean, sum, minimum, maximum, last] + downsamples = [TimeSeries.Mean(), TimeSeries.Sum(), TimeSeries.Min(), TimeSeries.Max(), TimeSeries.Last()] + @testset for (func, downsample) in zip(funcs, downsamples) + cl_new = retime(cl, new_timestamps; upsample=TimeSeries.Linear(), downsample) + + @test timestamp(cl_new) == new_timestamps + + # extrapolation + @test values(cl_new[1, :Close]) == values(cl[1, :Close]) + + # aggregation + idx = new_timestamps[2] .<= timestamp(cl) .< new_timestamps[3] + @test func(values(cl[:Close][idx])) == values(cl_new[:Close][2])[1] + end + + # test using Symbols + downsamples = [:mean, :sum, :min, :max, :last] + @testset for (func, downsample) in zip(funcs, downsamples) + cl_new = retime(cl, new_timestamps; upsample=TimeSeries.Linear(), downsample) + + @test timestamp(cl_new) == new_timestamps + + # extrapolation + @test values(cl_new[1, :Close]) == values(cl[1, :Close]) + + # aggregation + idx = new_timestamps[2] .<= timestamp(cl) .< new_timestamps[3] + @test func(values(cl[:Close][idx])) == values(cl_new[:Close][2])[1] + end + end + + @testset "single column interpolation" begin + new_timestamps = collect(Dates.DateTime(2000):Dates.Hour(1):Dates.DateTime(2001)) + + upsamples = [TimeSeries.Linear(), TimeSeries.Previous(), TimeSeries.Next(), TimeSeries.Nearest()] + @testset for upsample in upsamples + cl_new = retime(cl, new_timestamps; upsample) + + @test timestamp(cl_new) == new_timestamps + + # TODO: real tests + end + + # test using Symbols + upsamples = [:linear, :previous, :next, :nearest] + @testset for upsample in upsamples + cl_new = retime(cl, new_timestamps; upsample) + + @test timestamp(cl_new) == new_timestamps + + # TODO: real tests + end + end + + @testset "single column extrapolate" begin + new_timestamps = collect(Dates.DateTime(2000):Dates.Hour(1):Dates.DateTime(2001)) + + cl_new = retime(cl, new_timestamps; extrapolate=TimeSeries.FillConstant(0.0)) + @test timestamp(cl_new) == new_timestamps + @test values(cl_new[:Close][1])[1] == 0.0 + + cl_new = retime(cl, new_timestamps; extrapolate=TimeSeries.NearestExtrapolate()) + @test timestamp(cl_new) == new_timestamps + @test values(cl_new[:Close][1])[1] == values(cl[:Close][1])[1] + + cl_new = retime(cl, new_timestamps; extrapolate=TimeSeries.MissingExtrapolate()) + @test timestamp(cl_new) == new_timestamps + @test all(ismissing.(values(cl_new[:Close][1]))) + + cl_new = retime(cl, new_timestamps; extrapolate=TimeSeries.NaNExtrapolate()) + @test timestamp(cl_new) == new_timestamps + @test all(isnan.(values(cl_new[:Close][1]))) + end + + @testset "multi column" begin + new_timestamps = collect(Dates.Date(2000):Dates.Week(1):Dates.Date(2001)) + + funcs = [mean, sum, minimum, maximum, last] + downsamples = [TimeSeries.Mean(), TimeSeries.Sum(), TimeSeries.Min(), TimeSeries.Max(), TimeSeries.Last()] + @testset for (func, downsample) in zip(funcs, downsamples) + + ohlc_new = retime(ohlc, new_timestamps; upsample=TimeSeries.Linear(), downsample=TimeSeries.Mean()) + + @test timestamp(ohlc_new) == new_timestamps + + # extrapolation + @test values(ohlc_new[1]) == values(ohlc_new[1]) + + idx = new_timestamps[2] .<= timestamp(ohlc) .< new_timestamps[3] + @test mean(values(ohlc[idx]); dims=1) == values(ohlc_new[2]) + end + end + + @testset "multi column interpolation" begin + new_timestamps = collect(Dates.DateTime(2000):Dates.Hour(1):Dates.DateTime(2001)) + + upsamples = [TimeSeries.Linear(), TimeSeries.Previous(), TimeSeries.Next(), TimeSeries.Nearest()] + @testset for upsample in upsamples + ohlc_new = retime(ohlc, new_timestamps; upsample) + + @test timestamp(ohlc_new) == new_timestamps + + # TODO: real tests + end + end + + @testset "multi column extrapolate" begin + new_timestamps = collect(Dates.DateTime(2000):Dates.Hour(1):Dates.DateTime(2001)) + + ohlc_new = retime(ohlc, new_timestamps; extrapolate=TimeSeries.FillConstant(0.0)) + @test timestamp(ohlc_new) == new_timestamps + @test values(ohlc_new[1]) == zeros(1, 4) + + ohlc_new = retime(ohlc, new_timestamps; extrapolate=TimeSeries.NearestExtrapolate()) + @test timestamp(ohlc_new) == new_timestamps + @test values(ohlc_new[1]) == values(ohlc[1]) + + ohlc_new = retime(ohlc, new_timestamps; extrapolate=TimeSeries.MissingExtrapolate()) + @test timestamp(ohlc_new) == new_timestamps + @test all(ismissing.(values(ohlc_new[1]))) + + ohlc_new = retime(ohlc, new_timestamps; extrapolate=TimeSeries.NaNExtrapolate()) + @test timestamp(ohlc_new) == new_timestamps + @test all(isnan.(values(ohlc_new[1]))) + end + + @testset "single column with missing" begin + new_timestamps = collect(Dates.Date(2000):Dates.Week(1):Dates.Date(2001)) + # corrupt some values + cl_missing = TimeArray( + timestamp(cl), + let vals = convert(Vector{Union{Float64,Missing}}, copy(values(cl))) + vals[rand(1:length(vals), 100)] .= missing + vals + end, + colnames(cl), + ) + + cl_new = retime(cl_missing, new_timestamps; upsample=:linear, downsample=:mean, skip_missing=false) + + cl_new = retime(cl_missing, new_timestamps; upsample=:linear, downsample=:mean, skip_missing=true) + @test !any(ismissing.(values(cl_new))) + end + + @testset "single column with NaN" begin + new_timestamps = collect(Dates.Date(2000):Dates.Week(1):Dates.Date(2001)) + # corrupt some values + cl_missing = TimeArray( + timestamp(cl), + let vals = copy(values(cl)) + vals[rand(1:length(vals), 100)] .= NaN + vals + end, + colnames(cl), + ) + + cl_new = retime(cl_missing, new_timestamps; upsample=:linear, downsample=:mean, skip_missing=false) + + cl_new = retime(cl_missing, new_timestamps; upsample=:linear, downsample=:mean, skip_missing=true) + @test !any(isnan.(values(cl_new))) + end + +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 85513609..8b226ff3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -13,6 +13,7 @@ tests = [ "basemisc", "tables", "plotrecipes", + "retime", ] @testset "TimeSeries" begin