From b71c19748b5262e290e7b3c32dcf5b6bb04d7eae Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Wed, 7 Apr 2021 14:15:07 +0100 Subject: [PATCH 01/13] Implement initial FeatureTransforms support --- Project.toml | 4 ++- src/AxisSets.jl | 3 +++ src/featuretransforms.jl | 56 +++++++++++++++++++++++++++++++++++++++ src/impute.jl | 4 --- src/utils.jl | 4 +++ test/featuretransforms.jl | 40 ++++++++++++++++++++++++++++ test/runtests.jl | 2 ++ 7 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 src/featuretransforms.jl create mode 100644 src/utils.jl create mode 100644 test/featuretransforms.jl diff --git a/Project.toml b/Project.toml index 41b4eed..0b01a21 100644 --- a/Project.toml +++ b/Project.toml @@ -1,11 +1,12 @@ name = "AxisSets" uuid = "a1a1544e-ba16-4f6d-8861-e833517b754e" authors = ["Invenia Technical Computing Corporation"] -version = "0.1.4" +version = "0.1.5" [deps] AutoHashEquals = "15f4f7f2-30c1-5605-9d31-71845cf9641f" AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5" +FeatureTransforms = "8fd68953-04b8-4117-ac19-158bf6de9782" Impute = "f7bf1975-0170-51b9-8c5f-a992d46b9575" NamedDims = "356022a1-0364-5f58-8944-0da4b18d706f" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" @@ -14,6 +15,7 @@ ReadOnlyArrays = "988b38a3-91fc-5605-94a2-ee2116b3bd83" [compat] AutoHashEquals = "0.2" AxisKeys = "0.1" +FeatureTransforms = "0.3" Impute = "0.6" NamedDims = "0.2" OrderedCollections = "1" diff --git a/src/AxisSets.jl b/src/AxisSets.jl index 8621849..fffe089 100644 --- a/src/AxisSets.jl +++ b/src/AxisSets.jl @@ -2,6 +2,7 @@ module AxisSets using AutoHashEquals using AxisKeys +using FeatureTransforms using Impute using NamedDims using OrderedCollections @@ -88,5 +89,7 @@ include("dataset.jl") include("indexing.jl") include("functions.jl") include("impute.jl") +include("featuretransforms.jl") +include("utils.jl") end diff --git a/src/featuretransforms.jl b/src/featuretransforms.jl new file mode 100644 index 0000000..94cafab --- /dev/null +++ b/src/featuretransforms.jl @@ -0,0 +1,56 @@ +FeatureTransforms.is_transformable(::KeyedDataset) = true + +""" + FeatureTransforms.apply(ds::KeyedDataset, t::Transform; dims=:, kwargs...) + +Apply the `Transform` along the `dims` for each component in the [`KeyedDataset`](@ref) +with that dimension. + +# Example +```jldoctest +julia> using AxisKeys, FeatureTransforms; using AxisSets: KeyedDataset, Pattern, flatten; + +julia> ds = KeyedDataset( + flatten([ + :train => [ + :load => KeyedArray([7.0 7.7; 8.0 8.2; 9.0 9.9]; time=1:3, loc=[:x, :y]), + :price => KeyedArray([-2.0 4.0; 3.0 2.0; -1.0 -1.0]; time=1:3, id=[:a, :b]), + ], + :predict => [ + :load => KeyedArray([7.0 7.7; 8.1 7.9; 9.0 9.9]; time=1:3, loc=[:x, :y]), + :price => KeyedArray([0.5 -1.0; -5.0 -2.0; 0.0 1.0]; time=1:3, id=[:a, :b]), + ] + ])... + ); + +julia> p = Power(2); + +julia> [k => parent(parent(v)) for (k, v) in FeatureTransforms.apply(ds, p; dims=(:_, :price, :_)).data] +4-element Vector{Pair{Tuple{Symbol, Symbol}, Matrix{Float64}}}: + (:train, :load) => [7.0 7.7; 8.0 8.2; 9.0 9.9] + (:train, :price) => [4.0 16.0; 9.0 4.0; 1.0 1.0] + (:predict, :load) => [7.0 7.7; 8.1 7.9; 9.0 9.9] + (:predict, :price) => [0.25 1.0; 25.0 4.0; 0.0 1.0] +``` +""" +function FeatureTransforms.apply(ds::KeyedDataset, t::Transform; dims, kwargs...) + pattern = _pattern(dims) + dim = pattern.segments[end] + + # Get paths to components + apply_paths = dimpaths(ds, pattern) + apply_paths = [p[1:end-1] for p in apply_paths] + + if dim in (:_, :__) + # Corresponds to element-wise apply in FeatureTransforms + dim = Colon() + apply_paths = unique(apply_paths) + end + + for path in apply_paths + component = ds.data[path] + ds.data[path] = FeatureTransforms.apply(component, t; dims=dim, kwargs...) + end + + return ds +end diff --git a/src/impute.jl b/src/impute.jl index 9bd548d..04cdc9c 100644 --- a/src/impute.jl +++ b/src/impute.jl @@ -126,10 +126,6 @@ julia> [k => parent(parent(v)) for (k, v) in Impute.filter(ds; dims=:loc).data] """ Impute.apply(ds::KeyedDataset, f::Filter; dims) = Impute.apply!(deepcopy(ds), f; dims=dims) -_pattern(dims::Pattern) = dims -_pattern(dims::Tuple) = Pattern(dims) -_pattern(dims) = Pattern(:__, dims) - function Impute.apply!(ds::KeyedDataset, f::Filter; dims) pattern = _pattern(dims) dim = pattern.segments[end] diff --git a/src/utils.jl b/src/utils.jl new file mode 100644 index 0000000..a9f39f5 --- /dev/null +++ b/src/utils.jl @@ -0,0 +1,4 @@ +# Convert a dims argument to a Pattern +_pattern(dims::Pattern) = dims +_pattern(dims::Tuple) = Pattern(dims) +_pattern(dims) = Pattern(:__, dims) diff --git a/test/featuretransforms.jl b/test/featuretransforms.jl new file mode 100644 index 0000000..e51c55a --- /dev/null +++ b/test/featuretransforms.jl @@ -0,0 +1,40 @@ +@testset "FeatureTransforms" begin + ds = KeyedDataset( + flatten([ + :train => [ + :load => KeyedArray([7.0 7.7; 8.0 8.2; 9.0 9.9]; time=1:3, loc=[:x, :y]), + :price => KeyedArray([-2.0 4.0; 3.0 2.0; -1.0 -1.0]; time=1:3, id=[:a, :b]), + ], + :predict => [ + :load => KeyedArray([7.0 7.7; 8.1 7.9; 9.0 9.9]; time=1:3, loc=[:x, :y]), + :price => KeyedArray([0.5 -1.0; -5.0 -2.0; 0.0 1.0]; time=1:3, id=[:a, :b]), + ] + ])... + ) + + @testset "transform" begin + @test is_transformable(ds) + end + + @testset "apply" begin + p = Power(2) + + expected = KeyedDataset( + flatten([ + :train => [ + :load => KeyedArray([7.0 7.7; 8.0 8.2; 9.0 9.9]; time=1:3, loc=[:x, :y]), + :price => KeyedArray([4.0 16.0; 9.0 4.0; 1.0 1.0]; time=1:3, id=[:a, :b]), + ], + :predict => [ + :load => KeyedArray([7.0 7.7; 8.1 7.9; 9.0 9.9]; time=1:3, loc=[:x, :y]), + :price => KeyedArray([0.25 1.0; 25.0 4.0; 0.0 1.0]; time=1:3, id=[:a, :b]), + ] + ])... + ) + + r = FeatureTransforms.apply(ds, p; dims=(:_, :price, :_)) + + @test r isa KeyedDataset + @test isequal(r, expected) + end +end diff --git a/test/runtests.jl b/test/runtests.jl index a4be95b..3b582f2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,7 @@ using AxisKeys using AxisSets using Dates using Documenter +using FeatureTransforms using Impute using Missings using OrderedCollections @@ -20,6 +21,7 @@ using Impute: ThresholdError include("indexing.jl") include("functions.jl") include("impute.jl") + include("featuretransforms.jl") # The doctests fail on x86, so only run them on 64-bit hardware & Julia 1.6 Sys.WORD_SIZE == 64 && v"1.6" <= VERSION < v"1.7" && doctest(AxisSets) From f3bd0362cf172bb072a0ac2d35aceb3e54f2000a Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Wed, 7 Apr 2021 14:18:35 +0100 Subject: [PATCH 02/13] Only return transformed components in new dataset --- src/featuretransforms.jl | 14 +++++++------- test/featuretransforms.jl | 18 ++++++++++++++++-- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/featuretransforms.jl b/src/featuretransforms.jl index 94cafab..d0c1109 100644 --- a/src/featuretransforms.jl +++ b/src/featuretransforms.jl @@ -25,11 +25,11 @@ julia> ds = KeyedDataset( julia> p = Power(2); -julia> [k => parent(parent(v)) for (k, v) in FeatureTransforms.apply(ds, p; dims=(:_, :price, :_)).data] -4-element Vector{Pair{Tuple{Symbol, Symbol}, Matrix{Float64}}}: - (:train, :load) => [7.0 7.7; 8.0 8.2; 9.0 9.9] +julia> r = FeatureTransforms.apply(ds, p; dims=(:_, :price, :_)); + +julia> [k => parent(parent(v)) for (k, v) in r.data] +2-element Vector{Pair{Tuple{Symbol, Symbol}, Matrix{Float64}}}: (:train, :price) => [4.0 16.0; 9.0 4.0; 1.0 1.0] - (:predict, :load) => [7.0 7.7; 8.1 7.9; 9.0 9.9] (:predict, :price) => [0.25 1.0; 25.0 4.0; 0.0 1.0] ``` """ @@ -47,10 +47,10 @@ function FeatureTransforms.apply(ds::KeyedDataset, t::Transform; dims, kwargs... apply_paths = unique(apply_paths) end - for path in apply_paths + pairs = map(apply_paths) do path component = ds.data[path] - ds.data[path] = FeatureTransforms.apply(component, t; dims=dim, kwargs...) + path => FeatureTransforms.apply(component, t; dims=dim, kwargs...) end - return ds + return KeyedDataset(pairs...) end diff --git a/test/featuretransforms.jl b/test/featuretransforms.jl index e51c55a..7412d52 100644 --- a/test/featuretransforms.jl +++ b/test/featuretransforms.jl @@ -22,11 +22,9 @@ expected = KeyedDataset( flatten([ :train => [ - :load => KeyedArray([7.0 7.7; 8.0 8.2; 9.0 9.9]; time=1:3, loc=[:x, :y]), :price => KeyedArray([4.0 16.0; 9.0 4.0; 1.0 1.0]; time=1:3, id=[:a, :b]), ], :predict => [ - :load => KeyedArray([7.0 7.7; 8.1 7.9; 9.0 9.9]; time=1:3, loc=[:x, :y]), :price => KeyedArray([0.25 1.0; 25.0 4.0; 0.0 1.0]; time=1:3, id=[:a, :b]), ] ])... @@ -36,5 +34,21 @@ @test r isa KeyedDataset @test isequal(r, expected) + + expected = KeyedDataset( + flatten([ + :train => [ + :price => KeyedArray(hcat([16.0; 4.0; 1.0]); time=1:3, id=[:b]), + ], + :predict => [ + :price => KeyedArray(hcat([1.0; 4.0; 1.0]); time=1:3, id=[:b]), + ] + ])... + ) + + r = FeatureTransforms.apply(ds, p; dims=(:_, :price, :id), inds=[2]) + + @test r isa KeyedDataset + @test isequal(r, expected) end end From 80bd4b328b7e5f46604cef7ad3ba35c2de7708d7 Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Wed, 7 Apr 2021 14:53:54 +0100 Subject: [PATCH 03/13] Bump julia compat to 1.5 For FeatureTransforms compat --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 0b01a21..aa8020b 100644 --- a/Project.toml +++ b/Project.toml @@ -20,7 +20,7 @@ Impute = "0.6" NamedDims = "0.2" OrderedCollections = "1" ReadOnlyArrays = "0.1" -julia = "1.3" +julia = "1.5" [extras] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" From de3304f7e51d32208be6e8996010ca7f5ec0b016 Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Fri, 9 Apr 2021 17:59:25 +0100 Subject: [PATCH 04/13] Move constructing paths to separate function --- src/featuretransforms.jl | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/src/featuretransforms.jl b/src/featuretransforms.jl index d0c1109..4445299 100644 --- a/src/featuretransforms.jl +++ b/src/featuretransforms.jl @@ -1,5 +1,29 @@ FeatureTransforms.is_transformable(::KeyedDataset) = true +""" + _apply_paths(ds::KeyedDataset, dims) + +Based on the pattern specified by `dims`, returns a Tuple of +1. paths to components of `ds` that a `FeatureTransforms.Transform` should apply to, +2. the dimension of the components to apply along. +""" +function _apply_paths(ds::KeyedDataset, dims) + pattern = _pattern(dims) + + # Get paths to components + apply_paths = dimpaths(ds, pattern) + apply_paths = [p[1:end-1] for p in apply_paths] + + dim = pattern.segments[end] + if dim in (:_, :__) + # Corresponds to element-wise apply in FeatureTransforms + dim = Colon() + apply_paths = unique(apply_paths) + end + + return apply_paths, dim +end + """ FeatureTransforms.apply(ds::KeyedDataset, t::Transform; dims=:, kwargs...) @@ -34,22 +58,10 @@ julia> [k => parent(parent(v)) for (k, v) in r.data] ``` """ function FeatureTransforms.apply(ds::KeyedDataset, t::Transform; dims, kwargs...) - pattern = _pattern(dims) - dim = pattern.segments[end] - - # Get paths to components - apply_paths = dimpaths(ds, pattern) - apply_paths = [p[1:end-1] for p in apply_paths] - - if dim in (:_, :__) - # Corresponds to element-wise apply in FeatureTransforms - dim = Colon() - apply_paths = unique(apply_paths) - end + apply_paths, dim = _apply_paths(ds, dims) pairs = map(apply_paths) do path - component = ds.data[path] - path => FeatureTransforms.apply(component, t; dims=dim, kwargs...) + path => FeatureTransforms.apply(ds.data[path], t; dims=dim, kwargs...) end return KeyedDataset(pairs...) From 525ccc178649c980a7345f5d7ca5dd0689d30bbb Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Wed, 14 Apr 2021 10:27:59 +0100 Subject: [PATCH 05/13] Implement apply! and apply_append Also update docstrings of apply and _apply_paths --- src/featuretransforms.jl | 54 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/src/featuretransforms.jl b/src/featuretransforms.jl index 4445299..d59fda4 100644 --- a/src/featuretransforms.jl +++ b/src/featuretransforms.jl @@ -3,7 +3,7 @@ FeatureTransforms.is_transformable(::KeyedDataset) = true """ _apply_paths(ds::KeyedDataset, dims) -Based on the pattern specified by `dims`, returns a Tuple of +Based on the pattern specified by `dims`, returns a `Tuple` of 1. paths to components of `ds` that a `FeatureTransforms.Transform` should apply to, 2. the dimension of the components to apply along. """ @@ -25,10 +25,15 @@ function _apply_paths(ds::KeyedDataset, dims) end """ - FeatureTransforms.apply(ds::KeyedDataset, t::Transform; dims=:, kwargs...) + FeatureTransforms.apply(ds::KeyedDataset, t::Transform; dims, kwargs...) Apply the `Transform` along the `dims` for each component in the [`KeyedDataset`](@ref) -with that dimension. +with that dimension, and return a new [`KeyedDataset`](@ref) of the transformed components. + +If `dims` is a path (`Pattern` or `Tuple`), transform the components that match the path. +Otherwise, transform every component in the `KeyedDataset` that has a `dims` dimension. + +Keyword arguments are passed to the equivalent `FeatureTransforms` method. # Example ```jldoctest @@ -66,3 +71,46 @@ function FeatureTransforms.apply(ds::KeyedDataset, t::Transform; dims, kwargs... return KeyedDataset(pairs...) end + +""" + FeatureTransforms.apply!(ds::KeyedDataset, t::Transform; dims, kwargs...) + +Apply the `Transform` along the `dims` for each component in the [`KeyedDataset`](@ref) +with that dimension, and return the mutated [`KeyedDataset`](@ref). + +If `dims` is a path (`Pattern` or `Tuple`), transform the components that match the path. +Otherwise, transform every component in the `KeyedDataset` that has a `dims` dimension. + +Keyword arguments are passed to the equivalent `FeatureTransforms` method. +""" +function FeatureTransforms.apply!(ds::KeyedDataset, t::Transform; dims, kwargs...) + apply_paths, dim = _apply_paths(ds, dims) + + for path in apply_paths + FeatureTransforms.apply!(ds.data[path], t; dims=dim, kwargs...) + end + + return ds +end + +""" + FeatureTransforms.apply_append(ds::KeyedDataset, t::Transform; dims, kwargs...) + +Apply the `Transform` along the `dims` for each component in the [`KeyedDataset`](@ref) +with that dimension, and return a new [`KeyedDataset`](@ref) with the result of each +transform appended to the original component. + +If `dims` is a path (`Pattern` or `Tuple`), transform the components that match the path. +Otherwise, transform every component in the `KeyedDataset` that has a `dims` dimension. + +Keyword arguments are passed to the equivalent `FeatureTransforms` method. +""" +function FeatureTransforms.apply_append(ds::KeyedDataset, t::Transform; dims, kwargs...) + apply_paths, dim = _apply_paths(ds, dims) + + pairs = map(apply_paths) do path + path => FeatureTransforms.apply_append(ds.data[path], t; dims=dim, kwargs...) + end + + return KeyedDataset(pairs...) +end From c8408731784caa51e57712762f6665740b89df9b Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Wed, 14 Apr 2021 12:07:42 +0100 Subject: [PATCH 06/13] Allow colon for single dims --- src/utils.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils.jl b/src/utils.jl index a9f39f5..d0f94ec 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,4 +1,5 @@ # Convert a dims argument to a Pattern _pattern(dims::Pattern) = dims _pattern(dims::Tuple) = Pattern(dims) +_pattern(::Colon) = Pattern(:__) _pattern(dims) = Pattern(:__, dims) From b73a80d5da9a5783bf489b8f1e6ae8e1d4c576e0 Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Wed, 14 Apr 2021 12:07:54 +0100 Subject: [PATCH 07/13] Update tests for all apply methods --- test/featuretransforms.jl | 144 ++++++++++++++++++++++++++++---------- 1 file changed, 108 insertions(+), 36 deletions(-) diff --git a/test/featuretransforms.jl b/test/featuretransforms.jl index 7412d52..740e2d9 100644 --- a/test/featuretransforms.jl +++ b/test/featuretransforms.jl @@ -1,13 +1,17 @@ @testset "FeatureTransforms" begin + M1 = [0.0 1.0; 1.0 2.0; -0.5 0.0] + M2 = [-2.0 4.0; 3.0 2.0; -1.0 -1.0] + M3 = [0.0 1.0; -1.0 0.5; -0.5 0.0] + M4 = [0.5 -1.0; -5.0 -2.0; 0.0 1.0] ds = KeyedDataset( flatten([ :train => [ - :load => KeyedArray([7.0 7.7; 8.0 8.2; 9.0 9.9]; time=1:3, loc=[:x, :y]), - :price => KeyedArray([-2.0 4.0; 3.0 2.0; -1.0 -1.0]; time=1:3, id=[:a, :b]), + :load => KeyedArray(M1; time=1:3, loc=[:x, :y]), + :price => KeyedArray(M2; time=1:3, id=[:a, :b]), ], :predict => [ - :load => KeyedArray([7.0 7.7; 8.1 7.9; 9.0 9.9]; time=1:3, loc=[:x, :y]), - :price => KeyedArray([0.5 -1.0; -5.0 -2.0; 0.0 1.0]; time=1:3, id=[:a, :b]), + :load => KeyedArray(M3; time=1:3, loc=[:x, :y]), + :price => KeyedArray(M4; time=1:3, id=[:a, :b]), ] ])... ) @@ -16,39 +20,107 @@ @test is_transformable(ds) end - @testset "apply" begin + # TODO: use fake Transforms + @testset "apply OneToOne" begin p = Power(2) - expected = KeyedDataset( - flatten([ - :train => [ - :price => KeyedArray([4.0 16.0; 9.0 4.0; 1.0 1.0]; time=1:3, id=[:a, :b]), - ], - :predict => [ - :price => KeyedArray([0.25 1.0; 25.0 4.0; 0.0 1.0]; time=1:3, id=[:a, :b]), - ] - ])... - ) - - r = FeatureTransforms.apply(ds, p; dims=(:_, :price, :_)) - - @test r isa KeyedDataset - @test isequal(r, expected) - - expected = KeyedDataset( - flatten([ - :train => [ - :price => KeyedArray(hcat([16.0; 4.0; 1.0]); time=1:3, id=[:b]), - ], - :predict => [ - :price => KeyedArray(hcat([1.0; 4.0; 1.0]); time=1:3, id=[:b]), - ] - ])... - ) - - r = FeatureTransforms.apply(ds, p; dims=(:_, :price, :id), inds=[2]) - - @test r isa KeyedDataset - @test isequal(r, expected) + @testset "apply" begin + @testset "one feature" begin + expected = KeyedDataset( + flatten([ + :train => [ + :price => KeyedArray(M2.^2; time=1:3, id=[:a, :b]), + ], + :predict => [ + :price => KeyedArray(M4.^2; time=1:3, id=[:a, :b]), + ] + ])... + ) + + r = FeatureTransforms.apply(ds, p; dims=(:_, :price, :_)) + + @test r isa KeyedDataset + @test isequal(r, expected) + end + + @testset "all features" begin + expected = KeyedDataset( + flatten([ + :train => [ + :load => KeyedArray(M1.^2; time=1:3, loc=[:x, :y]), + :price => KeyedArray(M2.^2; time=1:3, id=[:a, :b]), + ], + :predict => [ + :load => KeyedArray(M3.^2; time=1:3, loc=[:x, :y]), + :price => KeyedArray(M4.^2; time=1:3, id=[:a, :b]), + ] + ])... + ) + + r = FeatureTransforms.apply(ds, p; dims=:) + + @test r isa KeyedDataset + @test isequal(r, expected) + end + + @testset "inds" begin + expected = KeyedDataset( + flatten([ + :train => [ + :price => KeyedArray(hcat((M2.^2)[:, 2]); time=1:3, id=[:b]), + ], + :predict => [ + :price => KeyedArray(hcat((M4.^2)[:, 2]); time=1:3, id=[:b]), + ] + ])... + ) + + r = FeatureTransforms.apply(ds, p; dims=(:_, :price, :id), inds=[2]) + + @test r isa KeyedDataset + @test isequal(r, expected) + end + end + + @testset "apply!" begin + expected = KeyedDataset( + flatten([ + :train => [ + :load => KeyedArray(M1; time=1:3, loc=[:x, :y]), + :price => KeyedArray(M2.^2; time=1:3, id=[:a, :b]), + ], + :predict => [ + :load => KeyedArray(M3; time=1:3, loc=[:x, :y]), + :price => KeyedArray(M4.^2; time=1:3, id=[:a, :b]), + ] + ])... + ) + + r = FeatureTransforms.apply!(ds, p; dims=(:_, :price, :_)) + + @test isequal(ds, expected) + @test r isa KeyedDataset + @test isequal(r, expected) + end + + @testset "apply_append" begin + M2_cat = cat(M2, M2.^2, dims=2) + M4_cat = cat(M4, M4.^2, dims=2) + expected = KeyedDataset( + flatten([ + :train => [ + :price => KeyedArray(M2_cat; time=1:3, id=[:a, :b, :a, :b]), + ], + :predict => [ + :price => KeyedArray(M4_cat; time=1:3, id=[:a, :b, :a, :b]), + ] + ])... + ) + + r = FeatureTransforms.apply_append(ds, p; dims=(:_, :price, :_), append_dim=2) + + @test r isa KeyedDataset + @test isequal(r, expected) + end end end From f85a135dcb1a3a3633f09af6a23214fbc6dfbe55 Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Wed, 14 Apr 2021 18:40:00 +0100 Subject: [PATCH 08/13] Put _pattern back into impute.jl --- src/AxisSets.jl | 1 - src/impute.jl | 4 ++++ src/utils.jl | 5 ----- 3 files changed, 4 insertions(+), 6 deletions(-) delete mode 100644 src/utils.jl diff --git a/src/AxisSets.jl b/src/AxisSets.jl index fffe089..aa8d5cc 100644 --- a/src/AxisSets.jl +++ b/src/AxisSets.jl @@ -90,6 +90,5 @@ include("indexing.jl") include("functions.jl") include("impute.jl") include("featuretransforms.jl") -include("utils.jl") end diff --git a/src/impute.jl b/src/impute.jl index 04cdc9c..9bd548d 100644 --- a/src/impute.jl +++ b/src/impute.jl @@ -126,6 +126,10 @@ julia> [k => parent(parent(v)) for (k, v) in Impute.filter(ds; dims=:loc).data] """ Impute.apply(ds::KeyedDataset, f::Filter; dims) = Impute.apply!(deepcopy(ds), f; dims=dims) +_pattern(dims::Pattern) = dims +_pattern(dims::Tuple) = Pattern(dims) +_pattern(dims) = Pattern(:__, dims) + function Impute.apply!(ds::KeyedDataset, f::Filter; dims) pattern = _pattern(dims) dim = pattern.segments[end] diff --git a/src/utils.jl b/src/utils.jl deleted file mode 100644 index d0f94ec..0000000 --- a/src/utils.jl +++ /dev/null @@ -1,5 +0,0 @@ -# Convert a dims argument to a Pattern -_pattern(dims::Pattern) = dims -_pattern(dims::Tuple) = Pattern(dims) -_pattern(::Colon) = Pattern(:__) -_pattern(dims) = Pattern(:__, dims) From 8af6a70881bbb910f62142681b06671279e2a3c5 Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Wed, 14 Apr 2021 18:41:46 +0100 Subject: [PATCH 09/13] Make apply methods return full dataset and use map - Map simplifies things - don't need the custom _apply_paths() anymore - Returning full dataset seems more appropriate after talking to Rory --- src/featuretransforms.jl | 102 +++++++++++++------------------------- test/featuretransforms.jl | 52 ++++++++++++------- 2 files changed, 67 insertions(+), 87 deletions(-) diff --git a/src/featuretransforms.jl b/src/featuretransforms.jl index d59fda4..a3c0683 100644 --- a/src/featuretransforms.jl +++ b/src/featuretransforms.jl @@ -1,39 +1,16 @@ FeatureTransforms.is_transformable(::KeyedDataset) = true -""" - _apply_paths(ds::KeyedDataset, dims) - -Based on the pattern specified by `dims`, returns a `Tuple` of -1. paths to components of `ds` that a `FeatureTransforms.Transform` should apply to, -2. the dimension of the components to apply along. -""" -function _apply_paths(ds::KeyedDataset, dims) - pattern = _pattern(dims) - - # Get paths to components - apply_paths = dimpaths(ds, pattern) - apply_paths = [p[1:end-1] for p in apply_paths] - - dim = pattern.segments[end] - if dim in (:_, :__) - # Corresponds to element-wise apply in FeatureTransforms - dim = Colon() - apply_paths = unique(apply_paths) - end - - return apply_paths, dim -end +_pattern(::Colon) = Pattern[(:__,)] +_pattern(dims::Symbol) = Pattern[(:__, dims)] +_pattern(dims) = Pattern[(:__, d) for d in dims] """ - FeatureTransforms.apply(ds::KeyedDataset, t::Transform; dims, kwargs...) + FeatureTransforms.apply(ds::KeyedDataset, t::Transform, [key]; dims=:, kwargs...) -Apply the `Transform` along the `dims` for each component in the [`KeyedDataset`](@ref) -with that dimension, and return a new [`KeyedDataset`](@ref) of the transformed components. +Apply the `Transform` to components of the [`KeyedDataset`](@ref) along dimension `dims`. +The transform can be applied to a subselection of components via a [`Pattern`](@ref) `key`. -If `dims` is a path (`Pattern` or `Tuple`), transform the components that match the path. -Otherwise, transform every component in the `KeyedDataset` that has a `dims` dimension. - -Keyword arguments are passed to the equivalent `FeatureTransforms` method. +Keyword arguments are passed to the equivalent `FeatureTransforms` method for a component. # Example ```jldoctest @@ -62,55 +39,44 @@ julia> [k => parent(parent(v)) for (k, v) in r.data] (:predict, :price) => [0.25 1.0; 25.0 4.0; 0.0 1.0] ``` """ -function FeatureTransforms.apply(ds::KeyedDataset, t::Transform; dims, kwargs...) - apply_paths, dim = _apply_paths(ds, dims) - - pairs = map(apply_paths) do path - path => FeatureTransforms.apply(ds.data[path], t; dims=dim, kwargs...) - end - - return KeyedDataset(pairs...) +function FeatureTransforms.apply(ds::KeyedDataset, t::Transform, keys...; dims=:, kwargs...) + patterns = isempty(keys) ? _pattern(dims) : Pattern[keys...] + return map(a -> FeatureTransforms.apply(a, t; dims=dims, kwargs...), ds, patterns...) end """ - FeatureTransforms.apply!(ds::KeyedDataset, t::Transform; dims, kwargs...) - -Apply the `Transform` along the `dims` for each component in the [`KeyedDataset`](@ref) -with that dimension, and return the mutated [`KeyedDataset`](@ref). + FeatureTransforms.apply!(ds::KeyedDataset, t::Transform, [key]; dims=:, kwargs...) -If `dims` is a path (`Pattern` or `Tuple`), transform the components that match the path. -Otherwise, transform every component in the `KeyedDataset` that has a `dims` dimension. +Apply the `Transform` to components of the [`KeyedDataset`](@ref) along dimension `dims`, +mutating the components in-place. +The transform can be applied to a subselection of components via a [`Pattern`](@ref) `key`. -Keyword arguments are passed to the equivalent `FeatureTransforms` method. +Keyword arguments are passed to the equivalent `FeatureTransforms` method for a component. """ -function FeatureTransforms.apply!(ds::KeyedDataset, t::Transform; dims, kwargs...) - apply_paths, dim = _apply_paths(ds, dims) - - for path in apply_paths - FeatureTransforms.apply!(ds.data[path], t; dims=dim, kwargs...) - end - - return ds +function FeatureTransforms.apply!( + ds::KeyedDataset, t::Transform, keys...; + dims=:, kwargs... +) + patterns = isempty(keys) ? _pattern(dims) : Pattern[keys...] + return map(a -> FeatureTransforms.apply!(a, t; dims=dims, kwargs...), ds, patterns...) end """ - FeatureTransforms.apply_append(ds::KeyedDataset, t::Transform; dims, kwargs...) - -Apply the `Transform` along the `dims` for each component in the [`KeyedDataset`](@ref) -with that dimension, and return a new [`KeyedDataset`](@ref) with the result of each -transform appended to the original component. + FeatureTransforms.apply_append(ds::KeyedDataset, t::Transform, [key]; dims=:, kwargs...) -If `dims` is a path (`Pattern` or `Tuple`), transform the components that match the path. -Otherwise, transform every component in the `KeyedDataset` that has a `dims` dimension. +Apply the `Transform` to components of the [`KeyedDataset`](@ref) along dimension `dims`. +The transform can be applied to a subselection of components via a [`Pattern`](@ref) `key`. -Keyword arguments are passed to the equivalent `FeatureTransforms` method. +Keyword arguments are passed to the equivalent `FeatureTransforms` method for a component. """ -function FeatureTransforms.apply_append(ds::KeyedDataset, t::Transform; dims, kwargs...) - apply_paths, dim = _apply_paths(ds, dims) - - pairs = map(apply_paths) do path - path => FeatureTransforms.apply_append(ds.data[path], t; dims=dim, kwargs...) +function FeatureTransforms.apply_append( + ds::KeyedDataset, t::Transform, keys...; + inner=false, dims=:, kwargs... +) + patterns = isempty(keys) ? _pattern(dims) : Pattern[keys...] + if inner + return map(ds, patterns...) do a + FeatureTransforms.apply_append(a, t; dims=dims, kwargs...) + end end - - return KeyedDataset(pairs...) end diff --git a/test/featuretransforms.jl b/test/featuretransforms.jl index 740e2d9..2183b06 100644 --- a/test/featuretransforms.jl +++ b/test/featuretransforms.jl @@ -29,18 +29,21 @@ expected = KeyedDataset( flatten([ :train => [ + :load => KeyedArray(M1; time=1:3, loc=[:x, :y]), :price => KeyedArray(M2.^2; time=1:3, id=[:a, :b]), ], :predict => [ + :load => KeyedArray(M3; time=1:3, loc=[:x, :y]), :price => KeyedArray(M4.^2; time=1:3, id=[:a, :b]), ] ])... ) - r = FeatureTransforms.apply(ds, p; dims=(:_, :price, :_)) + r = FeatureTransforms.apply(ds, p, (:_, :price, :_)) @test r isa KeyedDataset @test isequal(r, expected) + @test !isequal(ds, expected) end @testset "all features" begin @@ -61,24 +64,28 @@ @test r isa KeyedDataset @test isequal(r, expected) + @test !isequal(ds, expected) end @testset "inds" begin expected = KeyedDataset( flatten([ :train => [ + :load => KeyedArray(M1; time=1:3, loc=[:x, :y]), :price => KeyedArray(hcat((M2.^2)[:, 2]); time=1:3, id=[:b]), ], :predict => [ + :load => KeyedArray(M3; time=1:3, loc=[:x, :y]), :price => KeyedArray(hcat((M4.^2)[:, 2]); time=1:3, id=[:b]), ] ])... ) - r = FeatureTransforms.apply(ds, p; dims=(:_, :price, :id), inds=[2]) + r = FeatureTransforms.apply(ds, p, (:_, :price, :_); dims=:id, inds=[2]) @test r isa KeyedDataset @test isequal(r, expected) + @test !isequal(ds, expected) end end @@ -96,31 +103,38 @@ ])... ) - r = FeatureTransforms.apply!(ds, p; dims=(:_, :price, :_)) + r = FeatureTransforms.apply!(ds, p, (:_, :price, :_)) - @test isequal(ds, expected) @test r isa KeyedDataset @test isequal(r, expected) + @test isequal(ds, expected) end @testset "apply_append" begin - M2_cat = cat(M2, M2.^2, dims=2) - M4_cat = cat(M4, M4.^2, dims=2) - expected = KeyedDataset( - flatten([ - :train => [ - :price => KeyedArray(M2_cat; time=1:3, id=[:a, :b, :a, :b]), - ], - :predict => [ - :price => KeyedArray(M4_cat; time=1:3, id=[:a, :b, :a, :b]), - ] - ])... - ) + @testset "inner" begin + M2_cat = cat(M2, M2.^2, dims=2) + M4_cat = cat(M4, M4.^2, dims=2) + expected = KeyedDataset( + flatten([ + :train => [ + :load => KeyedArray(M1; time=1:3, loc=[:x, :y]), + :price => KeyedArray(M2_cat; time=1:3, id=[:a, :b, :a, :b]), + ], + :predict => [ + :load => KeyedArray(M3; time=1:3, loc=[:x, :y]), + :price => KeyedArray(M4_cat; time=1:3, id=[:a, :b, :a, :b]), + ] + ])... + ) - r = FeatureTransforms.apply_append(ds, p; dims=(:_, :price, :_), append_dim=2) + r = FeatureTransforms.apply_append( + ds, p, (:_, :price, :_); + inner=true, append_dim=2 + ) - @test r isa KeyedDataset - @test isequal(r, expected) + @test r isa KeyedDataset + @test isequal(r, expected) + end end end end From 5c711931ffa30bb977d7756c99f4f6c527b7f2ec Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Thu, 15 Apr 2021 18:32:26 +0100 Subject: [PATCH 10/13] Use common function for pattern in apply methods --- src/featuretransforms.jl | 27 +++++++++++++++++---------- src/impute.jl | 8 ++++---- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/featuretransforms.jl b/src/featuretransforms.jl index a3c0683..7416039 100644 --- a/src/featuretransforms.jl +++ b/src/featuretransforms.jl @@ -1,8 +1,9 @@ FeatureTransforms.is_transformable(::KeyedDataset) = true -_pattern(::Colon) = Pattern[(:__,)] -_pattern(dims::Symbol) = Pattern[(:__, dims)] -_pattern(dims) = Pattern[(:__, d) for d in dims] +_transform_pattern(keys, dims) = isempty(keys) ? _transform_pattern(dims) : Pattern[keys...] +_transform_pattern(::Colon) = Pattern[(:__,)] +_transform_pattern(dims::Symbol) = Pattern[(:__, dims)] +_transform_pattern(dims) = Pattern[(:__, d) for d in dims] """ FeatureTransforms.apply(ds::KeyedDataset, t::Transform, [key]; dims=:, kwargs...) @@ -39,9 +40,13 @@ julia> [k => parent(parent(v)) for (k, v) in r.data] (:predict, :price) => [0.25 1.0; 25.0 4.0; 0.0 1.0] ``` """ -function FeatureTransforms.apply(ds::KeyedDataset, t::Transform, keys...; dims=:, kwargs...) - patterns = isempty(keys) ? _pattern(dims) : Pattern[keys...] - return map(a -> FeatureTransforms.apply(a, t; dims=dims, kwargs...), ds, patterns...) +function FeatureTransforms.apply( + ds::KeyedDataset, t::Transform, keys...; + dims=:, kwargs... +) + return map(ds, _transform_pattern(keys, dims)...) do a + FeatureTransforms.apply(a, t; dims=dims, kwargs...) + end end """ @@ -57,8 +62,9 @@ function FeatureTransforms.apply!( ds::KeyedDataset, t::Transform, keys...; dims=:, kwargs... ) - patterns = isempty(keys) ? _pattern(dims) : Pattern[keys...] - return map(a -> FeatureTransforms.apply!(a, t; dims=dims, kwargs...), ds, patterns...) + return map(ds, _transform_pattern(keys, dims)...) do a + FeatureTransforms.apply!(a, t; dims=dims, kwargs...) + end end """ @@ -73,8 +79,9 @@ function FeatureTransforms.apply_append( ds::KeyedDataset, t::Transform, keys...; inner=false, dims=:, kwargs... ) - patterns = isempty(keys) ? _pattern(dims) : Pattern[keys...] - if inner + patterns = _transform_pattern(keys, dims) + + if inner # batched apply_append on each component return map(ds, patterns...) do a FeatureTransforms.apply_append(a, t; dims=dims, kwargs...) end diff --git a/src/impute.jl b/src/impute.jl index 9bd548d..fe8b782 100644 --- a/src/impute.jl +++ b/src/impute.jl @@ -126,12 +126,12 @@ julia> [k => parent(parent(v)) for (k, v) in Impute.filter(ds; dims=:loc).data] """ Impute.apply(ds::KeyedDataset, f::Filter; dims) = Impute.apply!(deepcopy(ds), f; dims=dims) -_pattern(dims::Pattern) = dims -_pattern(dims::Tuple) = Pattern(dims) -_pattern(dims) = Pattern(:__, dims) +_impute_pattern(dims::Pattern) = dims +_impute_pattern(dims::Tuple) = Pattern(dims) +_impute_pattern(dims) = Pattern(:__, dims) function Impute.apply!(ds::KeyedDataset, f::Filter; dims) - pattern = _pattern(dims) + pattern = _impute_pattern(dims) dim = pattern.segments[end] dim in (:_, :__) && throw(ArgumentError( From 230ea65522910bff44683554d631d39ee444859c Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Thu, 15 Apr 2021 18:32:51 +0100 Subject: [PATCH 11/13] Implement outer apply_append method --- src/featuretransforms.jl | 18 +++++++++++++- test/featuretransforms.jl | 51 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/src/featuretransforms.jl b/src/featuretransforms.jl index 7416039..e06f47b 100644 --- a/src/featuretransforms.jl +++ b/src/featuretransforms.jl @@ -77,7 +77,7 @@ Keyword arguments are passed to the equivalent `FeatureTransforms` method for a """ function FeatureTransforms.apply_append( ds::KeyedDataset, t::Transform, keys...; - inner=false, dims=:, kwargs... + inner=false, component_name=nothing, dims=:, kwargs... ) patterns = _transform_pattern(keys, dims) @@ -85,5 +85,21 @@ function FeatureTransforms.apply_append( return map(ds, patterns...) do a FeatureTransforms.apply_append(a, t; dims=dims, kwargs...) end + else # merge transformed components as new components of dataset + # select any components the keys match + selected = unique(x[1:end-1] for x in dimpaths(ds) if any(p -> x in p, patterns)) + + # construct keys of new transformed components + new_keys = map(selected) do k + component_name = isnothing(component_name) ? :component : component_name + (k[1:end-1]..., component_name) + end + + # pair new keys with transformed components + pairs = map(new_keys, selected) do new_k, k + new_k => FeatureTransforms.apply(ds.data[k], t; dims=dims, kwargs...) + end + + return merge(ds, KeyedDataset(pairs...)) end end diff --git a/test/featuretransforms.jl b/test/featuretransforms.jl index 2183b06..2870e06 100644 --- a/test/featuretransforms.jl +++ b/test/featuretransforms.jl @@ -134,6 +134,57 @@ @test r isa KeyedDataset @test isequal(r, expected) + @test !isequal(ds, expected) + end + + @testset "outer" begin + M2_cat = cat(M2, M2.^2, dims=2) + M4_cat = cat(M4, M4.^2, dims=2) + + expected = KeyedDataset( + flatten([ + :train => [ + :load => KeyedArray(M1; time=1:3, loc=[:x, :y]), + :price => KeyedArray(M2; time=1:3, id=[:a, :b]), + :component => KeyedArray(M2.^2; time=1:3, id=[:a, :b]), + ], + :predict => [ + :load => KeyedArray(M3; time=1:3, loc=[:x, :y]), + :price => KeyedArray(M4; time=1:3, id=[:a, :b]), + :component => KeyedArray(M4.^2; time=1:3, id=[:a, :b]), + ] + ])... + ) + + r = FeatureTransforms.apply_append(ds, p, (:_, :price, :_); append_dim=2) + + @test r isa KeyedDataset + @test isequal(r, expected) + @test !isequal(ds, expected) + + expected = KeyedDataset( + flatten([ + :train => [ + :load => KeyedArray(M1; time=1:3, loc=[:x, :y]), + :price => KeyedArray(M2; time=1:3, id=[:a, :b]), + :price2 => KeyedArray(M2.^2; time=1:3, id=[:a, :b]), + ], + :predict => [ + :load => KeyedArray(M3; time=1:3, loc=[:x, :y]), + :price => KeyedArray(M4; time=1:3, id=[:a, :b]), + :price2 => KeyedArray(M4.^2; time=1:3, id=[:a, :b]), + ] + ])... + ) + + r = FeatureTransforms.apply_append( + ds, p, (:_, :price, :_); + component_name=:price2, append_dim=2 + ) + + @test r isa KeyedDataset + @test isequal(r, expected) + @test !isequal(ds, expected) end end end From c7423b4724416884e772a73bf0026ca637ec0342 Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Thu, 15 Apr 2021 19:03:09 +0100 Subject: [PATCH 12/13] Simplify component_name --- src/featuretransforms.jl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/featuretransforms.jl b/src/featuretransforms.jl index e06f47b..3212e44 100644 --- a/src/featuretransforms.jl +++ b/src/featuretransforms.jl @@ -77,7 +77,7 @@ Keyword arguments are passed to the equivalent `FeatureTransforms` method for a """ function FeatureTransforms.apply_append( ds::KeyedDataset, t::Transform, keys...; - inner=false, component_name=nothing, dims=:, kwargs... + dims=:, inner=false, component_name=:component, kwargs... ) patterns = _transform_pattern(keys, dims) @@ -90,10 +90,7 @@ function FeatureTransforms.apply_append( selected = unique(x[1:end-1] for x in dimpaths(ds) if any(p -> x in p, patterns)) # construct keys of new transformed components - new_keys = map(selected) do k - component_name = isnothing(component_name) ? :component : component_name - (k[1:end-1]..., component_name) - end + new_keys = [(k[1:end-1]..., component_name) for k in selected] # pair new keys with transformed components pairs = map(new_keys, selected) do new_k, k From 6a26438eaaaffefa40224bd1daf36f9fb5bfd848 Mon Sep 17 00:00:00 2001 From: Ben Cottier Date: Thu, 15 Apr 2021 19:03:23 +0100 Subject: [PATCH 13/13] Update docstrings and doctests --- src/featuretransforms.jl | 72 +++++++++++++++++++++++++++++++++------ test/featuretransforms.jl | 2 +- 2 files changed, 63 insertions(+), 11 deletions(-) diff --git a/src/featuretransforms.jl b/src/featuretransforms.jl index 3212e44..f6861f6 100644 --- a/src/featuretransforms.jl +++ b/src/featuretransforms.jl @@ -8,10 +8,14 @@ _transform_pattern(dims) = Pattern[(:__, d) for d in dims] """ FeatureTransforms.apply(ds::KeyedDataset, t::Transform, [key]; dims=:, kwargs...) -Apply the `Transform` to components of the [`KeyedDataset`](@ref) along dimension `dims`. +Apply the `Transform` to each component of the [`KeyedDataset`](@ref). +Returns a new dataset with the same constraints, but transformed components. + The transform can be applied to a subselection of components via a [`Pattern`](@ref) `key`. +Otherwise, components are selected by the desired `dims`. -Keyword arguments are passed to the equivalent `FeatureTransforms` method for a component. +Keyword arguments including `dims` are passed to the appropriate `FeatureTransforms` method +for a component. # Example ```jldoctest @@ -32,11 +36,13 @@ julia> ds = KeyedDataset( julia> p = Power(2); -julia> r = FeatureTransforms.apply(ds, p; dims=(:_, :price, :_)); +julia> r = FeatureTransforms.apply(ds, p, (:_, :price, :_)); julia> [k => parent(parent(v)) for (k, v) in r.data] -2-element Vector{Pair{Tuple{Symbol, Symbol}, Matrix{Float64}}}: +4-element Vector{Pair{Tuple{Symbol, Symbol}, Matrix{Float64}}}: + (:train, :load) => [7.0 7.7; 8.0 8.2; 9.0 9.9] (:train, :price) => [4.0 16.0; 9.0 4.0; 1.0 1.0] + (:predict, :load) => [7.0 7.7; 8.1 7.9; 9.0 9.9] (:predict, :price) => [0.25 1.0; 25.0 4.0; 0.0 1.0] ``` """ @@ -52,11 +58,14 @@ end """ FeatureTransforms.apply!(ds::KeyedDataset, t::Transform, [key]; dims=:, kwargs...) -Apply the `Transform` to components of the [`KeyedDataset`](@ref) along dimension `dims`, -mutating the components in-place. +Apply the `Transform` to each component of the [`KeyedDataset`](@ref). +Returns a new dataset with the same constraints, but transformed components. + The transform can be applied to a subselection of components via a [`Pattern`](@ref) `key`. +Otherwise, components are selected by the desired `dims`. -Keyword arguments are passed to the equivalent `FeatureTransforms` method for a component. +Keyword arguments including `dims` are passed to the appropriate `FeatureTransforms` method +for a component. """ function FeatureTransforms.apply!( ds::KeyedDataset, t::Transform, keys...; @@ -68,12 +77,55 @@ function FeatureTransforms.apply!( end """ - FeatureTransforms.apply_append(ds::KeyedDataset, t::Transform, [key]; dims=:, kwargs...) + FeatureTransforms.apply_append( + ds::KeyedDataset, t::Transform, [key]; + dims=:, inner=false, component_name=:component, kwargs... + ) + +Apply the `Transform` to each component of the [`KeyedDataset`](@ref). -Apply the `Transform` to components of the [`KeyedDataset`](@ref) along dimension `dims`. The transform can be applied to a subselection of components via a [`Pattern`](@ref) `key`. +Otherwise, components are selected by the desired `dims`. + +If `inner=true`, perform `FeatureTransforms.apply_append` on each component, +returning a new dataset with the same constraints, but transformed components. + +Otherwise, transform each component using `FeatureTransforms.apply`, and append +to a copy of the dataset as a new component called `component_name`. + +Keyword arguments including `dims` are passed to the appropriate `FeatureTransforms` method +for a component. + +# Example +```jldoctest +julia> using AxisKeys, FeatureTransforms; using AxisSets: KeyedDataset, Pattern, flatten; + +julia> ds = KeyedDataset( + flatten([ + :train => [ + :load => KeyedArray([7.0 7.7; 8.0 8.2; 9.0 9.9]; time=1:3, loc=[:x, :y]), + :price => KeyedArray([-2.0 4.0; 3.0 2.0; -1.0 -1.0]; time=1:3, id=[:a, :b]), + ], + :predict => [ + :load => KeyedArray([7.0 7.7; 8.1 7.9; 9.0 9.9]; time=1:3, loc=[:x, :y]), + :price => KeyedArray([0.5 -1.0; -5.0 -2.0; 0.0 1.0]; time=1:3, id=[:a, :b]), + ] + ])... + ); + +julia> p = Power(2); -Keyword arguments are passed to the equivalent `FeatureTransforms` method for a component. +julia> r = FeatureTransforms.apply_append(ds, p, (:_, :price, :_); component_name=:price2); + +julia> [k => parent(parent(v)) for (k, v) in r.data] +6-element Vector{Pair{Tuple{Symbol, Symbol}, Matrix{Float64}}}: + (:train, :load) => [7.0 7.7; 8.0 8.2; 9.0 9.9] + (:train, :price) => [-2.0 4.0; 3.0 2.0; -1.0 -1.0] + (:predict, :load) => [7.0 7.7; 8.1 7.9; 9.0 9.9] + (:predict, :price) => [0.5 -1.0; -5.0 -2.0; 0.0 1.0] + (:train, :price2) => [4.0 16.0; 9.0 4.0; 1.0 1.0] + (:predict, :price2) => [0.25 1.0; 25.0 4.0; 0.0 1.0] +``` """ function FeatureTransforms.apply_append( ds::KeyedDataset, t::Transform, keys...; diff --git a/test/featuretransforms.jl b/test/featuretransforms.jl index 2870e06..b6ea906 100644 --- a/test/featuretransforms.jl +++ b/test/featuretransforms.jl @@ -179,7 +179,7 @@ r = FeatureTransforms.apply_append( ds, p, (:_, :price, :_); - component_name=:price2, append_dim=2 + component_name=:price2 ) @test r isa KeyedDataset