From 5356e6c3ad86d4ba62555e70b2f71de14b757e94 Mon Sep 17 00:00:00 2001 From: Elias Carvalho <73039601+eliascarv@users.noreply.github.com> Date: Wed, 27 Mar 2024 18:14:10 -0300 Subject: [PATCH] Refactor name generation of the `Map` transform (#272) * Refactor name generation of the 'Map' transform * Update docstring --- src/transforms/map.jl | 24 ++++++++++++++--- test/transforms/map.jl | 61 +++++++++++++++++++++++++++++++++++------- 2 files changed, 72 insertions(+), 13 deletions(-) diff --git a/src/transforms/map.jl b/src/transforms/map.jl index 2ed317e8..b06b9a08 100644 --- a/src/transforms/map.jl +++ b/src/transforms/map.jl @@ -12,7 +12,7 @@ The column selection can be a single column identifier (index or name), a collection of identifiers or a regular expression (regex). Passing a target column name is optional and when omitted a new name -is generated by joining the selected column names with the function name. +is generated by joining the function name with the selected column names. If the target column already exists in the table, the original column will be replaced. @@ -20,7 +20,7 @@ column will be replaced. ```julia Map(1 => sin) -Map(:a => sin, "b" => cos => :b_cos) +Map(:a => sin, "b" => cos => :cos_b) Map([2, 3] => ((b, c) -> 2b + c)) Map([:a, :c] => ((a, c) -> 2a * 3c) => :col1) Map(["c", "a"] => ((c, a) -> 3c / a) => :col1, "c" => tan) @@ -29,7 +29,12 @@ Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "col1") ## Notes -* Anonymous functions must be passed with parentheses as in the examples above. +* Anonymous functions must be passed with parentheses as in the examples above; +* Some function names are treated in a special way, they are: + * Anonymous functions: `#1` -> `f1`; + * Composed functions: `outer ∘ inner` -> `outer_inner`; + * `Base.Fix1` functions: `Base.Fix1(f, x)` -> `fix1_f`; + * `Base.Fix2` functions: `Base.Fix2(f, x)` -> `fix2_f`; """ struct Map <: StatelessFeatureTransform selectors::Vector{ColumnSelector} @@ -59,7 +64,18 @@ end isrevertible(::Type{Map}) = false -_makename(snames, fun) = Symbol(join([snames; nameof(fun)], "_")) +_funname(fun::Base.Fix1) = "fix1_" * _funname(fun.f) +_funname(fun::Base.Fix2) = "fix2_" * _funname(fun.f) +_funname(fun::ComposedFunction) = _funname(fun.outer) * "_" * _funname(fun.inner) +_funname(fun) = string(fun) + +function _makename(snames, fun) + funname = _funname(fun) + if contains(funname, "#") # anonymous functions + funname = replace(funname, "#" => "f") + end + Symbol(funname, :_, join(snames, "_")) +end function applyfeat(transform::Map, feat, prep) cols = Tables.columns(feat) diff --git a/test/transforms/map.jl b/test/transforms/map.jl index c0f464b4..b19ec36c 100644 --- a/test/transforms/map.jl +++ b/test/transforms/map.jl @@ -9,18 +9,18 @@ T = Map(1 => sin) n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :a_sin) - @test n.a_sin == sin.(t.a) + @test Tables.schema(n).names == (:a, :b, :c, :d, :sin_a) + @test n.sin_a == sin.(t.a) T = Map(:b => cos) n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :b_cos) - @test n.b_cos == cos.(t.b) + @test Tables.schema(n).names == (:a, :b, :c, :d, :cos_b) + @test n.cos_b == cos.(t.b) T = Map("c" => tan) n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :c_tan) - @test n.c_tan == tan.(t.c) + @test Tables.schema(n).names == (:a, :b, :c, :d, :tan_c) + @test n.tan_c == tan.(t.c) T = Map(:a => sin => :a) n, c = apply(T, t) @@ -44,15 +44,58 @@ T = Map(["c", "a"] => ((c, a) -> 3c / a) => :op1, "c" => tan) n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :op1, :c_tan) + @test Tables.schema(n).names == (:a, :b, :c, :d, :op1, :tan_c) @test n.op1 == @. 3 * t.c / t.a - @test n.c_tan == tan.(t.c) + @test n.tan_c == tan.(t.c) T = Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "op1") n, c = apply(T, t) @test Tables.schema(n).names == (:a, :b, :c, :d, :op1) @test n.op1 == @. t.a^2 - 2 * t.b + t.c - # throws + # generated names + # normal function + T = Map([:c, :d] => hypot) + n, c = apply(T, t) + @test Tables.schema(n).names == (:a, :b, :c, :d, :hypot_c_d) + @test n.hypot_c_d == hypot.(t.c, t.d) + + # anonymous function + f = a -> a^2 + 3 + fname = replace(string(f), "#" => "f") + colname = Symbol(fname, :_a) + T = Map(:a => f) + n, c = apply(T, t) + @test Tables.schema(n).names == (:a, :b, :c, :d, colname) + @test Tables.getcolumn(n, colname) == f.(t.a) + + # composed function + f = sin ∘ cos + T = Map(:b => f) + n, c = apply(T, t) + @test Tables.schema(n).names == (:a, :b, :c, :d, :sin_cos_b) + @test n.sin_cos_b == f.(t.b) + + f = sin ∘ cos ∘ tan + T = Map(:c => sin ∘ cos ∘ tan) + n, c = apply(T, t) + @test Tables.schema(n).names == (:a, :b, :c, :d, :sin_cos_tan_c) + @test n.sin_cos_tan_c == f.(t.c) + + # Base.Fix1 + f = Base.Fix1(hypot, 2) + T = Map(:d => f) + n, c = apply(T, t) + @test Tables.schema(n).names == (:a, :b, :c, :d, :fix1_hypot_d) + @test n.fix1_hypot_d == f.(t.d) + + # Base.Fix2 + f = Base.Fix2(hypot, 2) + T = Map(:a => f) + n, c = apply(T, t) + @test Tables.schema(n).names == (:a, :b, :c, :d, :fix2_hypot_a) + @test n.fix2_hypot_a == f.(t.a) + + # error: cannot create Map transform without arguments @test_throws ArgumentError Map() end