From 0d13f40456dd8f75731bb358c26eca496e7f7deb Mon Sep 17 00:00:00 2001 From: Davi Sales Barreira Date: Wed, 18 Sep 2024 12:06:31 -0300 Subject: [PATCH 1/2] :sparkles: Map refactor using rows. --- src/transforms/map.jl | 86 ++++++++++++------------------------------- 1 file changed, 23 insertions(+), 63 deletions(-) diff --git a/src/transforms/map.jl b/src/transforms/map.jl index b06b9a08..d8ca3383 100644 --- a/src/transforms/map.jl +++ b/src/transforms/map.jl @@ -3,28 +3,22 @@ # ------------------------------------------------------------------ """ - Map(cols₁ => fun₁ => target₁, cols₂ => fun₂, ..., colsₙ => funₙ => targetₙ) + Map(fun₁ => target₁, fun₂, ..., funₙ => targetₙ) -Applies the `funᵢ` function to the columns selected by `colsᵢ` using -the `map` function and saves the result in a new column named `targetᵢ`. - -The column selection can be a single column identifier (index or name), -a collection of identifiers or a regular expression (regex). +Applies the `funᵢ` function to each row using +and saves the result in a new column named `targetᵢ`. Passing a target column name is optional and when omitted a new name -is generated by joining the function name with the selected column names. +is generated by joining the function name. If the target column already exists in the table, the original column will be replaced. # Examples ```julia -Map(1 => sin) -Map(:a => sin, "b" => cos => :cos_b) -Map([2, 3] => ((b, c) -> 2b + c)) -Map([:a, :c] => ((a, c) -> 2a * 3c) => :col1) -Map(["c", "a"] => ((c, a) -> 3c / a) => :col1, "c" => tan) -Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "col1") +Map(row->sin(row.a)) +Map((row->sum(row)) => :col1) +Map((row->row.a^2) => :col1, (row->row.b + row.c) => :col2) ``` ## Notes @@ -37,7 +31,6 @@ Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "col1") * `Base.Fix2` functions: `Base.Fix2(f, x)` -> `fix2_f`; """ struct Map <: StatelessFeatureTransform - selectors::Vector{ColumnSelector} funs::Vector{Function} targets::Vector{Union{Nothing,Symbol}} end @@ -46,69 +39,36 @@ Map() = throw(ArgumentError("cannot create Map transform without arguments")) # utility types const TargetName = Union{Symbol,AbstractString} -const PairWithTarget = Pair{<:Any,<:Pair{<:Function,<:TargetName}} -const PairWithoutTarget = Pair{<:Any,<:Function} -const MapPair = Union{PairWithTarget,PairWithoutTarget} +const PairFunctionTarget = Pair{<:Function,<:TargetName} +const MapPair = Union{PairFunctionTarget,Function} # utility functions -_extract(p::PairWithTarget) = selector(first(p)), first(last(p)), Symbol(last(last(p))) -_extract(p::PairWithoutTarget) = selector(first(p)), last(p), nothing +_extract(p::PairFunctionTarget) = first(p), Symbol(last(p)) +_extract(p::Function) = p, nothing function Map(pairs::MapPair...) tuples = map(_extract, pairs) - selectors = [t[1] for t in tuples] - funs = [t[2] for t in tuples] - targets = [t[3] for t in tuples] - Map(selectors, funs, targets) + funs = [t[1] for t in tuples] + targets = [t[2] for t in tuples] + Map(funs, targets) end isrevertible(::Type{Map}) = false -_funname(fun::Base.Fix1) = "fix1_" * _funname(fun.f) -_funname(fun::Base.Fix2) = "fix2_" * _funname(fun.f) -_funname(fun::ComposedFunction) = _funname(fun.outer) * "_" * _funname(fun.inner) +# _funname(fun::Base.Fix1) = "fix1_" * _funname(fun.f) +# _funname(fun::Base.Fix2) = "fix2_" * _funname(fun.f) +# _funname(fun::ComposedFunction) = _funname(fun.outer) * "_" * _funname(fun.inner) _funname(fun) = string(fun) -function _makename(snames, fun) - funname = _funname(fun) - if contains(funname, "#") # anonymous functions - funname = replace(funname, "#" => "f") - end - Symbol(funname, :_, join(snames, "_")) -end - function applyfeat(transform::Map, feat, prep) - cols = Tables.columns(feat) - onames = Tables.columnnames(cols) - - selectors = transform.selectors + rows = Tables.rows(feat) funs = transform.funs targets = transform.targets - - # new names and columns - names = collect(onames) - columns = Any[Tables.getcolumn(cols, nm) for nm in onames] - - # mapped columns - mapped = map(selectors, funs, targets) do selector, fun, target - snames = selector(names) - newname = isnothing(target) ? _makename(snames, fun) : target - scolumns = (Tables.getcolumn(cols, nm) for nm in snames) - newcolumn = map(fun, scolumns...) + mapped = map(funs, targets) do fun, target + newname = isnothing(target) ? Symbol(_funname(fun)) : target + newcolumn = map(row->fun(Tables.Row(row)),rows) newname => newcolumn end - - for (name, column) in mapped - if name ∈ onames - i = findfirst(==(name), onames) - columns[i] = column - else - push!(names, name) - push!(columns, column) - end - end - - 𝒯 = (; zip(names, columns)...) - newfeat = 𝒯 |> Tables.materializer(feat) + newfeat = merge(Tables.columns(feat),mapped) |> Tables.materializer(feat) newfeat, nothing -end +end \ No newline at end of file From 618a68a625414ca978bbc5af4788eae2045a7270 Mon Sep 17 00:00:00 2001 From: Davi Sales Barreira Date: Wed, 18 Sep 2024 12:06:44 -0300 Subject: [PATCH 2/2] Tests for refactor. --- test/transforms/map.jl | 93 ++---------------------------------------- 1 file changed, 4 insertions(+), 89 deletions(-) diff --git a/test/transforms/map.jl b/test/transforms/map.jl index b19ec36c..ccc15673 100644 --- a/test/transforms/map.jl +++ b/test/transforms/map.jl @@ -1,100 +1,15 @@ @testset "Map" begin - @test !isrevertible(Map(:a => sin)) - + @test !isrevertible(Map(row->row)) a = [4, 7, 8, 5, 8, 1] b = [1, 9, 1, 7, 9, 4] c = [2, 8, 6, 3, 2, 2] d = [7, 5, 9, 5, 3, 4] t = Table(; a, b, c, d) - T = Map(1 => sin) - n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :sin_a) - @test n.sin_a == sin.(t.a) - - T = Map(:b => cos) - n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :cos_b) - @test n.cos_b == cos.(t.b) - - T = Map("c" => tan) - n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :tan_c) - @test n.tan_c == tan.(t.c) - - T = Map(:a => sin => :a) - n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d) - @test n.a == sin.(t.a) - - T = Map(:a => sin => "a") - n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d) - @test n.a == sin.(t.a) - - T = Map([2, 3] => ((b, c) -> 2b + c) => :op1) - n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :op1) - @test n.op1 == @. 2 * t.b + t.c - - T = Map([:a, :c] => ((a, c) -> 2a * 3c) => :op1) - n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :op1) - @test n.op1 == @. 2 * t.a * 3 * t.c - - T = Map(["c", "a"] => ((c, a) -> 3c / a) => :op1, "c" => tan) - n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :op1, :tan_c) - @test n.op1 == @. 3 * t.c / t.a - @test n.tan_c == tan.(t.c) - - T = Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "op1") - n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :op1) - @test n.op1 == @. t.a^2 - 2 * t.b + t.c - - # generated names - # normal function - T = Map([:c, :d] => hypot) - n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :hypot_c_d) - @test n.hypot_c_d == hypot.(t.c, t.d) - - # anonymous function - f = a -> a^2 + 3 - fname = replace(string(f), "#" => "f") - colname = Symbol(fname, :_a) - T = Map(:a => f) - n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, colname) - @test Tables.getcolumn(n, colname) == f.(t.a) - - # composed function - f = sin ∘ cos - T = Map(:b => f) - n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :sin_cos_b) - @test n.sin_cos_b == f.(t.b) - - f = sin ∘ cos ∘ tan - T = Map(:c => sin ∘ cos ∘ tan) - n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :sin_cos_tan_c) - @test n.sin_cos_tan_c == f.(t.c) - - # Base.Fix1 - f = Base.Fix1(hypot, 2) - T = Map(:d => f) - n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :fix1_hypot_d) - @test n.fix1_hypot_d == f.(t.d) - - # Base.Fix2 - f = Base.Fix2(hypot, 2) - T = Map(:a => f) + T = Map((row -> row.a) => :e) n, c = apply(T, t) - @test Tables.schema(n).names == (:a, :b, :c, :d, :fix2_hypot_a) - @test n.fix2_hypot_a == f.(t.a) + @test Tables.schema(n).names == (:a, :b, :c, :d, :e) + @test n.e == t.a # error: cannot create Map transform without arguments @test_throws ArgumentError Map()