Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Map with row function syntax #291 #292

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 23 additions & 63 deletions src/transforms/map.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,22 @@
# ------------------------------------------------------------------

"""
Map(cols₁ => fun₁ => target₁, cols₂ => fun₂, ..., colsₙ => funₙ => targetₙ)
Map(fun₁ => target₁, fun₂, ..., funₙ => targetₙ)

Applies the `funᵢ` function to the columns selected by `colsᵢ` using
the `map` function and saves the result in a new column named `targetᵢ`.

The column selection can be a single column identifier (index or name),
a collection of identifiers or a regular expression (regex).
Applies the `funᵢ` function to each row using
and saves the result in a new column named `targetᵢ`.

Passing a target column name is optional and when omitted a new name
is generated by joining the function name with the selected column names.
is generated by joining the function name.
If the target column already exists in the table, the original
column will be replaced.

# Examples

```julia
Map(1 => sin)
Map(:a => sin, "b" => cos => :cos_b)
Map([2, 3] => ((b, c) -> 2b + c))
Map([:a, :c] => ((a, c) -> 2a * 3c) => :col1)
Map(["c", "a"] => ((c, a) -> 3c / a) => :col1, "c" => tan)
Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "col1")
Map(row->sin(row.a))
Map((row->sum(row)) => :col1)
Map((row->row.a^2) => :col1, (row->row.b + row.c) => :col2)
```

## Notes
Expand All @@ -37,7 +31,6 @@ Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "col1")
* `Base.Fix2` functions: `Base.Fix2(f, x)` -> `fix2_f`;
"""
struct Map <: StatelessFeatureTransform
selectors::Vector{ColumnSelector}
funs::Vector{Function}
targets::Vector{Union{Nothing,Symbol}}
end
Expand All @@ -46,69 +39,36 @@ Map() = throw(ArgumentError("cannot create Map transform without arguments"))

# utility types
const TargetName = Union{Symbol,AbstractString}
const PairWithTarget = Pair{<:Any,<:Pair{<:Function,<:TargetName}}
const PairWithoutTarget = Pair{<:Any,<:Function}
const MapPair = Union{PairWithTarget,PairWithoutTarget}
const PairFunctionTarget = Pair{<:Function,<:TargetName}
const MapPair = Union{PairFunctionTarget,Function}

# utility functions
_extract(p::PairWithTarget) = selector(first(p)), first(last(p)), Symbol(last(last(p)))
_extract(p::PairWithoutTarget) = selector(first(p)), last(p), nothing
_extract(p::PairFunctionTarget) = first(p), Symbol(last(p))
_extract(p::Function) = p, nothing

function Map(pairs::MapPair...)
tuples = map(_extract, pairs)
selectors = [t[1] for t in tuples]
funs = [t[2] for t in tuples]
targets = [t[3] for t in tuples]
Map(selectors, funs, targets)
funs = [t[1] for t in tuples]
targets = [t[2] for t in tuples]
Map(funs, targets)
end

isrevertible(::Type{Map}) = false

_funname(fun::Base.Fix1) = "fix1_" * _funname(fun.f)
_funname(fun::Base.Fix2) = "fix2_" * _funname(fun.f)
_funname(fun::ComposedFunction) = _funname(fun.outer) * "_" * _funname(fun.inner)
# _funname(fun::Base.Fix1) = "fix1_" * _funname(fun.f)
# _funname(fun::Base.Fix2) = "fix2_" * _funname(fun.f)
# _funname(fun::ComposedFunction) = _funname(fun.outer) * "_" * _funname(fun.inner)
_funname(fun) = string(fun)

function _makename(snames, fun)
funname = _funname(fun)
if contains(funname, "#") # anonymous functions
funname = replace(funname, "#" => "f")
end
Symbol(funname, :_, join(snames, "_"))
end

function applyfeat(transform::Map, feat, prep)
cols = Tables.columns(feat)
onames = Tables.columnnames(cols)

selectors = transform.selectors
rows = Tables.rows(feat)
funs = transform.funs
targets = transform.targets

# new names and columns
names = collect(onames)
columns = Any[Tables.getcolumn(cols, nm) for nm in onames]

# mapped columns
mapped = map(selectors, funs, targets) do selector, fun, target
snames = selector(names)
newname = isnothing(target) ? _makename(snames, fun) : target
scolumns = (Tables.getcolumn(cols, nm) for nm in snames)
newcolumn = map(fun, scolumns...)
mapped = map(funs, targets) do fun, target
newname = isnothing(target) ? Symbol(_funname(fun)) : target
newcolumn = map(row->fun(Tables.Row(row)),rows)
newname => newcolumn
end

for (name, column) in mapped
if name ∈ onames
i = findfirst(==(name), onames)
columns[i] = column
else
push!(names, name)
push!(columns, column)
end
end

𝒯 = (; zip(names, columns)...)
newfeat = 𝒯 |> Tables.materializer(feat)
newfeat = merge(Tables.columns(feat),mapped) |> Tables.materializer(feat)
newfeat, nothing
end
end
93 changes: 4 additions & 89 deletions test/transforms/map.jl
Original file line number Diff line number Diff line change
@@ -1,100 +1,15 @@
@testset "Map" begin
@test !isrevertible(Map(:a => sin))

@test !isrevertible(Map(row->row))
a = [4, 7, 8, 5, 8, 1]
b = [1, 9, 1, 7, 9, 4]
c = [2, 8, 6, 3, 2, 2]
d = [7, 5, 9, 5, 3, 4]
t = Table(; a, b, c, d)

T = Map(1 => sin)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :sin_a)
@test n.sin_a == sin.(t.a)

T = Map(:b => cos)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :cos_b)
@test n.cos_b == cos.(t.b)

T = Map("c" => tan)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :tan_c)
@test n.tan_c == tan.(t.c)

T = Map(:a => sin => :a)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d)
@test n.a == sin.(t.a)

T = Map(:a => sin => "a")
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d)
@test n.a == sin.(t.a)

T = Map([2, 3] => ((b, c) -> 2b + c) => :op1)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :op1)
@test n.op1 == @. 2 * t.b + t.c

T = Map([:a, :c] => ((a, c) -> 2a * 3c) => :op1)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :op1)
@test n.op1 == @. 2 * t.a * 3 * t.c

T = Map(["c", "a"] => ((c, a) -> 3c / a) => :op1, "c" => tan)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :op1, :tan_c)
@test n.op1 == @. 3 * t.c / t.a
@test n.tan_c == tan.(t.c)

T = Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "op1")
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :op1)
@test n.op1 == @. t.a^2 - 2 * t.b + t.c

# generated names
# normal function
T = Map([:c, :d] => hypot)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :hypot_c_d)
@test n.hypot_c_d == hypot.(t.c, t.d)

# anonymous function
f = a -> a^2 + 3
fname = replace(string(f), "#" => "f")
colname = Symbol(fname, :_a)
T = Map(:a => f)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, colname)
@test Tables.getcolumn(n, colname) == f.(t.a)

# composed function
f = sin ∘ cos
T = Map(:b => f)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :sin_cos_b)
@test n.sin_cos_b == f.(t.b)

f = sin ∘ cos ∘ tan
T = Map(:c => sin ∘ cos ∘ tan)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :sin_cos_tan_c)
@test n.sin_cos_tan_c == f.(t.c)

# Base.Fix1
f = Base.Fix1(hypot, 2)
T = Map(:d => f)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :fix1_hypot_d)
@test n.fix1_hypot_d == f.(t.d)

# Base.Fix2
f = Base.Fix2(hypot, 2)
T = Map(:a => f)
T = Map((row -> row.a) => :e)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :fix2_hypot_a)
@test n.fix2_hypot_a == f.(t.a)
@test Tables.schema(n).names == (:a, :b, :c, :d, :e)
@test n.e == t.a

# error: cannot create Map transform without arguments
@test_throws ArgumentError Map()
Expand Down
Loading