From f1fc948e2cacf7c8b58f071c915b3fa8350b4666 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Wed, 30 Jun 2021 06:05:38 -0400 Subject: [PATCH 01/16] :x on lhs --- src/parsing.jl | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/parsing.jl b/src/parsing.jl index e5a4427d..335c0f34 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -245,20 +245,28 @@ function fun_to_vec(ex::Expr; throw(ArgumentError(s)) end - # y = :x - if lhs isa Symbol && rhs isa QuoteNode + # y = ... + if lhs isa Symbol + msg = "Using an un-quoted Symbol on the LHS is deprecated. " * + "Write $(QuoteNode(lhs)) = ... instead." + Base.depwarn(msg, ""; force=true) + lhs = QuoteNode(lhs) + end + + # :y = :x + if lhs isa QuoteNode && rhs isa QuoteNode source = rhs - dest = QuoteNode(lhs) + dest = lhs return quote $source => $dest end end - # y = cols(:x) - if lhs isa Symbol && onearg(rhs, :cols) + # :y = cols(:x) + if lhs isa QuoteNode && onearg(rhs, :cols) source = rhs.args[2] - dest = QuoteNode(lhs) + dest = lhs return quote $source => $dest @@ -284,14 +292,13 @@ function fun_to_vec(ex::Expr; end end - # y = f(:x) - # y = f(cols(:x)) - # y = :x + 1 - # y = cols(:x) + 1 + # :y = f(:x) + # :y = f(cols(:x)) + # :y = :x + 1 + # :y = cols(:x) + 1 source, fun = get_source_fun(rhs; wrap_byrow = wrap_byrow) - if lhs isa Symbol - dest = QuoteNode(lhs) - + if lhs isa QuoteNode + dest = lhs return quote $source => $fun => $dest end From 06c974e9225a7573cd10bea4a871a4df083526e2 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Wed, 30 Jun 2021 06:12:20 -0400 Subject: [PATCH 02/16] depwarn fix --- src/parsing.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parsing.jl b/src/parsing.jl index 335c0f34..3b817f39 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -249,7 +249,7 @@ function fun_to_vec(ex::Expr; if lhs isa Symbol msg = "Using an un-quoted Symbol on the LHS is deprecated. " * "Write $(QuoteNode(lhs)) = ... instead." - Base.depwarn(msg, ""; force=true) + Base.depwarn(msg, "") lhs = QuoteNode(lhs) end From 796d6e34077dbf882e4046a8a639ea376cd42e90 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Wed, 30 Jun 2021 09:31:02 -0400 Subject: [PATCH 03/16] update byrow.jl --- test/byrow.jl | 228 +++++++++++++++++++++++++------------------------- 1 file changed, 114 insertions(+), 114 deletions(-) diff --git a/test/byrow.jl b/test/byrow.jl index 49ac4616..d48624b8 100644 --- a/test/byrow.jl +++ b/test/byrow.jl @@ -15,43 +15,43 @@ const ≅ = isequal y = [:v, :w, :x, :y, :z], c = [:g, :quote, :body, :transform, missing]) - @test @transform(df, @byrow n = :i + :g) ≅ @transform(df, n = :i + :g) - @test @transform(df, @byrow n = :t * string(:y)) ≅ @transform(df, n = :t .* string.(:y)) - @test @transform(df, @byrow n = :g == 1 ? 100 : 500) ≅ @transform(df, n = ifelse.(:g .== 1, 100, 500)) - @test @transform(df, @byrow n = :g == 1 && :t == "a") ≅ @transform(df, n = map((g, t) -> g == 1 && t == "a", :g, :t)) - @test @transform(df, @byrow n = first(:g)) ≅ @transform(df, n = first.(:g)) + @test @transform(df, @byrow :n = :i + :g) ≅ @transform(df, :n = :i + :g) + @test @transform(df, @byrow :n = :t * string(:y)) ≅ @transform(df, :n = :t .* string.(:y)) + @test @transform(df, @byrow :n = :g == 1 ? 100 : 500) ≅ @transform(df, :n = ifelse.(:g .== 1, 100, 500)) + @test @transform(df, @byrow :n = :g == 1 && :t == "a") ≅ @transform(df, :n = map((g, t) -> g == 1 && t == "a", :g, :t)) + @test @transform(df, @byrow :n = first(:g)) ≅ @transform(df, :n = first.(:g)) d = @transform df @byrow begin - n1 = :i - n2 = :i * :g + :n1 = :i + :n2 = :i * :g end - @test d ≅ @transform(df, n1 = :i, n2 = :i .* :g) - @test d ≅ @transform(df, @byrow(n1 = :i), @byrow(n2 = :i * :g)) + @test d ≅ @transform(df, :n1 = :i, :n2 = :i .* :g) + @test d ≅ @transform(df, @byrow(:n1 = :i), @byrow(:n2 = :i * :g)) d = @transform df @byrow begin cols(:n1) = :i - n2 = cols(:i) * :g + :n2 = cols(:i) * :g end - @test d ≅ @transform(df, n1 = :i, n2 = :i .* :g) + @test d ≅ @transform(df, :n1 = :i, :n2 = :i .* :g) d = @transform df @byrow begin - n1 = cols(:i) + :n1 = cols(:i) cols(:n2) = :i * :g end - @test d ≅ @transform(df, n1 = :i, n2 = :i .* :g) + @test d ≅ @transform(df, :n1 = :i, :n2 = :i .* :g) d = @transform df @byrow begin - n1 = begin + :n1 = begin :i end - n2 = :i * :g + :n2 = :i * :g end - @test d ≅ @transform(df, n1 = :i, n2 = :i .* :g) + @test d ≅ @transform(df, :n1 = :i, :n2 = :i .* :g) d = @transform df @byrow begin - n1 = :i * :g - n2 = :i * :g + :n1 = :i * :g + :n2 = :i * :g end - @test d ≅ @transform(df, n1 = :i .* :g, n2 = :i .* :g) + @test d ≅ @transform(df, :n1 = :i .* :g, :n2 = :i .* :g) end @testset "@transform! with @byrow" begin @@ -62,43 +62,43 @@ end y = [:v, :w, :x, :y, :z], c = [:g, :quote, :body, :transform!, missing]) - @test @transform!(copy(df), @byrow n = :i + :g) ≅ @transform!(copy(df), n = :i + :g) - @test @transform!(copy(df), @byrow n = :t * string(:y)) ≅ @transform!(copy(df), n = :t .* string.(:y)) - @test @transform!(copy(df), @byrow n = :g == 1 ? 100 : 500) ≅ @transform!(copy(df), n = ifelse.(:g .== 1, 100, 500)) - @test @transform!(copy(df), @byrow n = :g == 1 && :t == "a") ≅ @transform!(copy(df), n = map((g, t) -> g == 1 && t == "a", :g, :t)) - @test @transform!(copy(df), @byrow n = first(:g)) ≅ @transform!(copy(df), n = first.(:g)) + @test @transform!(copy(df), @byrow :n = :i + :g) ≅ @transform!(copy(df), :n = :i + :g) + @test @transform!(copy(df), @byrow :n = :t * string(:y)) ≅ @transform!(copy(df), :n = :t .* string.(:y)) + @test @transform!(copy(df), @byrow :n = :g == 1 ? 100 : 500) ≅ @transform!(copy(df), :n = ifelse.(:g .== 1, 100, 500)) + @test @transform!(copy(df), @byrow :n = :g == 1 && :t == "a") ≅ @transform!(copy(df), :n = map((g, t) -> g == 1 && t == "a", :g, :t)) + @test @transform!(copy(df), @byrow :n = first(:g)) ≅ @transform!(copy(df), :n = first.(:g)) d = @transform! df @byrow begin - n1 = :i - n2 = :i * :g + :n1 = :i + :n2 = :i * :g end - @test d ≅ @transform!(copy(df), n1 = :i, n2 = :i .* :g) - @test d ≅ @transform!(copy(df), @byrow(n1 = :i), @byrow(n2 = :i * :g)) + @test d ≅ @transform!(copy(df), :n1 = :i, :n2 = :i .* :g) + @test d ≅ @transform!(copy(df), @byrow(:n1 = :i), @byrow(:n2 = :i * :g)) d = @transform! df @byrow begin cols(:n1) = :i - n2 = cols(:i) * :g + :n2 = cols(:i) * :g end - @test d ≅ @transform!(copy(df), n1 = :i, n2 = :i .* :g) + @test d ≅ @transform!(copy(df), :n1 = :i, :n2 = :i .* :g) d = @transform! df @byrow begin - n1 = cols(:i) + :n1 = cols(:i) cols(:n2) = :i * :g end - @test d ≅ @transform!(copy(df), n1 = :i, n2 = :i .* :g) + @test d ≅ @transform!(copy(df), :n1 = :i, :n2 = :i .* :g) d = @transform! df @byrow begin - n1 = begin + :n1 = begin :i end - n2 = :i * :g + :n2 = :i * :g end - @test d ≅ @transform!(copy(df), n1 = :i, n2 = :i .* :g) + @test d ≅ @transform!(copy(df), :n1 = :i, :n2 = :i .* :g) d = @transform! df @byrow begin - n1 = :i * :g - n2 = :i * :g + :n1 = :i * :g + :n2 = :i * :g end - @test d ≅ @transform!(copy(df), n1 = :i .* :g, n2 = :i .* :g) + @test d ≅ @transform!(copy(df), :n1 = :i .* :g, :n2 = :i .* :g) end @testset "@select with @byrow" begin @@ -109,43 +109,43 @@ end y = [:v, :w, :x, :y, :z], c = [:g, :quote, :body, :select, missing]) - @test @select(df, @byrow n = :i + :g) ≅ @select(df, n = :i + :g) - @test @select(df, @byrow n = :t * string(:y)) ≅ @select(df, n = :t .* string.(:y)) - @test @select(df, @byrow n = :g == 1 ? 100 : 500) ≅ @select(df, n = ifelse.(:g .== 1, 100, 500)) - @test @select(df, @byrow n = :g == 1 && :t == "a") ≅ @select(df, n = map((g, t) -> g == 1 && t == "a", :g, :t)) - @test @select(df, @byrow n = first(:g)) ≅ @select(df, n = first.(:g)) + @test @select(df, @byrow :n = :i + :g) ≅ @select(df, :n = :i + :g) + @test @select(df, @byrow :n = :t * string(:y)) ≅ @select(df, :n = :t .* string.(:y)) + @test @select(df, @byrow :n = :g == 1 ? 100 : 500) ≅ @select(df, :n = ifelse.(:g .== 1, 100, 500)) + @test @select(df, @byrow :n = :g == 1 && :t == "a") ≅ @select(df, :n = map((g, t) -> g == 1 && t == "a", :g, :t)) + @test @select(df, @byrow :n = first(:g)) ≅ @select(df, :n = first.(:g)) d = @select df @byrow begin - n1 = :i - n2 = :i * :g + :n1 = :i + :n2 = :i * :g end - @test d ≅ @select(df, n1 = :i, n2 = :i .* :g) - @test d ≅ @select(df, @byrow(n1 = :i), @byrow(n2 = :i * :g)) + @test d ≅ @select(df, :n1 = :i, :n2 = :i .* :g) + @test d ≅ @select(df, @byrow(:n1 = :i), @byrow(:n2 = :i * :g)) d = @select df @byrow begin cols(:n1) = :i - n2 = cols(:i) * :g + :n2 = cols(:i) * :g end - @test d ≅ @select(df, n1 = :i, n2 = :i .* :g) + @test d ≅ @select(df, :n1 = :i, :n2 = :i .* :g) d = @select df @byrow begin - n1 = cols(:i) + :n1 = cols(:i) cols(:n2) = :i * :g end - @test d ≅ @select(df, n1 = :i, n2 = :i .* :g) + @test d ≅ @select(df, :n1 = :i, :n2 = :i .* :g) d = @select df @byrow begin - n1 = begin + :n1 = begin :i end - n2 = :i * :g + :n2 = :i * :g end - @test d ≅ @select(df, n1 = :i, n2 = :i .* :g) + @test d ≅ @select(df, :n1 = :i, :n2 = :i .* :g) d = @select df @byrow begin - n1 = :i * :g - n2 = :i * :g + :n1 = :i * :g + :n2 = :i * :g end - @test d ≅ @select(df, n1 = :i .* :g, n2 = :i .* :g) + @test d ≅ @select(df, :n1 = :i .* :g, :n2 = :i .* :g) end @testset "@select! with @byrow" begin @@ -156,43 +156,43 @@ end y = [:v, :w, :x, :y, :z], c = [:g, :quote, :body, :select!, missing]) - @test @select!(copy(df), @byrow n = :i + :g) ≅ @select!(copy(df), n = :i + :g) - @test @select!(copy(df), @byrow n = :t * string(:y)) ≅ @select!(copy(df), n = :t .* string.(:y)) - @test @select!(copy(df), @byrow n = :g == 1 ? 100 : 500) ≅ @select!(copy(df), n = ifelse.(:g .== 1, 100, 500)) - @test @select!(copy(df), @byrow n = :g == 1 && :t == "a") ≅ @select!(copy(df), n = map((g, t) -> g == 1 && t == "a", :g, :t)) - @test @select!(copy(df), @byrow n = first(:g)) ≅ @select!(copy(df), n = first.(:g)) + @test @select!(copy(df), @byrow :n = :i + :g) ≅ @select!(copy(df), :n = :i + :g) + @test @select!(copy(df), @byrow :n = :t * string(:y)) ≅ @select!(copy(df), :n = :t .* string.(:y)) + @test @select!(copy(df), @byrow :n = :g == 1 ? 100 : 500) ≅ @select!(copy(df), :n = ifelse.(:g .== 1, 100, 500)) + @test @select!(copy(df), @byrow :n = :g == 1 && :t == "a") ≅ @select!(copy(df), :n = map((g, t) -> g == 1 && t == "a", :g, :t)) + @test @select!(copy(df), @byrow :n = first(:g)) ≅ @select!(copy(df), :n = first.(:g)) d = @select! copy(df) @byrow begin - n1 = :i - n2 = :i * :g + :n1 = :i + :n2 = :i * :g end - @test d ≅ @select!(copy(df), n1 = :i, n2 = :i .* :g) - @test d ≅ @select!(copy(df), @byrow(n1 = :i), @byrow(n2 = :i * :g)) + @test d ≅ @select!(copy(df), :n1 = :i, :n2 = :i .* :g) + @test d ≅ @select!(copy(df), @byrow(:n1 = :i), @byrow(:n2 = :i * :g)) d = @select! copy(df) @byrow begin cols(:n1) = :i - n2 = cols(:i) * :g + :n2 = cols(:i) * :g end - @test d ≅ @select!(copy(df), n1 = :i, n2 = :i .* :g) + @test d ≅ @select!(copy(df), :n1 = :i, :n2 = :i .* :g) d = @select! copy(df) @byrow begin - n1 = cols(:i) + :n1 = cols(:i) cols(:n2) = :i * :g end - @test d ≅ @select!(copy(df), n1 = :i, n2 = :i .* :g) + @test d ≅ @select!(copy(df), :n1 = :i, :n2 = :i .* :g) d = @select! copy(df) @byrow begin - n1 = begin + :n1 = begin :i end - n2 = :i * :g + :n2 = :i * :g end - @test d ≅ @select!(copy(df), n1 = :i, n2 = :i .* :g) + @test d ≅ @select!(copy(df), :n1 = :i, :n2 = :i .* :g) d = @select! copy(df) @byrow begin - n1 = :i * :g - n2 = :i * :g + :n1 = :i * :g + :n2 = :i * :g end - @test d ≅ @select!(copy(df), n1 = :i .* :g, n2 = :i .* :g) + @test d ≅ @select!(copy(df), :n1 = :i .* :g, :n2 = :i .* :g) end @testset "@with with @byrow" begin @@ -260,43 +260,43 @@ end gd = groupby(df, :g) - @test @combine(gd, @byrow n = :i + :g) ≅ @combine(gd, n = :i + :g) - @test @combine(gd, @byrow n = :t * string(:y)) ≅ @combine(gd, n = :t .* string.(:y)) - @test @combine(gd, @byrow n = :g == 1 ? 100 : 500) ≅ @combine(gd, n = ifelse.(:g .== 1, 100, 500)) - @test @combine(gd, @byrow n = :g == 1 && :t == "a") ≅ @combine(gd, n = map((g, t) -> g == 1 && t == "a", :g, :t)) - @test @combine(gd, @byrow n = first(:g)) ≅ @combine(gd, n = first.(:g)) + @test @combine(gd, @byrow :n = :i + :g) ≅ @combine(gd, :n = :i + :g) + @test @combine(gd, @byrow :n = :t * string(:y)) ≅ @combine(gd, :n = :t .* string.(:y)) + @test @combine(gd, @byrow :n = :g == 1 ? 100 : 500) ≅ @combine(gd, :n = ifelse.(:g .== 1, 100, 500)) + @test @combine(gd, @byrow :n = :g == 1 && :t == "a") ≅ @combine(gd, :n = map((g, t) -> g == 1 && t == "a", :g, :t)) + @test @combine(gd, @byrow :n = first(:g)) ≅ @combine(gd, :n = first.(:g)) d = @combine gd @byrow begin - n1 = :i - n2 = :i * :g + :n1 = :i + :n2 = :i * :g end - @test d ≅ @combine(gd, n1 = :i, n2 = :i .* :g) - @test d ≅ @combine(gd, @byrow(n1 = :i), @byrow(n2 = :i * :g)) + @test d ≅ @combine(gd, :n1 = :i, :n2 = :i .* :g) + @test d ≅ @combine(gd, @byrow(:n1 = :i), @byrow(:n2 = :i * :g)) d = @combine gd @byrow begin cols(:n1) = :i - n2 = cols(:i) * :g + :n2 = cols(:i) * :g end - @test d ≅ @combine(gd, n1 = :i, n2 = :i .* :g) + @test d ≅ @combine(gd, :n1 = :i, :n2 = :i .* :g) d = @combine gd @byrow begin - n1 = cols(:i) + :n1 = cols(:i) cols(:n2) = :i * :g end - @test d ≅ @combine(gd, n1 = :i, n2 = :i .* :g) + @test d ≅ @combine(gd, :n1 = :i, :n2 = :i .* :g) d = @combine gd @byrow begin - n1 = begin + :n1 = begin :i end - n2 = :i * :g + :n2 = :i * :g end - @test d ≅ @combine(gd, n1 = :i, n2 = :i .* :g) + @test d ≅ @combine(gd, :n1 = :i, :n2 = :i .* :g) d = @combine gd @byrow begin - n1 = :i * :g - n2 = :i * :g + :n1 = :i * :g + :n2 = :i * :g end - @test d ≅ @combine(gd, n1 = :i .* :g, n2 = :i .* :g) + @test d ≅ @combine(gd, :n1 = :i .* :g, :n2 = :i .* :g) end @testset "@by with @byrow" begin @@ -307,44 +307,44 @@ end y = [:v, :w, :x, :y, :z], c = [:g, :quote, :body, :combine, missing]) - @test @by(df, :g, @byrow n = :i + :g) ≅ @by(df, :g, n = :i + :g) - @test @by(df, :g, @byrow n = :t * string(:y)) ≅ @by(df, :g, n = :t .* string.(:y)) - @test @by(df, :g, @byrow n = :g == 1 ? 100 : 500) ≅ @by(df, :g, n = ifelse.(:g .== 1, 100, 500)) - @test @by(df, :g, @byrow n = :g == 1 && :t == "a") ≅ @by(df, :g, n = map((g, t) -> g == 1 && t == "a", :g, :t)) - @test @by(df, :g, @byrow n = first(:g)) ≅ @by(df, :g, n = first.(:g)) + @test @by(df, :g, @byrow :n = :i + :g) ≅ @by(df, :g, :n = :i + :g) + @test @by(df, :g, @byrow :n = :t * string(:y)) ≅ @by(df, :g, :n = :t .* string.(:y)) + @test @by(df, :g, @byrow :n = :g == 1 ? 100 : 500) ≅ @by(df, :g, :n = ifelse.(:g .== 1, 100, 500)) + @test @by(df, :g, @byrow :n = :g == 1 && :t == "a") ≅ @by(df, :g, :n = map((g, t) -> g == 1 && t == "a", :g, :t)) + @test @by(df, :g, @byrow :n = first(:g)) ≅ @by(df, :g, :n = first.(:g)) d = @by df :g @byrow begin - n1 = :i - n2 = :i * :g + :n1 = :i + :n2 = :i * :g end - @test d ≅ @by(df, :g, n1 = :i, n2 = :i .* :g) - @test d ≅ @by(df, :g, @byrow(n1 = :i), @byrow(n2 = :i * :g)) + @test d ≅ @by(df, :g, :n1 = :i, :n2 = :i .* :g) + @test d ≅ @by(df, :g, @byrow(:n1 = :i), @byrow(:n2 = :i * :g)) d = @by df :g @byrow begin cols(:n1) = :i - n2 = cols(:i) * :g + :n2 = cols(:i) * :g end - @test d ≅ @by(df, :g, n1 = :i, n2 = :i .* :g) + @test d ≅ @by(df, :g, :n1 = :i, :n2 = :i .* :g) d = @by df :g @byrow begin - n1 = cols(:i) + :n1 = cols(:i) cols(:n2) = :i * :g end - @test d ≅ @by(df, :g, n1 = :i, n2 = :i .* :g) + @test d ≅ @by(df, :g, :n1 = :i, :n2 = :i .* :g) d = @by df :g @byrow begin - n1 = begin + :n1 = begin :i end - n2 = :i * :g + :n2 = :i * :g end - @test d ≅ @by(df, :g, n1 = :i, n2 = :i .* :g) + @test d ≅ @by(df, :g, :n1 = :i, :n2 = :i .* :g) d = @by df :g @byrow begin - n1 = :i * :g - n2 = :i * :g + :n1 = :i * :g + :n2 = :i * :g end - @test d ≅ @by(df, :g, n1 = :i .* :g, n2 = :i .* :g) + @test d ≅ @by(df, :g, :n1 = :i .* :g, :n2 = :i .* :g) end end \ No newline at end of file From 0d05ef34b2586740ead76626eb3e246602cd58bb Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 1 Jul 2021 05:31:38 -0400 Subject: [PATCH 04/16] update test/chaining.jl --- test/chaining.jl | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/test/chaining.jl b/test/chaining.jl index 2c01b339..ee82280e 100644 --- a/test/chaining.jl +++ b/test/chaining.jl @@ -3,7 +3,8 @@ module TestChaining using Test, Random using DataFrames using DataFramesMeta -using Lazy, Chain +using Lazy: @>, @as +using Chain using Statistics using Random @@ -14,47 +15,47 @@ df = DataFrame(a = rand(1:3, n), x = rand(n)) x = @where(df, :a .> 2) -x = @transform(x, y = 10 * :x) -x = @by(x, :b, meanX = mean(:x), meanY = mean(:y)) +x = @transform(x, :y = 10 * :x) +x = @by(x, :b, :meanX = mean(:x), :meanY = mean(:y)) x = @orderby(x, :b, -:meanX) -x = @select(x, var = :b, :meanX, :meanY) +x = @select(x, :var = :b, :meanX, :meanY) x_as = @as _x_ begin df @where(_x_, :a .> 2) - @transform(_x_, y = 10 * :x) - @by(_x_, :b, meanX = mean(:x), meanY = mean(:y)) + @transform(_x_, :y = 10 * :x) + @by(_x_, :b, :meanX = mean(:x), :meanY = mean(:y)) @orderby(_x_, :b, -:meanX) - @select(_x_, var = :b, :meanX, :meanY) + @select(_x_, :var = :b, :meanX, :meanY) end x_thread = @> begin df @where(:a .> 2) - @transform(y = 10 * :x) - @by(:b, meanX = mean(:x), meanY = mean(:y)) + @transform(:y = 10 * :x) + @by(:b, :meanX = mean(:x), :meanY = mean(:y)) @orderby(:b, -:meanX) - @select(var = :b, :meanX, :meanY) + @select(:var = :b, :meanX, :meanY) end x_chain = @chain df begin @where(:a .> 2) - @transform(y = 10 * :x) - @by(:b, meanX = mean(:x), meanY = mean(:y)) + @transform(:y = 10 * :x) + @by(:b, :meanX = mean(:x), :meanY = mean(:y)) @orderby(:b, -:meanX) - @select(var = :b, :meanX, :meanY) + @select(:var = :b, :meanX, :meanY) end x_chain_2 = @chain df begin @where :a .> 2 - @transform @byrow y = 10 * :x + @transform @byrow :y = 10 * :x @by :b begin - meanX = mean(:x) - meanY = mean(:y) + :meanX = mean(:x) + :meanY = mean(:y) end @orderby (:b) (-:meanX) @select begin - var = :b + :var = :b :meanX :meanY end From a5eb7feb0b8769453c3bdfc9145c274f1fbf89a6 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 1 Jul 2021 06:41:54 -0400 Subject: [PATCH 05/16] test/dataframes.jl --- test/dataframes.jl | 356 ++++++++++++++++++++++----------------------- 1 file changed, 178 insertions(+), 178 deletions(-) diff --git a/test/dataframes.jl b/test/dataframes.jl index 84e3c770..a4e119eb 100644 --- a/test/dataframes.jl +++ b/test/dataframes.jl @@ -35,35 +35,35 @@ const ≅ = isequal n_sym = :new_column n_space = "new column" - @test @transform(df, n = :i).n == df.i - @test @transform(df, n = :i .+ :g).n == df.i .+ df.g - @test @transform(df, n = :t .* string.(:y)).n == df.t .* string.(df.y) - @test @transform(df, n = Symbol.(:y, ^(:t))).n == Symbol.(df.y, :t) - @test @transform(df, n = Symbol.(:y, ^(:body))).n == Symbol.(df.y, :body) - @test @transform(df, body = :i).body == df.i - @test @transform(df, transform = :i).transform == df.i - - @test @transform(df, n = cols(iq)).n == df.i - @test @transform(df, n = cols(iq) .+ cols(gq)).n == df.i .+ df.g - @test @transform(df, n = cols(tq) .* string.(cols(yq))).n == df.t .* string.(df.y) - @test @transform(df, n = Symbol.(cols(yq), ^(:t))).n == Symbol.(df.y, :t) - @test @transform(df, n = Symbol.(cols(yq), ^(:body))).n == Symbol.(df.y, :body) - @test @transform(df, body = cols(iq)).body == df.i - @test @transform(df, transform = cols(iq)).transform == df.i - - @test @transform(df, n = cols(ir)).n == df.i - @test @transform(df, n = cols(ir) .+ cols(gr)).n == df.i .+ df.g - @test @transform(df, n = cols(tr) .* string.(cols(yr))).n == df.t .* string.(df.y) - @test @transform(df, n = Symbol.(cols(yr), ^(:t))).n == Symbol.(df.y, :t) - @test @transform(df, n = Symbol.(cols(yr), ^(:body))).n == Symbol.(df.y, :body) - @test @transform(df, body = cols(ir)).body == df.i - @test @transform(df, transform = cols(ir)).transform == df.i - @test @transform(df, n = cols("g") + cols(:i)).n == df.g + df.i - @test @transform(df, n = cols(1) + cols(2)).n == df.g + df.i - - @test @transform(df, n = :i).g !== df.g - - newdf = @transform(df, n = :i) + @test @transform(df, :n = :i).n == df.i + @test @transform(df, :n = :i .+ :g).n == df.i .+ df.g + @test @transform(df, :n = :t .* string.(:y)).n == df.t .* string.(df.y) + @test @transform(df, :n = Symbol.(:y, ^(:t))).n == Symbol.(df.y, :t) + @test @transform(df, :n = Symbol.(:y, ^(:body))).n == Symbol.(df.y, :body) + @test @transform(df, :body = :i).body == df.i + @test @transform(df, :transform = :i).transform == df.i + + @test @transform(df, :n = cols(iq)).n == df.i + @test @transform(df, :n = cols(iq) .+ cols(gq)).n == df.i .+ df.g + @test @transform(df, :n = cols(tq) .* string.(cols(yq))).n == df.t .* string.(df.y) + @test @transform(df, :n = Symbol.(cols(yq), ^(:t))).n == Symbol.(df.y, :t) + @test @transform(df, :n = Symbol.(cols(yq), ^(:body))).n == Symbol.(df.y, :body) + @test @transform(df, :body = cols(iq)).body == df.i + @test @transform(df, :transform = cols(iq)).transform == df.i + + @test @transform(df, :n = cols(ir)).n == df.i + @test @transform(df, :n = cols(ir) .+ cols(gr)).n == df.i .+ df.g + @test @transform(df, :n = cols(tr) .* string.(cols(yr))).n == df.t .* string.(df.y) + @test @transform(df, :n = Symbol.(cols(yr), ^(:t))).n == Symbol.(df.y, :t) + @test @transform(df, :n = Symbol.(cols(yr), ^(:body))).n == Symbol.(df.y, :body) + @test @transform(df, :body = cols(ir)).body == df.i + @test @transform(df, :transform = cols(ir)).transform == df.i + @test @transform(df, :n = cols("g") + cols(:i)).n == df.g + df.i + @test @transform(df, :n = cols(1) + cols(2)).n == df.g + df.i + + @test @transform(df, :n = :i).g !== df.g + + newdf = @transform(df, :n = :i) @test newdf[:, Not(:n)] ≅ df @test @transform(df, :i) ≅ df @@ -76,9 +76,9 @@ const ≅ = isequal @test @transform(df, cols(n_space) = :i)."new column" == df.i @test @transform(df, cols("new" * "_" * "column") = :i).new_column == df.i - @test @transform(df, n = 1).n == fill(1, nrow(df)) + @test @transform(df, :n = 1).n == fill(1, nrow(df)) - @test @transform(df, n = :i .* :g).n == [1, 2, 3, 8, 10] + @test @transform(df, :n = :i .* :g).n == [1, 2, 3, 8, 10] end @testset "@transform with :block" begin @@ -91,36 +91,36 @@ end ) d = @transform df begin - n1 = :i - n2 = :i .+ :g + :n1 = :i + :n2 = :i .+ :g end - @test d ≅ @transform(df, n1 = :i, n2 = :i .+ :g) + @test d ≅ @transform(df, :n1 = :i, :n2 = :i .+ :g) d = @transform df begin cols(:n1) = :i - n2 = cols(:i) .+ :g + :n2 = cols(:i) .+ :g end - @test d ≅ @transform(df, n1 = :i, n2 = :i .+ :g) + @test d ≅ @transform(df, :n1 = :i, :n2 = :i .+ :g) d = @transform df begin - n1 = cols(:i) + :n1 = cols(:i) cols(:n2) = :i .+ :g end - @test d ≅ @transform(df, n1 = :i, n2 = :i .+ :g) + @test d ≅ @transform(df, :n1 = :i, :n2 = :i .+ :g) d = @transform df begin - n1 = begin + :n1 = begin :i end - n2 = :i .+ :g + :n2 = :i .+ :g end - @test d ≅ @transform(df, n1 = :i, n2 = :i .+ :g) + @test d ≅ @transform(df, :n1 = :i, :n2 = :i .+ :g) d = @transform df begin - n1 = @. :i * :g - n2 = @. :i * :g + :n1 = @. :i * :g + :n2 = @. :i * :g end - @test d ≅ @transform(df, n1 = :i .* :g, n2 = :i .* :g) + @test d ≅ @transform(df, :n1 = :i .* :g, :n2 = :i .* :g) end @testset "@transform!" begin @@ -150,31 +150,31 @@ end n_sym = :new_column n_space = "new column" - @test @transform!(df, n = :i).n == df.i - @test @transform!(df, n = :i .+ :g).n == df.i .+ df.g - @test @transform!(df, n = :t .* string.(:y)).n == df.t .* string.(df.y) - @test @transform!(df, n = Symbol.(:y, ^(:t))).n == Symbol.(df.y, :t) - @test @transform!(df, n = Symbol.(:y, ^(:body))).n == Symbol.(df.y, :body) - @test @transform!(df, body = :i).body == df.i - @test @transform!(df, transform = :i).transform == df.i - - @test @transform!(df, n = cols(iq)).n == df.i - @test @transform!(df, n = cols(iq) .+ cols(gq)).n == df.i .+ df.g - @test @transform!(df, n = cols(tq) .* string.(cols(yq))).n == df.t .* string.(df.y) - @test @transform!(df, n = Symbol.(cols(yq), ^(:t))).n == Symbol.(df.y, :t) - @test @transform!(df, n = Symbol.(cols(yq), ^(:body))).n == Symbol.(df.y, :body) - @test @transform!(df, body = cols(iq)).body == df.i - @test @transform!(df, transform = cols(iq)).transform == df.i - - @test @transform!(df, n = cols(ir)).n == df.i - @test @transform!(df, n = cols(ir) .+ cols(gr)).n == df.i .+ df.g - @test @transform!(df, n = cols(tr) .* string.(cols(yr))).n == df.t .* string.(df.y) - @test @transform!(df, n = Symbol.(cols(yr), ^(:t))).n == Symbol.(df.y, :t) - @test @transform!(df, n = Symbol.(cols(yr), ^(:body))).n == Symbol.(df.y, :body) - @test @transform!(df, body = cols(ir)).body == df.i - @test @transform!(df, transform = cols(ir)).transform == df.i - @test @transform!(df, n = cols("g") + cols(:i)).n == df.g + df.i - @test @transform!(df, n = cols(1) + cols(2)).n == df.g + df.i + @test @transform!(df, :n = :i).n == df.i + @test @transform!(df, :n = :i .+ :g).n == df.i .+ df.g + @test @transform!(df, :n = :t .* string.(:y)).n == df.t .* string.(df.y) + @test @transform!(df, :n = Symbol.(:y, ^(:t))).n == Symbol.(df.y, :t) + @test @transform!(df, :n = Symbol.(:y, ^(:body))).n == Symbol.(df.y, :body) + @test @transform!(df, :body = :i).body == df.i + @test @transform!(df, :transform = :i).transform == df.i + + @test @transform!(df, :n = cols(iq)).n == df.i + @test @transform!(df, :n = cols(iq) .+ cols(gq)).n == df.i .+ df.g + @test @transform!(df, :n = cols(tq) .* string.(cols(yq))).n == df.t .* string.(df.y) + @test @transform!(df, :n = Symbol.(cols(yq), ^(:t))).n == Symbol.(df.y, :t) + @test @transform!(df, :n = Symbol.(cols(yq), ^(:body))).n == Symbol.(df.y, :body) + @test @transform!(df, :body = cols(iq)).body == df.i + @test @transform!(df, :transform = cols(iq)).transform == df.i + + @test @transform!(df, :n = cols(ir)).n == df.i + @test @transform!(df, :n = cols(ir) .+ cols(gr)).n == df.i .+ df.g + @test @transform!(df, :n = cols(tr) .* string.(cols(yr))).n == df.t .* string.(df.y) + @test @transform!(df, :n = Symbol.(cols(yr), ^(:t))).n == Symbol.(df.y, :t) + @test @transform!(df, :n = Symbol.(cols(yr), ^(:body))).n == Symbol.(df.y, :body) + @test @transform!(df, :body = cols(ir)).body == df.i + @test @transform!(df, :transform = cols(ir)).transform == df.i + @test @transform!(df, :n = cols("g") + cols(:i)).n == df.g + df.i + @test @transform!(df, :n = cols(1) + cols(2)).n == df.g + df.i @test @transform!(df, cols("new_column") = :i).new_column == df.i @test @transform!(df, cols(n_str) = :i).new_column == df.i @@ -183,17 +183,17 @@ end @test @transform!(df, cols(n_space) = :i)."new column" == df.i @test @transform!(df, cols("new" * "_" * "column") = :i).new_column == df.i - @test @transform!(df, n = 1).n == fill(1, nrow(df)) - @test @transform!(df, n = :i .* :g).n == [1, 2, 3, 8, 10] + @test @transform!(df, :n = 1).n == fill(1, nrow(df)) + @test @transform!(df, :n = :i .* :g).n == [1, 2, 3, 8, 10] # non-copying - @test @transform!(df, n = :i).g === df.g + @test @transform!(df, :n = :i).g === df.g # mutating df2 = copy(df) @test @transform!(df, :i) === df @test df ≅ df2 @test @transform!(df, :i, :g) ≅ df2 - @transform!(df, n2 = :i) + @transform!(df, :n2 = :i) @test df[:, Not(:n2)] ≅ df2 end @@ -207,36 +207,36 @@ end ) d = @transform! df begin - n1 = :i - n2 = :i .+ :g + :n1 = :i + :n2 = :i .+ :g end - @test d ≅ @transform!(df, n1 = :i, n2 = :i .+ :g) + @test d ≅ @transform!(df, :n1 = :i, :n2 = :i .+ :g) d = @transform! df begin cols(:n1) = :i - n2 = cols(:i) .+ :g + :n2 = cols(:i) .+ :g end - @test d ≅ @transform!(df, n1 = :i, n2 = :i .+ :g) + @test d ≅ @transform!(df, :n1 = :i, :n2 = :i .+ :g) d = @transform df begin - n1 = cols(:i) - cols(:n2) = :i .+ :g + :n1 = cols(:i) + :n1 = cols(:n2) = :i .+ :g end - @test d ≅ @transform!(df, n1 = :i, n2 = :i .+ :g) + @test d ≅ @transform!(df, :n1 = :i, :n2 = :i .+ :g) d = @transform! df begin - n1 = begin + :n1 = begin :i end - n2 = :i .+ :g + :n2 = :i .+ :g end - @test d ≅ @transform!(df, n1 = :i, n2 = :i .+ :g) + @test d ≅ @transform!(df, :n1 = :i, :n2 = :i .+ :g) d = @transform! df begin - n1 = @. :i * :g - n2 = @. :i * :g + :n1 = @. :i * :g + :n2 = @. :i * :g end - @test d ≅ @transform!(df, n1 = :i .* :g, n2 = :i .* :g) + @test d ≅ @transform!(df, :n1 = :i .* :g, :n2 = :i .* :g) end # Defined outside of `@testset` due to use of `@eval` @@ -278,8 +278,8 @@ s = [:i, :g] @test_throws LoadError @eval @transform(df, Between(:i, :t)).Between == df.i @test_throws LoadError @eval @transform(df, Not(:i)).Not == df.i @test_throws LoadError @eval @transform(df, Not([:i, :g])) - @test_throws MethodError @eval @transform(df, n = sum(Between(:i, :t))) - @test_throws ArgumentError @eval @transform(df, n = sum(cols(s))) + @test_throws MethodError @eval @transform(df, :n = sum(Between(:i, :t))) + @test_throws ArgumentError @eval @transform(df, :n = sum(cols(s))) @test_throws ArgumentError @eval @transform(df, y = :i + cols(1)) end @@ -315,35 +315,35 @@ end @test @select(df, :i, :g) == df[!, [:i, :g]] df2 = copy(df) df2.n = df2.i .+ df2.g - @test @select(df, :i, :g, n = :i .+ :g) == df2[!, [:i, :g, :n]] - - @test @select(df, n = :i).n == df.i - @test @select(df, n = :i .+ :g).n == df.i .+ df.g - @test @select(df, n = :t .* string.(:y)).n == df.t .* string.(df.y) - @test @select(df, n = Symbol.(:y, ^(:t))).n == Symbol.(df.y, :t) - @test @select(df, n = Symbol.(:y, ^(:body))).n == Symbol.(df.y, :body) - @test @select(df, body = :i).body == df.i - @test @select(df, transform = :i).transform == df.i - - @test @select(df, n = cols(iq)).n == df.i - @test @select(df, n = cols(iq) .+ cols(gq)).n == df.i .+ df.g - @test @select(df, n = cols(tq) .* string.(cols(yq))).n == df.t .* string.(df.y) - @test @select(df, n = Symbol.(cols(yq), ^(:t))).n == Symbol.(df.y, :t) - @test @select(df, n = Symbol.(cols(yq), ^(:body))).n == Symbol.(df.y, :body) - @test @select(df, body = cols(iq)).body == df.i - @test @select(df, transform = cols(iq)).transform == df.i - - @test @select(df, n = cols(ir)).n == df.i - @test @select(df, n = cols(ir) .+ cols(gr)).n == df.i .+ df.g - @test @select(df, n = cols(tr) .* string.(cols(yr))).n == df.t .* string.(df.y) - @test @select(df, n = Symbol.(cols(yr), ^(:t))).n == Symbol.(df.y, :t) - @test @select(df, n = Symbol.(cols(yr), ^(:body))).n == Symbol.(df.y, :body) - @test @select(df, body = cols(ir)).body == df.i - @test @select(df, transform = cols(ir)).transform == df.i - @test @select(df, n = cols("g") + cols(:i)).n == df.g + df.i - @test @select(df, n = cols(1) + cols(2)).n == df.g + df.i - - @test @select(df, n = 1).n == fill(1, nrow(df)) + @test @select(df, :i, :g, :n = :i .+ :g) == df2[!, [:i, :g, :n]] + + @test @select(df, :n = :i).n == df.i + @test @select(df, :n = :i .+ :g).n == df.i .+ df.g + @test @select(df, :n = :t .* string.(:y)).n == df.t .* string.(df.y) + @test @select(df, :n = Symbol.(:y, ^(:t))).n == Symbol.(df.y, :t) + @test @select(df, :n = Symbol.(:y, ^(:body))).n == Symbol.(df.y, :body) + @test @select(df, :body = :i).body == df.i + @test @select(df, :transform = :i).transform == df.i + + @test @select(df, :n = cols(iq)).n == df.i + @test @select(df, :n = cols(iq) .+ cols(gq)).n == df.i .+ df.g + @test @select(df, :n = cols(tq) .* string.(cols(yq))).n == df.t .* string.(df.y) + @test @select(df, :n = Symbol.(cols(yq), ^(:t))).n == Symbol.(df.y, :t) + @test @select(df, :n = Symbol.(cols(yq), ^(:body))).n == Symbol.(df.y, :body) + @test @select(df, :body = cols(iq)).body == df.i + @test @select(df, :transform = cols(iq)).transform == df.i + + @test @select(df, :n = cols(ir)).n == df.i + @test @select(df, :n = cols(ir) .+ cols(gr)).n == df.i .+ df.g + @test @select(df, :n = cols(tr) .* string.(cols(yr))).n == df.t .* string.(df.y) + @test @select(df, :n = Symbol.(cols(yr), ^(:t))).n == Symbol.(df.y, :t) + @test @select(df, :n = Symbol.(cols(yr), ^(:body))).n == Symbol.(df.y, :body) + @test @select(df, :body = cols(ir)).body == df.i + @test @select(df, :transform = cols(ir)).transform == df.i + @test @select(df, :n = cols("g") + cols(:i)).n == df.g + df.i + @test @select(df, :n = cols(1) + cols(2)).n == df.g + df.i + + @test @select(df, :n = 1).n == fill(1, nrow(df)) @test @select(df, cols("new_column") = :i).new_column == df.i @test @select(df, cols(n_str) = :i).new_column == df.i @@ -352,7 +352,7 @@ end @test @select(df, cols(n_space) = :i)."new column" == df.i @test @select(df, cols("new" * "_" * "column") = :i).new_column == df.i - @test @transform(df, n = :i .* :g).n == [1, 2, 3, 8, 10] + @test @transform(df, :n = :i .* :g).n == [1, 2, 3, 8, 10] end @testset "select with :block" begin @@ -365,36 +365,36 @@ end ) d = @select df begin - n1 = :i - n2 = :i .+ :g + :n1 = :i + :n2 = :i .+ :g end - @test d ≅ @select(df, n1 = :i, n2 = :i .+ :g) + @test d ≅ @select(df, :n1 = :i, :n2 = :i .+ :g) d = @select df begin cols(:n1) = :i - n2 = cols(:i) .+ :g + :n2 = cols(:i) .+ :g end - @test d ≅ @select(df, n1 = :i, n2 = :i .+ :g) + @test d ≅ @select(df, :n1 = :i, :n2 = :i .+ :g) d = @select df begin - n1 = cols(:i) + :n1 = cols(:i) cols(:n2) = :i .+ :g end - @test d ≅ @select(df, n1 = :i, n2 = :i .+ :g) + @test d ≅ @select(df, :n1 = :i, :n2 = :i .+ :g) d = @select df begin - n1 = begin + :n1 = begin :i end - n2 = :i .+ :g + :n2 = :i .+ :g end - @test d ≅ @select(df, n1 = :i, n2 = :i .+ :g) + @test d ≅ @select(df, :n1 = :i, :n2 = :i .+ :g) d = @select df begin - n1 = @. :i * :g - n2 = @. :i * :g + :n1 = @. :i * :g + :n2 = @. :i * :g end - @test d ≅ @select(df, n1 = :i .* :g, n2 = :i .* :g) + @test d ≅ @select(df, :n1 = :i .* :g, :n2 = :i .* :g) end @testset "@select!" begin @@ -428,38 +428,38 @@ end df2 = copy(df) df2.n = df2.i .+ df2.g - @test @select!(copy(df), :i, :g, n = :i .+ :g) == df2[!, [:i, :g, :n]] + @test @select!(copy(df), :i, :g, :n = :i .+ :g) == df2[!, [:i, :g, :n]] @test @select!(copy(df), :i, :g) == df2[!, [:i, :g]] @test @select!(copy(df), :i) == df2[!, [:i]] - @test @select!(copy(df), n = :i .+ :g).n == df.i .+ df.g - @test @select!(copy(df), n = :i).n == df.i - @test @select!(copy(df), n = :t .* string.(:y)).n == df.t .* string.(df.y) - @test @select!(copy(df), n = Symbol.(:y, ^(:t))).n == Symbol.(df.y, :t) - @test @select!(copy(df), n = Symbol.(:y, ^(:body))).n == Symbol.(df.y, :body) - @test @select!(copy(df), body = :i).body == df.i - @test @select!(copy(df), transform = :i).transform == df.i - - @test @select!(copy(df), n = cols(iq)).n == df.i - @test @select!(copy(df), n = cols(iq) .+ cols(gq)).n == df.i .+ df.g - @test @select!(copy(df), n = cols(tq) .* string.(cols(yq))).n == df.t .* string.(df.y) - @test @select!(copy(df), n = Symbol.(cols(yq), ^(:t))).n == Symbol.(df.y, :t) - @test @select!(copy(df), n = Symbol.(cols(yq), ^(:body))).n == Symbol.(df.y, :body) - @test @select!(copy(df), body = cols(iq)).body == df.i - @test @select!(copy(df), transform = cols(iq)).transform == df.i - - @test @select!(copy(df), n = cols(ir)).n == df.i - @test @select!(copy(df), n = cols(ir) .+ cols(gr)).n == df.i .+ df.g - @test @select!(copy(df), n = cols(tr) .* string.(cols(yr))).n == df.t .* string.(df.y) - @test @select!(copy(df), n = Symbol.(cols(yr), ^(:t))).n == Symbol.(df.y, :t) - @test @select!(copy(df), n = Symbol.(cols(yr), ^(:body))).n == Symbol.(df.y, :body) - @test @select!(copy(df), body = cols(ir)).body == df.i - @test @select!(copy(df), transform = cols(ir)).transform == df.i - @test @select!(copy(df), n = cols("g") + cols(:i)).n == df.g + df.i - @test @select!(copy(df), n = cols(1) + cols(2)).n == df.g + df.i - - - @test @select!(copy(df), n = 1).n == fill(1, nrow(df)) + @test @select!(copy(df), :n = :i .+ :g).n == df.i .+ df.g + @test @select!(copy(df), :n = :i).n == df.i + @test @select!(copy(df), :n = :t .* string.(:y)).n == df.t .* string.(df.y) + @test @select!(copy(df), :n = Symbol.(:y, ^(:t))).n == Symbol.(df.y, :t) + @test @select!(copy(df), :n = Symbol.(:y, ^(:body))).n == Symbol.(df.y, :body) + @test @select!(copy(df), :body = :i).body == df.i + @test @select!(copy(df), :transform = :i).transform == df.i + + @test @select!(copy(df), :n = cols(iq)).n == df.i + @test @select!(copy(df), :n = cols(iq) .+ cols(gq)).n == df.i .+ df.g + @test @select!(copy(df), :n = cols(tq) .* string.(cols(yq))).n == df.t .* string.(df.y) + @test @select!(copy(df), :n = Symbol.(cols(yq), ^(:t))).n == Symbol.(df.y, :t) + @test @select!(copy(df), :n = Symbol.(cols(yq), ^(:body))).n == Symbol.(df.y, :body) + @test @select!(copy(df), :body = cols(iq)).body == df.i + @test @select!(copy(df), :transform = cols(iq)).transform == df.i + + @test @select!(copy(df), :n = cols(ir)).n == df.i + @test @select!(copy(df), :n = cols(ir) .+ cols(gr)).n == df.i .+ df.g + @test @select!(copy(df), :n = cols(tr) .* string.(cols(yr))).n == df.t .* string.(df.y) + @test @select!(copy(df), :n = Symbol.(cols(yr), ^(:t))).n == Symbol.(df.y, :t) + @test @select!(copy(df), :n = Symbol.(cols(yr), ^(:body))).n == Symbol.(df.y, :body) + @test @select!(copy(df), :body = cols(ir)).body == df.i + @test @select!(copy(df), :transform = cols(ir)).transform == df.i + @test @select!(copy(df), :n = cols("g") + cols(:i)).n == df.g + df.i + @test @select!(copy(df), :n = cols(1) + cols(2)).n == df.g + df.i + + + @test @select!(copy(df), :n = 1).n == fill(1, nrow(df)) @test @select!(copy(df), cols("new_column") = :i).new_column == df.i @test @select!(copy(df), cols(n_str) = :i).new_column == df.i @@ -490,36 +490,36 @@ end ) d = @select! copy(df) begin - n1 = :i - n2 = :i .+ :g + :n1 = :i + :n2 = :i .+ :g end - @test d ≅ @select!(copy(df), n1 = :i, n2 = :i .+ :g) + @test d ≅ @select!(copy(df), :n1 = :i, :n2 = :i .+ :g) d = @select! copy(df) begin cols(:n1) = :i - n2 = cols(:i) .+ :g + :n2 = cols(:i) .+ :g end - @test d ≅ @select!(copy(df), n1 = :i, n2 = :i .+ :g) + @test d ≅ @select!(copy(df), :n1 = :i, :n2 = :i .+ :g) d = @select! copy(df) begin - n1 = cols(:i) + :n1 = cols(:i) cols(:n2) = :i .+ :g end - @test d ≅ @select!(copy(df), n1 = :i, n2 = :i .+ :g) + @test d ≅ @select!(copy(df), :n1 = :i, :n2 = :i .+ :g) d = @select! copy(df) begin - n1 = begin + :n1 = begin :i end - n2 = :i .+ :g + :n2 = :i .+ :g end - @test d ≅ @select!(copy(df), n1 = :i, n2 = :i .+ :g) + @test d ≅ @select!(copy(df), :n1 = :i, :n2 = :i .+ :g) d = @select! copy(df) begin - n1 = @. :i * :g - n2 = @. :i * :g + :n1 = @. :i * :g + :n2 = @. :i * :g end - @test d ≅ @select!(copy(df), n1 = :i .* :g, n2 = :i .* :g) + @test d ≅ @select!(copy(df), :n1 = :i .* :g, :n2 = :i .* :g) end # Defined outside of `@testset` due to use of `@eval` @@ -552,14 +552,14 @@ cr = "c" @test_throws LoadError @eval @select(df, Between(:i, :t)).Between == df.i @test_throws LoadError @eval @select(df, Not(:i)).Not == df.i @test_throws LoadError @eval @select(df, Not([:i, :g])) - @test_throws MethodError @eval @select(df, n = sum(Between(:i, :t))) - @test_throws ArgumentError @eval @select(df, n = sum(cols(s))) + @test_throws MethodError @eval @select(df, :n = sum(Between(:i, :t))) + @test_throws ArgumentError @eval @select(df, :n = sum(cols(s))) @test_throws ArgumentError @eval @select(df, y = :i + cols(1)) end @testset "Keyword arguments failure" begin - @test_throws LoadError @eval @transform(df; n = :i) - @test_throws LoadError @eval @select(df; n = :i) + @test_throws LoadError @eval @transform(df; :n = :i) + @test_throws LoadError @eval @select(df; :n = :i) end @testset "with" begin From ee1f6c7802e4d6dce05e2f9b87367f250c4b8e61 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 1 Jul 2021 09:24:37 -0400 Subject: [PATCH 06/16] update timings tests --- test/data.table.timings.jl | 14 +++++++------- test/dataframes.jl | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/test/data.table.timings.jl b/test/data.table.timings.jl index 427452d6..688ad592 100644 --- a/test/data.table.timings.jl +++ b/test/data.table.timings.jl @@ -51,12 +51,12 @@ DMA = DataFrame( ); function dt_timings(D) - @time @by(D, :id1, sv =sum(:v1)); - @time @by(D, :id1, sv =sum(:v1)); - @time @by(D, [:id1, :id2], sv =sum(:v1)); - @time @by(D, [:id1, :id2], sv =sum(:v1)); - @time @by(D, :id3, sv = sum(:v1), mv3 = mean(:v3)); - @time @by(D, :id3, sv = sum(:v1), mv3 = mean(:v3)); + @time @by(D, :id1, :sv =sum(:v1)); + @time @by(D, :id1, :sv =sum(:v1)); + @time @by(D, [:id1, :id2], :sv =sum(:v1)); + @time @by(D, [:id1, :id2], :sv =sum(:v1)); + @time @by(D, :id3, :sv = sum(:v1), :mv3 = mean(:v3)); + @time @by(D, :id3, :sv = sum(:v1), :mv3 = mean(:v3)); @time aggregate(D[!, [4;7:9]], :id4, mean); @time aggregate(D[!, [4;7:9]], :id4, mean); @time aggregate(D[!, [6;7:9]], :id6, sum); @@ -68,4 +68,4 @@ dt_timings(DA) dt_timings(DCA) dt_timings(DMA) -@profile @by(DA, :id1, sv =sum(:v1)); +@profile @by(DA, :id1, :sv =sum(:v1)); diff --git a/test/dataframes.jl b/test/dataframes.jl index a4e119eb..16104a8f 100644 --- a/test/dataframes.jl +++ b/test/dataframes.jl @@ -220,7 +220,7 @@ end d = @transform df begin :n1 = cols(:i) - :n1 = cols(:n2) = :i .+ :g + :n2 = cols(:n2) = :i .+ :g end @test d ≅ @transform!(df, :n1 = :i, :n2 = :i .+ :g) From 1ee5636955117ee71b4e931e0cd4bd5522d75b1e Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 1 Jul 2021 09:28:08 -0400 Subject: [PATCH 07/16] update test/eachrow --- test/eachrow.jl | 8 +++---- test/function_compilation.jl | 46 ++++++++++++++++++------------------ 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/test/eachrow.jl b/test/eachrow.jl index 5ee89ca9..2053aa0c 100644 --- a/test/eachrow.jl +++ b/test/eachrow.jl @@ -22,8 +22,8 @@ y = 0 df = DataFrame(A = 1:3, B = [2, 1, 2]) df2 = @eachrow df begin - @newcol colX::Array{Float64} - @newcol colY::Array{Float64} + @newcol :colX::Array{Float64} + @newcol :colY::Array{Float64} :colX = :B == 2 ? pi * :A : :B if :A > 1 :colY = :A * :B @@ -138,8 +138,8 @@ y = 0 df = DataFrame(A = 1:3, B = [2, 1, 2]) df2 = @eachrow! df begin - @newcol colX::Array{Float64} - @newcol colY::Array{Float64} + @newcol :colX::Array{Float64} + @newcol :colY::Array{Float64} :colX = :B == 2 ? pi * :A : :B if :A > 1 :colY = :A * :B diff --git a/test/function_compilation.jl b/test/function_compilation.jl index 392c9b45..34ebb66a 100644 --- a/test/function_compilation.jl +++ b/test/function_compilation.jl @@ -14,15 +14,15 @@ using DataFramesMeta testdotfun(x, y) = x * y testnt(x) = (c = x,) - @test @select(df, c = :a + :b) == DataFrame(c = [3]) + @test @select(df, :c = :a + :b) == DataFrame(c = [3]) - fasttime = @timed @select(df, c = :a + :b) + fasttime = @timed @select(df, :c = :a + :b) slowtime = @timed select(df, [:a, :b] => ((a, b) -> a + b) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") - @test @select(df, c = begin :a + :b end) == DataFrame(c = [3]) + @test @select(df, :c = begin :a + :b end) == DataFrame(c = [3]) - fasttime = @timed @select(df, c = begin :a + :b end) + fasttime = @timed @select(df, :c = begin :a + :b end) slowtime = @timed select(df, [:a, :b] => ((a, b) -> a + b) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") @@ -32,9 +32,9 @@ using DataFramesMeta slowtime = @timed select(df, [:a, :b] => ((a, b) -> a + b) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") - @test @select(df, c = :a .+ :b) == DataFrame(c = [3]) + @test @select(df, :c = :a .+ :b) == DataFrame(c = [3]) - fasttime = @timed @select(df, c = :a .+ :b) + fasttime = @timed @select(df, :c = :a .+ :b) slowtime = @timed select(df, [:a, :b] => ((a, b) -> a .+ b) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") @@ -44,9 +44,9 @@ using DataFramesMeta slowtime = @timed select(df, [:a, :b] => ((a, b) -> a .+ b) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") - @test @select(df, c = cols(:a) + cols(:b)) == DataFrame(c = [3]) + @test @select(df, :c = cols(:a) + cols(:b)) == DataFrame(c = [3]) - fasttime = @timed @select(df, c = cols(:a) + cols(:b)) + fasttime = @timed @select(df, :c = cols(:a) + cols(:b)) slowtime = @timed select(df, [:a, :b] => ((a, b) -> a + b) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") @@ -56,9 +56,9 @@ using DataFramesMeta slowtime = @timed select(df, [:a, :b] => ((a, b) -> a + b) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") - @test @select(df, c = cols(:a) .+ cols(:b)) == DataFrame(c = [3]) + @test @select(df, :c = cols(:a) .+ cols(:b)) == DataFrame(c = [3]) - fasttime = @timed @select(df, c = cols(:a) .+ cols(:b)) + fasttime = @timed @select(df, :c = cols(:a) .+ cols(:b)) slowtime = @timed select(df, [:a, :b] => ((a, b) -> a .+ b) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") @@ -68,9 +68,9 @@ using DataFramesMeta slowtime = @timed select(df, [:a, :b] => ((a, b) -> a .+ b) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") - @test @select(df, c = :a) == DataFrame(c = [1]) + @test @select(df, :c = :a) == DataFrame(c = [1]) - fasttime = @timed @select(df, c = :a) + fasttime = @timed @select(df, :c = :a) slowtime = @timed select(df, [:a] => (a -> identity(a)) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") @@ -80,9 +80,9 @@ using DataFramesMeta slowtime = @timed select(df, [:a] => (a -> identity(a)) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") - @test @select(df, c = cols(:a)) == DataFrame(c = [1]) + @test @select(df, :c = cols(:a)) == DataFrame(c = [1]) - fasttime = @timed @select(df, c = cols(:a)) + fasttime = @timed @select(df, :c = cols(:a)) slowtime = @timed select(df, [:a] => (a -> identity(a)) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") @@ -104,9 +104,9 @@ using DataFramesMeta slowtime = @timed select(df, [:a] => (a -> identity(a)) => :a) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") - @test @select(df, c = testfun(:a, :b)) == DataFrame(c = [2]) + @test @select(df, :c = testfun(:a, :b)) == DataFrame(c = [2]) - fasttime = @timed @select(df, c = testfun(:a, :b)) + fasttime = @timed @select(df, :c = testfun(:a, :b)) slowtime = @timed select(df, [:a, :b] => ((a, b) -> testfun(a, b)) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") @@ -116,9 +116,9 @@ using DataFramesMeta slowtime = @timed select(df, [:a, :b] => ((a, b) -> testfun(a, b)) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") - @test @select(df, c = testfun(cols("a"), cols("b"))) == DataFrame(c = [2]) + @test @select(df, :c = testfun(cols("a"), cols("b"))) == DataFrame(c = [2]) - fasttime = @timed @select(df, c = testfun(cols("a"), cols("b"))) + fasttime = @timed @select(df, :c = testfun(cols("a"), cols("b"))) slowtime = @timed select(df, [:a, :b] => ((a, b) -> testfun(a, b)) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") @@ -128,9 +128,9 @@ using DataFramesMeta slowtime = @timed select(df, [:a, :b] => ((a, b) -> testfun(a, b)) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") - @test @select(df, c = testdotfun.(:a, :b)) == DataFrame(c = [2]) + @test @select(df, :c = testdotfun.(:a, :b)) == DataFrame(c = [2]) - fasttime = @timed @select(df, c = testdotfun.(:a, :b)) + fasttime = @timed @select(df, :c = testdotfun.(:a, :b)) slowtime = @timed select(df, [:a, :b] => ((a, b) -> testdotfun.(a, b)) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") @@ -140,9 +140,9 @@ using DataFramesMeta slowtime = @timed select(df, [:a, :b] => ((a, b) -> testdotfun.(a, b)) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") - @test @select(df, c = testdotfun.(cols("a"), cols("b"))) == DataFrame(c = [2]) + @test @select(df, :c = testdotfun.(cols("a"), cols("b"))) == DataFrame(c = [2]) - fasttime = @timed @select(df, c = testdotfun.(cols("a"), cols("b"))) + fasttime = @timed @select(df, :c = testdotfun.(cols("a"), cols("b"))) slowtime = @timed select(df, [:a, :b] => ((a, b) -> testdotfun.(a, b)) => :c) (slowtime[2] > fasttime[2]) || @warn("Slow compilation") @@ -154,7 +154,7 @@ using DataFramesMeta gd = groupby(df, :a) - @test @combine(gd, testnt(:b)) == DataFrame(a = [1], c = [2]) + @test @combine(gd, testnt(:b)) == DataFrame(a = [1], :c = [2]) fasttime = @timed @combine(gd, testnt(:b)) slowtime = @timed combine(gd, :b => (b -> testnt(b)) => AsTable) From 150564e23aa24c6ec277bc3d71381838a096bdc2 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 1 Jul 2021 09:56:37 -0400 Subject: [PATCH 08/16] update test/grouping --- test/grouping.jl | 187 +++++++++++++++++++++++------------------------ 1 file changed, 93 insertions(+), 94 deletions(-) diff --git a/test/grouping.jl b/test/grouping.jl index c603c2a5..a07e33e6 100644 --- a/test/grouping.jl +++ b/test/grouping.jl @@ -42,41 +42,41 @@ g = groupby(d, :x, sort=true) n_sym = :new_column n_space = "new column" - @test @combine(gd, n = mean(:i)).n == [2.0, 4.5] - @test @combine(gd, n = mean(:i) + mean(:g)).n == [3.0, 6.5] - @test @combine(gd, n = first(:t .* string.(:y))).n == ["av", "cy"] - @test @combine(gd, n = first(Symbol.(:y, ^(:t)))).n == [:vt, :yt] - @test @combine(gd, n = first(Symbol.(:y, ^(:body)))).n == [:vbody, :ybody] - @test @combine(gd, body = :i).body == df.i - @test @combine(gd, transform = :i).transform == df.i + @test @combine(gd, :n = mean(:i)).n == [2.0, 4.5] + @test @combine(gd, :n = mean(:i) + mean(:g)).n == [3.0, 6.5] + @test @combine(gd, :n = first(:t .* string.(:y))).n == ["av", "cy"] + @test @combine(gd, :n = first(Symbol.(:y, ^(:t)))).n == [:vt, :yt] + @test @combine(gd, :n = first(Symbol.(:y, ^(:body)))).n == [:vbody, :ybody] + @test @combine(gd, :body = :i).body == df.i + @test @combine(gd, :transform = :i).transform == df.i @test @combine(gd, (n1 = [first(:i)], n2 = [first(:y)])).n1 == [1, 4] - @test @combine(gd, n = mean(cols(iq))).n == [2.0, 4.5] - @test @combine(gd, n = mean(cols(iq)) + mean(cols(gq))).n == [3.0, 6.5] - @test @combine(gd, n = first(cols(tq) .* string.(cols(yq)))).n == ["av", "cy"] - @test @combine(gd, n = first(Symbol.(cols(yq), ^(:t)))).n == [:vt, :yt] - @test @combine(gd, n = first(Symbol.(cols(yq), ^(:body)))).n == [:vbody, :ybody] + @test @combine(gd, :n = mean(cols(iq))).n == [2.0, 4.5] + @test @combine(gd, :n = mean(cols(iq)) + mean(cols(gq))).n == [3.0, 6.5] + @test @combine(gd, :n = first(cols(tq) .* string.(cols(yq)))).n == ["av", "cy"] + @test @combine(gd, :n = first(Symbol.(cols(yq), ^(:t)))).n == [:vt, :yt] + @test @combine(gd, :n = first(Symbol.(cols(yq), ^(:body)))).n == [:vbody, :ybody] @test @combine(gd, cols(:n) = mean(cols(:i))).n == [2.0, 4.5] - @test @combine(gd, body = cols(iq)).body == df.i - @test @combine(gd, transform = cols(iq)).transform == df.i + @test @combine(gd, :body = cols(iq)).body == df.i + @test @combine(gd, :transform = cols(iq)).transform == df.i @test @combine(gd, (n1 = [first(cols(iq))], n2 = [first(cols(yq))])).n1 == [1, 4] - @test @combine(gd, n = mean(cols(ir))).n == [2.0, 4.5] - @test @combine(gd, n = mean(cols(ir)) + mean(cols(gr))).n == [3.0, 6.5] - @test @combine(gd, n = first(cols(tr) .* string.(cols(yr)))).n == ["av", "cy"] - @test @combine(gd, n = first(Symbol.(cols(yr), ^(:t)))).n == [:vt, :yt] - @test @combine(gd, n = first(Symbol.(cols(yr), ^(:body)))).n == [:vbody, :ybody] - @test @combine(gd, body = cols(ir)).body == df.i - @test @combine(gd, transform = cols(ir)).transform == df.i + @test @combine(gd, :n = mean(cols(ir))).n == [2.0, 4.5] + @test @combine(gd, :n = mean(cols(ir)) + mean(cols(gr))).n == [3.0, 6.5] + @test @combine(gd, :n = first(cols(tr) .* string.(cols(yr)))).n == ["av", "cy"] + @test @combine(gd, :n = first(Symbol.(cols(yr), ^(:t)))).n == [:vt, :yt] + @test @combine(gd, :n = first(Symbol.(cols(yr), ^(:body)))).n == [:vbody, :ybody] + @test @combine(gd, :body = cols(ir)).body == df.i + @test @combine(gd, :transform = cols(ir)).transform == df.i @test @combine(gd, (n1 = [first(cols(ir))], n2 = [first(cols(yr))])).n1 == [1, 4] - @test @combine(gd, n = mean(cols("i")) + 0 * first(cols(:g))).n == [2.0, 4.5] - @test @combine(gd, n = mean(cols(2)) + first(cols(1))).n == [3.0, 6.5] + @test @combine(gd, :n = mean(cols("i")) + 0 * first(cols(:g))).n == [2.0, 4.5] + @test @combine(gd, :n = mean(cols(2)) + first(cols(1))).n == [3.0, 6.5] @test @combine(gd, :i) == select(df, :g, :i) @test @combine(gd, :i, :g) ≅ select(df, :g, :i) - @test @combine(gd, :i, n = 1).n == fill(1, nrow(df)) + @test @combine(gd, :i, :n = 1).n == fill(1, nrow(df)) @test @combine(gd, cols("new_column") = 2).new_column == [2, 2] @test @combine(gd, cols(n_str) = 2).new_column == [2, 2] @@ -97,30 +97,30 @@ end g = groupby(df, :g) d = @combine g begin - im = mean(:i) - tf = first(:t) + :im = mean(:i) + :tf = first(:t) end - @test d ≅ @combine(g, im = mean(:i), tf = first(:t)) + @test d ≅ @combine(g, :im = mean(:i), :tf = first(:t)) d = @combine g begin cols(:im) = mean(:i) - tf = first(:t) + :tf = first(:t) end - @test d ≅ @combine(g, im = mean(:i), tf = first(:t)) + @test d ≅ @combine(g, :im = mean(:i), :tf = first(:t)) d = @combine g begin - im = mean(:i) - tf = first(cols(:t)) + :im = mean(:i) + :tf = first(cols(:t)) end - @test d ≅ @combine(g, im = mean(:i), tf = first(:t)) + @test d ≅ @combine(g, :im = mean(:i), :tf = first(:t)) d = @combine g begin - im = begin + :im = begin mean(:i) end - tf = first(:t) + :tf = first(:t) end - @test d ≅ @combine(g, im = mean(:i), tf = first(:t)) + @test d ≅ @combine(g, :im = mean(:i), :tf = first(:t)) end # Defined outside of `@testset` due to use of `@eval` @@ -151,9 +151,9 @@ gd = groupby(df, :g) newvar = :n @testset "Limits of @combine" begin - @test_throws MethodError @eval @combine(gd, n = sum(Between(:i, :t))) - @test_throws LoadError @eval @combine(gd; n = mean(:i)) - @test_throws ArgumentError @eval @combine(gd, n = mean(:i) + mean(cols(1))) + @test_throws MethodError @eval @combine(gd, :n = sum(Between(:i, :t))) + @test_throws LoadError @eval @combine(gd; :n = mean(:i)) + @test_throws ArgumentError @eval @combine(gd, :n = mean(:i) + mean(cols(1))) end @testset "@by" begin @@ -185,41 +185,41 @@ end n_sym = :new_column n_space = "new column" - @test @by(df, :g, n = mean(:i)).n == [2.0, 4.5] - @test @by(df, :g, n = mean(:i) + mean(:g)).n == [3.0, 6.5] - @test @by(df, :g, n = first(:t .* string.(:y))).n == ["av", "cy"] - @test @by(df, :g, n = first(Symbol.(:y, ^(:t)))).n == [:vt, :yt] - @test @by(df, :g, n = first(Symbol.(:y, ^(:body)))).n == [:vbody, :ybody] - @test @by(df, :g, body = :i).body == df.i - @test @by(df, :g, transform = :i).transform == df.i + @test @by(df, :g, :n = mean(:i)).n == [2.0, 4.5] + @test @by(df, :g, :n = mean(:i) + mean(:g)).n == [3.0, 6.5] + @test @by(df, :g, :n = first(:t .* string.(:y))).n == ["av", "cy"] + @test @by(df, :g, :n = first(Symbol.(:y, ^(:t)))).n == [:vt, :yt] + @test @by(df, :g, :n = first(Symbol.(:y, ^(:body)))).n == [:vbody, :ybody] + @test @by(df, :g, :body = :i).body == df.i + @test @by(df, :g, :transform = :i).transform == df.i @test @by(df, :g, (n1 = [first(:i)], n2 = [first(:y)])).n1 == [1, 4] - @test @by(df, :g, n = mean(cols(iq))).n == [2.0, 4.5] - @test @by(df, :g, n = mean(cols(iq)) + mean(cols(gq))).n == [3.0, 6.5] - @test @by(df, :g, n = first(cols(tq) .* string.(cols(yq)))).n == ["av", "cy"] - @test @by(df, :g, n = first(Symbol.(cols(yq), ^(:t)))).n == [:vt, :yt] - @test @by(df, :g, n = first(Symbol.(cols(yq), ^(:body)))).n == [:vbody, :ybody] + @test @by(df, :g, :n = mean(cols(iq))).n == [2.0, 4.5] + @test @by(df, :g, :n = mean(cols(iq)) + mean(cols(gq))).n == [3.0, 6.5] + @test @by(df, :g, :n = first(cols(tq) .* string.(cols(yq)))).n == ["av", "cy"] + @test @by(df, :g, :n = first(Symbol.(cols(yq), ^(:t)))).n == [:vt, :yt] + @test @by(df, :g, :n = first(Symbol.(cols(yq), ^(:body)))).n == [:vbody, :ybody] @test @by(df, :g, cols(:n) = mean(cols(:i))).n == [2.0, 4.5] - @test @by(df, :g, body = cols(iq)).body == df.i - @test @by(df, :g, transform = cols(iq)).transform == df.i + @test @by(df, :g, :body = cols(iq)).body == df.i + @test @by(df, :g, :transform = cols(iq)).transform == df.i @test @by(df, :g, (n1 = [first(cols(iq))], n2 = [first(cols(yq))])).n1 == [1, 4] - @test @by(df, "g", n = mean(cols(ir))).n == [2.0, 4.5] - @test @by(df, "g", n = mean(cols(ir)) + mean(cols(gr))).n == [3.0, 6.5] - @test @by(df, "g", n = first(cols(tr) .* string.(cols(yr)))).n == ["av", "cy"] - @test @by(df, "g", n = first(Symbol.(cols(yr), ^(:t)))).n == [:vt, :yt] - @test @by(df, "g", n = first(Symbol.(cols(yr), ^(:body)))).n == [:vbody, :ybody] - @test @by(df, "g", body = cols(ir)).body == df.i - @test @by(df, "g", transform = cols(ir)).transform == df.i + @test @by(df, "g", :n = mean(cols(ir))).n == [2.0, 4.5] + @test @by(df, "g", :n = mean(cols(ir)) + mean(cols(gr))).n == [3.0, 6.5] + @test @by(df, "g", :n = first(cols(tr) .* string.(cols(yr)))).n == ["av", "cy"] + @test @by(df, "g", :n = first(Symbol.(cols(yr), ^(:t)))).n == [:vt, :yt] + @test @by(df, "g", :n = first(Symbol.(cols(yr), ^(:body)))).n == [:vbody, :ybody] + @test @by(df, "g", :body = cols(ir)).body == df.i + @test @by(df, "g", :transform = cols(ir)).transform == df.i @test @by(df, "g", (n1 = [first(cols(ir))], n2 = [first(cols(yr))])).n1 == [1, 4] - @test @by(df, "g", n = mean(cols("i")) + 0 * first(cols(:g))).n == [2.0, 4.5] - @test @by(df, "g", n = mean(cols(2)) + first(cols(1))).n == [3.0, 6.5] + @test @by(df, "g", :n = mean(cols("i")) + 0 * first(cols(:g))).n == [2.0, 4.5] + @test @by(df, "g", :n = mean(cols(2)) + first(cols(1))).n == [3.0, 6.5] @test @by(df, :g, :i) == select(df, :g, :i) @test @by(df, :g, :i, :g) ≅ select(df, :g, :i) - @test @by(df, :g, :i, n = 1).n == fill(1, nrow(df)) + @test @by(df, :g, :i, :n = 1).n == fill(1, nrow(df)) @test @by(df, :g, cols("new_column") = 2).new_column == [2, 2] @test @by(df, :g, cols(n_str) = 2).new_column == [2, 2] @@ -240,30 +240,30 @@ end g = groupby(df, :g) d = @by df :g begin - im = mean(:i) - tf = first(:t) + :im = mean(:i) + :tf = first(:t) end - @test d ≅ @by(df, :g, im = mean(:i), tf = first(:t)) + @test d ≅ @by(df, :g, :im = mean(:i), :tf = first(:t)) d = @by df :g begin cols(:im) = mean(:i) - tf = first(:t) + :tf = first(:t) end - @test d ≅ @by(df, :g, im = mean(:i), tf = first(:t)) + @test d ≅ @by(df, :g, :im = mean(:i), :tf = first(:t)) d = @by df :g begin - im = mean(:i) - tf = first(cols(:t)) + :im = mean(:i) + :tf = first(cols(:t)) end - @test d ≅ @by(df, :g, im = mean(:i), tf = first(:t)) + @test d ≅ @by(df, :g, :im = mean(:i), :tf = first(:t)) d = @by df :g begin - im = begin + :im = begin mean(:i) end - tf = first(:t) + :tf = first(:t) end - @test d ≅ @by(df, :g, im = mean(:i), tf = first(:t)) + @test d ≅ @by(df, :g, :im = mean(:i), :tf = first(:t)) end # Defined outside of `@testset` due to use of `@eval` @@ -294,9 +294,8 @@ gd = groupby(df, :g) newvar = :n @testset "limits of @by" begin - @test_throws MethodError @eval @by(df, :g, n = sum(Between(:i, :t))) - @test_throws MethodError @eval @by(df, :g; n = mean(:i)) - @test_throws ArgumentError @eval @by(df, :g, n = mean(:i) + mean(cols(1))) + @test_throws MethodError @eval @by(df, :g, :n = sum(Between(:i, :t))) + @test_throws ArgumentError @eval @by(df, :g, :n = mean(:i) + mean(cols(1))) end @testset "@transform with grouped data frame" begin @@ -312,49 +311,49 @@ end ## Scalar output # Type promotion Int -> Float - t = @transform(g, t = :b[1]).t - s = @select(g, t = :b[1]).t + t = @transform(g, :t = :b[1]).t + s = @select(g, :t = :b[1]).t @test t ≅ s ≅ [1.0, 1.0, 1.0, missing, missing, 6.0, 6.0, 1.0] && t isa Vector{Union{Float64, Missing}} # Type promotion Number -> Any - t = @transform(g, t = isequal(:b[1], 1) ? :b[1] : "a").t - s = @select(g, t = isequal(:b[1], 1) ? :b[1] : "a").t + t = @transform(g, :t = isequal(:b[1], 1) ? :b[1] : "a").t + s = @select(g, :t = isequal(:b[1], 1) ? :b[1] : "a").t @test t ≅ s ≅ [1, 1, 1, "a", "a", "a", "a", 1] && t isa Vector{Any} ## Vector output # Normal use - t = @transform(g, t = :b .- mean(:b)).t - s = @select(g, t = :b .- mean(:b)).t + t = @transform(g, :t = :b .- mean(:b)).t + s = @select(g, :t = :b .- mean(:b)).t @test t ≅ s ≅ [-1.5, -0.5, 0.5, missing, missing, 0.5, -0.5, 1.5] && t isa Vector{Union{Float64, Missing}} # Type promotion - t = @transform(g, t = isequal(:b[1], 1) ? fill(1, length(:b)) : fill(2.0, length(:b))).t - s = @transform(g, t = isequal(:b[1], 1) ? fill(1, length(:b)) : fill(2.0, length(:b))).t + t = @transform(g, :t = isequal(:b[1], 1) ? fill(1, length(:b)) : fill(2.0, length(:b))).t + s = @transform(g, :t = isequal(:b[1], 1) ? fill(1, length(:b)) : fill(2.0, length(:b))).t @test t ≅ s ≅ [1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 1.0] && t isa Vector{Float64} # Vectors whose eltypes promote to any - t = @transform(g, t = isequal(:b[1], 1) ? :b : fill("a", length(:b))).t - s = @transform(g, t = isequal(:b[1], 1) ? :b : fill("a", length(:b))).t + t = @transform(g, :t = isequal(:b[1], 1) ? :b : fill("a", length(:b))).t + s = @transform(g, :t = isequal(:b[1], 1) ? :b : fill("a", length(:b))).t @test s ≅ t ≅ [1, 2, 3, "a", "a", "a", "a", 4] && t isa Vector{Any} # Categorical Array # Scalar - t = @transform(g, t = :c[1]).t - s = @transform(g, t = :c[1]).t + t = @transform(g, :t = :c[1]).t + s = @transform(g, :t = :c[1]).t @test t ≅ s ≅ [1, 1, 1, 1, 1, 3, 3, 1] && t isa CategoricalVector{Int} # Vector - t = @transform(g, t = :c).t - s = @transform(g, t = :c).t + t = @transform(g, :t = :c).t + s = @transform(g, :t = :c).t @test t ≅ s ≅ [1, 2, 3, 1, 2, 3, 1, 2] && t isa CategoricalVector{Int} - @test @transform(g, t = :c).a ≅ d.a - @test @select(g, :a, t = :c).a ≅ d.a + @test @transform(g, :t = :c).a ≅ d.a + @test @select(g, :a, :t = :c).a ≅ d.a - @test @transform(g, @byrow t = :a ^ 2).t ≅ d.a .^ 2 - @test @select(g, :a, @byrow t = :a ^ 2).t ≅ d.a .^ 2 + @test @transform(g, @byrow :t = :a ^ 2).t ≅ d.a .^ 2 + @test @select(g, :a, @byrow :t = :a ^ 2).t ≅ d.a .^ 2 end end # module From 7819602b3873bb4788cc5970e7cb3d1189f2f6c5 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Fri, 2 Jul 2021 05:50:10 -0400 Subject: [PATCH 09/16] update test/linqmacro --- test/linqmacro.jl | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/test/linqmacro.jl b/test/linqmacro.jl index 084f0e8d..bd8b4b5b 100644 --- a/test/linqmacro.jl +++ b/test/linqmacro.jl @@ -11,41 +11,41 @@ df = DataFrame(a = repeat(1:5, outer = 20), x = repeat(1:20, inner = 5)) x = @where(df, :a .> 2, :b .!= "c") -x = @transform(x, y = 10 * :x) +x = @transform(x, :y = 10 * :x) x = @orderby(x, :x .- mean(:x)) -x = @by(x, :b, meanX = mean(:x), meanY = mean(:y)) +x = @by(x, :b, :meanX = mean(:x), :meanY = mean(:y)) x = @select(x, var = :b, :meanX, :meanY) -x1 = @linq transform(where(df, :a .> 2, :b .!= "c"), y = 10 * :x) -x1 = @linq by(orderby(x1, :x .- mean(:x)), :b, meanX = mean(:x), meanY = mean(:y)) -x1 = @linq select(x1, var = :b, :meanX, :meanY) +x1 = @linq transform(where(df, :a .> 2, :b .!= "c"), :y = 10 * :x) +x1 = @linq by(orderby(x1, :x .- mean(:x)), :b, :meanX = mean(:x), :meanY = mean(:y)) +x1 = @linq select(x1, :var = :b, :meanX, :meanY) ## chaining xlinq = @linq df |> where(:a .> 2, :b .!= "c") |> - transform(y = 10 * :x) |> + transform(:y = 10 * :x) |> orderby(:x .- mean(:x)) |> - by(:b, meanX = mean(:x), meanY = mean(:y)) |> - select(var = :b, :meanX, :meanY) + by(:b, :meanX = mean(:x), :meanY = mean(:y)) |> + select(:var = :b, :meanX, :meanY) @test x == x1 @test x == xlinq xlinq2 = @linq df |> where(:a .> 2, :b .!= "c") |> - transform(y = 10 * :x) |> + transform(:y = 10 * :x) |> orderby(:x .- mean(:x)) |> groupby(:b) |> - combine(meanX = mean(:x), meanY = mean(:y)) + combine(:meanX = mean(:x), :meanY = mean(:y)) @test xlinq2[!, [:meanX, :meanY]] == xlinq[!, [:meanX, :meanY]] xlinq3 = @linq df |> where(:a .> 2, :b .!= "c") |> - transform(y = 10 * :x) |> + transform(:y = 10 * :x) |> orderby(:x .- mean(:x)) |> DataFrames.groupby(:b) |> - combine(meanX = mean(:x), meanY = mean(:y)) + combine(:meanX = mean(:x), :meanY = mean(:y)) @test xlinq3[!, [:meanX, :meanY]] == xlinq[!, [:meanX, :meanY]] @@ -68,7 +68,7 @@ xlinq3 = @linq df |> transform(cols(y_str) = 10 * cols(x_sym)) |> orderby(cols(x_sym) .- mean(cols(x_sym))) |> groupby(b_str) |> - combine(cols("meanX") = mean(:x), meanY = mean(:y)) + combine(cols("meanX") = mean(:x), :meanY = mean(:y)) @test isequal(xlinq3, DataFrame(b = "d", meanX = 40.0, meanY = 400.0)) end From 192ec85d6c85ffd4712b8f6c88ea3e09dd1a21d9 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Fri, 2 Jul 2021 05:51:11 -0400 Subject: [PATCH 10/16] update test/performance --- test/performance.jl | 46 ++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/test/performance.jl b/test/performance.jl index 0d07a9df..c4229394 100644 --- a/test/performance.jl +++ b/test/performance.jl @@ -74,13 +74,13 @@ end function DataFramesMeta_timings(df, gd) df_res = @transform(df, - res1 = :v1 .- mean(:v1), - res2 = demean(:v2), - res3 = :v1 + :v2, - res4 = string(:id), - res5 = complicated_vec(:v1, :v2, :v3), - res6a = @.(:v1 + :v2 + :v3 * :v3 + :v1), - res6b = begin + :res1 = :v1 .- mean(:v1), + :res2 = demean(:v2), + :res3 = :v1 + :v2, + :res4 = string(:id), + :res5 = complicated_vec(:v1, :v2, :v3), + :res6a = @.(:v1 + :v2 + :v3 * :v3 + :v1), + :res6b = begin d = Vector{Float64}(undef, length(:v1)) for i in eachindex(d) d[i] = :v1[i] + :v2[i] * :v3[i] * :v3[i] + :v1[i] @@ -90,11 +90,11 @@ function DataFramesMeta_timings(df, gd) ) gd_res = @combine(gd, - res7 = mean(:v1), - res8 = (t -> mean(t))(:v2), - res9 = std(:v1) + std(:v2), - res10 = complicated_scalar(:v1, :v2, :v3), - res11 = first(:v1) + mean(:v2) * std(:v1) + last(:v3) + :res7 = mean(:v1), + :res8 = (t -> mean(t))(:v2), + :res9 = std(:v1) + std(:v2), + :res10 = complicated_scalar(:v1, :v2, :v3), + :res11 = first(:v1) + mean(:v2) * std(:v1) + last(:v3) ) return(df_res, gd_res) @@ -109,20 +109,20 @@ N = 10 K = 10 df2 = DataFrame( - id = rand([Symbol("id", i) for i=1:K], N), - v1 = rand(1:5, N), - v2 = rand(1:5, N), - v3 = rand(N) + :id = rand([Symbol("id", i) for i=1:K], N), + :v1 = rand(1:5, N), + :v2 = rand(1:5, N), + :v3 = rand(N) ); println("DataFramesMeta raw timing") -@time @select(df2, res1 = :v1 .- mean(:v1)); -@time @select(df2, res2 = demean(:v2)); -@time @select(df2, res3 = :v1 + :v2); -@time @select(df2, res4 = string(:id)); -@time @select(df2, res5 = complicated_vec(:v1, :v2, :v3)); -@time @select(df2, res6a = @.(:v1 + :v2 + :v3 * :v3 + :v1)); -@time @select(df2, res6b = begin +@time @select(df2, :res1 = :v1 .- mean(:v1)); +@time @select(df2, :res2 = demean(:v2)); +@time @select(df2, :res3 = :v1 + :v2); +@time @select(df2, :res4 = string(:id)); +@time @select(df2, :res5 = complicated_vec(:v1, :v2, :v3)); +@time @select(df2, :res6a = @.(:v1 + :v2 + :v3 * :v3 + :v1)); +@time @select(df2, :res6b = begin d = Vector{Float64}(undef, length(:v1)) for i in eachindex(d) d[i] = :v1[i] + :v2[i] * :v3[i] * :v3[i] + :v1[i] From eb71a8299abf1257aa2b8e553b00e0e65d83db58 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Fri, 2 Jul 2021 05:56:41 -0400 Subject: [PATCH 11/16] update @col docstring --- src/macros.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/macros.jl b/src/macros.jl index 6326c287..f14e6e7d 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -7,7 +7,7 @@ """ @col(kw) -`@col` transforms an expression of the form `z = :x + :y` into it's equivalent in +`@col` transforms an expression of the form `:z = :x + :y` into it's equivalent in DataFrames's `source => fun => destination` syntax. ### Details @@ -24,7 +24,7 @@ a `source => fun => destination` pair that is suitable for the `select`, `transf ### Examples ```julia -julia> @col z = :x + :y +julia> @col :z = :x + :y [:x, :y] => (##595 => :z) ``` @@ -41,7 +41,7 @@ julia> df = DataFrame(x = [1, 2], y = [3, 4]); julia> import DataFramesMeta: @col; -julia> DataFrames.transform(df, @col z = :x .* :y) +julia> DataFrames.transform(df, @col :z = :x .* :y) 2×3 DataFrame │ Row │ x │ y │ z │ │ │ Int64 │ Int64 │ Int64 │ From ad78e8964c5b3fba37e00bf7e588e785c6a34991 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Wed, 7 Jul 2021 09:57:28 -0400 Subject: [PATCH 12/16] update src/macros.jl --- src/macros.jl | 73 +++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/src/macros.jl b/src/macros.jl index f14e6e7d..947f53ed 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -69,7 +69,7 @@ Broadcast operations within DataFramesMeta.jl macros. to indicate that the anonymous function created by DataFramesMeta to represent an operation should be applied "by-row". -If an expression starts with `@byrow`, either of the form `@byrow = f(:x)` +If an expression starts with `@byrow`, either of the form `@byrow :y = f(:x)` in transformations or `@byrow f(:x)` in `@orderby`, `@where`, and `@with`, then the anonymous function created by DataFramesMeta is wrapped in the `DataFrames.ByRow` function wrapper, which broadcasts the function so that it run on each row. @@ -79,7 +79,7 @@ then the anonymous function created by DataFramesMeta is wrapped in the ```julia julia> df = DataFrame(a = [1, 2, 3, 4], b = [5, 6, 7, 8]); -julia> @transform(df, @byrow c = :a * :b) +julia> @transform(df, @byrow :c = :a * :b) 4×3 DataFrame Row │ a b c │ Int64 Int64 Int64 @@ -931,21 +931,21 @@ Add additional columns or keys based on keyword arguments. Inputs to `@transform` can come in two formats: a `begin ... end` block, in which case each line in the block is a separate -transformation, (`y = f(:x)`), or as a series of +transformation, (`:y = f(:x)`), or as a series of keyword arguments. For example, the following are equivalent: ```julia @transform df begin - a = :x - b = :y + :a = :x + :b = :y end ``` and ``` -@transform(df, a = :x, b = :y) +@transform(df, :a = :x, :b = :y) ``` `@transform` uses the syntax `@byrow` to wrap transformations in @@ -953,7 +953,7 @@ the `ByRow` function wrapper from DataFrames, apply a function row-wise, similar to broadcasting. For example, the call ``` -@transform(df, @byrow y = :x == 1 ? true : false) +@transform(df, @byrow :y = :x == 1 ? true : false) ``` becomes @@ -978,8 +978,8 @@ julia> using DataFramesMeta julia> df = DataFrame(A = 1:3, B = [2, 1, 2]); julia> @transform df begin - a = 2 * :A - x = :A .+ :B + :a = 2 * :A + :x = :A .+ :B end 3×4 DataFrame Row │ A B a x @@ -989,7 +989,7 @@ julia> @transform df begin 2 │ 2 1 4 3 3 │ 3 2 6 5 -julia> @transform df @byrow z = :A * :B +julia> @transform df @byrow :z = :A * :B 3×3 DataFrame Row │ A B z │ Int64 Int64 Int64 @@ -999,8 +999,8 @@ julia> @transform df @byrow z = :A * :B 3 │ 3 2 6 julia> @transform df @byrow begin - x = :A * :B - y = :A == 1 ? 100 : 200 + :x = :A * :B + :y = :A == 1 ? 100 : 200 end 3×4 DataFrame @@ -1050,21 +1050,21 @@ No copies of existing columns are made. Inputs to `@transform!` can come in two formats: a `begin ... end` block, in which case each line in the block is a separate -transformation, (`y = f(:x)`), or as a series of +transformation, (`:y = f(:x)`), or as a series of keyword arguments. For example, the following are equivalent: ```julia @transform! df begin - a = :x - b = :y + :a = :x + :b = :y end ``` and ``` -@transform!(df, a = :x, b = :y) +@transform!(df, :a = :x, :b = :y) ``` `@transform!` uses the syntax `@byrow` to wrap transform!ations in @@ -1072,7 +1072,7 @@ the `ByRow` function wrapper from DataFrames, apply a function row-wise, similar to broadcasting. For example, the call ``` -@transform!(df, @byrow y = :x == 1 ? true : false) +@transform!(df, @byrow :y = :x == 1 ? true : false) ``` becomes @@ -1096,7 +1096,7 @@ julia> using DataFramesMeta julia> df = DataFrame(A = 1:3, B = [2, 1, 2]); -julia> df2 = @transform!(df, a = 2 * :A, x = :A .+ :B) +julia> df2 = @transform!(df, :a = 2 * :A, :x = :A .+ :B) 3×4 DataFrame │ Row │ A │ B │ a │ x │ │ │ Int64 │ Int64 │ Int64 │ Int64 │ @@ -1152,7 +1152,7 @@ equivalent: ```julia @select df begin :x - y = :a .+ :b + :y = :a .+ :b end ``` @@ -1167,7 +1167,7 @@ the `ByRow` function wrapper from DataFrames, apply a function row-wise, similar to broadcasting. For example, the call ``` -@select(df, @byrow y = :x == 1 ? true : false) +@select(df, @byrow :y = :x == 1 ? true : false) ``` becomes @@ -1207,7 +1207,7 @@ julia> @select(df, :c, :a) julia> @select df begin :c - x = :b + :c + :x = :b + :c end 8×2 DataFrame Row │ c x @@ -1268,7 +1268,7 @@ the `ByRow` function wrapper from DataFrames, apply a function row-wise, similar to broadcasting. For example, the call ``` -@select!(df, @byrow y = :x == 1 ? true : false) +@select!(df, @byrow :y = :x == 1 ? true : false) ``` becomes @@ -1313,7 +1313,7 @@ julia> df = DataFrame(a = repeat(1:4, outer = 2), b = repeat(2:-1:1, outer = 4), julia> df2 = @select! df begin :c - x = :b + :c + :x = :b + :c end 8×2 DataFrame Row │ c x @@ -1382,15 +1382,15 @@ For example, the following are equivalent: ``` @combine df begin - mx = mean(:x) - sx = std(:x) + :mx = mean(:x) + :sx = std(:x) end ``` and ``` -@combine(df, mx = mean(:x), sx = std(:x)) +@combine(df, :mx = mean(:x), :sx = std(:x)) ``` ### Examples @@ -1404,7 +1404,7 @@ julia> d = DataFrame( julia> g = groupby(d, :x); -julia> @combine(g, nsum = sum(:n)) +julia> @combine(g, :nsum = sum(:n)) 3×2 DataFrame Row │ x nsum │ Int64 Int64 @@ -1414,8 +1414,8 @@ julia> @combine(g, nsum = sum(:n)) 3 │ 3 27 julia> @combine g begin - x2 = 2 * :x - nsum = sum(:n) + :x2 = 2 * :x + :nsum = sum(:n) end 20×3 DataFrame Row │ x x2 nsum @@ -1465,7 +1465,6 @@ end ############################################################################## function by_helper(x, what, args...) - # Only allow one argument when returning a Table object # Only allow one argument when returning a Table object exprs, outer_flags = create_args_vector(args...) fe = first(exprs) @@ -1507,8 +1506,8 @@ For example, the following are equivalent: ``` @by df :g begin - mx = mean(:x) - sx = std(:x) + :mx = mean(:x) + :sx = std(:x) end ``` @@ -1528,7 +1527,7 @@ julia> df = DataFrame( b = repeat(2:-1:1, outer = 4), c = 1:8); -julia> @by(df, :a, d = sum(:c)) +julia> @by(df, :a, :d = sum(:c)) 4×2 DataFrame Row │ a d │ Int64 Int64 @@ -1539,7 +1538,7 @@ julia> @by(df, :a, d = sum(:c)) 4 │ 4 12 julia> @by df :a begin - d = 2 * :c + :d = 2 * :c end 8×2 DataFrame Row │ a d @@ -1554,7 +1553,7 @@ julia> @by df :a begin 7 │ 4 8 8 │ 4 16 -julia> @by(df, :a, c_sum = sum(:c), c_mean = mean(:c)) +julia> @by(df, :a, :c_sum = sum(:c), :c_mean = mean(:c)) 4×3 DataFrame Row │ a c_sum c_mean │ Int64 Int64 Float64 @@ -1565,8 +1564,8 @@ julia> @by(df, :a, c_sum = sum(:c), c_mean = mean(:c)) 4 │ 4 12 6.0 julia> @by df :a begin - c = :c - c_mean = mean(:c) + :c = :c + :c_mean = mean(:c) end 8×3 DataFrame Row │ a c c_mean From 9210b057fb0b3160176d56cb4b57176090fa21bc Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Wed, 7 Jul 2021 09:58:41 -0400 Subject: [PATCH 13/16] update src/eachrow.jl --- src/eachrow.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/eachrow.jl b/src/eachrow.jl index fbe6ee09..496edb82 100644 --- a/src/eachrow.jl +++ b/src/eachrow.jl @@ -143,7 +143,7 @@ julia> @eachrow df begin 3 │ 0 2 julia> df2 = @eachrow df begin - @newcol colX::Vector{Float64} + @newcol :colX::Vector{Float64} :colX = :B == 2 ? pi * :A : :B end 3×3 DataFrame @@ -157,7 +157,7 @@ julia> df2 = @eachrow df begin julia> varA = :A; varB = :B; julia> df2 = @eachrow df begin - @newcol colX::Vector{Float64} + @newcol :colX::Vector{Float64} :colX = cols(varB) == 2 ? pi * cols(varA) : cols(varB) end 3×3 DataFrame @@ -181,7 +181,7 @@ julia> x 3 julia> @eachrow df begin - @newcol m::Vector{Float64} + @newcol :m::Vector{Float64} :m = mean(_DF[:, row]) end 3×3 DataFrame @@ -290,7 +290,7 @@ julia> df2 julia> df2 = copy(df); julia> @eachrow! df2 begin - @newcol colX::Vector{Float64} + @newcol :colX::Vector{Float64} :colX = :B == 2 ? pi * :A : :B end 3×3 DataFrame @@ -306,7 +306,7 @@ julia> varA = :A; varB = :B; julia> df2 = copy(df); julia> @eachrow! df2 begin - @newcol colX::Vector{Float64} + @newcol :colX::Vector{Float64} :colX = cols(varB) == 2 ? pi * cols(varA) : cols(varB) end 3×3 DataFrame @@ -330,7 +330,7 @@ julia> x 3 julia> @eachrow! df begin - @newcol m::Vector{Float64} + @newcol :m::Vector{Float64} :m = mean(_DF[:, row]) end 3×3 DataFrame From e8a21ef394e3890afd0cf501589284f8fac246bd Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Wed, 7 Jul 2021 10:00:43 -0400 Subject: [PATCH 14/16] update src/linq.jl --- src/linqmacro.jl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/linqmacro.jl b/src/linqmacro.jl index c8e3a736..2d38b106 100644 --- a/src/linqmacro.jl +++ b/src/linqmacro.jl @@ -9,6 +9,10 @@ export @linq, linq """ @linq df ... + +!!! note + `@linq` is deprecated. Use `@chain` instead. See `? @chain` for details. + General macro that creates a mini DSL for chaining and macro calls. ### Details @@ -34,11 +38,11 @@ julia> df = DataFrame( b = repeat(2:-1:1, outer = 4), x = 1:8); -julia> x1 = @linq transform(where(df, :a .> 2, :b .!= "c"), y = 10 .* :x); +julia> x1 = @linq transform(where(df, :a .> 2, :b .!= "c"), :y = 10 .* :x); -julia> x1 = @linq by(x1, :b, meanX = mean(:x), meanY = mean(:y)); +julia> x1 = @linq by(x1, :b, :meanX = mean(:x), :meanY = mean(:y)); -julia> @linq select(orderby(x1, :b, -:meanX), var = :b, :meanX, :meanY) +julia> @linq select(orderby(x1, :b, -:meanX), :var = :b, :meanX, :meanY) 2×3 DataFrame │ Row │ var │ meanX │ meanY │ │ │ Int64 │ Float64 │ Float64 │ @@ -49,7 +53,7 @@ julia> @linq select(orderby(x1, :b, -:meanX), var = :b, :meanX, :meanY) julia> @linq df |> transform(y = 10 .* :x) |> where(:a .> 2) |> - by(:b, meanX = mean(:x), meanY = mean(:y)) |> + by(:b, :meanX = mean(:x), :meanY = mean(:y)) |> orderby(:meanX) |> select(:meanX, :meanY, var = :b) 2×3 DataFrame From 012fcfdadd5f664ad11b502025d2412bf3fe90b2 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Wed, 7 Jul 2021 10:05:58 -0400 Subject: [PATCH 15/16] update index.md --- docs/src/index.md | 46 ++++++++++++++++++++++------------------------ src/linqmacro.jl | 1 - 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 6ff7edb1..947f6c6a 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -31,7 +31,7 @@ df = DataFrame(a = [1, 2], b = [3, 4]); transform(df, [:a, :b] => ((a, b) -> a .* b .+ first(a) .- sum(b)) => :c); # With DataFramesMeta -@transform(df, c = :a .* :b .+ first(:a) .- sum(:b)) +@transform(df, :c = :a .* :b .+ first(:a) .- sum(:b)) ``` To reference columns inside DataFramesMeta macros, use `Symbol`s. For example, use `:x` @@ -66,11 +66,11 @@ data frame. df = DataFrame(x = [1, 1, 2, 2], y = [1, 2, 101, 102]); gd = groupby(df, :x); @select(df, :x, :y) -@select(df, x2 = 2 * :x, :y) -@select(gd, x2 = 2 .* :y .* first(:y)) +@select(df, :x2 = 2 * :x, :y) +@select(gd, :x2 = 2 .* :y .* first(:y)) @select!(df, :x, :y) -@select!(df, x = 2 * :x, :y) -@select!(gd, y = 2 .* :y .* first(:y)) +@select!(df, :x = 2 * :x, :y) +@select!(gd, :y = 2 .* :y .* first(:y)) ``` ## `@transform` and `@transform!` @@ -89,11 +89,11 @@ data frame. df = DataFrame(x = [1, 1, 2, 2], y = [1, 2, 101, 102]); gd = groupby(df, :x); @transform(df, :x, :y) -@transform(df, x2 = 2 * :x, :y) -@transform(gd, x2 = 2 .* :y .* first(:y)) +@transform(df, :x2 = 2 * :x, :y) +@transform(gd, :x2 = 2 .* :y .* first(:y)) @transform!(df, :x, :y) -@transform!(df, x = 2 * :x, :y) -@transform!(gd, y = 2 .* :y .* first(:y)) +@transform!(df, :x = 2 * :x, :y) +@transform!(gd, :y = 2 .* :y .* first(:y)) ``` ## `@subset` and `@subset!` @@ -124,8 +124,8 @@ Examples: ```julia df = DataFrame(x = [1, 1, 2, 2], y = [1, 2, 101, 102]); gd = groupby(df, :x); -@combine(gd, x2 = sum(:y)) -@combine(gd, x2 = :y .- sum(:y)) +@combine(gd, :x2 = sum(:y)) +@combine(gd, :x2 = :y .- sum(:y)) @combine(gd, (n1 = sum(:y), n2 = first(:y))) ``` @@ -255,14 +255,14 @@ end `@eachrow` also supports special syntax for allocating new columns to make `@eachrow` more useful for data transformations. The syntax `@newcol -x::Vector{Int}` allocates a new column `:x` with an `Vector` container with eltype +:x::Vector{Int}` allocates a new column `:x` with an `Vector` container with eltype `Int`. Here is an example where two new columns are added: ```julia df = DataFrame(A = 1:3, B = [2, 1, 2]) df2 = @eachrow df begin - @newcol colX::Vector{Float64} - @newcol colY::Vector{Union{Int,Missing}} + @newcol :colX::Vector{Float64} + @newcol :colY::Vector{Union{Int,Missing}} :colX = :B == 2 ? pi * :A : :B if :A > 1 :colY = :A * :B @@ -289,7 +289,7 @@ Thought of as a macro `@byrow` accepts a single argument and creates an anonymous function wrapped in `ByRow`. For example, ```julia -@transform(df, @byrow y = :x == 1 ? true : false) +@transform(df, @byrow :y = :x == 1 ? true : false) ``` is equivalent to @@ -327,7 +327,7 @@ julia> @where df @byrow begin however, like with `ByRow` in DataFrames.jl, when `@byrow` is used, functions do not take into account the grouping, so for example the result of `@transform(df, @byrow y = f(:x))` and -`@transform(groupby(df, :g), @byrow y = f(:x))` is the same. +`@transform(groupby(df, :g), @byrow :y = f(:x))` is the same. ## Working with column names programmatically with `cols` @@ -469,11 +469,11 @@ df = DataFrame(a = repeat(1:5, outer = 20), x = repeat(1:20, inner = 5)) x_thread = @chain df begin - @transform(y = 10 * :x) + @transform(:y = 10 * :x) @where(:a .> 2) - @by(:b, meanX = mean(:x), meanY = mean(:y)) + @by(:b, :meanX = mean(:x), :meanY = mean(:y)) @orderby(:meanX) - @select(:meanX, :meanY, var = :b) + @select(:meanX, :meanY, :var = :b) end ``` @@ -487,7 +487,7 @@ expression. # Get the sum of all columns after # a few transformations @chain df begin - @transform(y = 10 .* :x) + @transform(:y = 10 .* :x) @where(:a .> 2) @select(:a, :y, :x) reduce(+, eachcol(_)) @@ -499,14 +499,12 @@ in the middle of a `@chain` block. ```julia @chain df begin - @transform y = 10 .* :x + @transform :y = 10 .* :x @aside y_mean = mean(_.y) # From Chain.jl, not DataFramesMeta.jl - @select y_standardize = :y .- y_mean + @select :y_standardize = :y .- y_mean end ``` - - ```@contents Pages = ["api/api.md"] Depth = 3 diff --git a/src/linqmacro.jl b/src/linqmacro.jl index 2d38b106..d1d0c69e 100644 --- a/src/linqmacro.jl +++ b/src/linqmacro.jl @@ -9,7 +9,6 @@ export @linq, linq """ @linq df ... - !!! note `@linq` is deprecated. Use `@chain` instead. See `? @chain` for details. From 227a261795f909d553868240287b096cfb70f503 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Wed, 7 Jul 2021 10:22:42 -0400 Subject: [PATCH 16/16] tests pass --- test/dataframes.jl | 4 ++-- test/deprecated.jl | 42 ++++++++++++++++++++++++++++++++++++ test/function_compilation.jl | 2 +- 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/test/dataframes.jl b/test/dataframes.jl index 16104a8f..64360d5e 100644 --- a/test/dataframes.jl +++ b/test/dataframes.jl @@ -280,7 +280,7 @@ s = [:i, :g] @test_throws LoadError @eval @transform(df, Not([:i, :g])) @test_throws MethodError @eval @transform(df, :n = sum(Between(:i, :t))) @test_throws ArgumentError @eval @transform(df, :n = sum(cols(s))) - @test_throws ArgumentError @eval @transform(df, y = :i + cols(1)) + @test_throws ArgumentError @eval @transform(df, :y = :i + cols(1)) end @testset "@select" begin @@ -554,7 +554,7 @@ cr = "c" @test_throws LoadError @eval @select(df, Not([:i, :g])) @test_throws MethodError @eval @select(df, :n = sum(Between(:i, :t))) @test_throws ArgumentError @eval @select(df, :n = sum(cols(s))) - @test_throws ArgumentError @eval @select(df, y = :i + cols(1)) + @test_throws ArgumentError @eval @select(df, :y = :i + cols(1)) end @testset "Keyword arguments failure" begin diff --git a/test/deprecated.jl b/test/deprecated.jl index 0412d57a..b76c8cbc 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -159,4 +159,46 @@ end @test @where(gd, :c .== :g) ≅ df[[], :] end +@testset "Unquoted symbols on LHS" begin + df = DataFrame( + g = [1, 1, 1, 2, 2], + i = 1:5, + t = ["a", "b", "c", "c", "e"], + y = [:v, :w, :x, :y, :z], + c = [:g, :quote, :body, :transform, missing] + ) + + gd = groupby(df, :g) + + newdf = @transform df :n = :i + + @test (@transform df n = :i) ≅ newdf + @test (@transform(df, n = identity(:i))) ≅ newdf + @test (@transform df @byrow n = :i) ≅ newdf + d = @transform df begin + n = identity(:i) + end + @test d ≅ newdf + + d = @eachrow df begin + @newcol n::Vector{Int} + :n = :i + end + @test d ≅ newdf + + newdf = @select df :n = :i + + @test (@select df n = :i) ≅ newdf + @test (@select(df, n = identity(:i))) ≅ newdf + d = @select df begin + n = identity(:i) + end + @test (@select df @byrow n = :i) ≅ newdf + @test d ≅ newdf + + newdf = @combine gd :n = first(:i) + @test (@combine gd n = first(:i)) ≅ newdf + @test (@combine(gd, n = first(:i))) ≅ newdf +end + end # module \ No newline at end of file diff --git a/test/function_compilation.jl b/test/function_compilation.jl index 34ebb66a..4c411f61 100644 --- a/test/function_compilation.jl +++ b/test/function_compilation.jl @@ -154,7 +154,7 @@ using DataFramesMeta gd = groupby(df, :a) - @test @combine(gd, testnt(:b)) == DataFrame(a = [1], :c = [2]) + @test @combine(gd, testnt(:b)) == DataFrame(a = [1], c = [2]) fasttime = @timed @combine(gd, testnt(:b)) slowtime = @timed combine(gd, :b => (b -> testnt(b)) => AsTable)