Skip to content

Commit

Permalink
Polishing
Browse files Browse the repository at this point in the history
  • Loading branch information
hadley committed Mar 4, 2024
1 parent e3dc35e commit 60938d0
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 37 deletions.
7 changes: 4 additions & 3 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# dbplyr (development version)

* Refined the `select()` inlining criteria to keep computed columns used to
`arrange()` subqueries that are eliminated by a subsequent select (@ejneer,
#1437).

* dbplyr now exports some tools to work with the internal `table_path` class
which is useful for certain backends that need to work with this
data structure (#1300).
Expand Down Expand Up @@ -38,9 +42,6 @@
* The databricks backend now supports creating non-temporary tables too (#1418).

* `x$name` never attempts to evaluate `name` (#1368).
* Refined the `select()` inlining criteria to keep computed columns used to
`arrange()` subqueries that are eliminated by a subsequent select (@ejneer,
#1437).

* `rows_patch(in_place = FALSE)` now works when more than one column should be
patched (@gorcha, #1443).
Expand Down
17 changes: 7 additions & 10 deletions R/verb-select.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,22 +132,19 @@ add_select <- function(lazy_query, vars) {
}

select_can_be_inlined <- function(lazy_query, vars) {
vars_data <- op_vars(lazy_query)

# all computed columns used for ordering (if any) must be present
computed_flag <- purrr::map_lgl(lazy_query$select$expr, is_quosure)
computed_columns <- lazy_query$select$name[computed_flag]

order_vars <- purrr::map_chr(lazy_query$order_by, as_label)
ordered_present <- all(intersect(computed_columns, order_vars) %in% vars)

# computed columns used for ordering (if any) must also be
# selected to inline the query
all(intersect(computed_columns, order_vars) %in% vars) &&
(is_bijective_projection(vars, vars_data) || !is_true(lazy_query$distinct))
}
# if the projection is distinct, it must be bijective
is_distinct <- is_true(lazy_query$distinct)
is_bijective_projection <- identical(sort(unname(vars)), op_vars(lazy_query))
distinct_is_bijective <- !is_distinct || is_bijective_projection

is_bijective_projection <- function(vars, names_prev) {
vars <- unname(vars)
identical(sort(vars), names_prev)
ordered_present && distinct_is_bijective
}

rename_groups <- function(lazy_query, vars) {
Expand Down
49 changes: 25 additions & 24 deletions tests/testthat/test-verb-select.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,31 @@ test_that("select after distinct produces subquery", {
expect_true(lq$x$distinct)
})

test_that("select after arrange produces subquery, if needed", {
lf <- lazy_frame(x = 1)

# shouldn't inline
out <- lf %>% mutate(z = 2) %>% arrange(x, z) %>% select(x)
# should inline
out2 <- lf %>% mutate(z = 2) %>% arrange(x, z) %>% select(x, z)

inner_query <- out$lazy_query$x
expect_s3_class(inner_query, "lazy_select_query")
expect_equal(inner_query$order_by, list(quo(x), quo(z)), ignore_formula_env = TRUE)
expect_equal(
op_vars(inner_query),
c("x", "z")
)
expect_equal(op_vars(out$lazy_query), "x")

# order vars in a subquery are dropped
expect_equal(
inner_query[setdiff(names(inner_query), "order_vars")],
out2$lazy_query[setdiff(names(out2$lazy_query), "order_vars")]
)
})


test_that("rename/relocate after distinct is inlined #1141", {
lf <- lazy_frame(x = 1, y = 1:2)
expect_snapshot({
Expand Down Expand Up @@ -284,30 +309,6 @@ test_that("where() isn't suppored", {
})
})

test_that("arranged computed columns are not inlined away", {
lf <- lazy_frame(x = 1)

# shouldn't inline
out <- lf %>% mutate(z = 2) %>% arrange(x, z) %>% select(x)
# should inline
out2 <- lf %>% mutate(z = 2) %>% arrange(x, z) %>% select(x, z)

inner_query <- out$lazy_query$x
expect_s3_class(inner_query, "lazy_select_query")
expect_equal(
inner_query$order_by,
list(quo(x), quo(z)),
ignore_formula_env = TRUE
)
expect_equal(op_vars(inner_query), c("x", "z"))
expect_equal(op_vars(out$lazy_query), "x")
expect_equal(
# order vars in a subquery are dropped
inner_query[setdiff(names(inner_query), "order_vars")],
out2$lazy_query[setdiff(names(out2$lazy_query), "order_vars")]
)
})

# sql_render --------------------------------------------------------------

test_that("multiple selects are collapsed", {
Expand Down

0 comments on commit 60938d0

Please sign in to comment.