From 4005748f9c87ed7f65e14fcae39c59b9935a0ceb Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Mon, 28 Oct 2024 13:57:10 +0100 Subject: [PATCH] Check for duplicate names when supplying character literals (#367) Closes #346 --- NEWS.md | 2 ++ R/eval-walk.R | 55 +++++++++++++----------------- R/vars.R | 1 - tests/testthat/_snaps/eval-walk.md | 31 +++++++++++++++++ tests/testthat/test-eval-walk.R | 19 +++++++++++ 5 files changed, 76 insertions(+), 32 deletions(-) diff --git a/NEWS.md b/NEWS.md index 66b6cc4f..16fc26d1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,8 @@ * `matches()` now uses `perl = TRUE` by default. This makes it more consitent with regular expressions in stringr (#330). +* `eval_select()` now fails when data has duplicate names and a character vector is provided as input (#346). + * `eval_select()` and `eval_relocate()` gain a new `error_arg` argument that can be specified to throw a better error message when `allow_empty = FALSE` or `allow_rename = FALSE` (@olivroy, #327). * `vars_pull()` now also warns when using `.data` (#335). Please diff --git a/R/eval-walk.R b/R/eval-walk.R index d7a8eb31..bed5ff1a 100644 --- a/R/eval-walk.R +++ b/R/eval-walk.R @@ -18,36 +18,12 @@ vars_select_eval <- function(vars, return(pos) } - uniquely_named <- uniquely_named %||% is.data.frame(data) - - if (!is_symbolic(wrapped)) { - pos <- as_indices_sel_impl( - wrapped, - vars = vars, - strict = strict, - data = data, - allow_predicates = allow_predicates, - call = error_call - ) - pos <- loc_validate(pos, vars, call = error_call) - pos <- ensure_named( - pos, - vars, - uniquely_named = uniquely_named, - allow_rename = allow_rename, - allow_empty = allow_empty, - error_arg = error_arg, - call = error_call - ) - return(pos) - } - vars <- peek_vars() - vars_split <- vctrs::vec_split(seq_along(vars), vars) # Mark data duplicates so we can fail instead of disambiguating them # when renaming + uniquely_named <- uniquely_named %||% is.data.frame(data) if (uniquely_named) { vars_split$val <- map(vars_split$val, mark_data_dups) } @@ -177,7 +153,7 @@ walk_data_tree <- function(expr, data_mask, context_mask, colon = FALSE) { out <- switch( expr_kind(expr, context_mask, error_call), - literal = expr, + literal = eval_literal(expr, data_mask, context_mask), symbol = eval_sym(expr, data_mask, context_mask), `(` = walk_data_tree(expr[[2]], data_mask, context_mask, colon = colon), `!` = eval_bang(expr, data_mask, context_mask), @@ -301,11 +277,6 @@ chr_as_locations <- function(x, vars, call = caller_env(), arg = NULL) { set_names(out, names(x)) } -as_indices <- function(x, vars, strict = TRUE, call) { - inds <- with_subscript_errors(as_indices_impl(x, vars, strict, call)) - vctrs::vec_as_location(inds, length(vars), vars, convert_values = NULL) -} - expr_kind <- function(expr, context_mask, error_call) { switch( typeof(expr), @@ -347,6 +318,28 @@ call_kind <- function(expr, context_mask, error_call) { ) } +eval_literal <- function(expr, data_mask, context_mask) { + internal <- data_mask$.__tidyselect__.$internal + + if (internal$uniquely_named && is_character(expr)) { + # Since tidyselect allows repairing data frames with duplicate names by + # renaming or selecting positions, we can't check the input for duplicates. + # Instead, we check the output. But in case of character literals, checking + # the output doesn't work because we use `vctrs::vec_as_location()` to + # transform the strings to locations and it ignores duplicate names. So we + # instead check the input here, since it's not possible to repair duplicate + # names by matching them by name. This avoids an inconsistency with the + # symbolic path (#346). + vctrs::vec_as_names( + internal$vars, + repair = "check_unique", + call = internal$error_call + ) + } + + expr +} + eval_colon <- function(expr, data_mask, context_mask) { if (is_negated_colon(expr)) { # Compatibility syntax for `-1:-2`. We interpret it as `-(1:2)`. diff --git a/R/vars.R b/R/vars.R index d7851931..3dbc65b2 100644 --- a/R/vars.R +++ b/R/vars.R @@ -1,4 +1,3 @@ - peeker <- function(what) { function(..., fn = NULL) { if (!missing(...)) { diff --git a/tests/testthat/_snaps/eval-walk.md b/tests/testthat/_snaps/eval-walk.md index 41ca59b4..397f4f43 100644 --- a/tests/testthat/_snaps/eval-walk.md +++ b/tests/testthat/_snaps/eval-walk.md @@ -169,3 +169,34 @@ Error: ! Must select at least one item. +# duplicate names are checked when literals are supplied (#346) + + Code + select_loc(df, "x") + Condition + Error in `select_loc()`: + ! Names must be unique. + x These names are duplicated: + * "x" at locations 1 and 2. + Code + select_loc(df, c("x")) + Condition + Error in `select_loc()`: + ! Names must be unique. + x These names are duplicated: + * "x" at locations 1 and 2. + Code + select_loc(df, c(!!1:2)) + Condition + Error in `select_loc()`: + ! Names must be unique. + x These names are duplicated: + * "x" at locations 1 and 2. + Code + select_loc(df, !!(1:2)) + Condition + Error in `select_loc()`: + ! Names must be unique. + x These names are duplicated: + * "x" at locations 1 and 2. + diff --git a/tests/testthat/test-eval-walk.R b/tests/testthat/test-eval-walk.R index 9a79242e..4bd4e4bb 100644 --- a/tests/testthat/test-eval-walk.R +++ b/tests/testthat/test-eval-walk.R @@ -337,3 +337,22 @@ test_that("can make empty selection with allow_rename = FALSE", { c(cyl = 2L, am = 9L) ) }) + +test_that("duplicate names are checked when literals are supplied (#346)", { + df <- set_names(data.frame(x = 1, x = 1), c("x", "x")) + expect_snapshot( + error = TRUE, + cnd_class = TRUE, + { + select_loc(df, "x") + select_loc(df, c("x")) + + select_loc(df, c(!!1:2)) + select_loc(df, !!(1:2)) + } + ) + + # In these cases the selection manages to repair the data frame + expect_equal(select_loc(df, 1), c(x = 1L)) + expect_equal(select_loc(df, 2), c(x = 2L)) +})