From 4005748f9c87ed7f65e14fcae39c59b9935a0ceb Mon Sep 17 00:00:00 2001
From: Lionel Henry <lionel.hry@gmail.com>
Date: Mon, 28 Oct 2024 13:57:10 +0100
Subject: [PATCH] Check for duplicate names when supplying character literals
 (#367)

Closes #346
---
 NEWS.md                            |  2 ++
 R/eval-walk.R                      | 55 +++++++++++++-----------------
 R/vars.R                           |  1 -
 tests/testthat/_snaps/eval-walk.md | 31 +++++++++++++++++
 tests/testthat/test-eval-walk.R    | 19 +++++++++++
 5 files changed, 76 insertions(+), 32 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 66b6cc4f..16fc26d1 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,6 +2,8 @@
 
 * `matches()` now uses `perl = TRUE` by default. This makes it more consitent with regular expressions in stringr (#330).
 
+* `eval_select()` now fails when data has duplicate names and a character vector is provided as input (#346).
+
 * `eval_select()` and `eval_relocate()` gain a new `error_arg` argument that can be specified to throw a better error message when `allow_empty = FALSE`  or `allow_rename = FALSE` (@olivroy, #327).
 
 * `vars_pull()` now also warns when using `.data` (#335). Please
diff --git a/R/eval-walk.R b/R/eval-walk.R
index d7a8eb31..bed5ff1a 100644
--- a/R/eval-walk.R
+++ b/R/eval-walk.R
@@ -18,36 +18,12 @@ vars_select_eval <- function(vars,
     return(pos)
   }
 
-  uniquely_named <- uniquely_named %||% is.data.frame(data)
-
-  if (!is_symbolic(wrapped)) {
-    pos <- as_indices_sel_impl(
-      wrapped,
-      vars = vars,
-      strict = strict,
-      data = data,
-      allow_predicates = allow_predicates,
-      call = error_call
-    )
-    pos <- loc_validate(pos, vars, call = error_call)
-    pos <- ensure_named(
-      pos,
-      vars,
-      uniquely_named = uniquely_named,
-      allow_rename = allow_rename,
-      allow_empty = allow_empty,
-      error_arg = error_arg,
-      call = error_call
-    )
-    return(pos)
-  }
-
   vars <- peek_vars()
-
   vars_split <- vctrs::vec_split(seq_along(vars), vars)
 
   # Mark data duplicates so we can fail instead of disambiguating them
   # when renaming
+  uniquely_named <- uniquely_named %||% is.data.frame(data)
   if (uniquely_named) {
     vars_split$val <- map(vars_split$val, mark_data_dups)
   }
@@ -177,7 +153,7 @@ walk_data_tree <- function(expr, data_mask, context_mask, colon = FALSE) {
 
   out <- switch(
     expr_kind(expr, context_mask, error_call),
-    literal = expr,
+    literal = eval_literal(expr, data_mask, context_mask),
     symbol = eval_sym(expr, data_mask, context_mask),
     `(` = walk_data_tree(expr[[2]], data_mask, context_mask, colon = colon),
     `!` = eval_bang(expr, data_mask, context_mask),
@@ -301,11 +277,6 @@ chr_as_locations <- function(x, vars, call = caller_env(), arg = NULL) {
   set_names(out, names(x))
 }
 
-as_indices <- function(x, vars, strict = TRUE, call) {
-  inds <- with_subscript_errors(as_indices_impl(x, vars, strict, call))
-  vctrs::vec_as_location(inds, length(vars), vars, convert_values = NULL)
-}
-
 expr_kind <- function(expr, context_mask, error_call) {
   switch(
     typeof(expr),
@@ -347,6 +318,28 @@ call_kind <- function(expr, context_mask, error_call) {
   )
 }
 
+eval_literal <- function(expr, data_mask, context_mask) {
+  internal <- data_mask$.__tidyselect__.$internal
+
+  if (internal$uniquely_named && is_character(expr)) {
+    # Since tidyselect allows repairing data frames with duplicate names by
+    # renaming or selecting positions, we can't check the input for duplicates.
+    # Instead, we check the output. But in case of character literals, checking
+    # the output doesn't work because we use `vctrs::vec_as_location()` to
+    # transform the strings to locations and it ignores duplicate names. So we
+    # instead check the input here, since it's not possible to repair duplicate
+    # names by matching them by name. This avoids an inconsistency with the
+    # symbolic path (#346).
+    vctrs::vec_as_names(
+      internal$vars,
+      repair = "check_unique",
+      call = internal$error_call
+    )
+ }
+
+  expr
+}
+
 eval_colon <- function(expr, data_mask, context_mask) {
   if (is_negated_colon(expr)) {
     # Compatibility syntax for `-1:-2`. We interpret it as `-(1:2)`.
diff --git a/R/vars.R b/R/vars.R
index d7851931..3dbc65b2 100644
--- a/R/vars.R
+++ b/R/vars.R
@@ -1,4 +1,3 @@
-
 peeker <- function(what) {
   function(..., fn = NULL) {
     if (!missing(...)) {
diff --git a/tests/testthat/_snaps/eval-walk.md b/tests/testthat/_snaps/eval-walk.md
index 41ca59b4..397f4f43 100644
--- a/tests/testthat/_snaps/eval-walk.md
+++ b/tests/testthat/_snaps/eval-walk.md
@@ -169,3 +169,34 @@
       Error:
       ! Must select at least one item.
 
+# duplicate names are checked when literals are supplied (#346)
+
+    Code
+      select_loc(df, "x")
+    Condition <vctrs_error_names_must_be_unique>
+      Error in `select_loc()`:
+      ! Names must be unique.
+      x These names are duplicated:
+        * "x" at locations 1 and 2.
+    Code
+      select_loc(df, c("x"))
+    Condition <vctrs_error_names_must_be_unique>
+      Error in `select_loc()`:
+      ! Names must be unique.
+      x These names are duplicated:
+        * "x" at locations 1 and 2.
+    Code
+      select_loc(df, c(!!1:2))
+    Condition <vctrs_error_names_must_be_unique>
+      Error in `select_loc()`:
+      ! Names must be unique.
+      x These names are duplicated:
+        * "x" at locations 1 and 2.
+    Code
+      select_loc(df, !!(1:2))
+    Condition <vctrs_error_names_must_be_unique>
+      Error in `select_loc()`:
+      ! Names must be unique.
+      x These names are duplicated:
+        * "x" at locations 1 and 2.
+
diff --git a/tests/testthat/test-eval-walk.R b/tests/testthat/test-eval-walk.R
index 9a79242e..4bd4e4bb 100644
--- a/tests/testthat/test-eval-walk.R
+++ b/tests/testthat/test-eval-walk.R
@@ -337,3 +337,22 @@ test_that("can make empty selection with allow_rename = FALSE", {
     c(cyl = 2L, am = 9L)
   )
 })
+
+test_that("duplicate names are checked when literals are supplied (#346)", {
+  df <- set_names(data.frame(x = 1, x = 1), c("x", "x"))
+  expect_snapshot(
+    error = TRUE,
+    cnd_class = TRUE,
+    {
+      select_loc(df, "x")
+      select_loc(df, c("x"))
+
+      select_loc(df, c(!!1:2))
+      select_loc(df, !!(1:2))
+    }
+  )
+
+  # In these cases the selection manages to repair the data frame
+  expect_equal(select_loc(df, 1), c(x = 1L))
+  expect_equal(select_loc(df, 2), c(x = 2L))
+})