diff --git a/DESCRIPTION b/DESCRIPTION index d880dba2..a1cae5b0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,6 +32,7 @@ Imports: stringr, tibble, tidygraph, + tidyselect, WikidataQueryServiceR, WikidataR, xml2, diff --git a/R/spq_label.R b/R/spq_label.R index f9d8bc62..11429833 100644 --- a/R/spq_label.R +++ b/R/spq_label.R @@ -7,6 +7,10 @@ #' If you write "en" you #' can get labels for regional variants such as "en-GB". If you want results for #' "en" only, write "en$". +#' @param .overwrite whether to replace variables with their labels. +#' `spq_select(blop)` means you get both `blop` and `blop_label`. +#' `spq_select(blop, .overwrite = TRUE)` means you get the label as `blop`, +#' the "original" blop variable isn't returned. #' #' @return A query object #' @export @@ -26,7 +30,11 @@ #' spq_label(mayor, place, .languages = c("fr", "en", "de")) %>% #' spq_perform() #' ``` -spq_label <- function(.query, ..., .required = FALSE,.languages = getOption("glitter.lang", "en$")) { +spq_label <- function(.query, + ..., + .required = FALSE, + .languages = getOption("glitter.lang", "en$"), + .overwrite = FALSE) { vars = purrr::map_chr(rlang::enquos(...), spq_treat_argument) if (!is.null(.languages)) .languages = tolower(.languages) @@ -63,18 +71,18 @@ spq_label <- function(.query, ..., .required = FALSE,.languages = getOption("gli mutate_left <- sprintf("%s_label", sub("\\?", "", x)) - mutate_right <- sprintf("coalesce(%s_labell, '')", sub("\\?", "", x)) + mutate_right <- sprintf("coalesce(%s_labell, '')", un_question_mark(x)) args_list <- list(.query = q, m = mutate_right) names(args_list)[2] <- mutate_left q = do.call(spq_mutate, args_list) - q = spq_select(q, sprintf("-%s_labell", sub("\\?", "", x))) + q = spq_select(q, sprintf("-%s_labell", un_question_mark(x))) # we add the language of the label # because of regional variants if (!is.null(.languages)) { if (length(.languages) > 1 || !grepl("\\$$", .languages)) { - mutate_left <- sprintf("%s_label_lang", sub("\\?", "", x)) - mutate_right <- sprintf("lang(%s_labell)", sub("\\?", "", x)) + mutate_left <- sprintf("%s_label_lang", un_question_mark(x)) + mutate_right <- sprintf("lang(%s_labell)", un_question_mark(x)) args_list <- list(.query = q, m = mutate_right) names(args_list)[2] <- mutate_left q = do.call(spq_mutate, args_list) @@ -84,7 +92,15 @@ spq_label <- function(.query, ..., .required = FALSE,.languages = getOption("gli }, .init = .query ) -# TODO add .overwrite + + if (.overwrite) { + .query <- purrr::reduce( + vars, + \(.query, x) overwrite_with_label(.query, x), + .init = .query + ) + } + .query } @@ -97,3 +113,20 @@ create_lang_filter = function(language, x) { sprintf("langMatches(lang(%s_labell), '%s')", x, language) } } + +overwrite_with_label <- function(.query, x) { + remove_x <- sprintf("-%s", un_question_mark(x)) + .query <- spq_select(.query, remove_x) + .query <- spq_rename_var( + .query, + old = un_question_mark(x), + new = sprintf("%s0", un_question_mark(x)) + ) + .query <- spq_rename_var( + .query, + old = sprintf("%s_label", un_question_mark(x)), + new = un_question_mark(x) + ) + + .query +} diff --git a/R/spq_rename_var.R b/R/spq_rename_var.R new file mode 100644 index 00000000..b67d95cd --- /dev/null +++ b/R/spq_rename_var.R @@ -0,0 +1,32 @@ +spq_rename_var <- function(.query, old, new) { + + if (!(question_mark(old) %in% .query[["vars"]][["name"]])) { + cli::cli_abort("Can't rename {.field {old}} as it's not present in the query object.") + } + + if (question_mark(new) %in% .query[["vars"]][["name"]]) { + cli::cli_abort("Can't rename {.field {old}} to {.field {new}} as {.field {new}} already exists.") + } + + .query[["vars"]] <- spq_rename_var_in_df(.query[["vars"]], old, new) + + .query[["structure"]] <- spq_rename_var_in_df(.query[["structure"]], old, new) + + .query[["triples"]] <- spq_rename_var_in_df(.query[["triples"]], old, new) + + if (!is.null(.query[["filters"]])) { + .query[["filters"]] <- spq_rename_var_in_df(.query[["filters"]], old, new) + } + + .query +} + +spq_rename_var_in_df <- function(df, old, new) { + columns_to_transform <- names(df)[unlist(lapply(df, class)) == "character"] + dplyr::mutate( + df, + dplyr::across( + tidyselect::all_of(columns_to_transform), + \(x) sub(question_mark_escape(old), question_mark(new), x) + )) +} diff --git a/R/utils-str.R b/R/utils-str.R index 904d2eba..e62f2362 100644 --- a/R/utils-str.R +++ b/R/utils-str.R @@ -186,3 +186,15 @@ get_varformula = function(selected) { args = args ) } + +question_mark <- function(x) { + sprintf("?%s", x) +} + +question_mark_escape <- function(x) { + sprintf("\\?%s(:blank:]|[:punct:]|\\b)", x) +} + +un_question_mark <- function(x) { + sub("\\?", "", x) +} diff --git a/man/spq_label.Rd b/man/spq_label.Rd index c5b3d074..7ab98a40 100644 --- a/man/spq_label.Rd +++ b/man/spq_label.Rd @@ -8,7 +8,8 @@ spq_label( .query, ..., .required = FALSE, - .languages = getOption("glitter.lang", "en$") + .languages = getOption("glitter.lang", "en$"), + .overwrite = FALSE ) } \arguments{ @@ -25,6 +26,11 @@ restrictions on language (defined or not), \code{"*"} for any defined language. If you write "en" you can get labels for regional variants such as "en-GB". If you want results for "en" only, write "en$".} + +\item{.overwrite}{whether to replace variables with their labels. +\code{spq_select(blop)} means you get both \code{blop} and \code{blop_label}. +\code{spq_select(blop, .overwrite = TRUE)} means you get the label as \code{blop}, +the "original" blop variable isn't returned.} } \value{ A query object diff --git a/tests/testthat/_snaps/spq_label.md b/tests/testthat/_snaps/spq_label.md index c6c30ebf..44d5625a 100644 --- a/tests/testthat/_snaps/spq_label.md +++ b/tests/testthat/_snaps/spq_label.md @@ -60,3 +60,34 @@ } +# spq_label() .overwrite + + Code + spq_init() %>% spq_add("?mayor wdt:P31 ?species") %>% spq_set(species = c( + "wd:Q144", "wd:Q146", "wd:Q780")) %>% spq_add("?mayor p:P39 ?node") %>% + spq_add("?node ps:P39 wd:Q30185") %>% spq_add("?node pq:P642 ?place") %>% + spq_label(mayor, place, .languages = "en$", .overwrite = TRUE) + Output + PREFIX rdfs: + SELECT ?node ?species (COALESCE(?mayor_labell,'') AS ?mayor) (COALESCE(?place_labell,'') AS ?place) + WHERE { + + ?mayor0 wdt:P31 ?species. + ?mayor0 p:P39 ?node. + ?node ps:P39 wd:Q30185. + ?node pq:P642 ?place0. + OPTIONAL { + ?mayor0 rdfs:label ?mayor_labell. + FILTER(lang(?mayor_labell) IN ('en')) + } + + OPTIONAL { + ?place0 rdfs:label ?place_labell. + FILTER(lang(?place_labell) IN ('en')) + } + + VALUES ?species {wd:Q144 wd:Q146 wd:Q780} + + } + + diff --git a/tests/testthat/test-spq_label.R b/tests/testthat/test-spq_label.R index 48a2c04e..8a2758a7 100644 --- a/tests/testthat/test-spq_label.R +++ b/tests/testthat/test-spq_label.R @@ -34,3 +34,16 @@ test_that("spq_label() works", { "place_label", "place_label_lang") ) }) + +test_that("spq_label() .overwrite", { + + expect_snapshot( + spq_init() %>% + spq_add("?mayor wdt:P31 ?species") %>% + spq_set(species = c('wd:Q144','wd:Q146', 'wd:Q780')) %>% + spq_add("?mayor p:P39 ?node") %>% + spq_add("?node ps:P39 wd:Q30185") %>% + spq_add("?node pq:P642 ?place") %>% + spq_label(mayor, place, .languages = "en$", .overwrite = TRUE) + ) +})