Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

better handling of single/multiple choice variables, function to split data by form #26

Merged
merged 22 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/render-readme.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v2
- uses: r-lib/actions/setup-r@v1
- uses: r-lib/actions/setup-pandoc@v1
- uses: r-lib/actions/setup-r@v2
- uses: r-lib/actions/setup-pandoc@v2
- name: Install rmarkdown, remotes, and the local package
run: |
install.packages(c("devtools", "badger"))
Expand Down
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: redcaptools
Type: Package
Title: Tools for exporting and working with REDCap data
Version: 0.3.2
Version: 0.4.0
Authors@R:
c(
person(given = "Alan G.", family = "Haynes", role = "cre",
Expand All @@ -13,7 +13,7 @@ Description: Tools for exporting and working with REDCap data (e.g. adding label
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
RoxygenNote: 7.3.0
Suggests:
knitr,
rmarkdown,
Expand Down
10 changes: 10 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ export(redcap_export_meta)
export(redcap_export_tbl)
export(redcap_import_recode)
export(redcap_import_select)
export(redcap_prep)
export(redcap_prep_dates)
export(redcap_prep_datetimes)
export(redcap_toform)
export(remove_empty_rows)
export(singlechoice_factor)
export(singlechoice_opts)
Expand All @@ -23,10 +27,16 @@ importFrom(crayon,bold)
importFrom(crayon,italic)
importFrom(crayon,red)
importFrom(crayon,underline)
importFrom(dplyr,across)
importFrom(dplyr,bind_rows)
importFrom(dplyr,everything)
importFrom(dplyr,filter)
importFrom(dplyr,if_else)
importFrom(dplyr,mutate)
importFrom(dplyr,pull)
importFrom(dplyr,select)
importFrom(dplyr,slice)
importFrom(dplyr,where)
importFrom(httr2,req_body_form)
importFrom(httr2,req_headers)
importFrom(httr2,req_perform)
Expand Down
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# redcaptools 0.4.0

* some data preparation functions have been renamed to improve consistency and clarity. The old functions remain to work, but issue a warning about deprecation.
* `rc_prep` is now `redcap_prep`
* `rc_dates` is now `redcap_prep_dates`
* `rc_datetimes` is now `redcap_prep_datetimes`
* improved support for when no single or multiple choice variables exist in a database
* addition of `redcap_toform` which splits a dataframe into a list of dataframes, one for each form.
* addition of a new vignette covering some details on the API endpoints offered by REDCap.

# redcaptools 0.3.2

* finalizing `redcap_import_select` with user-friendly introduction, more automated options and tests
Expand Down
69 changes: 69 additions & 0 deletions R/deprecated.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#' Deprecated functions
#' These functions have been renamed to be more consistent with the rest of the package. They may be removed in a future version.
#' @name deprecated
#' @param data dataframe
#' @param metadata data dictionary from REDCap
#' @param rep replace variables. If FALSE, encoded versions of the variable will be created
#' @param rep_date,rep_datetime,rep_singlechoice,rep_multichoice replace the indicated variable type
#' @param app_date,app_datetime,app_singlechoice,app_multichoice text to append to the newly generated variables name (if \code{rep_*} is FALSE)
#' @param append text to append to the newly generated variables name (if \code{replace} is TRUE)
#' @param replace indicator of whether to replace original variables or not
#' @param ... options passed to/from other methods
NULL

#' @describeIn deprecated original function name for \code{redcap_prep}
#' @export
rc_prep <- function(data, metadata,
rep = FALSE,
rep_date = rep, rep_datetime = rep,
rep_singlechoice = rep, rep_multichoice = rep,
app_date = "_date", app_datetime = "_datetime",
app_singlechoice = "_factor", app_multichoice = "_factor",
...){

warning("rc_prep is deprecated, please use redcap_prep")
redcap_prep(data, metadata,
rep = rep,
rep_date = rep_date, rep_datetime = rep_datetime,
rep_singlechoice = rep_singlechoice, rep_multichoice = rep_multichoice,
app_date = app_date, app_datetime = app_datetime,
app_singlechoice = app_singlechoice, app_multichoice = app_multichoice,
...)

}

#' @describeIn deprecated original function name for \code{redcap_dates}
#' @export
rc_dates <- function(data, metadata, replace = FALSE, append = "_date"){
warning("rc_dates is deprecated, use redcap_prep_dates instead")
redcap_prep_dates(data, metadata, replace, append)
}

#' @describeIn deprecated original function name for \code{redcap_datetimes}
#' @export
rc_datetimes <- function(data, metadata, replace = FALSE, append = "_datetime", ...){
warning("rc_datetimes is deprecated, use redcap_prep_datetimes instead")
redcap_prep_datetimes(data, metadata, replace, append, ...)
}

#' @describeIn deprecated deprecated in favour of \code{redcap_toform}
#' Split a manually exported REDCap dataset into forms
#'
#' @param data dataframe
#' @param metadata datadictionary as exported from REDCap or downloaded from the API
#'
#' @return list of dataframes
#' @export
split_by_form <- function(data, metadata){
warning("split_by_form is deprecated, use redcap_toform instead")
metadata$regex <- ifelse(metadata$field_type == "checkbox",
paste0(metadata$field_name, "___"),
metadata$field_name)
sapply(unique(metadata$form_name), function(x){
regex <- paste(metadata$field_name[1], "^redcap",
paste0("^", metadata$regex[metadata$form_name == x],
collapse = "|"), sep = "|")
dd <- data[, grepl(regex, names(data))]
remove_empty_rows(dd)
})
}
104 changes: 104 additions & 0 deletions R/redcap_toform.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#' Convert manually downloaded REDCap data into a list of forms
#'
#' Similar to \code{redcap_export_byform}, this function tries to split a
#' manually downloaded dataset into it's constituent forms. While use of the API
#' allows individual forms to be downloaded, with a manual download, only the data
#' dictionary is available as auxillary information.
#'
#' @param data imported REDCap data
#' @param datadict data dictionary downloaded manually from REDCap
#' @param metadata metadata downloaded from REDCap API
aghaynes marked this conversation as resolved.
Show resolved Hide resolved
#' @param guess_events restrict forms to events (rows) where data exists (see details)
#' @param ... additional arguments passed to other functions (currently unused)
#'
#' @details
#' In a longitudinal data collection with many forms, a REDCap dataset will have
#' a large degree of empty cells. The \code{guess_events} argument uses missingness
#' as an indicator of a row not being part of the form in question. If all user
#' variables (i.e. those that do not start with \code{redcap}) are empty, the row
#' will be removed from the dataset.
#'
#'
#' @importFrom dplyr if_else pull filter mutate select everything across where slice
#' @export
#' @examples
#' data <- readRDS(system.file("extdata/test.rda", package = "redcaptools"))
#' metadata <- readRDS(system.file("extdata/meta.rda", package = "redcaptools"))
#' dd <- read.csv(system.file("extdata/DataDictionary.csv", package = "redcaptools"))
#' redcap_toform(data, dd)
#' redcap_toform(data, metadata = metadata)
redcap_toform <- function(data,
datadict = NULL,
metadata = NULL,
guess_events = TRUE,
...){
if(is.null(datadict) & is.null(metadata))
stop("one of \"datadict\" or \"metadata\" must be provided")
if(!is.null(datadict)){
# a manually downloaded data dictionary has different variable names to the
# API version
metadata <- harmonize_datadict(datadict)
}

forms <- metadata$form_name |> unique()
names(forms) <- forms
idvar <- metadata$field_name[1]
redcap_vars <- names(data)[grepl("^redcap", names(data))]

generic_regex <- regex_many(c(idvar, redcap_vars))

lapply(forms,
function(form){
formmeta <- metadata |> slice(-1) |> filter(form_name == form)
# construct regex to select variables
form_regex <- formmeta |>
mutate(regex = if_else(field_type == "checkbox",
paste0("^", field_name, "___"),
regex_single(field_name))) |>
pull(regex) |> paste(collapse = "|")

# extract forms
tmp <- data |>
select(matches(generic_regex), matches(form_regex))

if(guess_events){
tmp2 <- tmp |>
select(matches(form_regex)) |>
mutate(across(
everything(),
~ !is.na(.x) & .x != ""),

) |> rowSums(na.rm = TRUE)
tmp <- tmp |> filter(tmp2 > 0)
}
tmp
})

}

regex_single <- function(x){
paste0("^", x, "$")
}
regex_many <- function(x){
paste0(regex_single(x), collapse = "|")
}

# regex_single("redcap_event_name")
# regex_many(c("redcap_event_name", "redcap_repeat_instance"))

harmonize_datadict <- function(datadict){
if(!ncol(datadict) == 18)
stop("data dictionary must have 18 columns")
# convert factors to character
datadict <- datadict |>
mutate(across(where(is.factor), as.character))
# set names
names(datadict) <- c("field_name", "form_name", "section_header", "field_type",
"field_label", "select_choices_or_calculations", "field_note",
"text_validation_type_or_show_slider_number",
"text_validation_min", "text_validation_max", "identifier",
"branching_logic", "required_field", "custom_alignment",
"question_number", "matrix_group_name", "matrix_ranking",
"field_annotation")
return(datadict)
}
90 changes: 52 additions & 38 deletions R/reformat.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,26 @@
singlechoice_opts <- function(metadata){
radio <- metadata[metadata$field_type %in% c("radio", "dropdown", "yesno"), ]
radio$select_choices_or_calculations[radio$field_type == "yesno"] <- "0, No | 1, Yes"
fn <- function(var, choices, label){
opts <- choices
opts <- trimws(unlist(strsplit(opts, "|", fixed = TRUE)))
n <- length(opts)
opts2 <- strsplit(opts, ",")
if(nrow(radio) > 0){
fn <- function(var, choices, label){
opts <- choices
opts <- trimws(unlist(strsplit(opts, "|", fixed = TRUE)))
n <- length(opts)
opts2 <- strsplit(opts, ",")

vals <- trimws(sapply(opts2, function(x) x[1], simplify = TRUE))
labs <- trimws(sapply(opts2, function(x) paste(x[-1], collapse = ","), simplify = TRUE))
labvals <- data.frame(var = rep(var, n), label = rep(label, n), val = vals, lab = labs)
labvals
vals <- trimws(sapply(opts2, function(x) x[1], simplify = TRUE))
labs <- trimws(sapply(opts2, function(x) paste(x[-1], collapse = ","), simplify = TRUE))
labvals <- data.frame(var = rep(var, n), label = rep(label, n), val = vals, lab = labs)
labvals
}
radio_labs <- do.call("rbind", apply(radio, 1, function(x) fn(x["field_name"], x["select_choices_or_calculations"], x["field_label"])))
row.names(radio_labs) <- NULL
} else {
radio_labs <- data.frame(var = character(0),
label = character(0),
val = character(0),
lab = character(0))
}
radio_labs <- do.call("rbind", apply(radio, 1, function(x) fn(x["field_name"], x["select_choices_or_calculations"], x["field_label"])))
row.names(radio_labs) <- NULL
return(radio_labs)
}

Expand All @@ -43,20 +50,28 @@ singlechoice_opts <- function(metadata){
#' @export
multichoice_opts <- function(metadata){
tmp <- metadata[metadata$field_type == "checkbox", ]
fn <- function(var, choices, label){
opts <- choices
opts <- trimws(unlist(strsplit(opts, "|", fixed = TRUE)))
n <- length(opts)
opts2 <- strsplit(opts, ",")
if(nrow(tmp) > 0){
fn <- function(var, choices, label){
opts <- choices
opts <- trimws(unlist(strsplit(opts, "|", fixed = TRUE)))
n <- length(opts)
opts2 <- strsplit(opts, ",")

vals <- trimws(sapply(opts2, function(x) x[1], simplify = TRUE))
labs <- trimws(sapply(opts2, function(x) paste(x[-1], collapse = ","), simplify = TRUE))
labvals <- data.frame(ovar = rep(var, n), var = rep(var, n), vlabel = rep(label, n), val = vals, label = labs)
labvals
vals <- trimws(sapply(opts2, function(x) x[1], simplify = TRUE))
labs <- trimws(sapply(opts2, function(x) paste(x[-1], collapse = ","), simplify = TRUE))
labvals <- data.frame(ovar = rep(var, n), var = rep(var, n), vlabel = rep(label, n), val = vals, label = labs)
labvals
}
tmp_labs <- do.call("rbind", apply(tmp, 1, function(x) fn(x["field_name"], x["select_choices_or_calculations"], x["field_label"])))
row.names(tmp_labs) <- NULL
tmp_labs$var <- paste0(tmp_labs$var, "___", tmp_labs$val)
} else {
tmp_labs <- data.frame(ovar = character(0),
var = character(0),
vlabel = character(0),
val = character(0),
label = character(0))
}
tmp_labs <- do.call("rbind", apply(tmp, 1, function(x) fn(x["field_name"], x["select_choices_or_calculations"], x["field_label"])))
row.names(tmp_labs) <- NULL
tmp_labs$var <- paste0(tmp_labs$var, "___", tmp_labs$val)
return(tmp_labs)
}

Expand Down Expand Up @@ -126,7 +141,7 @@ multichoice_factor <- function(data, metadata, replace = FALSE, append = "_facto
#' Converts the string values returned from REDCap to Dates.
#' This function also applies labels to the variable itself, based on the option label.
#'
#' @rdname rc_date
#' @rdname redcap_prep_dates
#' @param data the data.frame to modify
#' @param metadata metadata/datadictionary
#' @param replace whether to overwrite the existing data .
Expand All @@ -136,7 +151,7 @@ multichoice_factor <- function(data, metadata, replace = FALSE, append = "_facto
#' @importFrom labelled var_label var_label<-
#' @importFrom lubridate as_date
#' @export
rc_dates <- function(data, metadata, replace = FALSE, append = "_date"){
redcap_prep_dates <- function(data, metadata, replace = FALSE, append = "_date"){
tmp <- subset(metadata, metadata$text_validation_type_or_show_slider_number %in% c("date_dmy", "date_ymd"))
tmp <- tmp[tmp$field_name %in% names(data), ]
if(nrow(tmp) > 0){
Expand All @@ -154,12 +169,13 @@ rc_dates <- function(data, metadata, replace = FALSE, append = "_date"){
return(data)
}

#' @describeIn rc_date input data.frame with date-time variables reformated to POSIX

#' @describeIn redcap_prep_dates input data.frame with date-time variables reformated to POSIX
#' @param ... options passed to/from other methods
#' @importFrom labelled var_label var_label<-
#' @importFrom lubridate ymd_hm
#' @export
rc_datetimes <- function(data, metadata, replace = FALSE, append = "_datetime", ...){
redcap_prep_datetimes <- function(data, metadata, replace = FALSE, append = "_datetime", ...){
tmp <- subset(metadata, metadata$text_validation_type_or_show_slider_number %in% c("datetime_dmy", "datetime_ymd"))
tmp <- tmp[tmp$field_name %in% names(data), ]
if(nrow(tmp) > 0){
Expand All @@ -175,7 +191,6 @@ rc_datetimes <- function(data, metadata, replace = FALSE, append = "_datetime",
return(data)
}


#' Label non-single/multiple choice/date(time) fields
#' \code{singlechoice_factor}, \code{multichoice_factor}, \code{rc_date} and \code{rc_datetime}
#'
Expand Down Expand Up @@ -207,15 +222,13 @@ label_others <- function(data, metadata){
#' @return dataframe with converted factors, dates, POSIX, ...
#' @export
#'
rc_prep <- function(data, metadata,
rep = FALSE,
rep_date = rep, rep_datetime = rep,
rep_singlechoice = rep, rep_multichoice = rep,
app_date = "_date", app_datetime = "_datetime",
app_singlechoice = "_factor", app_multichoice = "_factor",
...
){

redcap_prep <- function(data, metadata,
rep = FALSE,
rep_date = rep, rep_datetime = rep,
rep_singlechoice = rep, rep_multichoice = rep,
app_date = "_date", app_datetime = "_datetime",
app_singlechoice = "_factor", app_multichoice = "_factor",
...){
tmp <- singlechoice_factor(data, metadata,
replace = rep_singlechoice,
append = app_singlechoice)
Expand All @@ -230,5 +243,6 @@ rc_prep <- function(data, metadata,
append = app_datetime, ...)
tmp <- label_others(tmp, metadata)
return(tmp)

}


Loading
Loading