Skip to content

Commit

Permalink
Merge pull request #26 from aghaynes/main
Browse files Browse the repository at this point in the history
better handling of single/multiple choice variables, function to split data by form
  • Loading branch information
DominikGuentensperger authored Apr 5, 2024
2 parents 562cd2d + 1d3814b commit f7607ac
Show file tree
Hide file tree
Showing 21 changed files with 662 additions and 112 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/render-readme.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v2
- uses: r-lib/actions/setup-r@v1
- uses: r-lib/actions/setup-pandoc@v1
- uses: r-lib/actions/setup-r@v2
- uses: r-lib/actions/setup-pandoc@v2
- name: Install rmarkdown, remotes, and the local package
run: |
install.packages(c("devtools", "badger"))
Expand Down
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: redcaptools
Type: Package
Title: Tools for exporting and working with REDCap data
Version: 0.3.2
Version: 0.4.0
Authors@R:
c(
person(given = "Alan G.", family = "Haynes", role = "cre",
Expand All @@ -13,7 +13,7 @@ Description: Tools for exporting and working with REDCap data (e.g. adding label
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
RoxygenNote: 7.3.0
Suggests:
knitr,
rmarkdown,
Expand All @@ -26,5 +26,6 @@ Imports:
labelled,
lubridate,
magrittr,
stringr
stringr,
tidyr
Config/testthat/edition: 3
12 changes: 12 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ export(redcap_export_meta)
export(redcap_export_tbl)
export(redcap_import_recode)
export(redcap_import_select)
export(redcap_prep)
export(redcap_prep_dates)
export(redcap_prep_datetimes)
export(redcap_toform)
export(remove_empty_rows)
export(singlechoice_factor)
export(singlechoice_opts)
Expand All @@ -23,10 +27,16 @@ importFrom(crayon,bold)
importFrom(crayon,italic)
importFrom(crayon,red)
importFrom(crayon,underline)
importFrom(dplyr,across)
importFrom(dplyr,bind_rows)
importFrom(dplyr,everything)
importFrom(dplyr,filter)
importFrom(dplyr,if_else)
importFrom(dplyr,mutate)
importFrom(dplyr,pull)
importFrom(dplyr,select)
importFrom(dplyr,slice)
importFrom(dplyr,where)
importFrom(httr2,req_body_form)
importFrom(httr2,req_headers)
importFrom(httr2,req_perform)
Expand All @@ -39,7 +49,9 @@ importFrom(lubridate,as_date)
importFrom(lubridate,ymd_hm)
importFrom(magrittr,"%>%")
importFrom(stringr,str_detect)
importFrom(stringr,str_extract)
importFrom(stringr,str_split)
importFrom(tidyr,fill)
importFrom(utils,read.csv)
importFrom(utils,str)
importFrom(utils,write.table)
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# redcaptools 0.4.0

* some data preparation functions have been renamed to improve consistency and clarity. The old functions remain to work, but issue a warning about deprecation.
* `rc_prep` is now `redcap_prep`
* `rc_dates` is now `redcap_prep_dates`
* `rc_datetimes` is now `redcap_prep_datetimes`
* improved support for when no single or multiple choice variables exist in a database
* addition of `redcap_toform` which splits a dataframe into a list of dataframes, one for each form.
* addition of a new vignette covering some details on the API endpoints offered by REDCap.

# redcaptools 0.3.2

* finalizing `redcap_import_select` with user-friendly introduction, more automated options and tests
Expand Down
69 changes: 69 additions & 0 deletions R/deprecated.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#' Deprecated functions
#' These functions have been renamed to be more consistent with the rest of the package. They may be removed in a future version.
#' @name deprecated
#' @param data dataframe
#' @param metadata data dictionary from REDCap
#' @param rep replace variables. If FALSE, encoded versions of the variable will be created
#' @param rep_date,rep_datetime,rep_singlechoice,rep_multichoice replace the indicated variable type
#' @param app_date,app_datetime,app_singlechoice,app_multichoice text to append to the newly generated variables name (if \code{rep_*} is FALSE)
#' @param append text to append to the newly generated variables name (if \code{replace} is TRUE)
#' @param replace indicator of whether to replace original variables or not
#' @param ... options passed to/from other methods
NULL

#' @describeIn deprecated original function name for \code{redcap_prep}
#' @export
rc_prep <- function(data, metadata,
rep = FALSE,
rep_date = rep, rep_datetime = rep,
rep_singlechoice = rep, rep_multichoice = rep,
app_date = "_date", app_datetime = "_datetime",
app_singlechoice = "_factor", app_multichoice = "_factor",
...){

warning("rc_prep is deprecated, please use redcap_prep")
redcap_prep(data, metadata,
rep = rep,
rep_date = rep_date, rep_datetime = rep_datetime,
rep_singlechoice = rep_singlechoice, rep_multichoice = rep_multichoice,
app_date = app_date, app_datetime = app_datetime,
app_singlechoice = app_singlechoice, app_multichoice = app_multichoice,
...)

}

#' @describeIn deprecated original function name for \code{redcap_dates}
#' @export
rc_dates <- function(data, metadata, replace = FALSE, append = "_date"){
warning("rc_dates is deprecated, use redcap_prep_dates instead")
redcap_prep_dates(data, metadata, replace, append)
}

#' @describeIn deprecated original function name for \code{redcap_datetimes}
#' @export
rc_datetimes <- function(data, metadata, replace = FALSE, append = "_datetime", ...){
warning("rc_datetimes is deprecated, use redcap_prep_datetimes instead")
redcap_prep_datetimes(data, metadata, replace, append, ...)
}

#' @describeIn deprecated deprecated in favour of \code{redcap_toform}
#' Split a manually exported REDCap dataset into forms
#'
#' @param data dataframe
#' @param metadata datadictionary as exported from REDCap or downloaded from the API
#'
#' @return list of dataframes
#' @export
split_by_form <- function(data, metadata){
warning("split_by_form is deprecated, use redcap_toform instead")
metadata$regex <- ifelse(metadata$field_type == "checkbox",
paste0(metadata$field_name, "___"),
metadata$field_name)
sapply(unique(metadata$form_name), function(x){
regex <- paste(metadata$field_name[1], "^redcap",
paste0("^", metadata$regex[metadata$form_name == x],
collapse = "|"), sep = "|")
dd <- data[, grepl(regex, names(data))]
remove_empty_rows(dd)
})
}
125 changes: 125 additions & 0 deletions R/redcap_toform.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#' Convert manually downloaded REDCap data into a list of forms
#'
#' Similar to \code{redcap_export_byform}, this function tries to split a
#' manually downloaded dataset into it's constituent forms. While use of the API
#' allows individual forms to be downloaded, with a manual download, only the data
#' dictionary is available as auxillary information. If no data dictionary is
#' available, the function will use the variable names to guess the forms (see
#' details).
#'
#' @param data imported REDCap data
#' @param datadict data dictionary downloaded manually from REDCap
#' @param metadata metadata downloaded from REDCap API
#' @param guess_events restrict forms to events (rows) where data exists (see details)
#' @param ... additional arguments passed to other functions (currently unused)
#'
#' @details
#' In a longitudinal data collection with many forms, a REDCap dataset will have
#' a large degree of empty cells. The \code{guess_events} argument uses missingness
#' as an indicator of a row not being part of the form in question. If all user
#' variables (i.e. those that do not start with \code{redcap}) are empty, the row
#' will be removed from the dataset.
#'
#' If neither \code{datadict} nor \code{metadata} are provided, the function will
#' attempt to guess the forms based on the variable names, specifically the
#' \code{form_complete} variables which denote the state of the form. This is
#' not a foolproof method: there may be other variables in the data that end with
#' \code{_complete}.
#'
#' @importFrom dplyr if_else pull filter mutate select everything across where slice
#' @importFrom tidyr fill
#' @importFrom stringr str_detect str_extract
#' @export
#' @examples
#' data <- readRDS(system.file("extdata/test.rda", package = "redcaptools"))
#' metadata <- readRDS(system.file("extdata/meta.rda", package = "redcaptools"))
#' dd <- read.csv(system.file("extdata/DataDictionary.csv", package = "redcaptools"))
#' redcap_toform(data, dd)
#' redcap_toform(data, metadata = metadata)
#' redcap_toform(data)
redcap_toform <- function(data,
datadict = NULL,
metadata = NULL,
guess_events = TRUE,
...){
if(!is.null(datadict)){
# a manually downloaded data dictionary has different variable names to the
# API version
metadata <- harmonize_datadict(datadict)
}
if(is.null(datadict) & is.null(metadata)){
warning("No metadata provided, guessing forms based on variable names")
metadata <- data.frame(field_name = names(data)) |>
mutate(complete = str_detect(field_name, "_complete$"),
form_name = str_extract(field_name, ".*(?=_complete)"),
field_type = "") |>
fill(form_name, .direction = "up")

metadata <- metadata |>
filter(!grepl("^redcap", field_name))
}



forms <- metadata$form_name |> unique()
names(forms) <- forms
idvar <- metadata$field_name[1]
redcap_vars <- names(data)[grepl("^redcap", names(data))]

generic_regex <- regex_many(c(idvar, redcap_vars))

lapply(forms,
function(form){
formmeta <- metadata |> slice(-1) |> filter(form_name == form)
# construct regex to select variables
form_regex <- formmeta |>
mutate(regex = if_else(field_type == "checkbox",
paste0("^", field_name, "___"),
regex_single(field_name))) |>
pull(regex) |> paste(collapse = "|")

# extract forms
tmp <- data |>
select(matches(generic_regex), matches(form_regex))

if(guess_events){
tmp2 <- tmp |>
select(matches(form_regex)) |>
mutate(across(
everything(),
~ !is.na(.x) & .x != ""),

) |> rowSums(na.rm = TRUE)
tmp <- tmp |> filter(tmp2 > 0)
}
tmp
})

}

regex_single <- function(x){
paste0("^", x, "$")
}
regex_many <- function(x){
paste0(regex_single(x), collapse = "|")
}

# regex_single("redcap_event_name")
# regex_many(c("redcap_event_name", "redcap_repeat_instance"))

harmonize_datadict <- function(datadict){
if(!ncol(datadict) == 18)
stop("data dictionary must have 18 columns")
# convert factors to character
datadict <- datadict |>
mutate(across(where(is.factor), as.character))
# set names
names(datadict) <- c("field_name", "form_name", "section_header", "field_type",
"field_label", "select_choices_or_calculations", "field_note",
"text_validation_type_or_show_slider_number",
"text_validation_min", "text_validation_max", "identifier",
"branching_logic", "required_field", "custom_alignment",
"question_number", "matrix_group_name", "matrix_ranking",
"field_annotation")
return(datadict)
}
Loading

0 comments on commit f7607ac

Please sign in to comment.