Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create cleaning logbook #40

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export(check_outliers)
export(create_audit_list)
export(create_clean_data)
export(create_cleaning_log)
export(create_cleaning_logbook)
export(create_duration_from_audit_sum_all)
export(create_duration_from_audit_with_start_end)
export(create_logic_for_other)
Expand Down
111 changes: 111 additions & 0 deletions R/create_cleaning_logbook.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@


#' Create data_exstract, logbook, deletion_log tables and add them to the cleaning logbook template
#'
#' @param raw_data Raw Dataset
#' @param cleaning_log Cleaning Log
#' @param col_enum Enumerator column name in both raw_data and cleaning_log. Must be the same and by default NA
#' @param filename URL link for the location of the updated cleaning logbook
#' @param uuid uuid column name both in raw_data and cleaning_log
#'
#' @return Return a list of 3 tables:
#' - "template_data_extract"
#' - "template_logbook"
#' - "template_deletion_log"
#' And update the cleaning logbook with the new tables and automatically update the enumerator performance table
#' @export
#' @examples
#' \dontrun{
#' create_enumerator_performance(
#' raw_data = cleaningtools::cleaningtools_raw_data,
#' cleaning_log = cleaningtools::cleaningtools_cleaning_log,
#' uuid = "X_uuid",
#' col_enum = "enumerator_num",
#' filename = "new_cleaning_logbook_draft.xlsx"
#' )}
create_cleaning_logbook <- function(raw_data,
cleaning_log,
uuid = "X_uuid",
col_enum = NA,
# link_cleaning_logbook = NA,
filename = NA){
## check if col_enum in raw_data and cleaning_log
if(!col_enum %in% colnames(raw_data)) stop("Enumerator column not found in raw_data")
if(!col_enum %in% colnames(cleaning_log)) stop("Enumerator column not found in cleaning_log")

## check if uuid in raw_data and cleaning_log
if(!uuid %in% colnames(raw_data)) stop("uuid column not found in raw_data")
if(!uuid %in% colnames(cleaning_log)) stop("uuid column not found in cleaning_log")

## Create the template_data_extract table
template_data_extract <- raw_data %>%
dplyr::select(all_of(uuid), all_of(col_enum)) %>%
dplyr::rename("enumerator ID"= all_of(col_enum),
"uuid" = all_of(uuid)) %>%
as.data.frame()

## create a device id df
deviceiddf <- raw_data %>%
dplyr::select(all_of(uuid), deviceid)

## Create the template_logbook table
template_logbook <- cleaning_log %>%
dplyr::filter(!change_type %in% c("remove_survey")) %>%
dplyr::left_join(deviceiddf, by = uuid) %>%
dplyr::mutate(`Type of Issue (Select from dropdown list)` = NA,
changed = ifelse(change_type %in% c("change_response","blank_response"),"Yes","No")) %>%
dplyr::select(all_of(uuid),all_of(col_enum),deviceid,questions,issue,`Type of Issue (Select from dropdown list)`,
reason,changed,old_value,new_value) %>%
dplyr::rename("uuid" = all_of(uuid),
"Enumerator ID" = all_of(col_enum),
"device ID" = deviceid,
"question.name" = questions,
"feedback" = reason,
"old.value" = old_value, "new.value" = new_value) %>%
as.data.frame()

## Create the template_deletion_log table
template_deletion_log <- cleaning_log %>%
dplyr::filter(change_type %in% c("remove_survey")) %>%
dplyr::left_join(deviceiddf, by = uuid) %>%
dplyr::mutate(`Type of Issue (Select from dropdown list)` = NA) %>%
dplyr::select(all_of(uuid),all_of(col_enum),deviceid,issue,`Type of Issue (Select from dropdown list)`,reason) %>%
dplyr::rename("uuid" = all_of(uuid),
"Enumerator ID" = all_of(col_enum),
"device ID" = deviceid,
"Issue" = issue,
"feedback" = reason)%>%
as.data.frame()

named_table = list("template_data_extract" = template_data_extract,
"template_logbook" = template_logbook,
"template_deletion_log" = template_deletion_log)

## Writing tables to cleaning_logbook
wb <- openxlsx::loadWorkbook(system.file("extdata","Research_cycle_data_cleaning_logbook_template_v4_202331_AA.xlsx",package = "cleaningtools"))

openxlsx::writeData(wb = wb,
x = named_table[["template_data_extract"]],
sheet = "01_TEMPLATE_data_extract",
startRow = 1, startCol = 1)

openxlsx::writeData(wb = wb,
x = named_table[["template_logbook"]],
sheet = "02_TEMPLATE_Logbook",
startRow = 1,startCol = 1)


openxlsx::writeData(wb,
x = named_table[["template_deletion_log"]],
sheet = "03_TEMPLATE_deletion log",
startCol = 1,startRow = 1)

if(is.na(filename)){
cat("Link to where the new cleaning_logbook should be saved is not provided, new cleaning_logbook will be saved in the same directory of the script.")
filename <- paste0("data_cleaning_workbook.xlsx")
}

openxlsx::saveWorkbook(wb, filename, overwrite = T)

return(named_table)
}
Binary file not shown.
45 changes: 45 additions & 0 deletions man/create_cleaning_logbook.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

66 changes: 66 additions & 0 deletions tests/testthat/test-create_cleaning_logbook.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
testthat::test_that("Create Cleaning Logbook", {
raw_data <- cleaningtools_raw_data
cleaning_log <- cleaningtools_cleaning_log

testthat::expect_error(create_cleaning_logbook(raw_data,cleaning_log,uuid = "X_uuid", col_enum = "absd"))
testthat::expect_equal(length(create_cleaning_logbook(raw_data,cleaning_log,uuid = "X_uuid", col_enum = "enumerator_num")), 3)

## TESTING First Table
test_data_raw <- data.frame(X_uuid = c("abcd","eeds","eedt","sdks"),
deviceid = c(1,2,3,4),
enumerator_num = c(1,2,4,4))

expected_output <- data.frame(uuid = c("abcd","eeds","eedt","sdks"),
`enumerator ID` = c(1,2,4,4)) %>%
dplyr::rename("enumerator ID" = enumerator.ID)
actual_ouput <- create_cleaning_logbook(test_data_raw,cleaning_log,uuid = "X_uuid", col_enum = "enumerator_num")

testthat::expect_equal(actual_ouput[[1]],
expected_output)

## TESTING Second Table
test_cleaning_log <- data.frame(X_uuid = c("abcd","eeds","eedt","sdks"),
enumerator_num = c(1,2,4,4),
questions = c("q1","q2","q3","q3"),
reason = c("f1","f2","f4","f3"),
issue = c("duplicate","duplicate","outlier",
"other_responses"),
change_type = c("remove_survey","remove_survey","no_action",
"change_response"),
old_value= c(1,1,1,1), new_value = c(2,2,2,2))

expected_output <- data.frame(uuid = c("eedt","sdks"),
`Enumerator ID` = c(4,4),
`device ID` = c(3,4),
question.name = c("q3","q3"),
issue = c("outlier","other_responses"),
`Type of Issue (Select from dropdown list)` = c(NA,NA),
feedback = c("f4","f3"),
changed = c("No","Yes"),
old.value = c(1,1), new.value = c(2,2)) %>%
dplyr::rename("Enumerator ID" = Enumerator.ID,
"device ID" = device.ID,
"Type of Issue (Select from dropdown list)" = Type.of.Issue..Select.from.dropdown.list.)

actual_ouput <- create_cleaning_logbook(test_data_raw,test_cleaning_log,uuid = "X_uuid", col_enum = "enumerator_num")

testthat::expect_equal(actual_ouput[[2]],
expected_output)

## TESTING Third Table
expected_output <- data.frame(uuid = c("abcd","eeds"),
`Enumerator ID` = c(1,2),
`device ID` = c(1,2),
Issue = c("duplicate","duplicate"),
`Type of Issue (Select from dropdown list)` = c(NA,NA),
feedback = c("f1","f2")) %>%
dplyr::rename("Enumerator ID" = Enumerator.ID,
"device ID" = device.ID,
"Type of Issue (Select from dropdown list)" = Type.of.Issue..Select.from.dropdown.list.)
actual_ouput <- create_cleaning_logbook(test_data_raw,test_cleaning_log, col_enum = "enumerator_num")

testthat::expect_equal(actual_ouput[[3]],
expected_output)


})