diff --git a/NAMESPACE b/NAMESPACE index 2b7d1412..d2f10378 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ importFrom(cli,cli_alert_success) importFrom(cli,cli_div) importFrom(cli,cli_h2) importFrom(cli,cli_text) +importFrom(dplyr,across) importFrom(dplyr,arrange) importFrom(dplyr,bind_cols) importFrom(dplyr,case_when) @@ -61,6 +62,7 @@ importFrom(stringr,str_replace) importFrom(stringr,str_replace_all) importFrom(tidyselect,all_of) importFrom(tidyselect,any_of) +importFrom(tidyselect,where) importFrom(tm,stemDocument) importFrom(utils,capture.output) importFrom(utils,packageVersion) diff --git a/NEWS.md b/NEWS.md index dbbab276..56a921e3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,8 @@ * `xportr_write()` now accepts `metadata` argument which can be used to set the dataset label to stay consistent with the other `xportr_*` functions. It is noteworthy that the dataset label set using the `xportr_df_label()` function will be retained during the `xportr_write()`. * Exporting a new dataset `dataset_spec` that contains the Dataset Specification for ADSL. (#179) +* Added a check for character variable lengths up to 200 bytes in `xpt_validate()`(#91, #189). + ## Deprecation and Breaking Changes * The `label` argument from the `xportr_write()` function is deprecated in favor of the `metadata` argument. (#179) diff --git a/R/utils-xportr.R b/R/utils-xportr.R index 06e1684f..feb31195 100644 --- a/R/utils-xportr.R +++ b/R/utils-xportr.R @@ -302,6 +302,18 @@ xpt_validate <- function(data) { glue("{fmt_fmts(names(chk_formats))} must have a valid format.") ) } + + # 4.0 max length of Character variables <= 200 bytes + max_nchar <- data %>% + summarize(across(where(is.character), ~ max(nchar(., type = "bytes")))) + nchar_gt_200 <- max_nchar[which(max_nchar > 200)] + if (length(nchar_gt_200) > 0) { + err_cnd <- c( + err_cnd, + glue("Length of {names(nchar_gt_200)} must be 200 bytes or less.") + ) + } + return(err_cnd) } diff --git a/R/xportr-package.R b/R/xportr-package.R index 701c4a52..197ad5be 100644 --- a/R/xportr-package.R +++ b/R/xportr-package.R @@ -95,11 +95,11 @@ #' @import rlang haven #' @importFrom dplyr left_join bind_cols filter select rename rename_with n #' everything arrange group_by summarize mutate ungroup case_when distinct -#' tribble if_else +#' tribble if_else across #' @importFrom glue glue glue_collapse #' @importFrom cli cli_alert_info cli_h2 cli_alert_success cli_div cli_text #' cli_alert_danger -#' @importFrom tidyselect all_of any_of +#' @importFrom tidyselect all_of any_of where #' @importFrom utils capture.output str tail packageVersion #' @importFrom stringr str_detect str_extract str_replace str_replace_all #' @importFrom readr parse_number diff --git a/tests/testthat/test-utils-xportr.R b/tests/testthat/test-utils-xportr.R index 4167b698..41f6adb8 100644 --- a/tests/testthat/test-utils-xportr.R +++ b/tests/testthat/test-utils-xportr.R @@ -111,3 +111,19 @@ test_that("xpt_validate: Get error message when the label contains non-ASCII, sy "Label 'A=fooçbar' cannot contain any non-ASCII, symbol or special characters." ) }) + +test_that("xpt_validate: Get error message when the length of a character variable is > 200 bytes ", { + df <- data.frame(A = paste(rep("A", 201), collapse = "")) + expect_equal( + xpt_validate(df), + "Length of A must be 200 bytes or less." + ) +}) + +test_that("xpt_validate: Get error message when the length of a non-ASCII character variable is > 200 bytes", { + df <- data.frame(A = paste(rep("一", 67), collapse = "")) + expect_equal( + xpt_validate(df), + "Length of A must be 200 bytes or less." + ) +})