Skip to content

Commit

Permalink
Check if we can simplify clean_names()
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed Oct 24, 2024
1 parent 83be942 commit d8fbd5f
Showing 1 changed file with 13 additions and 89 deletions.
102 changes: 13 additions & 89 deletions R/clean_names.R
Original file line number Diff line number Diff line change
Expand Up @@ -130,100 +130,24 @@ clean_names.character <- function(x, include_names = FALSE, ...) {
if (length(lag_pattern)) pattern <- pattern[-lag_pattern]
}

if (ignore_asis) {
asis_pattern <- which(pattern %in% c("I", "asis"))
if (length(asis_pattern)) pattern <- pattern[-asis_pattern]
}

# do we have a "log()" pattern here? if yes, get capture region
# which matches the "cleaned" variable name
cleaned <- unlist(lapply(seq_along(x), function(i) {
# check if we have special patterns like 100 * log(xy), and remove it
if (isFALSE(is_emmeans) && grepl("^([0-9]+)", x[i])) {
x[i] <- gsub("^([0-9]+)[^(\\.|[:alnum:])]+(.*)", "\\2", x[i])
}
# for brms multimembership, multiple elements might be returned
# need extra handling
multimembership <- NULL
for (j in seq_along(pattern)) {
# check if we find pattern at all
if (grepl(pattern[j], x[i], fixed = TRUE)) {
# remove possible namespace
if (grepl("::", x[i], fixed = TRUE)) {
x[i] <- sub("(.*)::(.*)", "\\2", x[i])
}
if (pattern[j] == "offset") { # nolint
x[i] <- trim_ws(unique(sub("^offset\\(([^-+ )]*).*", "\\1", x[i])))
} else if (pattern[j] == "I") {
if (!ignore_asis && grepl("I\\((.*)\\)", x[i])) {
# x[i] <- trim_ws(unique(sub("I\\(((\\w|\\.)*).*", "\\1", x[i])))
x[i] <- all.vars(stats::as.formula(paste("~", x[i])))
}
} else if (pattern[j] == "asis") {
if (!ignore_asis && grepl("asis\\((.*)\\)", x[i])) {
# x[i] <- trim_ws(unique(sub("asis\\(((\\w|\\.)*).*", "\\1", x[i])))
x[i] <- all.vars(stats::as.formula(paste("~", x[i])))
}
} else if (pattern[j] == "log(log") {
x[i] <- trim_ws(unique(sub("^log\\(log\\(((\\w|\\.)*).*", "\\1", x[i])))
} else if (pattern[j] == "relevel(as.factor") {
x[i] <- trim_ws(unique(sub("^relevel\\(as.factor\\(((\\w|\\.)*).*", "\\1", x[i])))
} else if (pattern[j] == "scale(log") {
x[i] <- trim_ws(unique(sub("^scale\\(log\\(((\\w|\\.)*).*", "\\1", x[i])))
x[i] <- trim_ws(unique(sub("^scale\\(log1p\\(((\\w|\\.)*).*", "\\1", x[i])))
x[i] <- trim_ws(unique(sub("^scale\\(log2\\(((\\w|\\.)*).*", "\\1", x[i])))
x[i] <- trim_ws(unique(sub("^scale\\(log10\\(((\\w|\\.)*).*", "\\1", x[i])))
} else if (pattern[j] == "scale(poly") {
x[i] <- trim_ws(unique(sub("^scale\\(poly\\(((\\w|\\.)*).*", "\\1", x[i])))
} else if (pattern[j] %in% c("mmc", "mm")) {
# # detect mm-pattern
# p <- paste0("^", pattern[j], "\\((.*)\\).*")
# # extract terms from mm() / mmc() functions
# g <- trim_ws(sub(p, "\\1", x[i]))
# # split terms, but not if comma inside parentheses
# g <- trim_ws(unlist(strsplit(g, ",(?![^()]*\\))", perl = TRUE), use.names = FALSE))
# # we might have additional arguments, like scale or weights. handle these here
# g <- g[!startsWith(g, "scale")]
# # clean weights
# gweights <- g[startsWith(g, "weights")]
# if (length(gweights)) {
# g <- g[!startsWith(g, "weights")]
# # this regular pattern finds "weights=" or "weights =", possibly followed
# # by "cbind()", e.g. "weights = cbind(w, w)". We extract the variable names,
# # create a formula, so "all.vars()" will only extract variable names if
# # we really have "cbind()" in the weights argument
# g <- c(g, .safe(all.vars(as.formula(paste0("~", trim_ws(gsub("weights\\s?=(.*)", "\\1", "weights = cbind(w, w)"))))))) # nolint
# }
# multimembership <- as.vector(trim_ws(g))
if (grepl(paste0("^", pattern[j], "\\((.*)\\).*"), x[i])) {
multimembership <- all.vars(stats::as.formula(paste("~", x[i])))
}
} else if (pattern[j] == "s" && startsWith(x[i], "s(")) {
x[i] <- gsub("^s\\(", "", x[i])
x[i] <- gsub("\\)$", "", x[i])
if (grepl("=|[[:digit:]]", x[i])) {
new_x <- trim_ws(unlist(strsplit(x[i], ",", fixed = TRUE), use.names = FALSE))
to_remove <- which(!grepl("\\D", new_x))
to_remove <- c(to_remove, grep("=", new_x, fixed = TRUE))
if (length(to_remove) == 0) {
x[i] <- toString(new_x)
} else {
x[i] <- toString(new_x[-to_remove])
}
}
} else {
# p <- paste0("^", pattern[j], "\\(([^,/)]*).*")
# this one should be more generic...
p <- paste0("^", pattern[j], "\\(((\\w|\\.)*).*")
x[i] <- unique(sub(p, "\\1", x[i]))
}
if (!grepl("(Intercept)", x[i])) {
# check if we have special patterns like 100 * log(xy), and remove it
if (isFALSE(is_emmeans) && grepl("^([0-9]+)", x[i])) {
x[i] <- gsub("^([0-9]+)[^(\\.|[:alnum:])]+(.*)", "\\2", x[i])
}
if (any(startsWith(x[i], pattern))) {
x[i] <- all.vars(stats::as.formula(paste("~", x[i])))
}
}
# for coxme-models, remove random-effect things...
if (grepl("|", x[i], fixed = TRUE)) {
x[i] <- sub("^(.*)\\|(.*)", "\\2", x[i])
}
# either return regular term, or mm term for brms
if (is.null(multimembership)) {
trim_ws(x[i])
} else {
multimembership
}
trim_ws(x[i])
}), use.names = FALSE)

# remove for random intercept only models
Expand Down

0 comments on commit d8fbd5f

Please sign in to comment.