Check if we can simplify clean_names()

easystats · Oct 24, 2024 · d8fbd5f · d8fbd5f
1 parent 83be942
commit d8fbd5f
Showing 1 changed file with 13 additions and 89 deletions.
diff --git a/R/clean_names.R b/R/clean_names.R
@@ -130,100 +130,24 @@ clean_names.character <- function(x, include_names = FALSE, ...) {
     if (length(lag_pattern)) pattern <- pattern[-lag_pattern]
   }
 
+  if (ignore_asis) {
+    asis_pattern <- which(pattern %in% c("I", "asis"))
+    if (length(asis_pattern)) pattern <- pattern[-asis_pattern]
+  }
+
   # do we have a "log()" pattern here? if yes, get capture region
   # which matches the "cleaned" variable name
   cleaned <- unlist(lapply(seq_along(x), function(i) {
-    # check if we have special patterns like 100 * log(xy), and remove it
-    if (isFALSE(is_emmeans) && grepl("^([0-9]+)", x[i])) {
-      x[i] <- gsub("^([0-9]+)[^(\\.|[:alnum:])]+(.*)", "\\2", x[i])
-    }
-    # for brms multimembership, multiple elements might be returned
-    # need extra handling
-    multimembership <- NULL
-    for (j in seq_along(pattern)) {
-      # check if we find pattern at all
-      if (grepl(pattern[j], x[i], fixed = TRUE)) {
-        # remove possible namespace
-        if (grepl("::", x[i], fixed = TRUE)) {
-          x[i] <- sub("(.*)::(.*)", "\\2", x[i])
-        }
-        if (pattern[j] == "offset") { # nolint
-          x[i] <- trim_ws(unique(sub("^offset\\(([^-+ )]*).*", "\\1", x[i])))
-        } else if (pattern[j] == "I") {
-          if (!ignore_asis && grepl("I\\((.*)\\)", x[i])) {
-            # x[i] <- trim_ws(unique(sub("I\\(((\\w|\\.)*).*", "\\1", x[i])))
-            x[i] <- all.vars(stats::as.formula(paste("~", x[i])))
-          }
-        } else if (pattern[j] == "asis") {
-          if (!ignore_asis && grepl("asis\\((.*)\\)", x[i])) {
-            # x[i] <- trim_ws(unique(sub("asis\\(((\\w|\\.)*).*", "\\1", x[i])))
-            x[i] <- all.vars(stats::as.formula(paste("~", x[i])))
-          }
-        } else if (pattern[j] == "log(log") {
-          x[i] <- trim_ws(unique(sub("^log\\(log\\(((\\w|\\.)*).*", "\\1", x[i])))
-        } else if (pattern[j] == "relevel(as.factor") {
-          x[i] <- trim_ws(unique(sub("^relevel\\(as.factor\\(((\\w|\\.)*).*", "\\1", x[i])))
-        } else if (pattern[j] == "scale(log") {
-          x[i] <- trim_ws(unique(sub("^scale\\(log\\(((\\w|\\.)*).*", "\\1", x[i])))
-          x[i] <- trim_ws(unique(sub("^scale\\(log1p\\(((\\w|\\.)*).*", "\\1", x[i])))
-          x[i] <- trim_ws(unique(sub("^scale\\(log2\\(((\\w|\\.)*).*", "\\1", x[i])))
-          x[i] <- trim_ws(unique(sub("^scale\\(log10\\(((\\w|\\.)*).*", "\\1", x[i])))
-        } else if (pattern[j] == "scale(poly") {
-          x[i] <- trim_ws(unique(sub("^scale\\(poly\\(((\\w|\\.)*).*", "\\1", x[i])))
-        } else if (pattern[j] %in% c("mmc", "mm")) {
-          # # detect mm-pattern
-          # p <- paste0("^", pattern[j], "\\((.*)\\).*")
-          # # extract terms from mm() / mmc() functions
-          # g <- trim_ws(sub(p, "\\1", x[i]))
-          # # split terms, but not if comma inside parentheses
-          # g <- trim_ws(unlist(strsplit(g, ",(?![^()]*\\))", perl = TRUE), use.names = FALSE))
-          # # we might have additional arguments, like scale or weights. handle these here
-          # g <- g[!startsWith(g, "scale")]
-          # # clean weights
-          # gweights <- g[startsWith(g, "weights")]
-          # if (length(gweights)) {
-          #   g <- g[!startsWith(g, "weights")]
-          #   # this regular pattern finds "weights=" or "weights =", possibly followed
-          #   # by "cbind()", e.g. "weights = cbind(w, w)". We extract the variable names,
-          #   # create a formula, so "all.vars()" will only extract variable names if
-          #   # we really have "cbind()" in the weights argument
-          #   g <- c(g, .safe(all.vars(as.formula(paste0("~", trim_ws(gsub("weights\\s?=(.*)", "\\1", "weights = cbind(w, w)"))))))) # nolint
-          # }
-          # multimembership <- as.vector(trim_ws(g))
-          if (grepl(paste0("^", pattern[j], "\\((.*)\\).*"), x[i])) {
-            multimembership <- all.vars(stats::as.formula(paste("~", x[i])))
-          }
-        } else if (pattern[j] == "s" && startsWith(x[i], "s(")) {
-          x[i] <- gsub("^s\\(", "", x[i])
-          x[i] <- gsub("\\)$", "", x[i])
-          if (grepl("=|[[:digit:]]", x[i])) {
-            new_x <- trim_ws(unlist(strsplit(x[i], ",", fixed = TRUE), use.names = FALSE))
-            to_remove <- which(!grepl("\\D", new_x))
-            to_remove <- c(to_remove, grep("=", new_x, fixed = TRUE))
-            if (length(to_remove) == 0) {
-              x[i] <- toString(new_x)
-            } else {
-              x[i] <- toString(new_x[-to_remove])
-            }
-          }
-        } else {
-          # p <- paste0("^", pattern[j], "\\(([^,/)]*).*")
-          # this one should be more generic...
-          p <- paste0("^", pattern[j], "\\(((\\w|\\.)*).*")
-          x[i] <- unique(sub(p, "\\1", x[i]))
-        }
+    if (!grepl("(Intercept)", x[i])) {
+      # check if we have special patterns like 100 * log(xy), and remove it
+      if (isFALSE(is_emmeans) && grepl("^([0-9]+)", x[i])) {
+        x[i] <- gsub("^([0-9]+)[^(\\.|[:alnum:])]+(.*)", "\\2", x[i])
+      }
+      if (any(startsWith(x[i], pattern))) {
+        x[i] <- all.vars(stats::as.formula(paste("~", x[i])))
       }
     }
-    # for coxme-models, remove random-effect things...
-    if (grepl("|", x[i], fixed = TRUE)) {
-      x[i] <- sub("^(.*)\\|(.*)", "\\2", x[i])
-    }
-    # either return regular term, or mm term for brms
-    if (is.null(multimembership)) {
-      trim_ws(x[i])
-    } else {
-      multimembership
-    }
+    trim_ws(x[i])
   }), use.names = FALSE)
 
   # remove for random intercept only models