From 3d23d6305006691515056e08fdb261fe8e72a21c Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 2 Jul 2024 08:38:06 -0500 Subject: [PATCH] Add a new line to every line of source code (#188) I don't think this should affect downstream code (since some inputs would have previously added it and some did not) and it makes the code simpler and the output more consistent. --- NEWS.md | 1 + R/parse.R | 35 ++++++++++++++++---------------- man/parse_all.Rd | 28 ++++++++++++++++--------- tests/testthat/test-conditions.R | 2 +- tests/testthat/test-eval.R | 6 +++--- tests/testthat/test-parse.R | 35 ++++++++++++++------------------ 6 files changed, 56 insertions(+), 51 deletions(-) diff --git a/NEWS.md b/NEWS.md index d0113c5..b027674 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # evaluate (development version) +* `parse_all()` adds a `\n` to the end of every line, even the last one if it didn't have one in the input. * Setting `ACTIONS_STEP_DEBUG=1` (as in a failing GHA workflow) will automatically set `log_echo` and `log_warning` to `TRUE` (#175). * New `local_reproducible_output()` helper that sets various options and env vars to help ensure consistency of output across environments. * The `source` output handler is now passed the entire top-level expression, not just the first component. diff --git a/R/parse.R b/R/parse.R index ee198ee..b9d85a9 100644 --- a/R/parse.R +++ b/R/parse.R @@ -8,17 +8,24 @@ #' @param filename string overriding the file name #' @param allow_error whether to allow syntax errors in `x` #' @return -#' A data frame with columns `src`, a character vector of source code, and -#' `expr`, a list-column of parsed expressions. There will be one row for each -#' top-level expression in `x`. +#' A data frame two columns, `src` and `expr`, and one row for each top-level +#' expression in `x`. #' -#' A top-level expression is a complete expression -#' which would trigger execution if typed at the console. The `expression` -#' object in `expr` can be of any length: it will be 0 if the top-level -#' expression contains only whitespace and/or comments; 1 if the top-level -#' expression is a single scalar (like `TRUE`, `1`, or `"x"`), name, or call; -#' or 2 if the top-level expression uses `;` to put multiple expressions on -#' one line. The expressions have their srcrefs removed. +#' `src` is a character vector of source code. Each element represents a +#' complete line (or multi-line) expression, i.e. it always has a terminal `\n`. +#' +#' `expr`, a list-column of top-level expressions. A top-level expression +#' is a complete expression which would trigger execution if typed at the +#' console. Each element is an [expression()] object, which can be of any +#' length. It will be length: +#' +#' * 0 if the top-level expression contains only whitespace and/or comments. +#' * 1 if the top-level expression is a single scalar ( +#' like `TRUE`, `1`, or `"x"`), name, or call +#' * 2 or more if the top-level expression uses `;` to put multiple expressions +#' on one line. +#' +#' The expressions have their srcrefs removed. #' #' If there are syntax errors in `x` and `allow_error = TRUE`, the data #' frame will have an attribute `PARSE_ERROR` that stores the error object. @@ -38,16 +45,11 @@ parse_all <- function(x, filename = NULL, allow_error = FALSE) UseMethod("parse_ #' @export parse_all.character <- function(x, filename = NULL, allow_error = FALSE) { if (any(grepl("\n", x))) { - # Track whether or not last element has a newline - trailing_nl <- grepl("\n$", x[length(x)]) # Ensure that empty lines are not dropped by strsplit() x[x == ""] <- "\n" # Standardise to a character vector with one line per element; # this is the input that parse() is documented to accept x <- unlist(strsplit(x, "\n"), recursive = FALSE, use.names = FALSE) - } else { - lines <- x - trailing_nl <- FALSE } n <- length(x) @@ -101,8 +103,7 @@ parse_all.character <- function(x, filename = NULL, allow_error = FALSE) { res <- res[order(res$line), c("src", "expr")] # Restore newlines stripped while converting to vector of lines - nl <- c(rep("\n", nrow(res) - 1), if (trailing_nl) "\n" else "") - res$src <- paste0(res$src, nl) + res$src <- paste0(res$src, "\n") res$expr <- lapply(res$expr, removeSource) diff --git a/man/parse_all.Rd b/man/parse_all.Rd index 768ea8b..5e0966f 100644 --- a/man/parse_all.Rd +++ b/man/parse_all.Rd @@ -15,17 +15,25 @@ If a connection, will be opened and closed only if it was closed initially.} \item{allow_error}{whether to allow syntax errors in \code{x}} } \value{ -A data frame with columns \code{src}, a character vector of source code, and -\code{expr}, a list-column of parsed expressions. There will be one row for each -top-level expression in \code{x}. +A data frame two columns, \code{src} and \code{expr}, and one row for each top-level +expression in \code{x}. -A top-level expression is a complete expression -which would trigger execution if typed at the console. The \code{expression} -object in \code{expr} can be of any length: it will be 0 if the top-level -expression contains only whitespace and/or comments; 1 if the top-level -expression is a single scalar (like \code{TRUE}, \code{1}, or \code{"x"}), name, or call; -or 2 if the top-level expression uses \verb{;} to put multiple expressions on -one line. The expressions have their srcrefs removed. +\code{src} is a character vector of source code. Each element represents a +complete line (or multi-line) expression, i.e. it always has a terminal \verb{\\n}. + +\code{expr}, a list-column of top-level expressions. A top-level expression +is a complete expression which would trigger execution if typed at the +console. Each element is an \code{\link[=expression]{expression()}} object, which can be of any +length. It will be length: +\itemize{ +\item 0 if the top-level expression contains only whitespace and/or comments. +\item 1 if the top-level expression is a single scalar ( +like \code{TRUE}, \code{1}, or \code{"x"}), name, or call +\item 2 or more if the top-level expression uses \verb{;} to put multiple expressions +on one line. +} + +The expressions have their srcrefs removed. If there are syntax errors in \code{x} and \code{allow_error = TRUE}, the data frame will have an attribute \code{PARSE_ERROR} that stores the error object. diff --git a/tests/testthat/test-conditions.R b/tests/testthat/test-conditions.R index f450345..9c5805a 100644 --- a/tests/testthat/test-conditions.R +++ b/tests/testthat/test-conditions.R @@ -98,7 +98,7 @@ test_that("log_warning causes warnings to be emitted", { # And still recorded in eval result expect_output_types(ev, c("source", "warning")) - expect_equal(ev[[1]]$src, "f()") + expect_equal(ev[[1]]$src, "f()\n") expect_equal(ev[[2]], simpleWarning("Hi!", quote(f()))) }) diff --git a/tests/testthat/test-eval.R b/tests/testthat/test-eval.R index 7d43190..8a559e9 100644 --- a/tests/testthat/test-eval.R +++ b/tests/testthat/test-eval.R @@ -24,11 +24,11 @@ test_that("log_echo causes output to be immediately written to stderr()", { res <- evaluate("f()", log_echo = TRUE), type = "message" ) - expect_equal(out, "f()") + expect_equal(out, c("f()", "")) # But still recorded in eval result expect_output_types(res, c("source", "text")) - expect_equal(res[[1]]$src, "f()") + expect_equal(res[[1]]$src, "f()\n") }) test_that("ACTIONS_STEP_DEBUG forces log_warning and log_echo to TRUE", { @@ -40,7 +40,7 @@ test_that("ACTIONS_STEP_DEBUG forces log_warning and log_echo to TRUE", { withr::local_envvar(ACTIONS_STEP_DEBUG = "true") capture.output(expect_warning(evaluate("f()"), "abc"), type = "message") }) - expect_equal(out, "f()") + expect_equal(out, c("f()", "")) }) test_that("data sets loaded", { diff --git a/tests/testthat/test-parse.R b/tests/testthat/test-parse.R index b8afc86..ede288d 100644 --- a/tests/testthat/test-parse.R +++ b/tests/testthat/test-parse.R @@ -1,21 +1,16 @@ test_that("can parse even if no expressions", { - expect_equal(parse_all("")$src, "") - expect_equal(parse_all("#")$src, "#") + expect_equal(parse_all("")$src, "\n") + expect_equal(parse_all("#")$src, "#\n") expect_equal(parse_all("#\n\n")$src, c("#\n", "\n")) }) -test_that("preserves trailing nl", { - expect_equal(parse_all("x")$src, "x") - expect_equal(parse_all("x\n")$src, "x\n") - - expect_equal(parse_all("")$src, "") +test_that("every line gets nl", { + expect_equal(parse_all("x")$src, "x\n") + expect_equal(parse_all("")$src, "\n") expect_equal(parse_all("\n")$src, "\n") - expect_equal(parse_all("{\n1\n}")$src, "{\n1\n}") - expect_equal(parse_all("{\n1\n}\n")$src, "{\n1\n}\n") - # even empty lines - expect_equal(parse_all("a\n\nb")$src, c("a\n", "\n", "b")) + expect_equal(parse_all("a\n\nb")$src, c("a\n", "\n", "b\n")) expect_equal(parse_all("a\n\nb\n")$src, c("a\n", "\n", "b\n")) expect_equal(parse_all("\n\n")$src, c("\n", "\n")) @@ -31,7 +26,7 @@ test_that("empty lines are never silently dropped", { # # 1 # ``` - expect_equal(parse_all(c("\n", "", "1"))$src, c("\n", "\n", "1")) + expect_equal(parse_all(c("\n", "", "1"))$src, c("\n", "\n", "1\n")) }) test_that("a character vector is equivalent to a multi-line string", { @@ -48,8 +43,8 @@ test_that("recombines multi-expression TLEs", { }) test_that("re-integrates lines without expressions", { - expect_equal(parse_all("1\n\n2")$src, c("1\n", "\n", "2")) - expect_equal(parse_all("1\n#\n2")$src, c("1\n", "#\n", "2")) + expect_equal(parse_all("1\n\n2")$src, c("1\n", "\n", "2\n")) + expect_equal(parse_all("1\n#\n2")$src, c("1\n", "#\n", "2\n")) }) test_that("expr is always an expression", { @@ -74,7 +69,7 @@ test_that("double quotes in Chinese characters not destroyed", { skip_if_not(l10n_info()[['UTF-8']]) out <- parse_all(c('1+1', '"你好"')) - expect_equal(out$src[[2]], '"你好"') + expect_equal(out$src[[2]], '"你好"\n') expect_equal(out$expr[[2]], expression("你好")) }) @@ -83,14 +78,14 @@ test_that("multibyte characters are parsed correctly", { code <- c("ϱ <- 1# g / ml", "äöüßÄÖÜπ <- 7 + 3# nonsense") out <- parse_all(code) - expect_equal(out$src, paste0(code, c("\n", ""))) + expect_equal(out$src, paste0(code, "\n")) }) # input types ------------------------------------------------------------------ test_that("can parse a call", { out <- parse_all(quote(f(a, b, c))) - expect_equal(out$src, "f(a, b, c)") + expect_equal(out$src, "f(a, b, c)\n") expect_equal(out$expr, list(expression(f(a, b, c)))) }) @@ -101,7 +96,7 @@ test_that("can parse a connection", { con <- file(path) out <- parse_all(con) - expect_equal(out$src, c("# 1\n", "1 + 1")) + expect_equal(out$src, c("# 1\n", "1 + 1\n")) expect_equal(out$expr, list(expression(), expression(1 + 1))) # Doesn't leave any connections around @@ -113,7 +108,7 @@ test_that("can parse a function", { # Hi 1 + 1 }) - expect_equal(out$src, c("# Hi\n", "1 + 1")) + expect_equal(out$src, c("# Hi\n", "1 + 1\n")) expect_equal(out$expr, list(expression(), expression(1 + 1))) }) @@ -124,7 +119,7 @@ test_that("parsing a function parses its body", { # Hi 1 + 1 }) - expect_equal(out$src, c("# Hi\n", "1 + 1")) + expect_equal(out$src, c("# Hi\n", "1 + 1\n")) }) test_that("dedents function body", {