From 3d23d6305006691515056e08fdb261fe8e72a21c Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Tue, 2 Jul 2024 08:38:06 -0500
Subject: [PATCH] Add a new line to every line of source code (#188)

I don't think this should affect downstream code (since some inputs would have previously added it and some did not) and it makes the code simpler and the output more consistent.
---
 NEWS.md                          |  1 +
 R/parse.R                        | 35 ++++++++++++++++----------------
 man/parse_all.Rd                 | 28 ++++++++++++++++---------
 tests/testthat/test-conditions.R |  2 +-
 tests/testthat/test-eval.R       |  6 +++---
 tests/testthat/test-parse.R      | 35 ++++++++++++++------------------
 6 files changed, 56 insertions(+), 51 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index d0113c5..b027674 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,6 @@
 # evaluate (development version)
 
+* `parse_all()` adds a `\n` to the end of every line, even the last one if it didn't have one in the input.
 * Setting `ACTIONS_STEP_DEBUG=1` (as in a failing GHA workflow) will automatically set `log_echo` and `log_warning` to `TRUE` (#175).
 * New `local_reproducible_output()` helper that sets various options and env vars to help ensure consistency of output across environments.
 * The `source` output handler is now passed the entire top-level expression, not just the first component.
diff --git a/R/parse.R b/R/parse.R
index ee198ee..b9d85a9 100644
--- a/R/parse.R
+++ b/R/parse.R
@@ -8,17 +8,24 @@
 #' @param filename string overriding the file name
 #' @param allow_error whether to allow syntax errors in `x`
 #' @return 
-#' A data frame with columns `src`, a character vector of source code, and 
-#' `expr`, a list-column of parsed expressions. There will be one row for each 
-#' top-level expression in `x`. 
+#' A data frame two columns, `src` and `expr`, and one row for each top-level 
+#' expression in `x`.
 #' 
-#' A top-level expression is a complete expression 
-#' which would trigger execution if typed at the console. The `expression`
-#' object in `expr` can be of any length: it will be 0 if the top-level 
-#' expression contains only whitespace and/or comments; 1 if the top-level 
-#' expression is a single scalar (like `TRUE`, `1`, or `"x"`), name, or call; 
-#' or 2 if the top-level expression uses `;` to put multiple expressions on 
-#' one line. The expressions have their srcrefs removed.
+#' `src` is a character vector of source code. Each element represents a 
+#' complete line (or multi-line) expression, i.e. it always has a terminal `\n`.
+#' 
+#' `expr`, a list-column of top-level expressions. A top-level expression 
+#' is a complete expression which would trigger execution if typed at the 
+#' console. Each element is an [expression()] object, which can be of any
+#' length. It will be length:
+#' 
+#' * 0 if the top-level expression contains only whitespace and/or comments.
+#' * 1 if the top-level expression is a single scalar (
+#'   like `TRUE`, `1`, or `"x"`), name, or call
+#' * 2 or more if the top-level expression uses `;` to put multiple expressions
+#'   on one line.
+#' 
+#' The expressions have their srcrefs removed.
 #' 
 #' If there are syntax errors in `x` and `allow_error = TRUE`, the data 
 #' frame will have an attribute `PARSE_ERROR` that stores the error object.
@@ -38,16 +45,11 @@ parse_all <- function(x, filename = NULL, allow_error = FALSE) UseMethod("parse_
 #' @export
 parse_all.character <- function(x, filename = NULL, allow_error = FALSE) {
   if (any(grepl("\n", x))) {
-    # Track whether or not last element has a newline
-    trailing_nl <- grepl("\n$", x[length(x)])
     # Ensure that empty lines are not dropped by strsplit()
     x[x == ""] <- "\n"
     # Standardise to a character vector with one line per element;
     # this is the input that parse() is documented to accept
     x <- unlist(strsplit(x, "\n"), recursive = FALSE, use.names = FALSE)
-  } else {
-    lines <- x
-    trailing_nl <- FALSE
   }
   n <- length(x)
 
@@ -101,8 +103,7 @@ parse_all.character <- function(x, filename = NULL, allow_error = FALSE) {
   res <- res[order(res$line), c("src", "expr")]
   
   # Restore newlines stripped while converting to vector of lines
-  nl <- c(rep("\n", nrow(res) - 1), if (trailing_nl) "\n" else "")
-  res$src <- paste0(res$src, nl)
+  res$src <- paste0(res$src, "\n")
   
   res$expr <- lapply(res$expr, removeSource)
 
diff --git a/man/parse_all.Rd b/man/parse_all.Rd
index 768ea8b..5e0966f 100644
--- a/man/parse_all.Rd
+++ b/man/parse_all.Rd
@@ -15,17 +15,25 @@ If a connection, will be opened and closed only if it was closed initially.}
 \item{allow_error}{whether to allow syntax errors in \code{x}}
 }
 \value{
-A data frame with columns \code{src}, a character vector of source code, and
-\code{expr}, a list-column of parsed expressions. There will be one row for each
-top-level expression in \code{x}.
+A data frame two columns, \code{src} and \code{expr}, and one row for each top-level
+expression in \code{x}.
 
-A top-level expression is a complete expression
-which would trigger execution if typed at the console. The \code{expression}
-object in \code{expr} can be of any length: it will be 0 if the top-level
-expression contains only whitespace and/or comments; 1 if the top-level
-expression is a single scalar (like \code{TRUE}, \code{1}, or \code{"x"}), name, or call;
-or 2 if the top-level expression uses \verb{;} to put multiple expressions on
-one line. The expressions have their srcrefs removed.
+\code{src} is a character vector of source code. Each element represents a
+complete line (or multi-line) expression, i.e. it always has a terminal \verb{\\n}.
+
+\code{expr}, a list-column of top-level expressions. A top-level expression
+is a complete expression which would trigger execution if typed at the
+console. Each element is an \code{\link[=expression]{expression()}} object, which can be of any
+length. It will be length:
+\itemize{
+\item 0 if the top-level expression contains only whitespace and/or comments.
+\item 1 if the top-level expression is a single scalar (
+like \code{TRUE}, \code{1}, or \code{"x"}), name, or call
+\item 2 or more if the top-level expression uses \verb{;} to put multiple expressions
+on one line.
+}
+
+The expressions have their srcrefs removed.
 
 If there are syntax errors in \code{x} and \code{allow_error = TRUE}, the data
 frame will have an attribute \code{PARSE_ERROR} that stores the error object.
diff --git a/tests/testthat/test-conditions.R b/tests/testthat/test-conditions.R
index f450345..9c5805a 100644
--- a/tests/testthat/test-conditions.R
+++ b/tests/testthat/test-conditions.R
@@ -98,7 +98,7 @@ test_that("log_warning causes warnings to be emitted", {
 
   # And still recorded in eval result
   expect_output_types(ev, c("source", "warning"))
-  expect_equal(ev[[1]]$src, "f()")
+  expect_equal(ev[[1]]$src, "f()\n")
   expect_equal(ev[[2]], simpleWarning("Hi!", quote(f())))
 })
 
diff --git a/tests/testthat/test-eval.R b/tests/testthat/test-eval.R
index 7d43190..8a559e9 100644
--- a/tests/testthat/test-eval.R
+++ b/tests/testthat/test-eval.R
@@ -24,11 +24,11 @@ test_that("log_echo causes output to be immediately written to stderr()", {
     res <- evaluate("f()", log_echo = TRUE),
     type = "message"
   )
-  expect_equal(out, "f()")
+  expect_equal(out, c("f()", ""))
 
   # But still recorded in eval result
   expect_output_types(res, c("source", "text"))
-  expect_equal(res[[1]]$src, "f()")
+  expect_equal(res[[1]]$src, "f()\n")
 })
 
 test_that("ACTIONS_STEP_DEBUG forces log_warning and log_echo to TRUE", {
@@ -40,7 +40,7 @@ test_that("ACTIONS_STEP_DEBUG forces log_warning and log_echo to TRUE", {
     withr::local_envvar(ACTIONS_STEP_DEBUG = "true")
     capture.output(expect_warning(evaluate("f()"), "abc"), type = "message")
   })
-  expect_equal(out, "f()")
+  expect_equal(out, c("f()", ""))
 })
 
 test_that("data sets loaded", {
diff --git a/tests/testthat/test-parse.R b/tests/testthat/test-parse.R
index b8afc86..ede288d 100644
--- a/tests/testthat/test-parse.R
+++ b/tests/testthat/test-parse.R
@@ -1,21 +1,16 @@
 test_that("can parse even if no expressions", {
-  expect_equal(parse_all("")$src, "")
-  expect_equal(parse_all("#")$src, "#")
+  expect_equal(parse_all("")$src, "\n")
+  expect_equal(parse_all("#")$src, "#\n")
   expect_equal(parse_all("#\n\n")$src, c("#\n", "\n"))
 })
 
-test_that("preserves trailing nl", {
-  expect_equal(parse_all("x")$src, "x")
-  expect_equal(parse_all("x\n")$src, "x\n")
-
-  expect_equal(parse_all("")$src, "")
+test_that("every line gets nl", {
+  expect_equal(parse_all("x")$src, "x\n")
+  expect_equal(parse_all("")$src, "\n")
   expect_equal(parse_all("\n")$src, "\n")
 
-  expect_equal(parse_all("{\n1\n}")$src, "{\n1\n}")
-  expect_equal(parse_all("{\n1\n}\n")$src, "{\n1\n}\n")
-
   # even empty lines
-  expect_equal(parse_all("a\n\nb")$src, c("a\n", "\n", "b"))
+  expect_equal(parse_all("a\n\nb")$src, c("a\n", "\n", "b\n"))
   expect_equal(parse_all("a\n\nb\n")$src, c("a\n", "\n", "b\n"))
   
   expect_equal(parse_all("\n\n")$src, c("\n", "\n"))
@@ -31,7 +26,7 @@ test_that("empty lines are never silently dropped", {
   # 
   # 1
   # ```
-  expect_equal(parse_all(c("\n", "", "1"))$src, c("\n", "\n", "1"))
+  expect_equal(parse_all(c("\n", "", "1"))$src, c("\n", "\n", "1\n"))
 })
 
 test_that("a character vector is equivalent to a multi-line string", {
@@ -48,8 +43,8 @@ test_that("recombines multi-expression TLEs", {
 })
 
 test_that("re-integrates lines without expressions", {
-  expect_equal(parse_all("1\n\n2")$src, c("1\n", "\n", "2"))
-  expect_equal(parse_all("1\n#\n2")$src, c("1\n", "#\n", "2"))
+  expect_equal(parse_all("1\n\n2")$src, c("1\n", "\n", "2\n"))
+  expect_equal(parse_all("1\n#\n2")$src, c("1\n", "#\n", "2\n"))
 })
 
 test_that("expr is always an expression", {
@@ -74,7 +69,7 @@ test_that("double quotes in Chinese characters not destroyed", {
   skip_if_not(l10n_info()[['UTF-8']])
 
   out <- parse_all(c('1+1', '"你好"'))
-  expect_equal(out$src[[2]], '"你好"')
+  expect_equal(out$src[[2]], '"你好"\n')
   expect_equal(out$expr[[2]], expression("你好"))
 })
 
@@ -83,14 +78,14 @@ test_that("multibyte characters are parsed correctly", {
   
   code <- c("ϱ <- 1# g / ml", "äöüßÄÖÜπ <- 7 + 3# nonsense")
   out <- parse_all(code)
-  expect_equal(out$src, paste0(code, c("\n", "")))
+  expect_equal(out$src, paste0(code, "\n"))
 })
 
 # input types ------------------------------------------------------------------
 
 test_that("can parse a call", {
   out <- parse_all(quote(f(a, b, c)))
-  expect_equal(out$src, "f(a, b, c)")
+  expect_equal(out$src, "f(a, b, c)\n")
   expect_equal(out$expr, list(expression(f(a, b, c))))
 })
 
@@ -101,7 +96,7 @@ test_that("can parse a connection", {
   con <- file(path)
   out <- parse_all(con)
 
-  expect_equal(out$src, c("# 1\n", "1 + 1"))
+  expect_equal(out$src, c("# 1\n", "1 + 1\n"))
   expect_equal(out$expr, list(expression(), expression(1 + 1)))
 
   # Doesn't leave any connections around
@@ -113,7 +108,7 @@ test_that("can parse a function", {
     # Hi
     1 + 1
   })
-  expect_equal(out$src, c("# Hi\n", "1 + 1"))
+  expect_equal(out$src, c("# Hi\n", "1 + 1\n"))
   expect_equal(out$expr, list(expression(), expression(1 + 1)))
 })
 
@@ -124,7 +119,7 @@ test_that("parsing a function parses its body", {
     # Hi
     1 + 1
   })
-  expect_equal(out$src, c("# Hi\n", "1 + 1"))
+  expect_equal(out$src, c("# Hi\n", "1 + 1\n"))
 })
 
 test_that("dedents function body", {