From 212acdbfa7c93d13e342a25e0766de6638fe8b71 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Wed, 3 Jul 2024 07:26:54 -0500
Subject: [PATCH] Use "complete source" instead of top-level expression (#199)

Fixes #194
---
 NEWS.md                          |  4 +--
 R/evaluate.R                     | 15 +++++++++
 R/output-handler.R               | 13 ++++----
 R/parse_all.R                    | 55 ++++++++++++++++++++++----------
 man/evaluate.Rd                  | 16 ++++++++++
 man/new_output_handler.Rd        |  7 ++--
 man/parse_all.Rd                 | 55 ++++++++++++++++++++++----------
 tests/testthat/test-conditions.R |  2 +-
 8 files changed, 121 insertions(+), 46 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index b027674..2a52938 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -3,8 +3,8 @@
 * `parse_all()` adds a `\n` to the end of every line, even the last one if it didn't have one in the input.
 * Setting `ACTIONS_STEP_DEBUG=1` (as in a failing GHA workflow) will automatically set `log_echo` and `log_warning` to `TRUE` (#175).
 * New `local_reproducible_output()` helper that sets various options and env vars to help ensure consistency of output across environments.
-* The `source` output handler is now passed the entire top-level expression, not just the first component.
-* `evaluate()` will now terminate on the first error in a top-level expression. This matches R's own behaviour more closely.
+* The `source` output handler is now passed the entire complete input expression, not just the first component.
+* `evaluate()` now terminates on the first error in a multi-expression input, i.e. `1;stop('2');3` will no longer evaluate the third component. This matches console behaviour more closely.
 * `is.value()` has been removed since it tests for an object that evaluate never creates.
 * `parse_all()` no longer has a default method, which will generate better errors if you pass in something unexpectected.
 * The package now depends on R 4.0.0 in order to decrease our maintenance burden.
diff --git a/R/evaluate.R b/R/evaluate.R
index 37a6882..f541e8a 100644
--- a/R/evaluate.R
+++ b/R/evaluate.R
@@ -50,6 +50,21 @@
 #' @param filename string overrriding the [base::srcfile()] filename.
 #' @param include_timing Deprecated. 
 #' @import graphics grDevices utils
+#' @examples
+#' evaluate(c(
+#'   "1 + 1", 
+#'   "2 + 2"
+#' ))
+#' 
+#' # Not that's there's a difference in output between putting multiple 
+#' # expressions on one line vs spreading them across multiple lines
+#' evaluate("1;2;3")
+#' evaluate(c("1", "2", "3"))
+#' 
+#' # This also affects how errors propagate, matching the behaviour
+#' # of the R console
+#' evaluate("1;stop(2);3")
+#' evaluate(c("1", "stop(2)", "3"))
 evaluate <- function(input,
                      envir = parent.frame(),
                      enclos = NULL,
diff --git a/R/output-handler.R b/R/output-handler.R
index 3e1fb70..59d099a 100644
--- a/R/output-handler.R
+++ b/R/output-handler.R
@@ -16,13 +16,14 @@
 #' printing, then the `text` or `graphics` handlers may be called.
 #'
 #' @param source Function to handle the echoed source code under evaluation.
-#'  This function should take two arguments (`src` and `tle`), and return
-#'  an object that will be inserted into the evaluate outputs. `src` is the
-#'  unparsed text of the source code, and `tle` is the parsed top-level 
-#'  expression. If `src` is unparsable, `tle` will be `expression()`.
+#'   This function should take two arguments (`src` and `expr`), and return
+#'   an object that will be inserted into the evaluate outputs. `src` is the
+#'   unparsed text of the source code, and `expr` is the complete input 
+#'   expression (which may have 0, 1, 2, or more components; see [parse_all()]
+#'   for details).
 #' 
-#'  Return `src` for the default evaluate behaviour. Return `NULL` to 
-#'  drop the source from the output.
+#'   Return `src` for the default evaluate behaviour. Return `NULL` to 
+#'   drop the source from the output.
 #' @param text Function to handle any textual console output.
 #' @param graphics Function to handle graphics, as returned by
 #'   [recordPlot()].
diff --git a/R/parse_all.R b/R/parse_all.R
index b9d85a9..d95b5b4 100644
--- a/R/parse_all.R
+++ b/R/parse_all.R
@@ -8,22 +8,25 @@
 #' @param filename string overriding the file name
 #' @param allow_error whether to allow syntax errors in `x`
 #' @return 
-#' A data frame two columns, `src` and `expr`, and one row for each top-level 
-#' expression in `x`.
+#' A data frame two columns, `src` and `expr`, and one row for each complete
+#' input in `x`. A complete input is R code that would trigger execution when 
+#' typed at the console. This might consist of multiple expressions separated 
+#' by `;` or one expression spread over multiple lines (like a function 
+#' definition).
 #' 
 #' `src` is a character vector of source code. Each element represents a 
-#' complete line (or multi-line) expression, i.e. it always has a terminal `\n`.
+#' complete input expression (which might span multiple line) and always has a 
+#' terminal `\n`.
 #' 
-#' `expr`, a list-column of top-level expressions. A top-level expression 
-#' is a complete expression which would trigger execution if typed at the 
-#' console. Each element is an [expression()] object, which can be of any
-#' length. It will be length:
+#' `expr` is a list-column of [expression]s. The expressions can be of any 
+#' length, depending on the structure of the complete input source:
 #' 
-#' * 0 if the top-level expression contains only whitespace and/or comments.
-#' * 1 if the top-level expression is a single scalar (
-#'   like `TRUE`, `1`, or `"x"`), name, or call
-#' * 2 or more if the top-level expression uses `;` to put multiple expressions
-#'   on one line.
+#' * If `src` consists of only only whitespace and/or comments, `expr` will
+#'   be length 0.
+#' * If `src` a single scalar (like `TRUE`, `1`, or `"x"`), name, or 
+#'   function call, `expr` will be length 1.
+#' * If `src` contains multiple expressions separated by `;`, `expr` will 
+#'   have length two or more.
 #' 
 #' The expressions have their srcrefs removed.
 #' 
@@ -31,15 +34,33 @@
 #' frame will have an attribute `PARSE_ERROR` that stores the error object.
 #' @export
 #' @examples
-#' source <- "
-#'   # a comment
-#'   x
-#'   x;y
-#' "
+#' # Each of these inputs are single line, but generate different numbers of 
+#' # expressions
+#' source <- c(
+#'   "# a comment",
+#'   "x",
+#'   "x;y",
+#'   "x;y;z"
+#' )
 #' parsed <- parse_all(source)
 #' lengths(parsed$expr)
 #' str(parsed$expr)
 #' 
+#' # Each of these inputs are a single expression, but span different numbers
+#' # of lines
+#' source <- c(
+#'   "function() {}",
+#'   "function() {",
+#'   "  # Hello!",
+#'   "}",
+#'   "function() {",
+#'   "  # Hello!",
+#'   "  # Goodbye!",
+#'   "}"
+#' )
+#' parsed <- parse_all(source)
+#' lengths(parsed$expr)
+#' parsed$src
 parse_all <- function(x, filename = NULL, allow_error = FALSE) UseMethod("parse_all")
 
 #' @export
diff --git a/man/evaluate.Rd b/man/evaluate.Rd
index 4589298..ebb7d62 100644
--- a/man/evaluate.Rd
+++ b/man/evaluate.Rd
@@ -83,3 +83,19 @@ output, all correctly interleaved in the order in which they occured. It
 stores the final result, whether or not it should be visible, and the
 contents of the current graphics device.
 }
+\examples{
+evaluate(c(
+  "1 + 1", 
+  "2 + 2"
+))
+
+# Not that's there's a difference in output between putting multiple 
+# expressions on one line vs spreading them across multiple lines
+evaluate("1;2;3")
+evaluate(c("1", "2", "3"))
+
+# This also affects how errors propagate, matching the behaviour
+# of the R console
+evaluate("1;stop(2);3")
+evaluate(c("1", "stop(2)", "3"))
+}
diff --git a/man/new_output_handler.Rd b/man/new_output_handler.Rd
index 0f2c5eb..de20549 100644
--- a/man/new_output_handler.Rd
+++ b/man/new_output_handler.Rd
@@ -18,10 +18,11 @@ new_output_handler(
 }
 \arguments{
 \item{source}{Function to handle the echoed source code under evaluation.
-This function should take two arguments (\code{src} and \code{tle}), and return
+This function should take two arguments (\code{src} and \code{expr}), and return
 an object that will be inserted into the evaluate outputs. \code{src} is the
-unparsed text of the source code, and \code{tle} is the parsed top-level
-expression. If \code{src} is unparsable, \code{tle} will be \code{expression()}.
+unparsed text of the source code, and \code{expr} is the complete input
+expression (which may have 0, 1, 2, or more components; see \code{\link[=parse_all]{parse_all()}}
+for details).
 
 Return \code{src} for the default evaluate behaviour. Return \code{NULL} to
 drop the source from the output.}
diff --git a/man/parse_all.Rd b/man/parse_all.Rd
index 55e0c9e..cb904ed 100644
--- a/man/parse_all.Rd
+++ b/man/parse_all.Rd
@@ -15,22 +15,25 @@ If a connection, will be opened and closed only if it was closed initially.}
 \item{allow_error}{whether to allow syntax errors in \code{x}}
 }
 \value{
-A data frame two columns, \code{src} and \code{expr}, and one row for each top-level
-expression in \code{x}.
+A data frame two columns, \code{src} and \code{expr}, and one row for each complete
+input in \code{x}. A complete input is R code that would trigger execution when
+typed at the console. This might consist of multiple expressions separated
+by \verb{;} or one expression spread over multiple lines (like a function
+definition).
 
 \code{src} is a character vector of source code. Each element represents a
-complete line (or multi-line) expression, i.e. it always has a terminal \verb{\\n}.
+complete input expression (which might span multiple line) and always has a
+terminal \verb{\\n}.
 
-\code{expr}, a list-column of top-level expressions. A top-level expression
-is a complete expression which would trigger execution if typed at the
-console. Each element is an \code{\link[=expression]{expression()}} object, which can be of any
-length. It will be length:
+\code{expr} is a list-column of \link{expression}s. The expressions can be of any
+length, depending on the structure of the complete input source:
 \itemize{
-\item 0 if the top-level expression contains only whitespace and/or comments.
-\item 1 if the top-level expression is a single scalar (
-like \code{TRUE}, \code{1}, or \code{"x"}), name, or call
-\item 2 or more if the top-level expression uses \verb{;} to put multiple expressions
-on one line.
+\item If \code{src} consists of only only whitespace and/or comments, \code{expr} will
+be length 0.
+\item If \code{src} a single scalar (like \code{TRUE}, \code{1}, or \code{"x"}), name, or
+function call, \code{expr} will be length 1.
+\item If \code{src} contains multiple expressions separated by \verb{;}, \code{expr} will
+have length two or more.
 }
 
 The expressions have their srcrefs removed.
@@ -43,13 +46,31 @@ Works very similarly to parse, but also keeps original formatting and
 comments.
 }
 \examples{
-source <- "
-  # a comment
-  x
-  x;y
-"
+# Each of these inputs are single line, but generate different numbers of 
+# expressions
+source <- c(
+  "# a comment",
+  "x",
+  "x;y",
+  "x;y;z"
+)
 parsed <- parse_all(source)
 lengths(parsed$expr)
 str(parsed$expr)
 
+# Each of these inputs are a single expression, but span different numbers
+# of lines
+source <- c(
+  "function() {}",
+  "function() {",
+  "  # Hello!",
+  "}",
+  "function() {",
+  "  # Hello!",
+  "  # Goodbye!",
+  "}"
+)
+parsed <- parse_all(source)
+lengths(parsed$expr)
+parsed$src
 }
diff --git a/tests/testthat/test-conditions.R b/tests/testthat/test-conditions.R
index 9c5805a..f760d83 100644
--- a/tests/testthat/test-conditions.R
+++ b/tests/testthat/test-conditions.R
@@ -104,7 +104,7 @@ test_that("log_warning causes warnings to be emitted", {
 
 # errors ----------------------------------------------------------------------
 
-test_that("an error terminates evaluation of top-level expression", {
+test_that("an error terminates evaluation of multi-expression input", {
   ev <- evaluate("stop('1');2\n3")
   expect_output_types(ev, c("source", "error", "source", "text"))
   expect_equal(ev[[1]]$src, "stop('1');2\n")