From 212acdbfa7c93d13e342a25e0766de6638fe8b71 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Wed, 3 Jul 2024 07:26:54 -0500 Subject: [PATCH] Use "complete source" instead of top-level expression (#199) Fixes #194 --- NEWS.md | 4 +-- R/evaluate.R | 15 +++++++++ R/output-handler.R | 13 ++++---- R/parse_all.R | 55 ++++++++++++++++++++++---------- man/evaluate.Rd | 16 ++++++++++ man/new_output_handler.Rd | 7 ++-- man/parse_all.Rd | 55 ++++++++++++++++++++++---------- tests/testthat/test-conditions.R | 2 +- 8 files changed, 121 insertions(+), 46 deletions(-) diff --git a/NEWS.md b/NEWS.md index b027674..2a52938 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,8 +3,8 @@ * `parse_all()` adds a `\n` to the end of every line, even the last one if it didn't have one in the input. * Setting `ACTIONS_STEP_DEBUG=1` (as in a failing GHA workflow) will automatically set `log_echo` and `log_warning` to `TRUE` (#175). * New `local_reproducible_output()` helper that sets various options and env vars to help ensure consistency of output across environments. -* The `source` output handler is now passed the entire top-level expression, not just the first component. -* `evaluate()` will now terminate on the first error in a top-level expression. This matches R's own behaviour more closely. +* The `source` output handler is now passed the entire complete input expression, not just the first component. +* `evaluate()` now terminates on the first error in a multi-expression input, i.e. `1;stop('2');3` will no longer evaluate the third component. This matches console behaviour more closely. * `is.value()` has been removed since it tests for an object that evaluate never creates. * `parse_all()` no longer has a default method, which will generate better errors if you pass in something unexpectected. * The package now depends on R 4.0.0 in order to decrease our maintenance burden. diff --git a/R/evaluate.R b/R/evaluate.R index 37a6882..f541e8a 100644 --- a/R/evaluate.R +++ b/R/evaluate.R @@ -50,6 +50,21 @@ #' @param filename string overrriding the [base::srcfile()] filename. #' @param include_timing Deprecated. #' @import graphics grDevices utils +#' @examples +#' evaluate(c( +#' "1 + 1", +#' "2 + 2" +#' )) +#' +#' # Not that's there's a difference in output between putting multiple +#' # expressions on one line vs spreading them across multiple lines +#' evaluate("1;2;3") +#' evaluate(c("1", "2", "3")) +#' +#' # This also affects how errors propagate, matching the behaviour +#' # of the R console +#' evaluate("1;stop(2);3") +#' evaluate(c("1", "stop(2)", "3")) evaluate <- function(input, envir = parent.frame(), enclos = NULL, diff --git a/R/output-handler.R b/R/output-handler.R index 3e1fb70..59d099a 100644 --- a/R/output-handler.R +++ b/R/output-handler.R @@ -16,13 +16,14 @@ #' printing, then the `text` or `graphics` handlers may be called. #' #' @param source Function to handle the echoed source code under evaluation. -#' This function should take two arguments (`src` and `tle`), and return -#' an object that will be inserted into the evaluate outputs. `src` is the -#' unparsed text of the source code, and `tle` is the parsed top-level -#' expression. If `src` is unparsable, `tle` will be `expression()`. +#' This function should take two arguments (`src` and `expr`), and return +#' an object that will be inserted into the evaluate outputs. `src` is the +#' unparsed text of the source code, and `expr` is the complete input +#' expression (which may have 0, 1, 2, or more components; see [parse_all()] +#' for details). #' -#' Return `src` for the default evaluate behaviour. Return `NULL` to -#' drop the source from the output. +#' Return `src` for the default evaluate behaviour. Return `NULL` to +#' drop the source from the output. #' @param text Function to handle any textual console output. #' @param graphics Function to handle graphics, as returned by #' [recordPlot()]. diff --git a/R/parse_all.R b/R/parse_all.R index b9d85a9..d95b5b4 100644 --- a/R/parse_all.R +++ b/R/parse_all.R @@ -8,22 +8,25 @@ #' @param filename string overriding the file name #' @param allow_error whether to allow syntax errors in `x` #' @return -#' A data frame two columns, `src` and `expr`, and one row for each top-level -#' expression in `x`. +#' A data frame two columns, `src` and `expr`, and one row for each complete +#' input in `x`. A complete input is R code that would trigger execution when +#' typed at the console. This might consist of multiple expressions separated +#' by `;` or one expression spread over multiple lines (like a function +#' definition). #' #' `src` is a character vector of source code. Each element represents a -#' complete line (or multi-line) expression, i.e. it always has a terminal `\n`. +#' complete input expression (which might span multiple line) and always has a +#' terminal `\n`. #' -#' `expr`, a list-column of top-level expressions. A top-level expression -#' is a complete expression which would trigger execution if typed at the -#' console. Each element is an [expression()] object, which can be of any -#' length. It will be length: +#' `expr` is a list-column of [expression]s. The expressions can be of any +#' length, depending on the structure of the complete input source: #' -#' * 0 if the top-level expression contains only whitespace and/or comments. -#' * 1 if the top-level expression is a single scalar ( -#' like `TRUE`, `1`, or `"x"`), name, or call -#' * 2 or more if the top-level expression uses `;` to put multiple expressions -#' on one line. +#' * If `src` consists of only only whitespace and/or comments, `expr` will +#' be length 0. +#' * If `src` a single scalar (like `TRUE`, `1`, or `"x"`), name, or +#' function call, `expr` will be length 1. +#' * If `src` contains multiple expressions separated by `;`, `expr` will +#' have length two or more. #' #' The expressions have their srcrefs removed. #' @@ -31,15 +34,33 @@ #' frame will have an attribute `PARSE_ERROR` that stores the error object. #' @export #' @examples -#' source <- " -#' # a comment -#' x -#' x;y -#' " +#' # Each of these inputs are single line, but generate different numbers of +#' # expressions +#' source <- c( +#' "# a comment", +#' "x", +#' "x;y", +#' "x;y;z" +#' ) #' parsed <- parse_all(source) #' lengths(parsed$expr) #' str(parsed$expr) #' +#' # Each of these inputs are a single expression, but span different numbers +#' # of lines +#' source <- c( +#' "function() {}", +#' "function() {", +#' " # Hello!", +#' "}", +#' "function() {", +#' " # Hello!", +#' " # Goodbye!", +#' "}" +#' ) +#' parsed <- parse_all(source) +#' lengths(parsed$expr) +#' parsed$src parse_all <- function(x, filename = NULL, allow_error = FALSE) UseMethod("parse_all") #' @export diff --git a/man/evaluate.Rd b/man/evaluate.Rd index 4589298..ebb7d62 100644 --- a/man/evaluate.Rd +++ b/man/evaluate.Rd @@ -83,3 +83,19 @@ output, all correctly interleaved in the order in which they occured. It stores the final result, whether or not it should be visible, and the contents of the current graphics device. } +\examples{ +evaluate(c( + "1 + 1", + "2 + 2" +)) + +# Not that's there's a difference in output between putting multiple +# expressions on one line vs spreading them across multiple lines +evaluate("1;2;3") +evaluate(c("1", "2", "3")) + +# This also affects how errors propagate, matching the behaviour +# of the R console +evaluate("1;stop(2);3") +evaluate(c("1", "stop(2)", "3")) +} diff --git a/man/new_output_handler.Rd b/man/new_output_handler.Rd index 0f2c5eb..de20549 100644 --- a/man/new_output_handler.Rd +++ b/man/new_output_handler.Rd @@ -18,10 +18,11 @@ new_output_handler( } \arguments{ \item{source}{Function to handle the echoed source code under evaluation. -This function should take two arguments (\code{src} and \code{tle}), and return +This function should take two arguments (\code{src} and \code{expr}), and return an object that will be inserted into the evaluate outputs. \code{src} is the -unparsed text of the source code, and \code{tle} is the parsed top-level -expression. If \code{src} is unparsable, \code{tle} will be \code{expression()}. +unparsed text of the source code, and \code{expr} is the complete input +expression (which may have 0, 1, 2, or more components; see \code{\link[=parse_all]{parse_all()}} +for details). Return \code{src} for the default evaluate behaviour. Return \code{NULL} to drop the source from the output.} diff --git a/man/parse_all.Rd b/man/parse_all.Rd index 55e0c9e..cb904ed 100644 --- a/man/parse_all.Rd +++ b/man/parse_all.Rd @@ -15,22 +15,25 @@ If a connection, will be opened and closed only if it was closed initially.} \item{allow_error}{whether to allow syntax errors in \code{x}} } \value{ -A data frame two columns, \code{src} and \code{expr}, and one row for each top-level -expression in \code{x}. +A data frame two columns, \code{src} and \code{expr}, and one row for each complete +input in \code{x}. A complete input is R code that would trigger execution when +typed at the console. This might consist of multiple expressions separated +by \verb{;} or one expression spread over multiple lines (like a function +definition). \code{src} is a character vector of source code. Each element represents a -complete line (or multi-line) expression, i.e. it always has a terminal \verb{\\n}. +complete input expression (which might span multiple line) and always has a +terminal \verb{\\n}. -\code{expr}, a list-column of top-level expressions. A top-level expression -is a complete expression which would trigger execution if typed at the -console. Each element is an \code{\link[=expression]{expression()}} object, which can be of any -length. It will be length: +\code{expr} is a list-column of \link{expression}s. The expressions can be of any +length, depending on the structure of the complete input source: \itemize{ -\item 0 if the top-level expression contains only whitespace and/or comments. -\item 1 if the top-level expression is a single scalar ( -like \code{TRUE}, \code{1}, or \code{"x"}), name, or call -\item 2 or more if the top-level expression uses \verb{;} to put multiple expressions -on one line. +\item If \code{src} consists of only only whitespace and/or comments, \code{expr} will +be length 0. +\item If \code{src} a single scalar (like \code{TRUE}, \code{1}, or \code{"x"}), name, or +function call, \code{expr} will be length 1. +\item If \code{src} contains multiple expressions separated by \verb{;}, \code{expr} will +have length two or more. } The expressions have their srcrefs removed. @@ -43,13 +46,31 @@ Works very similarly to parse, but also keeps original formatting and comments. } \examples{ -source <- " - # a comment - x - x;y -" +# Each of these inputs are single line, but generate different numbers of +# expressions +source <- c( + "# a comment", + "x", + "x;y", + "x;y;z" +) parsed <- parse_all(source) lengths(parsed$expr) str(parsed$expr) +# Each of these inputs are a single expression, but span different numbers +# of lines +source <- c( + "function() {}", + "function() {", + " # Hello!", + "}", + "function() {", + " # Hello!", + " # Goodbye!", + "}" +) +parsed <- parse_all(source) +lengths(parsed$expr) +parsed$src } diff --git a/tests/testthat/test-conditions.R b/tests/testthat/test-conditions.R index 9c5805a..f760d83 100644 --- a/tests/testthat/test-conditions.R +++ b/tests/testthat/test-conditions.R @@ -104,7 +104,7 @@ test_that("log_warning causes warnings to be emitted", { # errors ---------------------------------------------------------------------- -test_that("an error terminates evaluation of top-level expression", { +test_that("an error terminates evaluation of multi-expression input", { ev <- evaluate("stop('1');2\n3") expect_output_types(ev, c("source", "error", "source", "text")) expect_equal(ev[[1]]$src, "stop('1');2\n")