From 4df7e52cf5c6412a3af6eb3fe925a3c8d01dc88f Mon Sep 17 00:00:00 2001 From: Duncan Murdoch Date: Thu, 15 Jun 2023 10:15:01 -0400 Subject: [PATCH 1/3] Fix URL handling in write_bib(): - add `packageURL` argument. If TRUE (the default), use the DESCRIPTION file URL in preference to what `citation()` would use. - if multiple URLs are present, drop all but the first --- NEWS.md | 5 +++++ R/citation.R | 35 +++++++++++++++++++++++++++++------ man/write_bib.Rd | 16 +++++++++++++++- 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index acd2ececed..6e08d26ce8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # CHANGES IN knitr VERSION 1.44 +## MINOR CHANGES + +- `write_bib()` has a new argument `packageURL` to control whether +to use a URL from the `DESCRIPTION` file or the one generated by +`utils::citation()`. # CHANGES IN knitr VERSION 1.43 diff --git a/R/citation.R b/R/citation.R index e59d2e93e4..0a5402ee5b 100644 --- a/R/citation.R +++ b/R/citation.R @@ -13,6 +13,16 @@ #' information because it often changes (e.g. author, year, package version, #' ...). #' +#' There are at least two different uses for the URL in a reference list. You +#' might want to tell users where to go for more information; in that case, use +#' the default \code{packageURL = TRUE}, and the first URL listed in the +#' \file{DESCRIPTION} file will be used. Be careful: some authors don't put the +#' most relevant URL first. Alternatively, you might want to identify exactly +#' which version of the package was used in the document. If it was installed +#' from CRAN or some other repositories, the version number identifies it, and +#' \code{packageURL = FALSE} will use the repository URL (as used by +#' \code{utils::\link{citation}()}). +#' #' @param x Package names. Packages which are not installed are ignored. #' @param file The (\file{.bib}) file to write. By default, or if \code{NULL}, #' output is written to the R console. @@ -24,6 +34,8 @@ #' \samp{R-} unless \code{\link{option}('knitr.bib.prefix')} has been set to #' another string. #' @param lib.loc A vector of path names of R libraries. +#' @param packageURL Use the \code{URL} field from the \file{DESCRIPTION} file. +#' See Details below. #' @return A list containing the citations. Citations are also written to the #' \code{file} as a side effect. #' @note Some packages on CRAN do not have standard bib entries, which was once @@ -59,7 +71,8 @@ #' str(knitr:::.tweak.bib) write_bib = function( x = .packages(), file = '', tweak = TRUE, width = NULL, - prefix = getOption('knitr.bib.prefix', 'R-'), lib.loc = NULL + prefix = getOption('knitr.bib.prefix', 'R-'), lib.loc = NULL, + packageURL = TRUE ) { system.file = function(...) base::system.file(..., lib.loc = lib.loc) citation = function(...) utils::citation(..., lib.loc = lib.loc) @@ -75,11 +88,21 @@ write_bib = function( bib = sapply(x, function(pkg) { cite = citation(pkg, auto = if (pkg != 'base') { meta = packageDescription(pkg, lib.loc = lib.loc) - # don't use the CRAN URL if the package has provided its own URL - if (identical(meta$Repository, 'CRAN') && !is.null(meta$URL)) { - # however, the package may have provided multiple URLs, in which case we - # still use the CRAN URL - if (!grepl('[, ]', meta$URL)) meta$Repository = NULL + # don't use the citation() URL if the package has provided its own URL + if (!is.null(meta$URL)) { + if (packageURL) + meta$Repository = NULL + + # the package may have provided multiple URLs, in which case we + # use the first. We also work around a bug in citation() up to + # R 4.3.1. The grepl pattern here is problematic, but it's what + # citation() was using. + + if (getRversion() < '4.3.2' && grepl('[, ]', meta$URL)) + meta$URL = sub('[, ].*', '', meta$URL) + + # Always remove URLs after the first one + meta$URL = sub(',? .*', '', meta$URL) } meta }) diff --git a/man/write_bib.Rd b/man/write_bib.Rd index a31417600f..659a7c729e 100644 --- a/man/write_bib.Rd +++ b/man/write_bib.Rd @@ -10,7 +10,8 @@ write_bib( tweak = TRUE, width = NULL, prefix = getOption("knitr.bib.prefix", "R-"), - lib.loc = NULL + lib.loc = NULL, + packageURL = TRUE ) } \arguments{ @@ -30,6 +31,9 @@ will not be wrapped.} another string.} \item{lib.loc}{A vector of path names of R libraries.} + +\item{packageURL}{Use the \code{URL} field from the \file{DESCRIPTION} file. +See Details below.} } \value{ A list containing the citations. Citations are also written to the @@ -49,6 +53,16 @@ For a package, the keyword \samp{R-pkgname} is used for its bib item, where this function is to automate the generation of the package citation information because it often changes (e.g. author, year, package version, ...). + +There are at least two different uses for the URL in a reference list. You +might want to tell users where to go for more information; in that case, use +the default \code{packageURL = TRUE}, and the first URL listed in the +\file{DESCRIPTION} file will be used. Be careful: some authors don't put the +most relevant URL first. Alternatively, you might want to identify exactly +which version of the package was used in the document. If it was installed +from CRAN or some other repositories, the version number identifies it, and +\code{packageURL = FALSE} will use the repository URL (as used by +\code{utils::\link{citation}()}). } \note{ Some packages on CRAN do not have standard bib entries, which was once From 6bc26ee1348f4fabbc01473818aee32159d4523b Mon Sep 17 00:00:00 2001 From: Duncan Murdoch Date: Tue, 20 Jun 2023 07:53:59 -0400 Subject: [PATCH 2/3] Handle new Github et al handling --- R/citation.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/citation.R b/R/citation.R index 0a5402ee5b..0e39ff241a 100644 --- a/R/citation.R +++ b/R/citation.R @@ -90,8 +90,10 @@ write_bib = function( meta = packageDescription(pkg, lib.loc = lib.loc) # don't use the citation() URL if the package has provided its own URL if (!is.null(meta$URL)) { - if (packageURL) + if (packageURL) { meta$Repository = NULL + meta$RemoteType = NULL + } # the package may have provided multiple URLs, in which case we # use the first. We also work around a bug in citation() up to From 3a3acd4effa639aa0f4c6931c474230b5ccde0e1 Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Wed, 16 Aug 2023 21:12:58 -0500 Subject: [PATCH 3/3] cosmetic changes --- R/citation.R | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/R/citation.R b/R/citation.R index 0e39ff241a..d7588304bf 100644 --- a/R/citation.R +++ b/R/citation.R @@ -89,23 +89,15 @@ write_bib = function( cite = citation(pkg, auto = if (pkg != 'base') { meta = packageDescription(pkg, lib.loc = lib.loc) # don't use the citation() URL if the package has provided its own URL - if (!is.null(meta$URL)) { - if (packageURL) { - meta$Repository = NULL - meta$RemoteType = NULL - } - - # the package may have provided multiple URLs, in which case we - # use the first. We also work around a bug in citation() up to - # R 4.3.1. The grepl pattern here is problematic, but it's what - # citation() was using. - - if (getRversion() < '4.3.2' && grepl('[, ]', meta$URL)) - meta$URL = sub('[, ].*', '', meta$URL) - - # Always remove URLs after the first one - meta$URL = sub(',? .*', '', meta$URL) - } + if (is.null(meta$URL)) return(meta) + if (packageURL) meta$Repository = meta$RemoteType = NULL + # the package may have provided multiple URLs, in which case we use the + # first. We also work around a bug in citation() up to R 4.3.1. The grep + # pattern here is problematic, but it's what citation() was using. + if (getRversion() < '4.3.2' && grepl('[, ]', meta$URL)) + meta$URL = sub('[, ].*', '', meta$URL) + # always remove URLs after the first one + meta$URL = sub(',? .*', '', meta$URL) meta }) if (tweak) {