Skip to content

Commit

Permalink
Refactor and optimization (#132)
Browse files Browse the repository at this point in the history
* remove simple_rapply

* options as new argument in oa_snowball

* truncate url display, closes #127

* hot fix

* use openalexR.print env var for #127
replaces openalexR.debug

* remove simple_rapply again

* update message

* refine oa_snowball

* Update R/oa_fetch.R

Co-authored-by: June Choe <[email protected]>

* update author ids

---------

Co-authored-by: June Choe <[email protected]>
  • Loading branch information
trangdata and yjunechoe authored Oct 26, 2023
1 parent b0a2770 commit 056e55c
Show file tree
Hide file tree
Showing 14 changed files with 99 additions and 59 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Type: Package
Package: openalexR
Title: Getting Bibliographic Records from 'OpenAlex' Database Using 'DSL'
API
Version: 1.2.2
Version: 1.2.2.9999
Authors@R: c(
person(given = "Massimo",
family = "Aria",
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# openalexR (development version)
* Breaking change: two arguments in `oa_snowball` are renamed:
citing_filter is now citing_params,
and cited_by_filter is now cited_by_params.
* improve `oa_snowball` performance
* allowed the use of `options$sample` with `search`

# openalexR 1.2.2
Expand Down
5 changes: 0 additions & 5 deletions R/oa2df.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ oa2df <- function(data, entity, options = NULL, count_only = FALSE, group_by = N
}

if (entity != "snowball"){
# replace NULL with NA
data <- simple_rapply(data, `%||%`, y = NA)
ch <- ifelse(is.null(options$select), "id", options$select[[1]])
if (!is.null(data[[ch]])) {
data <- list(data)
Expand Down Expand Up @@ -190,7 +188,6 @@ works2df <- function(data, abstract = TRUE, verbose = TRUE) {
if (verbose) pb$tick()

paper <- data[[i]]
paper <- simple_rapply(paper, `%||%`, y = NA)

fields <- works_process[works_process$field %in% names(paper), ]
sim_fields <- mapply(
Expand Down Expand Up @@ -310,7 +307,6 @@ abstract_build <- function(ab) {
#'
#' @export
authors2df <- function(data, verbose = TRUE) {

n <- length(data)
pb <- oa_progress(n)
list_df <- vector(mode = "list", length = n)
Expand Down Expand Up @@ -408,7 +404,6 @@ authors2df <- function(data, verbose = TRUE) {
#'
#' @export
institutions2df <- function(data, verbose = TRUE) {

n <- length(data)
pb <- oa_progress(n)
list_df <- vector(mode = "list", length = n)
Expand Down
24 changes: 17 additions & 7 deletions R/oa_fetch.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ oa_entities <- function() {
#' )
#'
#' oa_fetch(
#' identifier = c("A923435168", "A2208157607"),
#' identifier = c("A5069892096", "A5023888391"),
#' verbose = TRUE
#' )
#' }
Expand Down Expand Up @@ -213,11 +213,11 @@ oa_fetch <- function(entity = if (is.null(identifier)) NULL else id_type(shorten
#' verbose = FALSE
#' )
#'
#' # The author Massimo Aria is associated to the OpenAlex-id A923435168.
#' # The author Massimo Aria is associated to the OpenAlex-id A5069892096.
#'
#'
#' query_author <- oa_query(
#' identifier = "A923435168",
#' identifier = "A5069892096",
#' entity = "authors",
#' endpoint = "https://api.openalex.org"
#' )
Expand Down Expand Up @@ -458,6 +458,8 @@ get_next_page <- function(paging, i, res = NULL) {
#' Defaults to endpoint = "https://api.openalex.org".
#' @param verbose Logical. If TRUE, print information on querying process.
#' Default to \code{verbose = FALSE}.
#' To shorten the printed query URL, set the environment variable openalexR.print
#' to the number of characters to print: \code{Sys.setenv(openalexR.print = 70)}.
#' @param \dots Additional filter arguments.
#'
#' @return a character containing the query in OpenAlex format.
Expand All @@ -469,7 +471,7 @@ get_next_page <- function(paging, i, res = NULL) {
#' @examples
#' \dontrun{
#'
#' query_auth <- oa_query(identifier = "A923435168", verbose = TRUE)
#' query_auth <- oa_query(identifier = "A5069892096", verbose = TRUE)
#'
#' ### EXAMPLE 1: Full record about an entity.
#'
Expand All @@ -487,9 +489,9 @@ get_next_page <- function(paging, i, res = NULL) {
#' )
#'
#'
#' # The author Massimo Aria is associated to the OpenAlex-id A923435168:
#' # The author Massimo Aria is associated to the OpenAlex-id A5069892096:
#'
#' query_auth <- oa_query(identifier = "A923435168", verbose = TRUE)
#' query_auth <- oa_query(identifier = "A5069892096", verbose = TRUE)
#'
#'
#' ### EXAMPLE 2: all works citing a particular work.
Expand Down Expand Up @@ -586,7 +588,15 @@ oa_query <- function(filter = NULL,
query = query
)

if (verbose) message("Requesting url: ", query_url)
if (is.null(oa_print())){
url_display <- query_url
} else {
query_url <- utils::URLdecode(query_url)
query_url_more <- if (oa_print() < nchar(query_url)) "..."
url_display <- paste0(substr(query_url, 1, oa_print()), query_url_more)
}

if (verbose) message("Requesting url: ", url_display)

query_url
}
Expand Down
39 changes: 30 additions & 9 deletions R/oa_snowball.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
#' for example, W2755950973.
#' If "original", the OpenAlex IDs are kept as are,
#' for example, https://openalex.org/W2755950973
#' @param citing_filter filters used in the search of works citing the input works.
#' @param cited_by_filter filters used in the search of works cited by the input works.
#' @param citing_params parameters used in the search of works citing the input works.
#' @param cited_by_params parameters used in the search of works cited by the input works.
#' @inheritParams oa_fetch
#'
#'
Expand All @@ -28,8 +28,8 @@
#'
#' snowball_docs <- oa_snowball(
#' identifier = c("W2741809807", "W2755950973"),
#' citing_filter = list(from_publication_date = "2022-01-01"),
#' cited_by_filter = list(),
#' citing_params = list(from_publication_date = "2022-01-01"),
#' cited_by_params = list(),
#' verbose = TRUE
#' )
#' }
Expand All @@ -39,8 +39,8 @@ oa_snowball <- function(identifier = NULL,
mailto = oa_email(),
endpoint = "https://api.openalex.org",
verbose = FALSE,
citing_filter = list(),
cited_by_filter = list()) {
citing_params = list(),
cited_by_params = list()) {
id_type <- match.arg(id_type)
base_args <- list(
entity = "works",
Expand All @@ -51,20 +51,30 @@ oa_snowball <- function(identifier = NULL,
)

# collecting records about the target papers
paper <- do.call(oa_fetch, c(base_args, list(identifier = identifier), list(...)))
paper <- fetch_snow(
c(base_args, list(identifier = identifier)),
list(...)
)

# fetching documents citing the target papers
identifier <- shorten_oaid(paper$id)
if (verbose) message("Collecting all documents citing the target papers...")
citing <- suppressWarnings(
do.call(oa_fetch, c(base_args, list(cites = identifier), citing_filter))
fetch_snow(
c(base_args, list(cites = identifier)),
citing_params
)
)

# fetching documents cited by the target papers
if (verbose) message("Collecting all documents cited by the target papers...")
cited <- suppressWarnings(
do.call(oa_fetch, c(base_args, list(cited_by = identifier), cited_by_filter))
fetch_snow(
c(base_args, list(cited_by = identifier)),
cited_by_params
)
)

# merging all documents in a single data frame
if (is.null(citing)) {
citing <- paper[0, TRUE]
Expand Down Expand Up @@ -110,3 +120,14 @@ oa_snowball <- function(identifier = NULL,

list(nodes = nodes, edges = edges)
}


fetch_snow <- function(args, filt){
if (!is.null(filt$options$select)){
# id and referenced_works is needed to find citing papers
filt$options$select <- union(filt$options$select, c("id", "referenced_works"))
}

# collecting records about the target papers
do.call(oa_fetch, c(args, filt))
}
13 changes: 9 additions & 4 deletions R/openalexR-internal.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
.onAttach <- function(libname, pkgname) {
packageStartupMessage("Thank you for using openalexR!\n",
"To acknowledge our work, please cite the package by calling\n",
"`citation(\"openalexR\")`."
)
if (!grepl("suppress", Sys.getenv("openalexR.message"), ignore.case = TRUE)){
packageStartupMessage(
"Thank you for using openalexR!\n",
"To acknowledge our work, please cite the package by calling ",
"`citation(\"openalexR\")`.\n",
"To suppress this message, add `openalexR.message = suppressed` ",
"to your .Renviron file."
)
}
}
23 changes: 10 additions & 13 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,11 @@
# apply a function to all elements of a list

simple_rapply <- function(x, fn, ...) {
if (is.list(x)) {
lapply(x, simple_rapply, fn, ...)
} else {
fn(x, ...)
}
}

`%||%` <- function(x, y) if (is.null(x)) y else x

replace_w_na <- function(x){
lapply(x, `%||%`, y = NA)
}

subs_na <- function(x, type = c("row_df", "col_df", "flat", "rbind_df", "identical"), prefix = NULL) {
type <- match.arg(type)
subs_na <- function(x, type, prefix = NULL) {
if (length(x) == 0) {
return(NA)
}
Expand All @@ -27,8 +17,7 @@ subs_na <- function(x, type = c("row_df", "col_df", "flat", "rbind_df", "identic
out <- switch(type,
row_df = as.data.frame(replace_w_na(x)),
flat = unlist(x),
rbind_df = do.call(rbind.data.frame, lapply(x, replace_w_na)
)
rbind_df = do.call(rbind.data.frame, lapply(x, replace_w_na))
)

if (!is.null(prefix)) {
Expand Down Expand Up @@ -82,6 +71,14 @@ oa_email <- function() {
email
}

oa_print <- function() {
p <- as.integer(Sys.getenv("openalexR.print"))
if (is.na(p)){
return(NULL)
}
p
}

oa_apikey <- function() {
apikey <- Sys.getenv("openalexR.apikey")
if (apikey == "") {
Expand Down
6 changes: 4 additions & 2 deletions man/oa_fetch.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/oa_ngrams.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 6 additions & 4 deletions man/oa_query.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/oa_request.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 9 additions & 7 deletions man/oa_snowball.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 056e55c

Please sign in to comment.