Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor and optimization #132

Merged
merged 11 commits into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Type: Package
Package: openalexR
Title: Getting Bibliographic Records from 'OpenAlex' Database Using 'DSL'
API
Version: 1.2.2
Version: 1.2.2.9999
Authors@R: c(
person(given = "Massimo",
family = "Aria",
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# openalexR (development version)
* Breaking change: two arguments in `oa_snowball` are renamed:
citing_filter is now citing_params,
and cited_by_filter is now cited_by_params.
* improve `oa_snowball` performance
* allowed the use of `options$sample` with `search`

# openalexR 1.2.2
Expand Down
5 changes: 0 additions & 5 deletions R/oa2df.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ oa2df <- function(data, entity, options = NULL, count_only = FALSE, group_by = N
}

if (entity != "snowball"){
# replace NULL with NA
data <- simple_rapply(data, `%||%`, y = NA)
ch <- ifelse(is.null(options$select), "id", options$select[[1]])
if (!is.null(data[[ch]])) {
data <- list(data)
Expand Down Expand Up @@ -190,7 +188,6 @@ works2df <- function(data, abstract = TRUE, verbose = TRUE) {
if (verbose) pb$tick()

paper <- data[[i]]
paper <- simple_rapply(paper, `%||%`, y = NA)

fields <- works_process[works_process$field %in% names(paper), ]
sim_fields <- mapply(
Expand Down Expand Up @@ -310,7 +307,6 @@ abstract_build <- function(ab) {
#'
#' @export
authors2df <- function(data, verbose = TRUE) {

n <- length(data)
pb <- oa_progress(n)
list_df <- vector(mode = "list", length = n)
Expand Down Expand Up @@ -408,7 +404,6 @@ authors2df <- function(data, verbose = TRUE) {
#'
#' @export
institutions2df <- function(data, verbose = TRUE) {

n <- length(data)
pb <- oa_progress(n)
list_df <- vector(mode = "list", length = n)
Expand Down
24 changes: 17 additions & 7 deletions R/oa_fetch.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
#' )
#'
#' oa_fetch(
#' identifier = c("A923435168", "A2208157607"),
#' identifier = c("A5069892096", "A5023888391"),
#' verbose = TRUE
#' )
#' }
Expand Down Expand Up @@ -213,11 +213,11 @@
#' verbose = FALSE
#' )
#'
#' # The author Massimo Aria is associated to the OpenAlex-id A923435168.
#' # The author Massimo Aria is associated to the OpenAlex-id A5069892096.
#'
#'
#' query_author <- oa_query(
#' identifier = "A923435168",
#' identifier = "A5069892096",
#' entity = "authors",
#' endpoint = "https://api.openalex.org"
#' )
Expand Down Expand Up @@ -458,6 +458,8 @@
#' Defaults to endpoint = "https://api.openalex.org".
#' @param verbose Logical. If TRUE, print information on querying process.
#' Default to \code{verbose = FALSE}.
#' To shorten the printed query URL, set the environment variable openalexR.print
#' to the number of characters to print: \code{Sys.setenv(openalexR.print = 70)}.
#' @param \dots Additional filter arguments.
#'
#' @return a character containing the query in OpenAlex format.
Expand All @@ -469,7 +471,7 @@
#' @examples
#' \dontrun{
#'
#' query_auth <- oa_query(identifier = "A923435168", verbose = TRUE)
#' query_auth <- oa_query(identifier = "A5069892096", verbose = TRUE)
#'
#' ### EXAMPLE 1: Full record about an entity.
#'
Expand All @@ -487,9 +489,9 @@
#' )
#'
#'
#' # The author Massimo Aria is associated to the OpenAlex-id A923435168:
#' # The author Massimo Aria is associated to the OpenAlex-id A5069892096:
#'
#' query_auth <- oa_query(identifier = "A923435168", verbose = TRUE)
#' query_auth <- oa_query(identifier = "A5069892096", verbose = TRUE)
#'
#'
#' ### EXAMPLE 2: all works citing a particular work.
Expand Down Expand Up @@ -586,7 +588,15 @@
query = query
)

if (verbose) message("Requesting url: ", query_url)
if (is.null(oa_print())){
url_display <- query_url
} else {
query_url <- utils::URLdecode(query_url)
query_url_more <- if (oa_print() < nchar(query_url)) "..."
url_display <- paste0(substr(query_url, 1, oa_print()), query_url_more)

Check warning on line 596 in R/oa_fetch.R

View check run for this annotation

Codecov / codecov/patch

R/oa_fetch.R#L594-L596

Added lines #L594 - L596 were not covered by tests
}

if (verbose) message("Requesting url: ", url_display)

query_url
}
Expand Down
39 changes: 30 additions & 9 deletions R/oa_snowball.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
#' for example, W2755950973.
#' If "original", the OpenAlex IDs are kept as are,
#' for example, https://openalex.org/W2755950973
#' @param citing_filter filters used in the search of works citing the input works.
#' @param cited_by_filter filters used in the search of works cited by the input works.
#' @param citing_params parameters used in the search of works citing the input works.
#' @param cited_by_params parameters used in the search of works cited by the input works.
#' @inheritParams oa_fetch
#'
#'
Expand All @@ -28,8 +28,8 @@
#'
#' snowball_docs <- oa_snowball(
#' identifier = c("W2741809807", "W2755950973"),
#' citing_filter = list(from_publication_date = "2022-01-01"),
#' cited_by_filter = list(),
#' citing_params = list(from_publication_date = "2022-01-01"),
#' cited_by_params = list(),
#' verbose = TRUE
#' )
#' }
Expand All @@ -39,8 +39,8 @@
mailto = oa_email(),
endpoint = "https://api.openalex.org",
verbose = FALSE,
citing_filter = list(),
cited_by_filter = list()) {
citing_params = list(),
cited_by_params = list()) {
id_type <- match.arg(id_type)
base_args <- list(
entity = "works",
Expand All @@ -51,20 +51,30 @@
)

# collecting records about the target papers
paper <- do.call(oa_fetch, c(base_args, list(identifier = identifier), list(...)))
paper <- fetch_snow(
c(base_args, list(identifier = identifier)),
list(...)
)

# fetching documents citing the target papers
identifier <- shorten_oaid(paper$id)
if (verbose) message("Collecting all documents citing the target papers...")
citing <- suppressWarnings(
do.call(oa_fetch, c(base_args, list(cites = identifier), citing_filter))
fetch_snow(
c(base_args, list(cites = identifier)),
citing_params
)
)

# fetching documents cited by the target papers
if (verbose) message("Collecting all documents cited by the target papers...")
cited <- suppressWarnings(
do.call(oa_fetch, c(base_args, list(cited_by = identifier), cited_by_filter))
fetch_snow(
c(base_args, list(cited_by = identifier)),
cited_by_params
)
)

# merging all documents in a single data frame
if (is.null(citing)) {
citing <- paper[0, TRUE]
Expand Down Expand Up @@ -110,3 +120,14 @@

list(nodes = nodes, edges = edges)
}


fetch_snow <- function(args, filt){
if (!is.null(filt$options$select)){
# id and referenced_works is needed to find citing papers
filt$options$select <- union(filt$options$select, c("id", "referenced_works"))

Check warning on line 128 in R/oa_snowball.R

View check run for this annotation

Codecov / codecov/patch

R/oa_snowball.R#L128

Added line #L128 was not covered by tests
}

# collecting records about the target papers
do.call(oa_fetch, c(args, filt))
}
13 changes: 9 additions & 4 deletions R/openalexR-internal.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
.onAttach <- function(libname, pkgname) {
packageStartupMessage("Thank you for using openalexR!\n",
"To acknowledge our work, please cite the package by calling\n",
"`citation(\"openalexR\")`."
)
if (!grepl("suppress", Sys.getenv("openalexR.message"), ignore.case = TRUE)){
packageStartupMessage(
"Thank you for using openalexR!\n",
"To acknowledge our work, please cite the package by calling ",
"`citation(\"openalexR\")`.\n",
"To suppress this message, add `openalexR.message = suppressed` ",
"to your .Renviron file."
)
}
}
23 changes: 10 additions & 13 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,11 @@
# apply a function to all elements of a list

simple_rapply <- function(x, fn, ...) {
if (is.list(x)) {
lapply(x, simple_rapply, fn, ...)
} else {
fn(x, ...)
}
}

`%||%` <- function(x, y) if (is.null(x)) y else x

replace_w_na <- function(x){
lapply(x, `%||%`, y = NA)
}

subs_na <- function(x, type = c("row_df", "col_df", "flat", "rbind_df", "identical"), prefix = NULL) {
type <- match.arg(type)
subs_na <- function(x, type, prefix = NULL) {
if (length(x) == 0) {
return(NA)
}
Expand All @@ -27,8 +17,7 @@
out <- switch(type,
row_df = as.data.frame(replace_w_na(x)),
flat = unlist(x),
rbind_df = do.call(rbind.data.frame, lapply(x, replace_w_na)
)
rbind_df = do.call(rbind.data.frame, lapply(x, replace_w_na))
)

if (!is.null(prefix)) {
Expand Down Expand Up @@ -82,6 +71,14 @@
email
}

oa_print <- function() {
p <- as.integer(Sys.getenv("openalexR.print"))
if (is.na(p)){
return(NULL)
}
p

Check warning on line 79 in R/utils.R

View check run for this annotation

Codecov / codecov/patch

R/utils.R#L79

Added line #L79 was not covered by tests
}

oa_apikey <- function() {
apikey <- Sys.getenv("openalexR.apikey")
if (apikey == "") {
Expand Down
6 changes: 4 additions & 2 deletions man/oa_fetch.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/oa_ngrams.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 6 additions & 4 deletions man/oa_query.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/oa_request.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 9 additions & 7 deletions man/oa_snowball.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading