Skip to content

Commit

Permalink
Merge branch 'ropensci:main' into prune-edges
Browse files Browse the repository at this point in the history
  • Loading branch information
yjunechoe authored Jul 20, 2023
2 parents 4c29065 + 3f6bde9 commit 586fef5
Show file tree
Hide file tree
Showing 22 changed files with 549 additions and 100 deletions.
4 changes: 0 additions & 4 deletions .github/workflows/check-pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,6 @@ jobs:
with:
extra-packages: any::devtools, any::pkgdown

- name: Install dependencies
run: devtools::install_github("https://github.com/ropensci-org/rotemplate")
shell: Rscript {0}

- name: Check pkgdown
run: pkgdown::check_pkgdown()
shell: Rscript {0}
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(authors2df)
export(concepts2df)
export(funders2df)
export(institutions2df)
export(oa2bibliometrix)
export(oa2df)
Expand All @@ -12,9 +13,11 @@ export(oa_query)
export(oa_random)
export(oa_request)
export(oa_snowball)
export(publishers2df)
export(show_authors)
export(show_works)
export(snowball2df)
export(sources2df)
export(venues2df)
export(works2df)
importFrom(stats,setNames)
278 changes: 270 additions & 8 deletions R/oa2df.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
#' The argument can be one of c("works", "authors", "venues", "institutions", "concepts").
#' @param abstract Logical. If TRUE, the function returns also the abstract of each item.
#' Ignored if entity is different from "works". Defaults to TRUE.
#' @param verbose Logical.
#' If TRUE, print information about the dataframe conversion process.
#' Defaults to TRUE.
#'
#' @inheritParams oa_query
#' @inheritParams oa_request
#' @return A tibble/dataframe result of the original OpenAlex result list.
Expand Down Expand Up @@ -57,6 +61,9 @@ oa2df <- function(data, entity, count_only = FALSE, group_by = NULL, abstract =
institutions = institutions2df(data, verbose),
venues = venues2df(data, verbose),
concepts = concepts2df(data, verbose),
funders = funders2df(data, verbose),
sources = sources2df(data, verbose),
publishers = publishers2df(data, verbose),
snowball = snowball2df(data)
)
}
Expand Down Expand Up @@ -184,10 +191,10 @@ works2df <- function(data, abstract = TRUE, verbose = TRUE) {

if (!is.null(paper$primary_location)) {
so_info <- paper$primary_location["source"]
so_info <- if (is.na(so_info)) NA else so_info[[1]]

so_info <- if (length(so_info[[1]]) == 0) NA else so_info[[1]]
venue_info <- replace_w_na(paper$primary_location[venue_cols])
venue <- setNames(
c(paper$primary_location[venue_cols], so_info[so_cols]),
c(venue_info, so_info[so_cols]),
c(names(venue_cols), names(so_cols))
)
}
Expand All @@ -205,12 +212,11 @@ works2df <- function(data, abstract = TRUE, verbose = TRUE) {
}
first_inst <- prepend(first_inst, "institution")
aff_raw <- list(au_affiliation_raw = l$raw_affiliation_string[1])
l_author <- l_author <- if (length(l$author) > 0) {
prepend(l$author, "au")
l_author <- if (length(l$author) > 0) {
prepend(replace_w_na(l$author), "au")
} else {
empty_list(c("au_id", "au_display_name", "au_orcid"))
}

c(l_author, l["author_position"], aff_raw, first_inst)
}), "rbind_df"
)
Expand All @@ -220,8 +226,9 @@ works2df <- function(data, abstract = TRUE, verbose = TRUE) {
if (!is.null(paper$abstract_inverted_index) && abstract) {
ab <- abstract_build(paper$abstract_inverted_index)
}
paper_biblio <- replace_w_na(paper$biblio) # TODO replace sapply with something else

out_ls <- c(sim_fields, venue, paper$biblio, list(author = author, ab = ab))
out_ls <- c(sim_fields, venue, paper_biblio, list(author = author, ab = ab))
out_ls[sapply(out_ls, is.null)] <- NULL
list_df[[i]] <- out_ls
}
Expand Down Expand Up @@ -334,7 +341,7 @@ authors2df <- function(data, verbose = TRUE) {
}
sub_affiliation <- prepend(sub_affiliation, "affiliation")
}

sub_affiliation <- replace_w_na(sub_affiliation)
list_df[[i]] <- c(sim_fields, sub_affiliation)
}

Expand Down Expand Up @@ -650,6 +657,261 @@ concepts2df <- function(data, verbose = TRUE) {
}


#' Convert OpenAlex collection of funders' records from list format to data frame
#'
#' It converts bibliographic collection of funders' records gathered from OpenAlex database \href{https://openalex.org/}{https://openalex.org/} into data frame.
#' The function converts a list of funders' records obtained using \code{oa_request} into a data frame/tibble.
#'
#' @inheritParams oa2df
#'
#' @return a data.frame.
#'
#' For more extensive information about OpenAlex API, please visit: <https://docs.openalex.org>
#'
#'
#' @examples
#' \dontrun{
#'
#' # Get funders located in Canada with more than 100,000 citations
#'
#' res <- oa_request(
#' "https://api.openalex.org/funders?filter=country_code:ca,cited_by_count:>100000"
#' )
#'
#' df <- oa2df(res, entity = "funders")
#'
#' df
#' }
#'
#' @export
funders2df <- function(data, verbose = TRUE) {

# replace NULL with NA
data <- simple_rapply(data, `%||%`, y = NA)

if (!is.null(data$id)) {
data <- list(data)
}

funder_process <- tibble::tribble(
~type, ~field,
"identical", "id",
"identical", "display_name",
"col_df", "alternate_titles",
"identical", "country_code",
"identical", "description",
"identical", "homepage_url",
"identical", "image_url",
"identical", "image_thumbnail_url",
"identical", "grants_count",
"identical", "works_count",
"identical", "cited_by_count",
"col_df", "summary_stats",
"col_df", "ids",
"rbind_df", "counts_by_year",
"rbind_df", "roles",
"identical", "updated_date",
"identical", "created_date"
)

n <- length(data)
pb <- oa_progress(n)
list_df <- vector(mode = "list", length = n)

for (i in seq.int(n)) {
if (verbose) pb$tick()

item <- data[[i]]
fields <- funder_process[funder_process$field %in% names(item), ]
sim_fields <- mapply(
function(x, y) subs_na(item[[x]], type = y),
fields$field,
fields$type,
SIMPLIFY = FALSE
)
list_df[[i]] <- sim_fields
}

out_df <- rbind_oa_ls(list_df)
out_df
}



#' Convert OpenAlex collection of sources' records from list format to data frame
#'
#' It converts bibliographic collection of sources' records gathered from OpenAlex database \href{https://openalex.org/}{https://openalex.org/} into data frame.
#' The function converts a list of sources' records obtained using \code{oa_request} into a data frame/tibble.
#'
#' @inheritParams oa2df
#'
#' @return a data.frame.
#'
#' For more extensive information about OpenAlex API, please visit: <https://docs.openalex.org>
#'
#'
#' @examples
#' \dontrun{
#'
#' # Get sources from Nature
#'
#' res <- oa_request(
#' "https://api.openalex.org/sources?search=nature"
#' )
#'
#' df <- oa2df(res, entity = "sources")
#'
#' df
#' }
#'
#' @export
sources2df <- function(data, verbose = TRUE) {

# replace NULL with NA
data <- simple_rapply(data, `%||%`, y = NA)

if (!is.null(data$id)) {
data <- list(data)
}

source_process <- tibble::tribble(
~type, ~field,
"identical", "id",
"identical", "issn_l",
"col_df", "issn",
"identical", "display_name",
"identical", "host_organization",
"identical", "host_organization_name",
"col_df", "host_organization_lineage",
"identical", "relevance_score",
"identical", "works_count",
"identical", "cited_by_count",
"col_df", "summary_stats",
"identical", "is_oa",
"identical", "is_in_doaj",
"col_df", "ids",
"identical", "homepage_url",
"identical", "apc_prices",
"identical", "apc_usd",
"identical", "country_code",
"flat", "societies",
"flat", "alternate_titles",
"identical", "abbreviated_title",
"identical", "type",
"rbind_df", "x_concepts",
"rbind_df", "counts_by_year",
"identical", "works_api_url",
"identical", "updated_date",
"identical", "created_date"
)

n <- length(data)
pb <- oa_progress(n)
list_df <- vector(mode = "list", length = n)

for (i in seq.int(n)) {
if (verbose) pb$tick()

item <- data[[i]]
fields <- source_process[source_process$field %in% names(item), ]
sim_fields <- mapply(
function(x, y) subs_na(item[[x]], type = y),
fields$field,
fields$type,
SIMPLIFY = FALSE
)
list_df[[i]] <- sim_fields
}

out_df <- rbind_oa_ls(list_df)
out_df
}



#' Convert OpenAlex collection of publishers' records from list format to data frame
#'
#' It converts bibliographic collection of publishers' records gathered from OpenAlex database \href{https://openalex.org/}{https://openalex.org/} into data frame.
#' The function converts a list of publishers' records obtained using \code{oa_request} into a data frame/tibble.
#'
#' @inheritParams oa2df
#'
#' @return a data.frame.
#'
#' For more extensive information about OpenAlex API, please visit: <https://docs.openalex.org>
#'
#'
#' @examples
#' \dontrun{
#'
#' # Get publishers located in Canada with more than 100,000 citations
#'
#' res <- oa_request(
#' "https://api.openalex.org/publishers?filter=country_codes:ca"
#' )
#'
#' df <- oa2df(res, entity = "publishers")
#'
#' df
#' }
#'
#' @export
publishers2df <- function(data, verbose = TRUE) {

# replace NULL with NA
data <- simple_rapply(data, `%||%`, y = NA)

if (!is.null(data$id)) {
data <- list(data)
}

publisher_process <- tibble::tribble(
~type, ~field,
"identical", "id",
"identical", "display_name",
"flat", "alternate_titles",
"identical", "hierarchy_level",
"row_df", "parent_publisher",
"flat", "lineage",
"identical", "country_codes",
"identical", "homepage_url",
"identical", "image_url",
"identical", "image_thumbnail_url",
"identical", "works_count",
"identical", "cited_by_count",
"col_df", "summary_stats",
"col_df", "ids",
"rbind_df", "counts_by_year",
"rbind_df", "roles",
"identical", "sources_api_url",
"identical", "updated_date",
"identical", "created_date"
)

n <- length(data)
pb <- oa_progress(n)
list_df <- vector(mode = "list", length = n)

for (i in seq.int(n)) {
if (verbose) pb$tick()

item <- data[[i]]
fields <- publisher_process[publisher_process$field %in% names(item), ]
sim_fields <- mapply(
function(x, y) subs_na(item[[x]], type = y),
fields$field,
fields$type,
SIMPLIFY = FALSE
)
list_df[[i]] <- sim_fields
}

out_df <- rbind_oa_ls(list_df)
out_df
}



#' Flatten snowball result
#'
#' | id|title |...|cited_by_count| referenced_works |cited_by |...|
Expand Down
3 changes: 2 additions & 1 deletion R/oa_fetch.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
#' @examples
#' oa_entities()
oa_entities <- function() {
c("works", "authors", "venues", "institutions", "concepts")
c("works", "authors", "venues", "institutions", "concepts",
"funders", "sources", "publishers")
}

#' A composition function to perform query building, requesting,
Expand Down
Loading

0 comments on commit 586fef5

Please sign in to comment.