Skip to content

Commit

Permalink
add publishers and sources
Browse files Browse the repository at this point in the history
  • Loading branch information
trangdata committed Jul 19, 2023
1 parent e5086a7 commit 6fbdef0
Show file tree
Hide file tree
Showing 7 changed files with 274 additions and 7 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ export(oa_query)
export(oa_random)
export(oa_request)
export(oa_snowball)
export(publishers2df)
export(show_authors)
export(show_works)
export(snowball2df)
export(sources2df)
export(venues2df)
export(works2df)
importFrom(stats,setNames)
180 changes: 177 additions & 3 deletions R/oa2df.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ oa2df <- function(data, entity, count_only = FALSE, group_by = NULL, abstract =
venues = venues2df(data, verbose),
concepts = concepts2df(data, verbose),
funders = funders2df(data, verbose),
sources = sources2df(data, verbose),
publishers = publishers2df(data, verbose),
snowball = snowball2df(data)
)
}
Expand Down Expand Up @@ -651,10 +653,10 @@ concepts2df <- function(data, verbose = TRUE) {
}


#' Convert OpenAlex collection of concepts' records from list format to data frame
#' Convert OpenAlex collection of funders' records from list format to data frame
#'
#' It converts bibliographic collection of concepts' records gathered from OpenAlex database \href{https://openalex.org/}{https://openalex.org/} into data frame.
#' The function converts a list of concepts' records obtained using \code{oa_request} into a data frame/tibble.
#' It converts bibliographic collection of funders' records gathered from OpenAlex database \href{https://openalex.org/}{https://openalex.org/} into data frame.
#' The function converts a list of funders' records obtained using \code{oa_request} into a data frame/tibble.
#'
#' @inheritParams oa2df
#'
Expand Down Expand Up @@ -731,6 +733,178 @@ funders2df <- function(data, verbose = TRUE) {



#' Convert OpenAlex collection of sources' records from list format to data frame
#'
#' It converts bibliographic collection of sources' records gathered from OpenAlex database \href{https://openalex.org/}{https://openalex.org/} into data frame.
#' The function converts a list of sources' records obtained using \code{oa_request} into a data frame/tibble.
#'
#' @inheritParams oa2df
#'
#' @return a data.frame.
#'
#' For more extensive information about OpenAlex API, please visit: <https://docs.openalex.org>
#'
#'
#' @examples
#' \dontrun{
#'
#' # Get sources from Nature
#'
#' res <- oa_request(
#' "https://api.openalex.org/sources?search=nature"
#' )
#'
#' df <- oa2df(res, entity = "sources")
#'
#' df
#' }
#'
#' @export
sources2df <- function(data, verbose = TRUE) {

# replace NULL with NA
data <- simple_rapply(data, `%||%`, y = NA)

if (!is.null(data$id)) {
data <- list(data)
}

source_process <- tibble::tribble(
~type, ~field,
"identical", "id",
"identical", "issn_l",
"col_df", "issn",
"identical", "display_name",
"identical", "host_organization",
"identical", "host_organization_name",
"col_df", "host_organization_lineage",
"identical", "relevance_score",
"identical", "works_count",
"identical", "cited_by_count",
"col_df", "summary_stats",
"identical", "is_oa",
"identical", "is_in_doaj",
"col_df", "ids",
"identical", "homepage_url",
"identical", "apc_prices",
"identical", "apc_usd",
"identical", "country_code",
"flat", "societies",
"flat", "alternate_titles",
"identical", "abbreviated_title",
"identical", "type",
"rbind_df", "x_concepts",
"rbind_df", "counts_by_year",
"identical", "works_api_url",
"identical", "updated_date",
"identical", "created_date"
)

n <- length(data)
pb <- oa_progress(n)
list_df <- vector(mode = "list", length = n)

for (i in seq.int(n)) {
if (verbose) pb$tick()

item <- data[[i]]
fields <- source_process[source_process$field %in% names(item), ]
sim_fields <- mapply(
function(x, y) subs_na(item[[x]], type = y),
fields$field,
fields$type,
SIMPLIFY = FALSE
)
list_df[[i]] <- sim_fields
}

out_df <- rbind_oa_ls(list_df)
}



#' Convert OpenAlex collection of publishers' records from list format to data frame
#'
#' It converts bibliographic collection of publishers' records gathered from OpenAlex database \href{https://openalex.org/}{https://openalex.org/} into data frame.
#' The function converts a list of publishers' records obtained using \code{oa_request} into a data frame/tibble.
#'
#' @inheritParams oa2df
#'
#' @return a data.frame.
#'
#' For more extensive information about OpenAlex API, please visit: <https://docs.openalex.org>
#'
#'
#' @examples
#' \dontrun{
#'
#' # Get publishers located in Canada with more than 100,000 citations
#'
#' res <- oa_request(
#' "https://api.openalex.org/publishers?filter=country_codes:ca"
#' )
#'
#' df <- oa2df(res, entity = "publishers")
#'
#' df
#' }
#'
#' @export
publishers2df <- function(data, verbose = TRUE) {

# replace NULL with NA
data <- simple_rapply(data, `%||%`, y = NA)

if (!is.null(data$id)) {
data <- list(data)
}

publisher_process <- tibble::tribble(
~type, ~field,
"identical", "id",
"identical", "display_name",
"flat", "alternate_titles",
"identical", "hierarchy_level",
"row_df", "parent_publisher",
"flat", "lineage",
"identical", "country_codes",
"identical", "homepage_url",
"identical", "image_url",
"identical", "image_thumbnail_url",
"identical", "works_count",
"identical", "cited_by_count",
"col_df", "summary_stats",
"col_df", "ids",
"rbind_df", "counts_by_year",
"rbind_df", "roles",
"identical", "sources_api_url",
"identical", "updated_date",
"identical", "created_date"
)

n <- length(data)
pb <- oa_progress(n)
list_df <- vector(mode = "list", length = n)

for (i in seq.int(n)) {
if (verbose) pb$tick()

item <- data[[i]]
fields <- publisher_process[publisher_process$field %in% names(item), ]
sim_fields <- mapply(
function(x, y) subs_na(item[[x]], type = y),
fields$field,
fields$type,
SIMPLIFY = FALSE
)
list_df[[i]] <- sim_fields
}

out_df <- rbind_oa_ls(list_df)
}



#' Flatten snowball result
#'
#' | id|title |...|cited_by_count| referenced_works |cited_by |...|
Expand Down
2 changes: 1 addition & 1 deletion R/oa_fetch.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#' oa_entities()
oa_entities <- function() {
c("works", "authors", "venues", "institutions", "concepts",
"grants", "funders", "locations", "sources", "publishers")
"funders", "sources", "publishers")
}

#' A composition function to perform query building, requesting,
Expand Down
6 changes: 3 additions & 3 deletions man/funders2df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions man/publishers2df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions man/sources2df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions tests/testthat/test-oa_fetch.R
Original file line number Diff line number Diff line change
Expand Up @@ -317,3 +317,18 @@ test_that("oa_fetch works for funders", {
expect_equal(ncol(s), 17)
expect_true(nrow(s) > 1)
})

test_that("oa_fetch works for sources", {
s <- oa_fetch(entity = "sources", search = "nature")
expect_s3_class(s, "data.frame")
expect_equal(ncol(s), 27)
expect_true(nrow(s) > 200)
})

test_that("oa_fetch works for publishers", {
s <- oa_fetch(entity = "publishers", country_codes = "ca")
expect_s3_class(s, "data.frame")
expect_equal(ncol(s), 19)
expect_true(nrow(s) > 100)
})

0 comments on commit 6fbdef0

Please sign in to comment.