Skip to content

Commit

Permalink
Merge pull request #235 from ropensci/truncation-warning
Browse files Browse the repository at this point in the history
Truncation warning
  • Loading branch information
massimoaria authored Apr 29, 2024
2 parents acc3e7c + 610b6a6 commit 10aad43
Show file tree
Hide file tree
Showing 12 changed files with 101 additions and 60 deletions.
15 changes: 7 additions & 8 deletions R/oa2df.R
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ works2df <- function(data, abstract = TRUE, verbose = TRUE,
first_inst <- empty_inst
}
first_inst <- prepend(first_inst, "institution")
aff_raw <- list(au_affiliation_raw = l$raw_affiliation_string[1])
aff_raw <- list(au_affiliation_raw = replace_w_na(l$raw_affiliation_string[1]))
l_author <- if (length(l$author) > 0) {
prepend(replace_w_na(l$author), "au")
} else {
Expand Down Expand Up @@ -318,8 +318,8 @@ abstract_build <- function(ab) {
#' query_author <- oa_query(
#' identifier = NULL,
#' entity = "authors",
#' last_known_institution.id = "I71267560",
#' works_count = ">99"
#' last_known_institutions.id = "I71267560",
#' works_count = ">500"
#' )
#'
#' res <- oa_request(
Expand Down Expand Up @@ -370,14 +370,13 @@ authors2df <- function(data, verbose = TRUE,
fields$type,
SIMPLIFY = FALSE
)
sub_affiliation <- item$last_known_institution
if (!is.null(sub_affiliation)) {
sub_affiliation <- item$last_known_institutions
if (!is.null(sub_affiliation) && length(sub_affiliation)) {
sub_affiliation <- sub_affiliation[[1]]
if (is.na(sub_affiliation[[1]])) {
sub_affiliation <- empty_inst
}
if (length(sub_affiliation$lineage) > 1) {
sub_affiliation$lineage <- paste(sub_affiliation$lineage, collapse = ", ")
}
sub_affiliation$lineage <- paste(sub_affiliation$lineage, collapse = ", ")
sub_affiliation <- prepend(sub_affiliation, "affiliation")
}
sub_affiliation <- replace_w_na(sub_affiliation)
Expand Down
54 changes: 43 additions & 11 deletions R/oa_fetch.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,19 +141,20 @@ oa_fetch <- function(entity = if (is.null(identifier)) NULL else id_type(shorten
if (length(final_res[[1]]) == 0) { # || is.null(final_res[[1]][[1]]$id)
return(NULL)
}
final_res <- unlist(final_res, recursive = FALSE)

if (output == "list") {
unlist(final_res, recursive = FALSE)
} else {
# Flatten out the initial chunking of 50 at a time
final_res <- list(unlist(final_res, recursive = FALSE))
do.call(rbind, lapply(
final_res, oa2df,
entity = entity, options = options, abstract = abstract,
count_only = count_only, group_by = group_by,
verbose = verbose
))
return(final_res)
}

# Flatten out the initial chunking of 50 at a time
final_res <- list(final_res)
do.call(rbind, lapply(
final_res, oa2df,
entity = entity, options = options, abstract = abstract,
count_only = count_only, group_by = group_by,
verbose = verbose
))
}

#' Get bibliographic records from OpenAlex database
Expand Down Expand Up @@ -397,7 +398,38 @@ oa_request <- function(query_url,
if (!is.null(res[[result_name]])) data[[i]] <- res[[result_name]]
}

unlist(data, recursive = FALSE)
data <- unlist(data, recursive = FALSE)

if (grepl("filter", query_url) && grepl("works", query_url)) {
truncated <- unlist(truncated_authors(data))
if (length(truncated)) {
truncated <- shorten_oaid(truncated)
warning(
"\nThe following work(s) have truncated lists of authors: ",
paste(truncated, collapse = ", "),
".\nQuery each work separately by its identifier to get full list of authors.\n",
"For example:\n ",
paste0(
"lapply(c(\"",
paste(utils::head(truncated, 2), collapse = "\", \""),
"\"), \\(x) oa_fetch(identifier = x))"
),
"\nDetails at https://docs.openalex.org/api-entities/authors/limitations."
)
}
}

data
}

truncated_authors <- function(list_result) {
lapply(
list_result,
function(x){
trunc <- x$is_authors_truncated
if (!is.null(trunc) && trunc) x$id else NULL
}
)
}

get_next_page <- function(paging, i, res = NULL) {
Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ Let's first check how many records match the query, then download the entire col
```{r}
my_arguments <- list(
entity = "authors",
last_known_institution.id = "I71267560",
last_known_institutions.id = "I71267560",
works_count = ">499"
)
Expand Down
76 changes: 43 additions & 33 deletions README.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions man/authors2df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file modified man/figures/README-big-journals-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-biological-concepts-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-concept-cloud-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-italy-insts-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-snowballing-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion vignettes/articles/A_Brief_Introduction_to_openalexR.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ Let's first check how many records match the query, then set `count_only = FALSE
```{r}
my_arguments <- list(
entity = "authors",
last_known_institution.id = "I71267560",
last_known_institutions.id = "I71267560",
works_count = ">499"
)
Expand Down
8 changes: 4 additions & 4 deletions vignettes/articles/Filters.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ See the [original documentation](https://docs.openalex.org/how-to-use-the-api/ge
|-----------------------------------------|--------------------------------------------------------------------------------------------|
| `works_count` | \">99\" |
| `cited_by_count` | \">1000\" |
| `last_known_institution.id` | \"I4200000001\" |
| `last_known_institution.ror` | \"02nr0ka47\" |
| `last_known_institution.country_code` | \"CA\" |
| `last_known_institution.type` | \"nonprofit\" |
| `last_known_institutions.id` | \"I4200000001\" |
| `last_known_institutions.ror` | \"02nr0ka47\" |
| `last_known_institutions.country_code` | \"CA\" |
| `last_known_institutions.type` | \"nonprofit\" |
| `openalex` | \"A2208157607\" |
| `orcid` | \"0000-0001-6187-6610\" |
| `mag` | \"2208157607\" |
Expand Down

0 comments on commit 10aad43

Please sign in to comment.