From aad7ab7962b4f0f46f3912c70886e34b1412d67e Mon Sep 17 00:00:00 2001 From: Jeroen Ooms Date: Wed, 5 Feb 2025 13:56:14 +0100 Subject: [PATCH] For CRAN and BIOC only consider recently modified repos. --- R/meta.R | 41 +++++++++++++++++++++++++++++++---------- R/monorepos.R | 2 +- R/update.R | 11 +++++++++-- 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/R/meta.R b/R/meta.R index b7c5e26..f6edb34 100644 --- a/R/meta.R +++ b/R/meta.R @@ -37,11 +37,10 @@ check_and_trigger <- function(universe){ needs_update <- function(universe){ if(universe == 'cran') { - if(format(Sys.time(), '%H') == '00') return("Everything") - return(cran_recently_updated()) + return(github_last_update('cran')) } if(universe == 'bioc') { - return(metabioc_recently_updated()) + return(github_last_update('bioc')) } retry(git_clone(paste0('https://github.com/r-universe/', universe))) fullpath <- normalizePath(universe) @@ -105,20 +104,42 @@ trigger_workflow <- function(universe, workflow = 'sync.yml', inputs = NULL){ gh::gh(url, .method = 'POST', ref = 'master', inputs = inputs) } -# TODO: this may not work as intended because metacran lags behind CRAN -cran_recently_updated <- function(hours = 1){ +cran_recent_updates <- function(days = 1){ tmp <- tempfile() on.exit(unlink(tmp)) curl::curl_download('https://cloud.r-project.org/web/packages/packages.rds', destfile = tmp) cran <- as.data.frame(readRDS(tmp), stringsAsFactors = FALSE) - cran$age <- difftime(Sys.time(), as.POSIXct(cran[['Date/Publication']]), units = 'hours') - cran$Package[cran$age < hours] + cran$age <- difftime(Sys.time(), as.POSIXct(cran[['Date/Publication']]), units = 'days') + cran$Package[cran$age < days] } -metabioc_recently_updated <- function(hours = 1){ - latest <- gh::gh("/orgs/bioc/repos", sort='pushed', per_page = 1)[[1]] +bioc_recent_updates <- function(days = 1){ + yml <- yaml::read_yaml("https://bioconductor.org/config.yaml") + bioc <- jsonlite::read_json(sprintf('https://bioconductor.org/packages/json/%s/bioc/packages.json', yml$devel_version)) + stopifnot(length(bioc) > 2100) + dates <- as.Date(vapply(bioc, function(x) as.character(x$git_last_commit_date)[1], character(1))) + names(which(Sys.Date()-dates < days)) +} + +github_recent_updates <- function(org = 'cran', max = 100){ + repos <- gh::gh("/orgs/{org}/repos", sort='pushed', per_page = max, org = org) + vapply(repos, function(x) x$name, character(1)) +} + +make_filter_list <- function(org){ + if(org == 'cran'){ + return(c(cran_recent_updates(7), github_recent_updates('cran'))) + } + if(org == 'bioc'){ + # TODO: at time of a new bioc release this can be high for a while + return(c(bioc_recent_updates(30), github_recent_updates('bioc'))) + } +} + +github_last_update <- function(org, hours = 1){ + latest <- gh::gh("/orgs/{org}/repos", org = org, sort = 'pushed', per_page = 1)[[1]] pushed <- as.POSIXct(sub("T", " ", latest$pushed_at), tz = 'UTC') if(difftime(Sys.time(), pushed, units = 'hours') < hours){ - return('Everything') + return(latest$name) } } diff --git a/R/monorepos.R b/R/monorepos.R index 672d4fa..b415a88 100644 --- a/R/monorepos.R +++ b/R/monorepos.R @@ -112,7 +112,7 @@ sync_from_registry <- function(monorepo_url = Sys.getenv('MONOREPO_URL')){ print_message("Registry has DUPLICATED packages: '%s'", registry_pkgs[registry_dups]) } check_new_release_tags() - skiplist <- submodules_up_to_date(skip_broken = FALSE) + skiplist <- submodules_up_to_date(skip_broken = FALSE, filter_packages = make_filter_list(monorepo_name)) print_message("Submodules up-to-date: %d", length(skiplist)) dirty <- Filter(function(x){is.na(match(x$package, skiplist))}, registry[!registry_dups]) results1 <- lapply(dirty, try_update_package, update_pkg_remotes = TRUE) diff --git a/R/update.R b/R/update.R index ef46c94..484f875 100644 --- a/R/update.R +++ b/R/update.R @@ -42,13 +42,20 @@ update_submodules <- function(path = '.', skip = '.registry'){ #' @export #' @rdname sync -submodules_up_to_date <- function(skip_broken = TRUE, path = '.'){ +submodules_up_to_date <- function(skip_broken = TRUE, filter_packages = NULL, path = '.'){ withr::local_dir(path) repo <- gert::git_open(path) submodules <- gert::git_submodule_list(repo = repo) + skiplist <- NULL + if(length(filter_packages)){ + do_check <- submodules$path %in% filter_packages + skiplist <- submodules$path[!do_check] + submodules <- submodules[do_check,] + print_message("Skipping check for %d packages without recent activity", length(skiplist)) + } submodules$upstream <- remote_heads_in_batches(submodules$url, submodules$branch) isok <- which(submodules$upstream == submodules$head) - fine <- submodules$path[isok] + fine <- c(submodules$path[isok], skiplist) broken <- submodules[is.na(submodules$upstream),] for(i in seq_len(nrow(broken))){ module <- as.list(broken[i,])