diff --git a/DESCRIPTION b/DESCRIPTION index 8c0a307..a7e0408 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -6,7 +6,7 @@ Authors@R: comment = c(ORCID = "0000-0001-6759-5406")), person("Lorin", "Crawford", email="lorin_crawford@brown.edu", role = "aut", comment = c(ORCID = "0000-0003-0178-8242"))) -Description: Understanding morphological variation is an important task in many applications. Recent studies in computational biology have focused on developing computational tools for the task of sub-image selection which aims at identifying structural features that best describe the variation between classes of shapes. A major part in assessing the utility of these approaches is to demonstrate their performance on both simulated and real datasets. However, when creating a model for shape statistics, real data can be difficult to access and the sample sizes for these data are often small due to them being expensive to collect. Meanwhile, the landscape of current shape simulation methods has been mostly limited to approaches that use black-box inference---making it difficult to systematically assess the power and calibration of sub-image models. In this R package, we introduce the alpha-shape sampler: a probabilistic framework for simulating realistic 2D and 3D shapes based on probability distributions which can be learned from real data or explicitly stated by the user. The 'ashapesampler' package supports two mechanisms for sampling shapes in two and three dimensions. The first, empirically sampling based on an existing data set, was highlighted in the original main text of the paper. The second, probabilistic sampling from a known distribution, is the computational implementation of the theory derived in that paper. Work based on Winn-Nunez et al.(2024). +Description: Understanding morphological variation is an important task in many applications. Recent studies in computational biology have focused on developing computational tools for the task of sub-image selection which aims at identifying structural features that best describe the variation between classes of shapes. A major part in assessing the utility of these approaches is to demonstrate their performance on both simulated and real datasets. However, when creating a model for shape statistics, real data can be difficult to access and the sample sizes for these data are often small due to them being expensive to collect. Meanwhile, the landscape of current shape simulation methods has been mostly limited to approaches that use black-box inference---making it difficult to systematically assess the power and calibration of sub-image models. In this R package, we introduce the alpha-shape sampler: a probabilistic framework for simulating realistic 2D and 3D shapes based on probability distributions which can be learned from real data or explicitly stated by the user. The 'ashapesampler' package supports two mechanisms for sampling shapes in two and three dimensions. The first, empirically sampling based on an existing data set, was highlighted in the original main text of the paper. The second, probabilistic sampling from a known distribution, is the computational implementation of the theory derived in that paper. Work based on Winn-Nunez et al. (2024) . License: GPL (>=3) Imports: pracma, diff --git a/NAMESPACE b/NAMESPACE index 6afd23a..6c25006 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,6 +30,7 @@ export(tau_bound) export(write_alpha_txt) import(doParallel) import(foreach) +import(parallel) importFrom(Rvcg,vcgGetEdge) importFrom(Rvcg,vcgImport) importFrom(TDA,alphaComplexFiltration) diff --git a/R/mcmc.R b/R/mcmc.R index 7029196..72dafdd 100644 --- a/R/mcmc.R +++ b/R/mcmc.R @@ -25,6 +25,7 @@ globalVariables(c("i")) #' @importFrom stats runif #' @import doParallel #' @import foreach +#' @import parallel generate_ashape3d <- function(point_cloud, J, tau, @@ -44,7 +45,8 @@ generate_ashape3d <- function(point_cloud, cores <- max(1L, parallel::detectCores(), na.rm = TRUE) } } - registerDoParallel(cores = cores) + cl <- makeCluster(cores) + registerDoParallel(cl) #Check: 3 columns on vertex list if (dim(point_cloud)[2] != 3) { stop("Point cloud does not have correct number of columns.") @@ -103,7 +105,6 @@ generate_ashape3d <- function(point_cloud, .combine = rbind, .export = c("runif_ball_3D", "euclid_dists_point_cloud_3D") ) %dopar% { - #for (i in 1:n_vert){ new_points = runif_ball_3D(m, sample_rad) + cbind(rep(point_cloud[i, 1], m), rep(point_cloud[i, 2], m), rep(point_cloud[i, 3], m)) @@ -123,6 +124,7 @@ generate_ashape3d <- function(point_cloud, } keep_pts } + stopCluster(cl) my_points = unique(my_points) #keeps error free if necessary. if (dim(my_points)[1] < 5) { stop("Not enough points accepted to make a shape. Need at least 5. Check tau and k_min parameters to @@ -158,6 +160,7 @@ generate_ashape3d <- function(point_cloud, #' @importFrom stats runif #' @import doParallel #' @import foreach +#' @import parallel generate_ashape2d <- function(point_cloud, J, tau, @@ -177,7 +180,8 @@ generate_ashape2d <- function(point_cloud, cores <- max(1L, parallel::detectCores(), na.rm = TRUE) } } - registerDoParallel(cores = cores) + cl = makeCluster(cores) + registerDoParallel(cl) #Check: 2 columns on vertex list if (dim(point_cloud)[2] != 2) { stop("Point cloud does not have correct number of columns.") @@ -237,7 +241,6 @@ generate_ashape2d <- function(point_cloud, .combine = rbind, .export = c("runif_disk", "euclid_dists_point_cloud_2D") ) %dopar% { - #for(i in 1:n_vert){ new_points = runif_disk(m, sample_rad) + cbind(rep(point_cloud[i, 1], m), rep(point_cloud[i, 2], m)) keep_pts = matrix(NA, nrow = 0, ncol = 2) for (j in 1:m) { @@ -255,6 +258,7 @@ generate_ashape2d <- function(point_cloud, } keep_pts } + stopCluster(cl) my_points = unique(my_points) #keeps error free if necessary. if (dim(my_points)[1] < 3) { stop("Not enough points accepted to make a shape. Need at least 3. Check tau and k_min parameters to diff --git a/R/tau_bound.R b/R/tau_bound.R index bb3744a..8c365a7 100644 --- a/R/tau_bound.R +++ b/R/tau_bound.R @@ -28,17 +28,19 @@ #' @importFrom stats median #' @import doParallel #' @import foreach +#' @import parallel #' @importFrom dplyr setdiff tau_bound <- function(v_list, complex, extremes=NULL, cores = 1, sumstat="mean"){ - ### Determine the number of Cores for Parallelization ### - # if(cores > 1){ - # if(cores>detectCores()){ - # warning("The number of cores you're setting is larger than available cores!") - # cores <- max(1L, detectCores(), na.rm = TRUE)} - # } - # - # ### Register those Cores ### - # registerDoParallel(cores=cores) + # ### Determine the number of Cores for Parallelization ### + if(cores > 1){ + if(cores>parallel::detectCores()){ + warning("The number of cores you're setting is larger than available cores!") + cores <- max(1L, parallel::detectCores(), na.rm = TRUE)} + } + + ### Register those Cores ### + cl <- makeCluster(cores) + registerDoParallel(cl) dimension = dim(v_list)[2] n = dim(v_list)[1] @@ -90,10 +92,10 @@ tau_bound <- function(v_list, complex, extremes=NULL, cores = 1, sumstat="mean") t_circ = circumcenter_tet(v_list, t_list) } tau_vec=vector("numeric", m) - #tau_vec = foreach(k=1:m, .combine=cbind, - #.export = c("euclid_dists_point_cloud_2D", - # "euclid_dists_point_cloud_3D"))%dopar%{ - for(k in 1:m){ + k=NULL + tau_vec = foreach(k=1:m, .combine=cbind, + .export = c("euclid_dists_point_cloud_2D", + "euclid_dists_point_cloud_3D"))%dopar%{ i = extremes[k] edge_list_zoom = c(which(e_list$ed1==i), which(e_list$ed2==i)) edge_list_zoom = c(e_list[edge_list_zoom,1], e_list[edge_list_zoom,2]) @@ -112,7 +114,7 @@ tau_bound <- function(v_list, complex, extremes=NULL, cores = 1, sumstat="mean") dist_vec_point = as.matrix(dist_matrix[,i]) #Find smallest distance from point that is longer than edges, face bc, or tet bc if (length(edge_list_zoom)==0){ - tau_vec[k] = min(dist_vec_point[dist_vec_point>0]) #case where isolated point + tau = min(dist_vec_point[dist_vec_point>0]) #case where isolated point } else { dist_vec = dist_vec_point[edge_list_zoom] dist_vec_b = c() @@ -134,13 +136,14 @@ tau_bound <- function(v_list, complex, extremes=NULL, cores = 1, sumstat="mean") } dist_vec = max(c(dist_vec, dist_vec_b)) if(length(dist_vec_point[dist_vec_point>dist_vec])==0){ - tau_vec[k] = dist_vec + tau = dist_vec } else { - tau_vec[k] = min(dist_vec_point[dist_vec_point>dist_vec]) + tau = min(dist_vec_point[dist_vec_point>dist_vec]) } } - #tau + tau } + stopCluster(cl) tau_keep = -1 if(sumstat=="max"){ tau_keep = max(tau_vec[tau_vec>0]) diff --git a/cran-comments.md b/cran-comments.md index d1a17a1..056721b 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,3 +1,14 @@ +January 28, 2024 + +The following edits have been made to the Description file as requested: +* space added before and after the year (2024). + +Documents have been checked and ensured that no more than 2 cores are used in examples and vignettes. Specifically: +* inst/doc/annulus_demo.R -- Chunk 2/Line 25 sets cores to be the minimum of 2 or however many cores are detected. The maximum value cores can be is 2, but can still function if only one core is available. +* inst/doc/torus_demo.R -- Chunk 2/Line 23 sets cores to be the minimum of 2 or however many cores are detected. The maximum value cores can be is 2, but can still function if only one core is available. +* R/tau_bound.R -- Lines 35-39 is a safety check within the function to ensure no more cores are called than what is available. Default for the function is 1 core. If more cores are called than available, the number of cores will be at least 1 but no more than what is available. In the case of the vigenttes annulus_demo and torus_demo, no more than 2 cores will be called and where 2 cores are called, they have passed the availability check. +* R/mcmc.R -- Lines 42-47 and 177-182 are the same safety check as for tau_bound.R and function the same way. + January 26, 2024 The following edits have been made to the Description file as requested: diff --git a/vignettes/annulus_demo.Rmd b/vignettes/annulus_demo.Rmd index 7c541bc..40e0b2c 100644 --- a/vignettes/annulus_demo.Rmd +++ b/vignettes/annulus_demo.Rmd @@ -23,7 +23,6 @@ library(ggplot2) library(doParallel) library(parallel) cores <- min(2L, detectCores()) -registerDoParallel(cores=cores) ``` In this document, we demonstrate the $\alpha$-shape sampler pipeline by simulating diff --git a/vignettes/torus_demo.Rmd b/vignettes/torus_demo.Rmd index 541e116..0838766 100644 --- a/vignettes/torus_demo.Rmd +++ b/vignettes/torus_demo.Rmd @@ -21,7 +21,6 @@ library(alphahull) library(doParallel) library(parallel) cores <- min(2L, detectCores()) -registerDoParallel(cores=cores) library(rgl) options(rgl.useNULL = TRUE) ```