From c3555f72a4dafcf8e964962c5d1a07330bc9d3c1 Mon Sep 17 00:00:00 2001 From: rwdavies Date: Wed, 21 Apr 2021 12:21:50 +0100 Subject: [PATCH] finish adding flag to check removal of object --- QUILT/R/RcppExports.R | 4 +- QUILT/R/copied_from_stitch.R | 2 +- QUILT/R/functions.R | 68 ++++++++++++++-------- QUILT/R/quilt.R | 13 ++++- QUILT/R/reference-single.R | 7 ++- QUILT/man/QUILT.Rd | 5 +- QUILT/src/RcppExports.cpp | 23 ++++---- QUILT/src/gibbs-nipt.cpp | 107 +++++++++++++++++++++++++++-------- 8 files changed, 161 insertions(+), 68 deletions(-) diff --git a/QUILT/R/RcppExports.R b/QUILT/R/RcppExports.R index 50fdb1a..2a020fb 100644 --- a/QUILT/R/RcppExports.R +++ b/QUILT/R/RcppExports.R @@ -182,8 +182,8 @@ rcpp_fly_weighter <- function(i, gCurrent, gAverage, ll_current, log_mult, ll_re } #' @export -rcpp_forwardBackwardGibbsNIPT <- function(sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, hapSum_tc, wif0, grid_has_read, L_grid, smooth_cm, Jmax_local = 100L, maxDifferenceBetweenReads = 1000, maxEmissionMatrixDifference = 10000000000, run_fb_subset = FALSE, run_fb_grid_offset = 0L, return_genProbs = TRUE, return_hapProbs = TRUE, return_gamma = TRUE, return_alpha = FALSE, return_p_store = FALSE, return_extra = FALSE, grid = 0L, snp_start_1_based = -1L, snp_end_1_based = -1L, generate_fb_snp_offsets = FALSE, suppressOutput = 1L, n_gibbs_starts = 1L, n_gibbs_sample_its = 1L, n_gibbs_burn_in_its = 1L, use_starting_read_labels = FALSE, verbose = FALSE, double_list_of_starting_read_labels = NULL, seed_vector = -1L, prev_list_of_alphaBetaBlocks = NULL, i_snp_block_for_alpha_beta = 1L, haploid_gibbs_equal_weighting = TRUE, gibbs_initialize_iteratively = FALSE, gibbs_initialize_at_first_read = TRUE, do_block_resampling = FALSE, artificial_relabel = -1L, pass_in_alphaBeta = FALSE, update_in_place = FALSE, update_hapSum = FALSE, record_read_set = FALSE, class_sum_cutoff = 0.06, perform_block_gibbs = FALSE, shuffle_bin_radius = 5000L, block_gibbs_iterations = as.integer( c(0)), return_gibbs_block_output = FALSE, return_advanced_gibbs_block_output = FALSE, rescale_eMatRead_t = TRUE, use_smooth_cm_in_block_gibbs = FALSE, block_gibbs_quantile_prob = 0.9, do_shard_ff0_block_gibbs = TRUE) { - .Call('_QUILT_rcpp_forwardBackwardGibbsNIPT', PACKAGE = 'QUILT', sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, hapSum_tc, wif0, grid_has_read, L_grid, smooth_cm, Jmax_local, maxDifferenceBetweenReads, maxEmissionMatrixDifference, run_fb_subset, run_fb_grid_offset, return_genProbs, return_hapProbs, return_gamma, return_alpha, return_p_store, return_extra, grid, snp_start_1_based, snp_end_1_based, generate_fb_snp_offsets, suppressOutput, n_gibbs_starts, n_gibbs_sample_its, n_gibbs_burn_in_its, use_starting_read_labels, verbose, double_list_of_starting_read_labels, seed_vector, prev_list_of_alphaBetaBlocks, i_snp_block_for_alpha_beta, haploid_gibbs_equal_weighting, gibbs_initialize_iteratively, gibbs_initialize_at_first_read, do_block_resampling, artificial_relabel, pass_in_alphaBeta, update_in_place, update_hapSum, record_read_set, class_sum_cutoff, perform_block_gibbs, shuffle_bin_radius, block_gibbs_iterations, return_gibbs_block_output, return_advanced_gibbs_block_output, rescale_eMatRead_t, use_smooth_cm_in_block_gibbs, block_gibbs_quantile_prob, do_shard_ff0_block_gibbs) +rcpp_forwardBackwardGibbsNIPT <- function(sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, hapSum_tc, hapMatcher, distinctHapsIE, rhb_t, ref_error, which_haps_to_use, wif0, grid_has_read, L_grid, smooth_cm, param_list, Jmax_local = 100L, maxDifferenceBetweenReads = 1000, maxEmissionMatrixDifference = 10000000000, run_fb_subset = FALSE, run_fb_grid_offset = 0L, grid = 0L, snp_start_1_based = -1L, snp_end_1_based = -1L, generate_fb_snp_offsets = FALSE, suppressOutput = 1L, n_gibbs_starts = 1L, n_gibbs_sample_its = 1L, n_gibbs_burn_in_its = 1L, use_starting_read_labels = FALSE, verbose = FALSE, double_list_of_starting_read_labels = NULL, seed_vector = -1L, prev_list_of_alphaBetaBlocks = NULL, i_snp_block_for_alpha_beta = 1L, haploid_gibbs_equal_weighting = TRUE, gibbs_initialize_iteratively = FALSE, gibbs_initialize_at_first_read = TRUE, do_block_resampling = FALSE, artificial_relabel = -1L, pass_in_alphaBeta = FALSE, update_in_place = FALSE, update_hapSum = FALSE, record_read_set = FALSE, class_sum_cutoff = 0.06, perform_block_gibbs = FALSE, shuffle_bin_radius = 5000L, block_gibbs_iterations = as.integer( c(0)), rescale_eMatRead_t = TRUE, use_smooth_cm_in_block_gibbs = FALSE, block_gibbs_quantile_prob = 0.9, do_shard_ff0_block_gibbs = TRUE, use_small_eHapsCurrent_tc = TRUE) { + .Call('_QUILT_rcpp_forwardBackwardGibbsNIPT', PACKAGE = 'QUILT', sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, hapSum_tc, hapMatcher, distinctHapsIE, rhb_t, ref_error, which_haps_to_use, wif0, grid_has_read, L_grid, smooth_cm, param_list, Jmax_local, maxDifferenceBetweenReads, maxEmissionMatrixDifference, run_fb_subset, run_fb_grid_offset, grid, snp_start_1_based, snp_end_1_based, generate_fb_snp_offsets, suppressOutput, n_gibbs_starts, n_gibbs_sample_its, n_gibbs_burn_in_its, use_starting_read_labels, verbose, double_list_of_starting_read_labels, seed_vector, prev_list_of_alphaBetaBlocks, i_snp_block_for_alpha_beta, haploid_gibbs_equal_weighting, gibbs_initialize_iteratively, gibbs_initialize_at_first_read, do_block_resampling, artificial_relabel, pass_in_alphaBeta, update_in_place, update_hapSum, record_read_set, class_sum_cutoff, perform_block_gibbs, shuffle_bin_radius, block_gibbs_iterations, rescale_eMatRead_t, use_smooth_cm_in_block_gibbs, block_gibbs_quantile_prob, do_shard_ff0_block_gibbs, use_small_eHapsCurrent_tc) } #' @export diff --git a/QUILT/R/copied_from_stitch.R b/QUILT/R/copied_from_stitch.R index 27638a7..3403433 100644 --- a/QUILT/R/copied_from_stitch.R +++ b/QUILT/R/copied_from_stitch.R @@ -9,7 +9,7 @@ print_message <- function(x, include_mem = FALSE) { } else { mem <- "" } - print( + message( paste0( "[", format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "] ", mem, x ) diff --git a/QUILT/R/functions.R b/QUILT/R/functions.R index d80866c..3a5613f 100644 --- a/QUILT/R/functions.R +++ b/QUILT/R/functions.R @@ -528,7 +528,8 @@ get_and_impute_one_sample <- function( print_extra_timing_information, n_gibbs_burn_in_its, block_gibbs_iterations, - plot_per_sample_likelihoods + plot_per_sample_likelihoods, + use_small_eHapsCurrent_tc ) { @@ -803,6 +804,7 @@ get_and_impute_one_sample <- function( distinctHapsIE = distinctHapsIE, hapMatcher = hapMatcher, rhb_t = rhb_t, + ref_error = ref_error, nSNPs = nSNPs, sampleReads = sampleReads, small_eHapsCurrent_tc = small_eHapsCurrent_tc, @@ -824,7 +826,6 @@ get_and_impute_one_sample <- function( block_gibbs_iterations = block_gibbs_iterations, perform_block_gibbs = TRUE, make_plots = make_plots, - ref_error = ref_error, wif0 = wif0, grid_has_read = grid_has_read, plot_description = paste0("it", i_it, ".gibbs"), @@ -858,7 +859,8 @@ get_and_impute_one_sample <- function( sample_name = sample_name, regionStart = regionStart, regionEnd = regionEnd, - buffer = buffer + buffer = buffer, + use_small_eHapsCurrent_tc = use_small_eHapsCurrent_tc ) if (hla_run) { @@ -1849,6 +1851,7 @@ impute_one_sample <- function( distinctHapsIE, hapMatcher, rhb_t, + ref_error, nSNPs, sampleReads, small_eHapsCurrent_tc, @@ -1871,7 +1874,6 @@ impute_one_sample <- function( perform_block_gibbs, make_plots, maxDifferenceBetweenReads, - ref_error, wif0, grid_has_read, verbose, @@ -1908,7 +1910,8 @@ impute_one_sample <- function( suppressOutput = 1, use_smooth_cm_in_block_gibbs = TRUE, block_gibbs_quantile_prob = 0.95, - make_plots_block_gibbs = FALSE + make_plots_block_gibbs = FALSE, + use_small_eHapsCurrent_tc = TRUE ) { ## K <- length(which_haps_to_use) @@ -1950,13 +1953,15 @@ impute_one_sample <- function( ## argh ## print(paste0("start = ", Sys.time())) - inflate_fhb_t_in_place( - rhb_t, - small_eHapsCurrent_tc, - haps_to_get = which_haps_to_use - 1, - nSNPs = nSNPs, - ref_error = ref_error - ) + if (use_small_eHapsCurrent_tc) { + inflate_fhb_t_in_place( + rhb_t, + small_eHapsCurrent_tc, + haps_to_get = which_haps_to_use - 1, + nSNPs = nSNPs, + ref_error = ref_error + ) + } if (make_plots_block_gibbs) { return_gibbs_block_output <- TRUE return_advanced_gibbs_block_output <- TRUE @@ -1964,13 +1969,37 @@ impute_one_sample <- function( ## ## ugh, alphaMatCurrent_tc is a CONSTANT ## - + ## param_list <- list( + ## return_alpha = FALSE, + ## return_p_store = FALSE, + ## return_extra = FALSE, + ## return_genProbs = TRUE, + ## return_hapProbs = TRUE, + ## return_gamma = TRUE, + ## return_gibbs_block_output = FALSE, + ## return_advanced_gibbs_block_output = FALSE + ## ) + param_list <- list( + return_alpha = FALSE, + return_extra = FALSE, + return_genProbs = return_genProbs, + return_gamma = as.logical(return_gamma | make_plots), + return_hapProbs = return_hapProbs, + return_p_store = return_p_store, + return_gibbs_block_output = return_gibbs_block_output, + return_advanced_gibbs_block_output = return_advanced_gibbs_block_output + ) out <- rcpp_forwardBackwardGibbsNIPT( sampleReads = sampleReads, priorCurrent_m = small_priorCurrent_m, alphaMatCurrent_tc = small_alphaMatCurrent_tc, eHapsCurrent_tc = small_eHapsCurrent_tc, transMatRate_tc_H = small_transMatRate_tc_H, + hapMatcher = hapMatcher, + distinctHapsIE = distinctHapsIE, + rhb_t = rhb_t, + ref_error = ref_error, + which_haps_to_use = which_haps_to_use, ff = 0, maxDifferenceBetweenReads = maxDifferenceBetweenReads, Jmax = Jmax, @@ -1978,12 +2007,6 @@ impute_one_sample <- function( run_fb_subset = FALSE, run_fb_grid_offset = FALSE, blocks_for_output = array(0, c(1, 1)), - return_alpha = FALSE, - return_extra = FALSE, - return_genProbs = return_genProbs, - return_gamma = as.logical(return_gamma | make_plots), - return_hapProbs = return_hapProbs, - return_p_store = return_p_store, ## return_p_store grid = grid, pass_in_alphaBeta = TRUE, alphaHat_t1 = alphaHat_t1, @@ -2019,14 +2042,13 @@ impute_one_sample <- function( shuffle_bin_radius = shuffle_bin_radius, L_grid = L_grid, block_gibbs_iterations = block_gibbs_iterations, - return_gibbs_block_output = return_gibbs_block_output, - return_advanced_gibbs_block_output = return_advanced_gibbs_block_output, rescale_eMatRead_t = rescale_eMatRead_t, smooth_cm = smooth_cm, + param_list = param_list, use_smooth_cm_in_block_gibbs = use_smooth_cm_in_block_gibbs, - block_gibbs_quantile_prob = block_gibbs_quantile_prob + block_gibbs_quantile_prob = block_gibbs_quantile_prob, + use_small_eHapsCurrent_tc = use_small_eHapsCurrent_tc ) - ## genProbs_t <- out$genProbsM_t hapProbs_t <- out$happrobs_t diff --git a/QUILT/R/quilt.R b/QUILT/R/quilt.R index af10cca..69fbee7 100644 --- a/QUILT/R/quilt.R +++ b/QUILT/R/quilt.R @@ -64,6 +64,7 @@ #' @param block_gibbs_iterations What iterations to perform block Gibbs sampling for the Gibbs sampler #' @param n_gibbs_burn_in_its How many iterations to run the Gibbs sampler for each time it is run #' @param plot_per_sample_likelihoods Plot per sample likelihoods i.e. the likelihood as the method progresses through the Gibbs sampling iterations +#' @param use_small_eHapsCurrent_tc For testing purposes only #' @return Results in properly formatted version #' @author Robert Davies #' @export @@ -131,7 +132,8 @@ QUILT <- function( print_extra_timing_information = FALSE, block_gibbs_iterations = c(3,6,9), n_gibbs_burn_in_its = 20, - plot_per_sample_likelihoods = FALSE + plot_per_sample_likelihoods = FALSE, + use_small_eHapsCurrent_tc = TRUE ) { @@ -579,7 +581,11 @@ QUILT <- function( betaHat_t3 <- array(0, c(K, nGrids)) small_priorCurrent_m <- array(1 / K, c(K, S)) small_alphaMatCurrent_tc <- array(1 / K, c(K, nGrids - 1, S)) - small_eHapsCurrent_tc <- array(0, c(K, nSNPs, S)) + if (use_small_eHapsCurrent_tc) { + small_eHapsCurrent_tc <- array(0, c(K, nSNPs, S)) + } else { + small_eHapsCurrent_tc <- array(0, c(1, 1, 1)) + } results_across_samples <- as.list(sampleRange[2] - sampleRange[1] + 1) @@ -661,7 +667,8 @@ QUILT <- function( print_extra_timing_information = print_extra_timing_information, n_gibbs_burn_in_its = n_gibbs_burn_in_its, block_gibbs_iterations = block_gibbs_iterations, - plot_per_sample_likelihoods = plot_per_sample_likelihoods + plot_per_sample_likelihoods = plot_per_sample_likelihoods, + use_small_eHapsCurrent_tc = use_small_eHapsCurrent_tc ) results_across_samples[[iSample - sampleRange[1] + 1]] <- out diff --git a/QUILT/R/reference-single.R b/QUILT/R/reference-single.R index 1a48e6e..3410463 100644 --- a/QUILT/R/reference-single.R +++ b/QUILT/R/reference-single.R @@ -366,7 +366,8 @@ make_rhb_t_equality <- function( rhb_t, nSNPs, ref_error, - nMaxDH = NA + nMaxDH = NA, + verbose = TRUE ) { if (is.na(nMaxDH)) { nMaxDH_default <- 2 ** 8 - 1 @@ -417,7 +418,9 @@ make_rhb_t_equality <- function( max(c(2 ** 4 - 1, suggested_value)), nMaxDH_default ) - print_message(paste0("Using nMaxDH = ", nMaxDH)) + if (verbose) { + print_message(paste0("Using nMaxDH = ", nMaxDH)) + } distinctHapsB <- distinctHapsB[1:nMaxDH, ] hapMatcher[hapMatcher > (nMaxDH)] <- 0 } diff --git a/QUILT/man/QUILT.Rd b/QUILT/man/QUILT.Rd index 0d03d9a..c2946be 100644 --- a/QUILT/man/QUILT.Rd +++ b/QUILT/man/QUILT.Rd @@ -68,7 +68,8 @@ QUILT( print_extra_timing_information = FALSE, block_gibbs_iterations = c(3, 6, 9), n_gibbs_burn_in_its = 20, - plot_per_sample_likelihoods = FALSE + plot_per_sample_likelihoods = FALSE, + use_small_eHapsCurrent_tc = TRUE ) } \arguments{ @@ -199,6 +200,8 @@ QUILT( \item{n_gibbs_burn_in_its}{How many iterations to run the Gibbs sampler for each time it is run} \item{plot_per_sample_likelihoods}{Plot per sample likelihoods i.e. the likelihood as the method progresses through the Gibbs sampling iterations} + +\item{use_small_eHapsCurrent_tc}{For testing purposes only} } \value{ Results in properly formatted version diff --git a/QUILT/src/RcppExports.cpp b/QUILT/src/RcppExports.cpp index 431927c..be18e98 100644 --- a/QUILT/src/RcppExports.cpp +++ b/QUILT/src/RcppExports.cpp @@ -842,8 +842,8 @@ BEGIN_RCPP END_RCPP } // rcpp_forwardBackwardGibbsNIPT -Rcpp::List rcpp_forwardBackwardGibbsNIPT(const Rcpp::List& sampleReads, const arma::mat& priorCurrent_m, const arma::cube& alphaMatCurrent_tc, const arma::cube& eHapsCurrent_tc, const arma::cube& transMatRate_tc_H, const double ff, const arma::mat& blocks_for_output, arma::mat& alphaHat_t1, arma::mat& betaHat_t1, arma::mat& alphaHat_t2, arma::mat& betaHat_t2, arma::mat& alphaHat_t3, arma::mat& betaHat_t3, arma::cube& hapSum_tc, Rcpp::IntegerVector& wif0, Rcpp::LogicalVector& grid_has_read, Rcpp::IntegerVector& L_grid, Rcpp::NumericVector& smooth_cm, const int Jmax_local, const double maxDifferenceBetweenReads, const double maxEmissionMatrixDifference, const bool run_fb_subset, const int run_fb_grid_offset, const bool return_genProbs, const bool return_hapProbs, bool return_gamma, const bool return_alpha, const bool return_p_store, const bool return_extra, const Rcpp::IntegerVector& grid, int snp_start_1_based, int snp_end_1_based, const bool generate_fb_snp_offsets, const int suppressOutput, int n_gibbs_starts, const int n_gibbs_sample_its, const int n_gibbs_burn_in_its, const bool use_starting_read_labels, const bool verbose, const Rcpp::List& double_list_of_starting_read_labels, Rcpp::IntegerVector seed_vector, const Rcpp::List& prev_list_of_alphaBetaBlocks, const int i_snp_block_for_alpha_beta, const bool haploid_gibbs_equal_weighting, const bool gibbs_initialize_iteratively, const bool gibbs_initialize_at_first_read, const bool do_block_resampling, const int artificial_relabel, const bool pass_in_alphaBeta, const bool update_in_place, const bool update_hapSum, const bool record_read_set, const double class_sum_cutoff, const bool perform_block_gibbs, const int shuffle_bin_radius, const Rcpp::IntegerVector block_gibbs_iterations, const bool return_gibbs_block_output, const bool return_advanced_gibbs_block_output, const bool rescale_eMatRead_t, const bool use_smooth_cm_in_block_gibbs, const double block_gibbs_quantile_prob, const bool do_shard_ff0_block_gibbs); -RcppExport SEXP _QUILT_rcpp_forwardBackwardGibbsNIPT(SEXP sampleReadsSEXP, SEXP priorCurrent_mSEXP, SEXP alphaMatCurrent_tcSEXP, SEXP eHapsCurrent_tcSEXP, SEXP transMatRate_tc_HSEXP, SEXP ffSEXP, SEXP blocks_for_outputSEXP, SEXP alphaHat_t1SEXP, SEXP betaHat_t1SEXP, SEXP alphaHat_t2SEXP, SEXP betaHat_t2SEXP, SEXP alphaHat_t3SEXP, SEXP betaHat_t3SEXP, SEXP hapSum_tcSEXP, SEXP wif0SEXP, SEXP grid_has_readSEXP, SEXP L_gridSEXP, SEXP smooth_cmSEXP, SEXP Jmax_localSEXP, SEXP maxDifferenceBetweenReadsSEXP, SEXP maxEmissionMatrixDifferenceSEXP, SEXP run_fb_subsetSEXP, SEXP run_fb_grid_offsetSEXP, SEXP return_genProbsSEXP, SEXP return_hapProbsSEXP, SEXP return_gammaSEXP, SEXP return_alphaSEXP, SEXP return_p_storeSEXP, SEXP return_extraSEXP, SEXP gridSEXP, SEXP snp_start_1_basedSEXP, SEXP snp_end_1_basedSEXP, SEXP generate_fb_snp_offsetsSEXP, SEXP suppressOutputSEXP, SEXP n_gibbs_startsSEXP, SEXP n_gibbs_sample_itsSEXP, SEXP n_gibbs_burn_in_itsSEXP, SEXP use_starting_read_labelsSEXP, SEXP verboseSEXP, SEXP double_list_of_starting_read_labelsSEXP, SEXP seed_vectorSEXP, SEXP prev_list_of_alphaBetaBlocksSEXP, SEXP i_snp_block_for_alpha_betaSEXP, SEXP haploid_gibbs_equal_weightingSEXP, SEXP gibbs_initialize_iterativelySEXP, SEXP gibbs_initialize_at_first_readSEXP, SEXP do_block_resamplingSEXP, SEXP artificial_relabelSEXP, SEXP pass_in_alphaBetaSEXP, SEXP update_in_placeSEXP, SEXP update_hapSumSEXP, SEXP record_read_setSEXP, SEXP class_sum_cutoffSEXP, SEXP perform_block_gibbsSEXP, SEXP shuffle_bin_radiusSEXP, SEXP block_gibbs_iterationsSEXP, SEXP return_gibbs_block_outputSEXP, SEXP return_advanced_gibbs_block_outputSEXP, SEXP rescale_eMatRead_tSEXP, SEXP use_smooth_cm_in_block_gibbsSEXP, SEXP block_gibbs_quantile_probSEXP, SEXP do_shard_ff0_block_gibbsSEXP) { +Rcpp::List rcpp_forwardBackwardGibbsNIPT(const Rcpp::List& sampleReads, const arma::mat& priorCurrent_m, const arma::cube& alphaMatCurrent_tc, const arma::cube& eHapsCurrent_tc, const arma::cube& transMatRate_tc_H, const double ff, const arma::mat& blocks_for_output, arma::mat& alphaHat_t1, arma::mat& betaHat_t1, arma::mat& alphaHat_t2, arma::mat& betaHat_t2, arma::mat& alphaHat_t3, arma::mat& betaHat_t3, arma::cube& hapSum_tc, arma::imat& hapMatcher, arma::mat& distinctHapsIE, const arma::imat& rhb_t, double ref_error, const Rcpp::IntegerVector& which_haps_to_use, Rcpp::IntegerVector& wif0, Rcpp::LogicalVector& grid_has_read, Rcpp::IntegerVector& L_grid, Rcpp::NumericVector& smooth_cm, Rcpp::List param_list, const int Jmax_local, const double maxDifferenceBetweenReads, const double maxEmissionMatrixDifference, const bool run_fb_subset, const int run_fb_grid_offset, const Rcpp::IntegerVector& grid, int snp_start_1_based, int snp_end_1_based, const bool generate_fb_snp_offsets, const int suppressOutput, int n_gibbs_starts, const int n_gibbs_sample_its, const int n_gibbs_burn_in_its, const bool use_starting_read_labels, const bool verbose, const Rcpp::List& double_list_of_starting_read_labels, Rcpp::IntegerVector seed_vector, const Rcpp::List& prev_list_of_alphaBetaBlocks, const int i_snp_block_for_alpha_beta, const bool haploid_gibbs_equal_weighting, const bool gibbs_initialize_iteratively, const bool gibbs_initialize_at_first_read, const bool do_block_resampling, const int artificial_relabel, const bool pass_in_alphaBeta, const bool update_in_place, const bool update_hapSum, const bool record_read_set, const double class_sum_cutoff, const bool perform_block_gibbs, const int shuffle_bin_radius, const Rcpp::IntegerVector block_gibbs_iterations, const bool rescale_eMatRead_t, const bool use_smooth_cm_in_block_gibbs, const double block_gibbs_quantile_prob, const bool do_shard_ff0_block_gibbs, const bool use_small_eHapsCurrent_tc); +RcppExport SEXP _QUILT_rcpp_forwardBackwardGibbsNIPT(SEXP sampleReadsSEXP, SEXP priorCurrent_mSEXP, SEXP alphaMatCurrent_tcSEXP, SEXP eHapsCurrent_tcSEXP, SEXP transMatRate_tc_HSEXP, SEXP ffSEXP, SEXP blocks_for_outputSEXP, SEXP alphaHat_t1SEXP, SEXP betaHat_t1SEXP, SEXP alphaHat_t2SEXP, SEXP betaHat_t2SEXP, SEXP alphaHat_t3SEXP, SEXP betaHat_t3SEXP, SEXP hapSum_tcSEXP, SEXP hapMatcherSEXP, SEXP distinctHapsIESEXP, SEXP rhb_tSEXP, SEXP ref_errorSEXP, SEXP which_haps_to_useSEXP, SEXP wif0SEXP, SEXP grid_has_readSEXP, SEXP L_gridSEXP, SEXP smooth_cmSEXP, SEXP param_listSEXP, SEXP Jmax_localSEXP, SEXP maxDifferenceBetweenReadsSEXP, SEXP maxEmissionMatrixDifferenceSEXP, SEXP run_fb_subsetSEXP, SEXP run_fb_grid_offsetSEXP, SEXP gridSEXP, SEXP snp_start_1_basedSEXP, SEXP snp_end_1_basedSEXP, SEXP generate_fb_snp_offsetsSEXP, SEXP suppressOutputSEXP, SEXP n_gibbs_startsSEXP, SEXP n_gibbs_sample_itsSEXP, SEXP n_gibbs_burn_in_itsSEXP, SEXP use_starting_read_labelsSEXP, SEXP verboseSEXP, SEXP double_list_of_starting_read_labelsSEXP, SEXP seed_vectorSEXP, SEXP prev_list_of_alphaBetaBlocksSEXP, SEXP i_snp_block_for_alpha_betaSEXP, SEXP haploid_gibbs_equal_weightingSEXP, SEXP gibbs_initialize_iterativelySEXP, SEXP gibbs_initialize_at_first_readSEXP, SEXP do_block_resamplingSEXP, SEXP artificial_relabelSEXP, SEXP pass_in_alphaBetaSEXP, SEXP update_in_placeSEXP, SEXP update_hapSumSEXP, SEXP record_read_setSEXP, SEXP class_sum_cutoffSEXP, SEXP perform_block_gibbsSEXP, SEXP shuffle_bin_radiusSEXP, SEXP block_gibbs_iterationsSEXP, SEXP rescale_eMatRead_tSEXP, SEXP use_smooth_cm_in_block_gibbsSEXP, SEXP block_gibbs_quantile_probSEXP, SEXP do_shard_ff0_block_gibbsSEXP, SEXP use_small_eHapsCurrent_tcSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -861,21 +861,21 @@ BEGIN_RCPP Rcpp::traits::input_parameter< arma::mat& >::type alphaHat_t3(alphaHat_t3SEXP); Rcpp::traits::input_parameter< arma::mat& >::type betaHat_t3(betaHat_t3SEXP); Rcpp::traits::input_parameter< arma::cube& >::type hapSum_tc(hapSum_tcSEXP); + Rcpp::traits::input_parameter< arma::imat& >::type hapMatcher(hapMatcherSEXP); + Rcpp::traits::input_parameter< arma::mat& >::type distinctHapsIE(distinctHapsIESEXP); + Rcpp::traits::input_parameter< const arma::imat& >::type rhb_t(rhb_tSEXP); + Rcpp::traits::input_parameter< double >::type ref_error(ref_errorSEXP); + Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type which_haps_to_use(which_haps_to_useSEXP); Rcpp::traits::input_parameter< Rcpp::IntegerVector& >::type wif0(wif0SEXP); Rcpp::traits::input_parameter< Rcpp::LogicalVector& >::type grid_has_read(grid_has_readSEXP); Rcpp::traits::input_parameter< Rcpp::IntegerVector& >::type L_grid(L_gridSEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector& >::type smooth_cm(smooth_cmSEXP); + Rcpp::traits::input_parameter< Rcpp::List >::type param_list(param_listSEXP); Rcpp::traits::input_parameter< const int >::type Jmax_local(Jmax_localSEXP); Rcpp::traits::input_parameter< const double >::type maxDifferenceBetweenReads(maxDifferenceBetweenReadsSEXP); Rcpp::traits::input_parameter< const double >::type maxEmissionMatrixDifference(maxEmissionMatrixDifferenceSEXP); Rcpp::traits::input_parameter< const bool >::type run_fb_subset(run_fb_subsetSEXP); Rcpp::traits::input_parameter< const int >::type run_fb_grid_offset(run_fb_grid_offsetSEXP); - Rcpp::traits::input_parameter< const bool >::type return_genProbs(return_genProbsSEXP); - Rcpp::traits::input_parameter< const bool >::type return_hapProbs(return_hapProbsSEXP); - Rcpp::traits::input_parameter< bool >::type return_gamma(return_gammaSEXP); - Rcpp::traits::input_parameter< const bool >::type return_alpha(return_alphaSEXP); - Rcpp::traits::input_parameter< const bool >::type return_p_store(return_p_storeSEXP); - Rcpp::traits::input_parameter< const bool >::type return_extra(return_extraSEXP); Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type grid(gridSEXP); Rcpp::traits::input_parameter< int >::type snp_start_1_based(snp_start_1_basedSEXP); Rcpp::traits::input_parameter< int >::type snp_end_1_based(snp_end_1_basedSEXP); @@ -903,13 +903,12 @@ BEGIN_RCPP Rcpp::traits::input_parameter< const bool >::type perform_block_gibbs(perform_block_gibbsSEXP); Rcpp::traits::input_parameter< const int >::type shuffle_bin_radius(shuffle_bin_radiusSEXP); Rcpp::traits::input_parameter< const Rcpp::IntegerVector >::type block_gibbs_iterations(block_gibbs_iterationsSEXP); - Rcpp::traits::input_parameter< const bool >::type return_gibbs_block_output(return_gibbs_block_outputSEXP); - Rcpp::traits::input_parameter< const bool >::type return_advanced_gibbs_block_output(return_advanced_gibbs_block_outputSEXP); Rcpp::traits::input_parameter< const bool >::type rescale_eMatRead_t(rescale_eMatRead_tSEXP); Rcpp::traits::input_parameter< const bool >::type use_smooth_cm_in_block_gibbs(use_smooth_cm_in_block_gibbsSEXP); Rcpp::traits::input_parameter< const double >::type block_gibbs_quantile_prob(block_gibbs_quantile_probSEXP); Rcpp::traits::input_parameter< const bool >::type do_shard_ff0_block_gibbs(do_shard_ff0_block_gibbsSEXP); - rcpp_result_gen = Rcpp::wrap(rcpp_forwardBackwardGibbsNIPT(sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, hapSum_tc, wif0, grid_has_read, L_grid, smooth_cm, Jmax_local, maxDifferenceBetweenReads, maxEmissionMatrixDifference, run_fb_subset, run_fb_grid_offset, return_genProbs, return_hapProbs, return_gamma, return_alpha, return_p_store, return_extra, grid, snp_start_1_based, snp_end_1_based, generate_fb_snp_offsets, suppressOutput, n_gibbs_starts, n_gibbs_sample_its, n_gibbs_burn_in_its, use_starting_read_labels, verbose, double_list_of_starting_read_labels, seed_vector, prev_list_of_alphaBetaBlocks, i_snp_block_for_alpha_beta, haploid_gibbs_equal_weighting, gibbs_initialize_iteratively, gibbs_initialize_at_first_read, do_block_resampling, artificial_relabel, pass_in_alphaBeta, update_in_place, update_hapSum, record_read_set, class_sum_cutoff, perform_block_gibbs, shuffle_bin_radius, block_gibbs_iterations, return_gibbs_block_output, return_advanced_gibbs_block_output, rescale_eMatRead_t, use_smooth_cm_in_block_gibbs, block_gibbs_quantile_prob, do_shard_ff0_block_gibbs)); + Rcpp::traits::input_parameter< const bool >::type use_small_eHapsCurrent_tc(use_small_eHapsCurrent_tcSEXP); + rcpp_result_gen = Rcpp::wrap(rcpp_forwardBackwardGibbsNIPT(sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, hapSum_tc, hapMatcher, distinctHapsIE, rhb_t, ref_error, which_haps_to_use, wif0, grid_has_read, L_grid, smooth_cm, param_list, Jmax_local, maxDifferenceBetweenReads, maxEmissionMatrixDifference, run_fb_subset, run_fb_grid_offset, grid, snp_start_1_based, snp_end_1_based, generate_fb_snp_offsets, suppressOutput, n_gibbs_starts, n_gibbs_sample_its, n_gibbs_burn_in_its, use_starting_read_labels, verbose, double_list_of_starting_read_labels, seed_vector, prev_list_of_alphaBetaBlocks, i_snp_block_for_alpha_beta, haploid_gibbs_equal_weighting, gibbs_initialize_iteratively, gibbs_initialize_at_first_read, do_block_resampling, artificial_relabel, pass_in_alphaBeta, update_in_place, update_hapSum, record_read_set, class_sum_cutoff, perform_block_gibbs, shuffle_bin_radius, block_gibbs_iterations, rescale_eMatRead_t, use_smooth_cm_in_block_gibbs, block_gibbs_quantile_prob, do_shard_ff0_block_gibbs, use_small_eHapsCurrent_tc)); return rcpp_result_gen; END_RCPP } @@ -1265,7 +1264,7 @@ static const R_CallMethodDef CallEntries[] = { {"_QUILT_rcpp_gibbs_nipt_initialize", (DL_FUNC) &_QUILT_rcpp_gibbs_nipt_initialize, 31}, {"_QUILT_rcpp_gibbs_nipt_iterate", (DL_FUNC) &_QUILT_rcpp_gibbs_nipt_iterate, 49}, {"_QUILT_rcpp_fly_weighter", (DL_FUNC) &_QUILT_rcpp_fly_weighter, 14}, - {"_QUILT_rcpp_forwardBackwardGibbsNIPT", (DL_FUNC) &_QUILT_rcpp_forwardBackwardGibbsNIPT, 62}, + {"_QUILT_rcpp_forwardBackwardGibbsNIPT", (DL_FUNC) &_QUILT_rcpp_forwardBackwardGibbsNIPT, 61}, {"_QUILT_Rcpp_make_eMatRead_t_for_gibbs_using_objects", (DL_FUNC) &_QUILT_Rcpp_make_eMatRead_t_for_gibbs_using_objects, 11}, {"_QUILT_rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects", (DL_FUNC) &_QUILT_rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects, 11}, {"_QUILT_Rcpp_quilt_test_doubler", (DL_FUNC) &_QUILT_Rcpp_quilt_test_doubler, 1}, diff --git a/QUILT/src/gibbs-nipt.cpp b/QUILT/src/gibbs-nipt.cpp index 5e6bf52..eb6adec 100644 --- a/QUILT/src/gibbs-nipt.cpp +++ b/QUILT/src/gibbs-nipt.cpp @@ -42,7 +42,33 @@ void rcpp_make_eMatRead_t( const bool rescale_eMatRead_t = true ); +void Rcpp_make_eMatRead_t_for_gibbs_using_objects( + arma::mat& eMatRead_t, + const Rcpp::List& sampleReads, + const arma::imat& hapMatcher, + const Rcpp::IntegerVector& grid, + const arma::imat& rhb_t, + const arma::mat& distinctHapsIE, + const double ref_error, + const Rcpp::IntegerVector& which_haps_to_use, + const bool rescale_eMatRead_t, + const int Jmax, + const double maxDifferenceBetweenReads +); +void rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects( + arma::mat& genProbsM_t, + arma::mat& genProbsF_t, + arma::mat& hapProbs_t, + const arma::mat& gammaMT_t, + const arma::mat& gammaMU_t, + const arma::mat& gammaP_t, + const arma::imat& hapMatcher, + const arma::mat& distinctHapsIE, + const Rcpp::IntegerVector& which_haps_to_use, + const double ref_error, + const arma::imat& rhb_t +); void rcpp_make_eMatGrid_t( arma::mat& eMatGrid_t, @@ -1748,6 +1774,12 @@ void set_seed(double seed) { Rcpp::NumericMatrix unpack_gammas( + const bool use_small_eHapsCurrent_tc, + const arma::imat& hapMatcher, + const arma::mat& distinctHapsIE, + const Rcpp::IntegerVector& which_haps_to_use, + const double ref_error, + const arma::imat& rhb_t, int s, std::string& prev_section, std::string& next_section, @@ -1885,20 +1917,20 @@ Rcpp::NumericMatrix unpack_gammas( next_section="calculate genProbs"; prev=print_times(prev, suppressOutput, prev_section, next_section); prev_section=next_section; - rcpp_calculate_gn_genProbs_and_hapProbs( - genProbsM_t_local, - genProbsF_t_local, - hapProbs_t_local, - s, - eHapsCurrent_tc, - gammaMT_t_local, - gammaMU_t_local, - gammaP_t_local, - grid, - snp_start_1_based, - snp_end_1_based, - run_fb_grid_offset - ); + if (use_small_eHapsCurrent_tc) { + rcpp_calculate_gn_genProbs_and_hapProbs( + genProbsM_t_local, genProbsF_t_local, hapProbs_t_local, + s, eHapsCurrent_tc, + gammaMT_t_local, gammaMU_t_local, gammaP_t_local, + grid, snp_start_1_based, snp_end_1_based, run_fb_grid_offset + ); + } else { + rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects( + genProbsM_t_local, genProbsF_t_local, hapProbs_t_local, + gammaMT_t_local, gammaMU_t_local, gammaP_t_local, + hapMatcher, distinctHapsIE, which_haps_to_use, ref_error, rhb_t + ); + } } // duplicate sometimes, but OK, is easy next_section="fly weighter"; @@ -1974,21 +2006,21 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT( arma::mat& alphaHat_t3, arma::mat& betaHat_t3, arma::cube& hapSum_tc, + arma::imat& hapMatcher, + arma::mat& distinctHapsIE, + const arma::imat& rhb_t, + double ref_error, + const Rcpp::IntegerVector& which_haps_to_use, Rcpp::IntegerVector& wif0, Rcpp::LogicalVector& grid_has_read, Rcpp::IntegerVector& L_grid, Rcpp::NumericVector& smooth_cm, + Rcpp::List param_list, const int Jmax_local = 100, const double maxDifferenceBetweenReads = 1000, const double maxEmissionMatrixDifference = 10000000000, const bool run_fb_subset = false, const int run_fb_grid_offset = 0, - const bool return_genProbs = true, - const bool return_hapProbs = true, - bool return_gamma = true, - const bool return_alpha = false, - const bool return_p_store = false, - const bool return_extra = false, const Rcpp::IntegerVector& grid = 0, int snp_start_1_based = -1, int snp_end_1_based = -1, @@ -2016,12 +2048,11 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT( const bool perform_block_gibbs = false, const int shuffle_bin_radius = 5000, const Rcpp::IntegerVector block_gibbs_iterations = Rcpp::IntegerVector::create(0), - const bool return_gibbs_block_output = false, - const bool return_advanced_gibbs_block_output = false, const bool rescale_eMatRead_t = true, const bool use_smooth_cm_in_block_gibbs = false, const double block_gibbs_quantile_prob = 0.9, - const bool do_shard_ff0_block_gibbs = true + const bool do_shard_ff0_block_gibbs = true, + const bool use_small_eHapsCurrent_tc = true ) { // I think these break the gibbs-ness - disable for now! // rescale_eMatRead_t should be fine to reset - will be constant across reads - only the read not per-base input considered @@ -2033,6 +2064,28 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT( prev=print_times(prev, suppressOutput, prev_section, next_section); prev_section=next_section; // + // unpack list elements + // + // ## R code for the below + // ## param_list <- list( + // ## return_alpha = FALSE, + // ## return_p_store = FALSE, + // ## return_extra = FALSE, + // ## return_genProbs = TRUE, + // ## return_hapProbs = TRUE, + // ## return_gamma = TRUE, + // ## return_gibbs_block_output = FALSE, + // ## return_advanced_gibbs_block_output = FALSE + // ## ) + const bool return_alpha = as(param_list["return_alpha"]); + const bool return_p_store = as(param_list["return_p_store"]); + const bool return_extra = as(param_list["return_extra"]); + const bool return_genProbs = as(param_list["return_genProbs"]); + const bool return_hapProbs = as(param_list["return_hapProbs"]); + const bool return_gamma = as(param_list["return_gamma"]); + const bool return_gibbs_block_output = as(param_list["return_gibbs_block_output"]); + const bool return_advanced_gibbs_block_output = as(param_list["return_advanced_gibbs_block_output"]); + // // // initialize variables // @@ -2265,7 +2318,11 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT( next_section="Initialize eMatRead_t"; prev=print_times(prev, suppressOutput, prev_section, next_section); prev_section=next_section; - rcpp_make_eMatRead_t(eMatRead_t, sampleReads, eHapsCurrent_tc, s, maxDifferenceBetweenReads, Jmax_local, eMatHapOri_t, pRgivenH1, pRgivenH2, prev, suppressOutput, prev_section, next_section, run_pseudo_haploid, rescale_eMatRead_t); + if (use_small_eHapsCurrent_tc) { + rcpp_make_eMatRead_t(eMatRead_t, sampleReads, eHapsCurrent_tc, s, maxDifferenceBetweenReads, Jmax_local, eMatHapOri_t, pRgivenH1, pRgivenH2, prev, suppressOutput, prev_section, next_section, run_pseudo_haploid, rescale_eMatRead_t); + } else { + Rcpp_make_eMatRead_t_for_gibbs_using_objects(eMatRead_t, sampleReads, hapMatcher, grid, rhb_t, distinctHapsIE, ref_error, which_haps_to_use, rescale_eMatRead_t, Jmax_local, maxDifferenceBetweenReads); + } // // // @@ -2338,6 +2395,7 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT( i_result_it = i_outer; i_ever_it = i_outer; hap_label_prob_matrix = unpack_gammas( + use_small_eHapsCurrent_tc, hapMatcher, distinctHapsIE, which_haps_to_use, ref_error, rhb_t, s, prev_section, next_section, suppressOutput, prev, verbose, nReads, H, nGrids, eHapsCurrent_tc, grid, snp_start_1_based, snp_end_1_based, run_fb_grid_offset, @@ -2485,6 +2543,7 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT( // if ((iteration + 1) > n_gibbs_burn_in_its) { hap_label_prob_matrix = unpack_gammas( + use_small_eHapsCurrent_tc, hapMatcher, distinctHapsIE, which_haps_to_use, ref_error, rhb_t, s, prev_section, next_section, suppressOutput, prev, verbose, nReads, H, nGrids, eHapsCurrent_tc, grid, snp_start_1_based, snp_end_1_based, run_fb_grid_offset,