From 6bd7b0b3d3ea78e89cd68dd014cb1748ca1e661d Mon Sep 17 00:00:00 2001 From: rwdavies Date: Thu, 22 Apr 2021 20:56:06 +0100 Subject: [PATCH] 0.1.7 speedups and RAM decrease --- CHANGELOG.md | 2 + QUILT.R | 6 +- QUILT/DESCRIPTION | 4 +- QUILT/R/RcppExports.R | 4 +- QUILT/R/functions.R | 78 +++---- QUILT/R/gibbs-small.R | 80 ------- QUILT/R/quilt.R | 21 +- QUILT/R/reference-single.R | 16 +- QUILT/man/QUILT.Rd | 2 +- QUILT/src/RcppExports.cpp | 17 +- QUILT/src/gibbs-nipt.cpp | 31 ++- QUILT/src/gibbs-small.cpp | 134 +++++++++--- QUILT/tests/testthat/test-unit-gibbs-small.R | 216 ++++++++++++++++++- README.md | 8 +- scripts/test-cli.R | 4 +- 15 files changed, 431 insertions(+), 192 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index adc2d48..015b811 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,5 @@ +* v0.1.7 + * Speedups and RAM decrease when using reference panels with fewer haplotypes * v0.1.6 * Discovered bug where only 1 read caused an error. Fix (obviate) this by requiring 2 of more reads through parameter minimum_number_of_sample_reads with default currently 2, where samples with fewer than 2 reads have all missing output * v0.1.5 diff --git a/QUILT.R b/QUILT.R index e73ad31..bb1a1d8 100755 --- a/QUILT.R +++ b/QUILT.R @@ -388,9 +388,9 @@ option_list <- list( ), make_option( "--use_small_eHapsCurrent_tc", - type = "integer", - help = "For testing purposes only [default TRUE] ", - default = TRUE + type = "logical", + help = "For testing purposes only [default FALSE] ", + default = FALSE ) ) opt <- suppressWarnings(parse_args(OptionParser(option_list = option_list))) diff --git a/QUILT/DESCRIPTION b/QUILT/DESCRIPTION index 3f9f628..be7fd2d 100644 --- a/QUILT/DESCRIPTION +++ b/QUILT/DESCRIPTION @@ -1,8 +1,8 @@ Package: QUILT Type: Package Title: QUILT -Version: 0.1.6 -Date: 2021-04-09 +Version: 0.1.7 +Date: 2021-04-22 Author: Robert William Davies Maintainer: Robert William Davies Description: QUILT performs imputation of individuals sequenced to low coverage diff --git a/QUILT/R/RcppExports.R b/QUILT/R/RcppExports.R index 2a020fb..d7f4505 100644 --- a/QUILT/R/RcppExports.R +++ b/QUILT/R/RcppExports.R @@ -182,8 +182,8 @@ rcpp_fly_weighter <- function(i, gCurrent, gAverage, ll_current, log_mult, ll_re } #' @export -rcpp_forwardBackwardGibbsNIPT <- function(sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, hapSum_tc, hapMatcher, distinctHapsIE, rhb_t, ref_error, which_haps_to_use, wif0, grid_has_read, L_grid, smooth_cm, param_list, Jmax_local = 100L, maxDifferenceBetweenReads = 1000, maxEmissionMatrixDifference = 10000000000, run_fb_subset = FALSE, run_fb_grid_offset = 0L, grid = 0L, snp_start_1_based = -1L, snp_end_1_based = -1L, generate_fb_snp_offsets = FALSE, suppressOutput = 1L, n_gibbs_starts = 1L, n_gibbs_sample_its = 1L, n_gibbs_burn_in_its = 1L, use_starting_read_labels = FALSE, verbose = FALSE, double_list_of_starting_read_labels = NULL, seed_vector = -1L, prev_list_of_alphaBetaBlocks = NULL, i_snp_block_for_alpha_beta = 1L, haploid_gibbs_equal_weighting = TRUE, gibbs_initialize_iteratively = FALSE, gibbs_initialize_at_first_read = TRUE, do_block_resampling = FALSE, artificial_relabel = -1L, pass_in_alphaBeta = FALSE, update_in_place = FALSE, update_hapSum = FALSE, record_read_set = FALSE, class_sum_cutoff = 0.06, perform_block_gibbs = FALSE, shuffle_bin_radius = 5000L, block_gibbs_iterations = as.integer( c(0)), rescale_eMatRead_t = TRUE, use_smooth_cm_in_block_gibbs = FALSE, block_gibbs_quantile_prob = 0.9, do_shard_ff0_block_gibbs = TRUE, use_small_eHapsCurrent_tc = TRUE) { - .Call('_QUILT_rcpp_forwardBackwardGibbsNIPT', PACKAGE = 'QUILT', sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, hapSum_tc, hapMatcher, distinctHapsIE, rhb_t, ref_error, which_haps_to_use, wif0, grid_has_read, L_grid, smooth_cm, param_list, Jmax_local, maxDifferenceBetweenReads, maxEmissionMatrixDifference, run_fb_subset, run_fb_grid_offset, grid, snp_start_1_based, snp_end_1_based, generate_fb_snp_offsets, suppressOutput, n_gibbs_starts, n_gibbs_sample_its, n_gibbs_burn_in_its, use_starting_read_labels, verbose, double_list_of_starting_read_labels, seed_vector, prev_list_of_alphaBetaBlocks, i_snp_block_for_alpha_beta, haploid_gibbs_equal_weighting, gibbs_initialize_iteratively, gibbs_initialize_at_first_read, do_block_resampling, artificial_relabel, pass_in_alphaBeta, update_in_place, update_hapSum, record_read_set, class_sum_cutoff, perform_block_gibbs, shuffle_bin_radius, block_gibbs_iterations, rescale_eMatRead_t, use_smooth_cm_in_block_gibbs, block_gibbs_quantile_prob, do_shard_ff0_block_gibbs, use_small_eHapsCurrent_tc) +rcpp_forwardBackwardGibbsNIPT <- function(sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, eMatGrid_t1, eMatGrid_t2, eMatGrid_t3, gammaMT_t_local, gammaMU_t_local, gammaP_t_local, hapSum_tc, hapMatcher, distinctHapsIE, rhb_t, ref_error, which_haps_to_use, wif0, grid_has_read, L_grid, smooth_cm, param_list, Jmax_local = 100L, maxDifferenceBetweenReads = 1000, maxEmissionMatrixDifference = 10000000000, run_fb_grid_offset = 0L, grid = 0L, snp_start_1_based = -1L, snp_end_1_based = -1L, generate_fb_snp_offsets = FALSE, suppressOutput = 1L, n_gibbs_starts = 1L, n_gibbs_sample_its = 1L, n_gibbs_burn_in_its = 1L, double_list_of_starting_read_labels = NULL, seed_vector = -1L, prev_list_of_alphaBetaBlocks = NULL, i_snp_block_for_alpha_beta = 1L, haploid_gibbs_equal_weighting = TRUE, gibbs_initialize_iteratively = FALSE, gibbs_initialize_at_first_read = TRUE, do_block_resampling = FALSE, artificial_relabel = -1L, pass_in_alphaBeta = FALSE, update_in_place = FALSE, update_hapSum = FALSE, record_read_set = FALSE, class_sum_cutoff = 0.06, perform_block_gibbs = FALSE, shuffle_bin_radius = 5000L, block_gibbs_iterations = as.integer( c(0)), rescale_eMatRead_t = TRUE, use_smooth_cm_in_block_gibbs = FALSE, block_gibbs_quantile_prob = 0.9, do_shard_ff0_block_gibbs = TRUE, use_small_eHapsCurrent_tc = TRUE) { + .Call('_QUILT_rcpp_forwardBackwardGibbsNIPT', PACKAGE = 'QUILT', sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, eMatGrid_t1, eMatGrid_t2, eMatGrid_t3, gammaMT_t_local, gammaMU_t_local, gammaP_t_local, hapSum_tc, hapMatcher, distinctHapsIE, rhb_t, ref_error, which_haps_to_use, wif0, grid_has_read, L_grid, smooth_cm, param_list, Jmax_local, maxDifferenceBetweenReads, maxEmissionMatrixDifference, run_fb_grid_offset, grid, snp_start_1_based, snp_end_1_based, generate_fb_snp_offsets, suppressOutput, n_gibbs_starts, n_gibbs_sample_its, n_gibbs_burn_in_its, double_list_of_starting_read_labels, seed_vector, prev_list_of_alphaBetaBlocks, i_snp_block_for_alpha_beta, haploid_gibbs_equal_weighting, gibbs_initialize_iteratively, gibbs_initialize_at_first_read, do_block_resampling, artificial_relabel, pass_in_alphaBeta, update_in_place, update_hapSum, record_read_set, class_sum_cutoff, perform_block_gibbs, shuffle_bin_radius, block_gibbs_iterations, rescale_eMatRead_t, use_smooth_cm_in_block_gibbs, block_gibbs_quantile_prob, do_shard_ff0_block_gibbs, use_small_eHapsCurrent_tc) } #' @export diff --git a/QUILT/R/functions.R b/QUILT/R/functions.R index 3a5613f..5b83cb5 100644 --- a/QUILT/R/functions.R +++ b/QUILT/R/functions.R @@ -467,10 +467,16 @@ get_and_impute_one_sample <- function( small_transMatRate_tc_H, alphaHat_t1, betaHat_t1, + eMatGrid_t1, alphaHat_t2, betaHat_t2, + eMatGrid_t2, alphaHat_t3, betaHat_t3, + eMatGrid_t3, + gammaMT_t_local, + gammaMU_t_local, + gammaP_t_local, small_alphaMatCurrent_tc, small_priorCurrent_m, small_eHapsCurrent_tc, @@ -811,10 +817,16 @@ get_and_impute_one_sample <- function( small_transMatRate_tc_H = small_transMatRate_tc_H, alphaHat_t1 = alphaHat_t1, betaHat_t1 = betaHat_t1, + eMatGrid_t1 = eMatGrid_t1, alphaHat_t2 = alphaHat_t2, betaHat_t2 = betaHat_t2, + eMatGrid_t2 = eMatGrid_t2, alphaHat_t3 = alphaHat_t3, betaHat_t3 = betaHat_t3, + eMatGrid_t3 = eMatGrid_t3, + gammaMT_t_local = gammaMT_t_local, + gammaMU_t_local = gammaMU_t_local, + gammaP_t_local = gammaP_t_local, small_alphaMatCurrent_tc = small_alphaMatCurrent_tc, small_priorCurrent_m = small_priorCurrent_m, smooth_cm = smooth_cm, @@ -846,7 +858,7 @@ get_and_impute_one_sample <- function( return_extra = FALSE, return_genProbs = return_genProbs, return_hapProbs = return_hapProbs, - return_gibbs_block_output = TRUE, + return_gibbs_block_output = FALSE, gibbs_initialize_iteratively = gibbs_initialize_iteratively, gibbs_initialize_at_first_read = FALSE, maxDifferenceBetweenReads = maxDifferenceBetweenReads, @@ -962,7 +974,7 @@ get_and_impute_one_sample <- function( if (have_truth_haplotypes) { w <- i_it + n_seek_its * (i_gibbs_sample - 1) - x <- calculate_pse_and_r2_during_gibbs(inRegion2 = inRegion2, hap1 = hap1, hap2 = hap2, truth_haps = truth_haps, af = af, verbose = FALSE) + x <- calculate_pse_and_r2_during_gibbs(inRegion2 = inRegion2, hap1 = hap1, hap2 = hap2, truth_haps = truth_haps, af = af, verbose = verbose) pse_mat[w, ] <- c(i_gibbs_sample, i_it, as.integer(phasing_it), x) } } @@ -1858,10 +1870,16 @@ impute_one_sample <- function( small_transMatRate_tc_H, alphaHat_t1, betaHat_t1, + eMatGrid_t1, alphaHat_t2, betaHat_t2, + eMatGrid_t2, alphaHat_t3, betaHat_t3, + eMatGrid_t3, + gammaMT_t_local, + gammaMU_t_local, + gammaP_t_local, small_alphaMatCurrent_tc, small_priorCurrent_m, smooth_cm, @@ -1916,42 +1934,6 @@ impute_one_sample <- function( ## K <- length(which_haps_to_use) S <- 1 - - ## print("TEMPORARY") - ## print(paste0("/dev/shm/rwdavies/", sample_name, ".RData")) - ## save( - ## rhb_t, - ## small_eHapsCurrent_tc, - ## which_haps_to_use, - ## nSNPs, - ## ref_error, - ## maxDifferenceBetweenReads, - ## Jmax, - ## hapMatcher, - ## grid, - ## distinctHapsIE, - ## sampleReads, - ## file = paste0("/dev/shm/rwdavies/", sample_name, ".RData"), - ## compress = FALSE - ## ) - ## stop("WER") - ## load("~/temp.RData") - ## temp_compare_two_versions( - ## rhb_t, - ## small_eHapsCurrent_tc, - ## which_haps_to_use, - ## nSNPs, - ## ref_error, - ## maxDifferenceBetweenReads, - ## Jmax, - ## hapMatcher, - ## grid, - ## distinctHapsIE, - ## sampleReads - ## ) - ## print("TEMPORARY") - - ## argh ## print(paste0("start = ", Sys.time())) if (use_small_eHapsCurrent_tc) { inflate_fhb_t_in_place( @@ -1977,7 +1959,10 @@ impute_one_sample <- function( ## return_hapProbs = TRUE, ## return_gamma = TRUE, ## return_gibbs_block_output = FALSE, - ## return_advanced_gibbs_block_output = FALSE + ## return_advanced_gibbs_block_output = FALSE, + ## use_starting_read_labels = FALSE, + ## verbose = FALSE, + ## run_fb_subset = FALSE ## ) param_list <- list( return_alpha = FALSE, @@ -1987,8 +1972,12 @@ impute_one_sample <- function( return_hapProbs = return_hapProbs, return_p_store = return_p_store, return_gibbs_block_output = return_gibbs_block_output, - return_advanced_gibbs_block_output = return_advanced_gibbs_block_output + return_advanced_gibbs_block_output = return_advanced_gibbs_block_output, + use_starting_read_labels = TRUE, + verbose = verbose, + run_fb_subset = FALSE ) + ## out <- rcpp_forwardBackwardGibbsNIPT( sampleReads = sampleReads, priorCurrent_m = small_priorCurrent_m, @@ -2004,7 +1993,6 @@ impute_one_sample <- function( maxDifferenceBetweenReads = maxDifferenceBetweenReads, Jmax = Jmax, maxEmissionMatrixDifference = maxEmissionMatrixDifference, - run_fb_subset = FALSE, run_fb_grid_offset = FALSE, blocks_for_output = array(0, c(1, 1)), grid = grid, @@ -2015,6 +2003,12 @@ impute_one_sample <- function( betaHat_t1 = betaHat_t1, betaHat_t2 = betaHat_t2, betaHat_t3 = betaHat_t3, + eMatGrid_t1 = eMatGrid_t1, + eMatGrid_t2 = eMatGrid_t2, + eMatGrid_t3 = eMatGrid_t3, + gammaMT_t_local = gammaMT_t_local, + gammaMU_t_local = gammaMU_t_local, + gammaP_t_local = gammaP_t_local, hapSum_tc = array(0, c(1, 1, 1)), snp_start_1_based = -1, snp_end_1_based = -1, @@ -2022,13 +2016,11 @@ impute_one_sample <- function( suppressOutput = suppressOutput, n_gibbs_burn_in_its = n_gibbs_burn_in_its, n_gibbs_sample_its = n_gibbs_sample_its, - use_starting_read_labels = TRUE, n_gibbs_starts = n_gibbs_starts, double_list_of_starting_read_labels = double_list_of_starting_read_labels, prev_list_of_alphaBetaBlocks = as.list(c(1, 2)), i_snp_block_for_alpha_beta = -1, haploid_gibbs_equal_weighting = TRUE, - verbose = verbose, gibbs_initialize_iteratively = gibbs_initialize_iteratively, gibbs_initialize_at_first_read = gibbs_initialize_at_first_read, ## experiment with do_block_resampling = FALSE, ## turn off for now diff --git a/QUILT/R/gibbs-small.R b/QUILT/R/gibbs-small.R index 6088971..0c6b707 100644 --- a/QUILT/R/gibbs-small.R +++ b/QUILT/R/gibbs-small.R @@ -70,83 +70,3 @@ R_make_eMatRead_t_for_gibbs_using_objects <- function( -temp_compare_two_versions <- function( - rhb_t, - small_eHapsCurrent_tc, - which_haps_to_use, - nSNPs, - ref_error, - maxDifferenceBetweenReads, - Jmax, - hapMatcher, - grid, - distinctHapsIE, - sampleReads -) { - - K <- length(which_haps_to_use) - rescale_eMatRead_t <- TRUE - nReads <- length(sampleReads) - - f1 <- function() { - ## - S <- 1 - ## argh - ## print(paste0("start = ", Sys.time())) - inflate_fhb_t_in_place( - rhb_t, - small_eHapsCurrent_tc, - haps_to_get = which_haps_to_use - 1, - nSNPs = nSNPs, - ref_error = ref_error - ) - eMatRead_t_old <- array(1, c(K, nReads)) - rcpp_make_eMatRead_t( - eMatRead_t = eMatRead_t_old, - sampleReads = sampleReads, - eHapsCurrent_tc = small_eHapsCurrent_tc, - s = 0, - maxDifferenceBetweenReads = maxDifferenceBetweenReads, - Jmax = Jmax, - eMatHapOri_t = matrix(0, nrow = 0, ncol = 0), - pRgivenH1 = vector(), - pRgivenH2 = vector(), - prev = 1, - suppressOutput = 1, - prev_section = "N/A", - next_section = "N/A", - run_pseudo_haploid = FALSE, - rescale_eMatRead_t = rescale_eMatRead_t - ) - eMatRead_t_old - } - - - f2 <- function() { - eMatRead_t_new <- array(1, c(K, nReads)) - Rcpp_make_eMatRead_t_for_gibbs_using_objects( - eMatRead_t = eMatRead_t_new, - sampleReads = sampleReads, - hapMatcher = hapMatcher, - grid = grid, - rhb_t = rhb_t, - distinctHapsIE = distinctHapsIE, - ref_error = ref_error, - which_haps_to_use = which_haps_to_use, - rescale_eMatRead_t = rescale_eMatRead_t, - Jmax = Jmax, - maxDifferenceBetweenReads = maxDifferenceBetweenReads - ) - eMatRead_t_new - } - - library("testthat") - expect_equal(f1(), f2()) - - library("microbenchmark") - ## can be slower (on tall data, HRC sized) - ## so can be faster (on wide data) - print(microbenchmark(f1(), f2(), times = 2)) - ## so def slower. hmm - -} diff --git a/QUILT/R/quilt.R b/QUILT/R/quilt.R index 69fbee7..34f2aac 100644 --- a/QUILT/R/quilt.R +++ b/QUILT/R/quilt.R @@ -133,7 +133,7 @@ QUILT <- function( block_gibbs_iterations = c(3,6,9), n_gibbs_burn_in_its = 20, plot_per_sample_likelihoods = FALSE, - use_small_eHapsCurrent_tc = TRUE + use_small_eHapsCurrent_tc = FALSE ) { @@ -575,10 +575,17 @@ QUILT <- function( S <- 1 alphaHat_t1 <- array(0, c(K, nGrids)) betaHat_t1 <- array(0, c(K, nGrids)) + eMatGrid_t1 <- array(0, c(K, nGrids)) alphaHat_t2 <- array(0, c(K, nGrids)) betaHat_t2 <- array(0, c(K, nGrids)) + eMatGrid_t2 <- array(0, c(K, nGrids)) alphaHat_t3 <- array(0, c(K, nGrids)) betaHat_t3 <- array(0, c(K, nGrids)) + eMatGrid_t3 <- array(0, c(K, nGrids)) + gammaMT_t_local <- array(0, c(K, nGrids)) + gammaMU_t_local <- array(0, c(K, nGrids)) + gammaP_t_local <- array(0, c(K, nGrids)) + ## small_priorCurrent_m <- array(1 / K, c(K, S)) small_alphaMatCurrent_tc <- array(1 / K, c(K, nGrids - 1, S)) if (use_small_eHapsCurrent_tc) { @@ -587,6 +594,9 @@ QUILT <- function( small_eHapsCurrent_tc <- array(0, c(1, 1, 1)) } + + + results_across_samples <- as.list(sampleRange[2] - sampleRange[1] + 1) for(iSample in sampleRange[1]:sampleRange[2]) { @@ -606,10 +616,16 @@ QUILT <- function( small_transMatRate_tc_H = small_transMatRate_tc_H, alphaHat_t1 = alphaHat_t1, betaHat_t1 = betaHat_t1, + eMatGrid_t1 = eMatGrid_t1, alphaHat_t2 = alphaHat_t2, betaHat_t2 = betaHat_t2, + eMatGrid_t2 = eMatGrid_t2, alphaHat_t3 = alphaHat_t3, betaHat_t3 = betaHat_t3, + eMatGrid_t3 = eMatGrid_t3, + gammaMT_t_local = gammaMT_t_local, + gammaMU_t_local = gammaMU_t_local, + gammaP_t_local = gammaP_t_local, small_alphaMatCurrent_tc = small_alphaMatCurrent_tc, small_priorCurrent_m = small_priorCurrent_m, small_eHapsCurrent_tc = small_eHapsCurrent_tc, @@ -683,12 +699,11 @@ QUILT <- function( } ## optionally, do some gc here, if longer running job - if (as.numeric(K) * as.numeric(nSNPs) > (1e8)) { + if (as.numeric(K) * as.numeric(nSNPs) > (1e6)) { for(i in 1:5) { gc(reset = TRUE) } } - } diff --git a/QUILT/R/reference-single.R b/QUILT/R/reference-single.R index 3410463..ed792ec 100644 --- a/QUILT/R/reference-single.R +++ b/QUILT/R/reference-single.R @@ -413,7 +413,19 @@ make_rhb_t_equality <- function( ## if (infer_nMaxDH) { running_count <- cumsum(rowSums(temp_counter) / (nrow(rhb_t) * nGrids)) - suggested_value <- which.max(running_count > 0.99) + ## really need to tune this better + ## basically, larger K, important to set large + if (K > 50000) { + thresh <- 0.9999 + } else if (K > 10000) { + thresh <- 0.9995 + } else if (K > 1000) { + thresh <- 0.999 + } else { + thresh <- 0.99 + } + ## really want to almost never need this, within reason, for large K + suggested_value <- which.max(running_count > thresh) nMaxDH <- min( max(c(2 ** 4 - 1, suggested_value)), nMaxDH_default @@ -422,7 +434,7 @@ make_rhb_t_equality <- function( print_message(paste0("Using nMaxDH = ", nMaxDH)) } distinctHapsB <- distinctHapsB[1:nMaxDH, ] - hapMatcher[hapMatcher > (nMaxDH)] <- 0 + hapMatcher[hapMatcher > (nMaxDH)] <- 0L } ## ## inflate them too, they're pretty small diff --git a/QUILT/man/QUILT.Rd b/QUILT/man/QUILT.Rd index c2946be..5545976 100644 --- a/QUILT/man/QUILT.Rd +++ b/QUILT/man/QUILT.Rd @@ -69,7 +69,7 @@ QUILT( block_gibbs_iterations = c(3, 6, 9), n_gibbs_burn_in_its = 20, plot_per_sample_likelihoods = FALSE, - use_small_eHapsCurrent_tc = TRUE + use_small_eHapsCurrent_tc = FALSE ) } \arguments{ diff --git a/QUILT/src/RcppExports.cpp b/QUILT/src/RcppExports.cpp index be18e98..9096da1 100644 --- a/QUILT/src/RcppExports.cpp +++ b/QUILT/src/RcppExports.cpp @@ -842,8 +842,8 @@ BEGIN_RCPP END_RCPP } // rcpp_forwardBackwardGibbsNIPT -Rcpp::List rcpp_forwardBackwardGibbsNIPT(const Rcpp::List& sampleReads, const arma::mat& priorCurrent_m, const arma::cube& alphaMatCurrent_tc, const arma::cube& eHapsCurrent_tc, const arma::cube& transMatRate_tc_H, const double ff, const arma::mat& blocks_for_output, arma::mat& alphaHat_t1, arma::mat& betaHat_t1, arma::mat& alphaHat_t2, arma::mat& betaHat_t2, arma::mat& alphaHat_t3, arma::mat& betaHat_t3, arma::cube& hapSum_tc, arma::imat& hapMatcher, arma::mat& distinctHapsIE, const arma::imat& rhb_t, double ref_error, const Rcpp::IntegerVector& which_haps_to_use, Rcpp::IntegerVector& wif0, Rcpp::LogicalVector& grid_has_read, Rcpp::IntegerVector& L_grid, Rcpp::NumericVector& smooth_cm, Rcpp::List param_list, const int Jmax_local, const double maxDifferenceBetweenReads, const double maxEmissionMatrixDifference, const bool run_fb_subset, const int run_fb_grid_offset, const Rcpp::IntegerVector& grid, int snp_start_1_based, int snp_end_1_based, const bool generate_fb_snp_offsets, const int suppressOutput, int n_gibbs_starts, const int n_gibbs_sample_its, const int n_gibbs_burn_in_its, const bool use_starting_read_labels, const bool verbose, const Rcpp::List& double_list_of_starting_read_labels, Rcpp::IntegerVector seed_vector, const Rcpp::List& prev_list_of_alphaBetaBlocks, const int i_snp_block_for_alpha_beta, const bool haploid_gibbs_equal_weighting, const bool gibbs_initialize_iteratively, const bool gibbs_initialize_at_first_read, const bool do_block_resampling, const int artificial_relabel, const bool pass_in_alphaBeta, const bool update_in_place, const bool update_hapSum, const bool record_read_set, const double class_sum_cutoff, const bool perform_block_gibbs, const int shuffle_bin_radius, const Rcpp::IntegerVector block_gibbs_iterations, const bool rescale_eMatRead_t, const bool use_smooth_cm_in_block_gibbs, const double block_gibbs_quantile_prob, const bool do_shard_ff0_block_gibbs, const bool use_small_eHapsCurrent_tc); -RcppExport SEXP _QUILT_rcpp_forwardBackwardGibbsNIPT(SEXP sampleReadsSEXP, SEXP priorCurrent_mSEXP, SEXP alphaMatCurrent_tcSEXP, SEXP eHapsCurrent_tcSEXP, SEXP transMatRate_tc_HSEXP, SEXP ffSEXP, SEXP blocks_for_outputSEXP, SEXP alphaHat_t1SEXP, SEXP betaHat_t1SEXP, SEXP alphaHat_t2SEXP, SEXP betaHat_t2SEXP, SEXP alphaHat_t3SEXP, SEXP betaHat_t3SEXP, SEXP hapSum_tcSEXP, SEXP hapMatcherSEXP, SEXP distinctHapsIESEXP, SEXP rhb_tSEXP, SEXP ref_errorSEXP, SEXP which_haps_to_useSEXP, SEXP wif0SEXP, SEXP grid_has_readSEXP, SEXP L_gridSEXP, SEXP smooth_cmSEXP, SEXP param_listSEXP, SEXP Jmax_localSEXP, SEXP maxDifferenceBetweenReadsSEXP, SEXP maxEmissionMatrixDifferenceSEXP, SEXP run_fb_subsetSEXP, SEXP run_fb_grid_offsetSEXP, SEXP gridSEXP, SEXP snp_start_1_basedSEXP, SEXP snp_end_1_basedSEXP, SEXP generate_fb_snp_offsetsSEXP, SEXP suppressOutputSEXP, SEXP n_gibbs_startsSEXP, SEXP n_gibbs_sample_itsSEXP, SEXP n_gibbs_burn_in_itsSEXP, SEXP use_starting_read_labelsSEXP, SEXP verboseSEXP, SEXP double_list_of_starting_read_labelsSEXP, SEXP seed_vectorSEXP, SEXP prev_list_of_alphaBetaBlocksSEXP, SEXP i_snp_block_for_alpha_betaSEXP, SEXP haploid_gibbs_equal_weightingSEXP, SEXP gibbs_initialize_iterativelySEXP, SEXP gibbs_initialize_at_first_readSEXP, SEXP do_block_resamplingSEXP, SEXP artificial_relabelSEXP, SEXP pass_in_alphaBetaSEXP, SEXP update_in_placeSEXP, SEXP update_hapSumSEXP, SEXP record_read_setSEXP, SEXP class_sum_cutoffSEXP, SEXP perform_block_gibbsSEXP, SEXP shuffle_bin_radiusSEXP, SEXP block_gibbs_iterationsSEXP, SEXP rescale_eMatRead_tSEXP, SEXP use_smooth_cm_in_block_gibbsSEXP, SEXP block_gibbs_quantile_probSEXP, SEXP do_shard_ff0_block_gibbsSEXP, SEXP use_small_eHapsCurrent_tcSEXP) { +Rcpp::List rcpp_forwardBackwardGibbsNIPT(const Rcpp::List& sampleReads, const arma::mat& priorCurrent_m, const arma::cube& alphaMatCurrent_tc, const arma::cube& eHapsCurrent_tc, const arma::cube& transMatRate_tc_H, const double ff, const arma::mat& blocks_for_output, arma::mat& alphaHat_t1, arma::mat& betaHat_t1, arma::mat& alphaHat_t2, arma::mat& betaHat_t2, arma::mat& alphaHat_t3, arma::mat& betaHat_t3, arma::mat& eMatGrid_t1, arma::mat& eMatGrid_t2, arma::mat& eMatGrid_t3, arma::mat& gammaMT_t_local, arma::mat& gammaMU_t_local, arma::mat& gammaP_t_local, arma::cube& hapSum_tc, arma::imat& hapMatcher, arma::mat& distinctHapsIE, const arma::imat& rhb_t, double ref_error, const Rcpp::IntegerVector& which_haps_to_use, Rcpp::IntegerVector& wif0, Rcpp::LogicalVector& grid_has_read, Rcpp::IntegerVector& L_grid, Rcpp::NumericVector& smooth_cm, Rcpp::List param_list, const int Jmax_local, const double maxDifferenceBetweenReads, const double maxEmissionMatrixDifference, const int run_fb_grid_offset, const Rcpp::IntegerVector& grid, int snp_start_1_based, int snp_end_1_based, const bool generate_fb_snp_offsets, const int suppressOutput, int n_gibbs_starts, const int n_gibbs_sample_its, const int n_gibbs_burn_in_its, const Rcpp::List& double_list_of_starting_read_labels, Rcpp::IntegerVector seed_vector, const Rcpp::List& prev_list_of_alphaBetaBlocks, const int i_snp_block_for_alpha_beta, const bool haploid_gibbs_equal_weighting, const bool gibbs_initialize_iteratively, const bool gibbs_initialize_at_first_read, const bool do_block_resampling, const int artificial_relabel, const bool pass_in_alphaBeta, const bool update_in_place, const bool update_hapSum, const bool record_read_set, const double class_sum_cutoff, const bool perform_block_gibbs, const int shuffle_bin_radius, const Rcpp::IntegerVector block_gibbs_iterations, const bool rescale_eMatRead_t, const bool use_smooth_cm_in_block_gibbs, const double block_gibbs_quantile_prob, const bool do_shard_ff0_block_gibbs, const bool use_small_eHapsCurrent_tc); +RcppExport SEXP _QUILT_rcpp_forwardBackwardGibbsNIPT(SEXP sampleReadsSEXP, SEXP priorCurrent_mSEXP, SEXP alphaMatCurrent_tcSEXP, SEXP eHapsCurrent_tcSEXP, SEXP transMatRate_tc_HSEXP, SEXP ffSEXP, SEXP blocks_for_outputSEXP, SEXP alphaHat_t1SEXP, SEXP betaHat_t1SEXP, SEXP alphaHat_t2SEXP, SEXP betaHat_t2SEXP, SEXP alphaHat_t3SEXP, SEXP betaHat_t3SEXP, SEXP eMatGrid_t1SEXP, SEXP eMatGrid_t2SEXP, SEXP eMatGrid_t3SEXP, SEXP gammaMT_t_localSEXP, SEXP gammaMU_t_localSEXP, SEXP gammaP_t_localSEXP, SEXP hapSum_tcSEXP, SEXP hapMatcherSEXP, SEXP distinctHapsIESEXP, SEXP rhb_tSEXP, SEXP ref_errorSEXP, SEXP which_haps_to_useSEXP, SEXP wif0SEXP, SEXP grid_has_readSEXP, SEXP L_gridSEXP, SEXP smooth_cmSEXP, SEXP param_listSEXP, SEXP Jmax_localSEXP, SEXP maxDifferenceBetweenReadsSEXP, SEXP maxEmissionMatrixDifferenceSEXP, SEXP run_fb_grid_offsetSEXP, SEXP gridSEXP, SEXP snp_start_1_basedSEXP, SEXP snp_end_1_basedSEXP, SEXP generate_fb_snp_offsetsSEXP, SEXP suppressOutputSEXP, SEXP n_gibbs_startsSEXP, SEXP n_gibbs_sample_itsSEXP, SEXP n_gibbs_burn_in_itsSEXP, SEXP double_list_of_starting_read_labelsSEXP, SEXP seed_vectorSEXP, SEXP prev_list_of_alphaBetaBlocksSEXP, SEXP i_snp_block_for_alpha_betaSEXP, SEXP haploid_gibbs_equal_weightingSEXP, SEXP gibbs_initialize_iterativelySEXP, SEXP gibbs_initialize_at_first_readSEXP, SEXP do_block_resamplingSEXP, SEXP artificial_relabelSEXP, SEXP pass_in_alphaBetaSEXP, SEXP update_in_placeSEXP, SEXP update_hapSumSEXP, SEXP record_read_setSEXP, SEXP class_sum_cutoffSEXP, SEXP perform_block_gibbsSEXP, SEXP shuffle_bin_radiusSEXP, SEXP block_gibbs_iterationsSEXP, SEXP rescale_eMatRead_tSEXP, SEXP use_smooth_cm_in_block_gibbsSEXP, SEXP block_gibbs_quantile_probSEXP, SEXP do_shard_ff0_block_gibbsSEXP, SEXP use_small_eHapsCurrent_tcSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -860,6 +860,12 @@ BEGIN_RCPP Rcpp::traits::input_parameter< arma::mat& >::type betaHat_t2(betaHat_t2SEXP); Rcpp::traits::input_parameter< arma::mat& >::type alphaHat_t3(alphaHat_t3SEXP); Rcpp::traits::input_parameter< arma::mat& >::type betaHat_t3(betaHat_t3SEXP); + Rcpp::traits::input_parameter< arma::mat& >::type eMatGrid_t1(eMatGrid_t1SEXP); + Rcpp::traits::input_parameter< arma::mat& >::type eMatGrid_t2(eMatGrid_t2SEXP); + Rcpp::traits::input_parameter< arma::mat& >::type eMatGrid_t3(eMatGrid_t3SEXP); + Rcpp::traits::input_parameter< arma::mat& >::type gammaMT_t_local(gammaMT_t_localSEXP); + Rcpp::traits::input_parameter< arma::mat& >::type gammaMU_t_local(gammaMU_t_localSEXP); + Rcpp::traits::input_parameter< arma::mat& >::type gammaP_t_local(gammaP_t_localSEXP); Rcpp::traits::input_parameter< arma::cube& >::type hapSum_tc(hapSum_tcSEXP); Rcpp::traits::input_parameter< arma::imat& >::type hapMatcher(hapMatcherSEXP); Rcpp::traits::input_parameter< arma::mat& >::type distinctHapsIE(distinctHapsIESEXP); @@ -874,7 +880,6 @@ BEGIN_RCPP Rcpp::traits::input_parameter< const int >::type Jmax_local(Jmax_localSEXP); Rcpp::traits::input_parameter< const double >::type maxDifferenceBetweenReads(maxDifferenceBetweenReadsSEXP); Rcpp::traits::input_parameter< const double >::type maxEmissionMatrixDifference(maxEmissionMatrixDifferenceSEXP); - Rcpp::traits::input_parameter< const bool >::type run_fb_subset(run_fb_subsetSEXP); Rcpp::traits::input_parameter< const int >::type run_fb_grid_offset(run_fb_grid_offsetSEXP); Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type grid(gridSEXP); Rcpp::traits::input_parameter< int >::type snp_start_1_based(snp_start_1_basedSEXP); @@ -884,8 +889,6 @@ BEGIN_RCPP Rcpp::traits::input_parameter< int >::type n_gibbs_starts(n_gibbs_startsSEXP); Rcpp::traits::input_parameter< const int >::type n_gibbs_sample_its(n_gibbs_sample_itsSEXP); Rcpp::traits::input_parameter< const int >::type n_gibbs_burn_in_its(n_gibbs_burn_in_itsSEXP); - Rcpp::traits::input_parameter< const bool >::type use_starting_read_labels(use_starting_read_labelsSEXP); - Rcpp::traits::input_parameter< const bool >::type verbose(verboseSEXP); Rcpp::traits::input_parameter< const Rcpp::List& >::type double_list_of_starting_read_labels(double_list_of_starting_read_labelsSEXP); Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type seed_vector(seed_vectorSEXP); Rcpp::traits::input_parameter< const Rcpp::List& >::type prev_list_of_alphaBetaBlocks(prev_list_of_alphaBetaBlocksSEXP); @@ -908,7 +911,7 @@ BEGIN_RCPP Rcpp::traits::input_parameter< const double >::type block_gibbs_quantile_prob(block_gibbs_quantile_probSEXP); Rcpp::traits::input_parameter< const bool >::type do_shard_ff0_block_gibbs(do_shard_ff0_block_gibbsSEXP); Rcpp::traits::input_parameter< const bool >::type use_small_eHapsCurrent_tc(use_small_eHapsCurrent_tcSEXP); - rcpp_result_gen = Rcpp::wrap(rcpp_forwardBackwardGibbsNIPT(sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, hapSum_tc, hapMatcher, distinctHapsIE, rhb_t, ref_error, which_haps_to_use, wif0, grid_has_read, L_grid, smooth_cm, param_list, Jmax_local, maxDifferenceBetweenReads, maxEmissionMatrixDifference, run_fb_subset, run_fb_grid_offset, grid, snp_start_1_based, snp_end_1_based, generate_fb_snp_offsets, suppressOutput, n_gibbs_starts, n_gibbs_sample_its, n_gibbs_burn_in_its, use_starting_read_labels, verbose, double_list_of_starting_read_labels, seed_vector, prev_list_of_alphaBetaBlocks, i_snp_block_for_alpha_beta, haploid_gibbs_equal_weighting, gibbs_initialize_iteratively, gibbs_initialize_at_first_read, do_block_resampling, artificial_relabel, pass_in_alphaBeta, update_in_place, update_hapSum, record_read_set, class_sum_cutoff, perform_block_gibbs, shuffle_bin_radius, block_gibbs_iterations, rescale_eMatRead_t, use_smooth_cm_in_block_gibbs, block_gibbs_quantile_prob, do_shard_ff0_block_gibbs, use_small_eHapsCurrent_tc)); + rcpp_result_gen = Rcpp::wrap(rcpp_forwardBackwardGibbsNIPT(sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, eMatGrid_t1, eMatGrid_t2, eMatGrid_t3, gammaMT_t_local, gammaMU_t_local, gammaP_t_local, hapSum_tc, hapMatcher, distinctHapsIE, rhb_t, ref_error, which_haps_to_use, wif0, grid_has_read, L_grid, smooth_cm, param_list, Jmax_local, maxDifferenceBetweenReads, maxEmissionMatrixDifference, run_fb_grid_offset, grid, snp_start_1_based, snp_end_1_based, generate_fb_snp_offsets, suppressOutput, n_gibbs_starts, n_gibbs_sample_its, n_gibbs_burn_in_its, double_list_of_starting_read_labels, seed_vector, prev_list_of_alphaBetaBlocks, i_snp_block_for_alpha_beta, haploid_gibbs_equal_weighting, gibbs_initialize_iteratively, gibbs_initialize_at_first_read, do_block_resampling, artificial_relabel, pass_in_alphaBeta, update_in_place, update_hapSum, record_read_set, class_sum_cutoff, perform_block_gibbs, shuffle_bin_radius, block_gibbs_iterations, rescale_eMatRead_t, use_smooth_cm_in_block_gibbs, block_gibbs_quantile_prob, do_shard_ff0_block_gibbs, use_small_eHapsCurrent_tc)); return rcpp_result_gen; END_RCPP } @@ -1264,7 +1267,7 @@ static const R_CallMethodDef CallEntries[] = { {"_QUILT_rcpp_gibbs_nipt_initialize", (DL_FUNC) &_QUILT_rcpp_gibbs_nipt_initialize, 31}, {"_QUILT_rcpp_gibbs_nipt_iterate", (DL_FUNC) &_QUILT_rcpp_gibbs_nipt_iterate, 49}, {"_QUILT_rcpp_fly_weighter", (DL_FUNC) &_QUILT_rcpp_fly_weighter, 14}, - {"_QUILT_rcpp_forwardBackwardGibbsNIPT", (DL_FUNC) &_QUILT_rcpp_forwardBackwardGibbsNIPT, 61}, + {"_QUILT_rcpp_forwardBackwardGibbsNIPT", (DL_FUNC) &_QUILT_rcpp_forwardBackwardGibbsNIPT, 64}, {"_QUILT_Rcpp_make_eMatRead_t_for_gibbs_using_objects", (DL_FUNC) &_QUILT_Rcpp_make_eMatRead_t_for_gibbs_using_objects, 11}, {"_QUILT_rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects", (DL_FUNC) &_QUILT_rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects, 11}, {"_QUILT_Rcpp_quilt_test_doubler", (DL_FUNC) &_QUILT_Rcpp_quilt_test_doubler, 1}, diff --git a/QUILT/src/gibbs-nipt.cpp b/QUILT/src/gibbs-nipt.cpp index eb6adec..3a1e266 100644 --- a/QUILT/src/gibbs-nipt.cpp +++ b/QUILT/src/gibbs-nipt.cpp @@ -2005,6 +2005,12 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT( arma::mat& betaHat_t2, arma::mat& alphaHat_t3, arma::mat& betaHat_t3, + arma::mat& eMatGrid_t1, + arma::mat& eMatGrid_t2, + arma::mat& eMatGrid_t3, + arma::mat& gammaMT_t_local, + arma::mat& gammaMU_t_local, + arma::mat& gammaP_t_local, arma::cube& hapSum_tc, arma::imat& hapMatcher, arma::mat& distinctHapsIE, @@ -2019,7 +2025,6 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT( const int Jmax_local = 100, const double maxDifferenceBetweenReads = 1000, const double maxEmissionMatrixDifference = 10000000000, - const bool run_fb_subset = false, const int run_fb_grid_offset = 0, const Rcpp::IntegerVector& grid = 0, int snp_start_1_based = -1, @@ -2029,8 +2034,6 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT( int n_gibbs_starts = 1, const int n_gibbs_sample_its = 1, const int n_gibbs_burn_in_its = 1, - const bool use_starting_read_labels = false, - const bool verbose = false, const Rcpp::List& double_list_of_starting_read_labels = R_NilValue, Rcpp::IntegerVector seed_vector = -1, const Rcpp::List& prev_list_of_alphaBetaBlocks = R_NilValue, @@ -2075,7 +2078,10 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT( // ## return_hapProbs = TRUE, // ## return_gamma = TRUE, // ## return_gibbs_block_output = FALSE, - // ## return_advanced_gibbs_block_output = FALSE + // ## return_advanced_gibbs_block_output = FALSE, + // ## use_starting_read_labels = FALSE, + // ## verbose = FALSE, + // ## run_fb_subset = FALSE // ## ) const bool return_alpha = as(param_list["return_alpha"]); const bool return_p_store = as(param_list["return_p_store"]); @@ -2085,6 +2091,9 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT( const bool return_gamma = as(param_list["return_gamma"]); const bool return_gibbs_block_output = as(param_list["return_gibbs_block_output"]); const bool return_advanced_gibbs_block_output = as(param_list["return_advanced_gibbs_block_output"]); + const bool use_starting_read_labels = as(param_list["use_starting_read_labels"]); + const bool verbose = as(param_list["verbose"]); + const bool run_fb_subset = as(param_list["run_fb_subset"]); // // // initialize variables @@ -2126,9 +2135,11 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT( gammaMU_t = arma::zeros(K, nGrids); gammaP_t = arma::zeros(K, nGrids); } - arma::mat gammaMT_t_local = arma::zeros(K, nGrids); - arma::mat gammaMU_t_local = arma::zeros(K, nGrids); - arma::mat gammaP_t_local = arma::zeros(K, nGrids); + if (!pass_in_alphaBeta) { + gammaMT_t_local = arma::zeros(K, nGrids); + gammaMU_t_local = arma::zeros(K, nGrids); + gammaP_t_local = arma::zeros(K, nGrids); + } arma::mat genProbsM_t_master = arma::zeros(3, nSNPsLocal); arma::mat genProbsF_t_master = arma::zeros(3, nSNPsLocal); arma::mat genProbsM_t = arma::zeros(3, nSNPsLocal); @@ -2196,13 +2207,13 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT( betaHat_t2 = arma::zeros(K, nGrids); alphaHat_t3 = arma::zeros(K, nGrids); betaHat_t3 = arma::zeros(K, nGrids); + eMatGrid_t1 = arma::ones(K, nGrids); + eMatGrid_t2 = arma::ones(K, nGrids); + eMatGrid_t3 = arma::ones(K, nGrids); } arma::rowvec c1 = arma::zeros(1, nGrids); - arma::mat eMatGrid_t1 = arma::ones(K, nGrids); arma::rowvec c2 = arma::zeros(1, nGrids); - arma::mat eMatGrid_t2 = arma::ones(K, nGrids); arma::rowvec c3 = arma::zeros(1, nGrids); - arma::mat eMatGrid_t3 = arma::ones(K, nGrids); if (update_hapSum & !update_in_place) { hapSum_tc = arma::zeros(K, nGrids, S); } diff --git a/QUILT/src/gibbs-small.cpp b/QUILT/src/gibbs-small.cpp index 246c136..0b0bda0 100644 --- a/QUILT/src/gibbs-small.cpp +++ b/QUILT/src/gibbs-small.cpp @@ -152,6 +152,7 @@ void rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects( const int K = gammaMT_t.n_rows; const int nGrids = gammaMT_t.n_cols; const int nSNPs = genProbsM_t.n_cols; + const int nMaxDH = distinctHapsIE.n_rows; const double ref_one_minus_error = 1 - ref_error; arma::icolvec dh_col(K); arma::vec g0(32); @@ -160,7 +161,16 @@ void rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects( g1.fill(0); arma::vec g2(32); g2.fill(0); + arma::vec h0(32); + h0.fill(0); + arma::vec h1(32); + h1.fill(0); + arma::vec h2(32); + h2.fill(0); double gk0, gk1, gk2; + arma::vec mg0(nMaxDH + 1); + arma::vec mg1(nMaxDH + 1); + arma::vec mg2(nMaxDH + 1); // for(iGrid = 0; iGrid < nGrids; iGrid++) { s = 32 * (iGrid); // 0-based here @@ -169,45 +179,40 @@ void rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects( e = nSNPs - 1; } nSNPsLocal = e - s + 1; - for(k = 0; k < K; k++) { - dh_col(k) = hapMatcher(which_haps_to_use(k) - 1, iGrid); - } g0.fill(0); g1.fill(0); g2.fill(0); - // - // loop - // + h0.fill(0); + h1.fill(0); + h2.fill(0); for(k = 0; k < K; k++) { - dh = dh_col(k); - if (dh == 0) { - // need to build this one - gk0 = gammaMT_t(k, iGrid); - gk1 = gammaMU_t(k, iGrid); - gk2 = gammaP_t(k, iGrid); - std::uint32_t tmp(rhb_t(which_haps_to_use(k) - 1, iGrid)); - for(b = 0; b < nSNPsLocal; b++) { - if (tmp & (1<>= 1) { + if ((tmp & 0x1) == 0) { + //if (tmp & (1< (nSNPs - 1)) { + // e = nSNPs - 1; + // } + // nSNPsLocal = e - s + 1; + // // + // g0.fill(0); + // g1.fill(0); + // g2.fill(0); + // mg0.fill(0); + // mg1.fill(0); + // mg2.fill(0); + // // + // // loop + // // + // for(k = 0; k < K; k++) { + // dh = hapMatcher(which_haps_to_use(k) - 1, iGrid); + // mg0(dh) += gammaMT_t(k, iGrid); + // mg1(dh) += gammaMU_t(k, iGrid); + // mg2(dh) += gammaP_t(k, iGrid); + // if (dh == 0) { + // // need to build this one + // gk0 = gammaMT_t(k, iGrid); + // gk1 = gammaMU_t(k, iGrid); + // gk2 = gammaP_t(k, iGrid); + // std::uint32_t tmp(rhb_t(which_haps_to_use(k) - 1, iGrid)); + // for(b = 0; b < nSNPsLocal; b++) { + // if (tmp & (1< 0) { + a <- a[-b] + } + o <- sapply(a, source) + +} + + + test_that("can avoid using eHapsCurrent_tc in genProbs calculation", { ## OK am here @@ -254,7 +271,7 @@ test_that("can avoid inflating fhb_t using eHapsCurrent_tc to make eMatRead_t", test_that("profile using riyan fish data", { - skip("WER") + skip("speed test") setwd("/data/smew1/rdavies/riyan_debug_2021_03_15") i_chr <- 2 @@ -316,7 +333,7 @@ test_that("profile using riyan fish data", { test_that("profile using HRC data", { - skip("WER") + skip("speed test") setwd("~/proj/QUILT/") dir <- "/data/smew1/rdavies/quilt_data/hrc_2021_04_20/2021_04_20_bams" @@ -336,6 +353,88 @@ test_that("profile using HRC data", { if (1 == 0) { + temp_compare_two_versions <- function( + rhb_t, + small_eHapsCurrent_tc, + which_haps_to_use, + nSNPs, + ref_error, + maxDifferenceBetweenReads, + Jmax, + hapMatcher, + grid, + distinctHapsIE, + sampleReads + ) { + + K <- length(which_haps_to_use) + rescale_eMatRead_t <- TRUE + nReads <- length(sampleReads) + + f1 <- function() { + ## + S <- 1 + ## argh + ## print(paste0("start = ", Sys.time())) + inflate_fhb_t_in_place( + rhb_t, + small_eHapsCurrent_tc, + haps_to_get = which_haps_to_use - 1, + nSNPs = nSNPs, + ref_error = ref_error + ) + eMatRead_t_old <- array(1, c(K, nReads)) + rcpp_make_eMatRead_t( + eMatRead_t = eMatRead_t_old, + sampleReads = sampleReads, + eHapsCurrent_tc = small_eHapsCurrent_tc, + s = 0, + maxDifferenceBetweenReads = maxDifferenceBetweenReads, + Jmax = Jmax, + eMatHapOri_t = matrix(0, nrow = 0, ncol = 0), + pRgivenH1 = vector(), + pRgivenH2 = vector(), + prev = 1, + suppressOutput = 1, + prev_section = "N/A", + next_section = "N/A", + run_pseudo_haploid = FALSE, + rescale_eMatRead_t = rescale_eMatRead_t + ) + eMatRead_t_old + } + + + f2 <- function() { + eMatRead_t_new <- array(1, c(K, nReads)) + Rcpp_make_eMatRead_t_for_gibbs_using_objects( + eMatRead_t = eMatRead_t_new, + sampleReads = sampleReads, + hapMatcher = hapMatcher, + grid = grid, + rhb_t = rhb_t, + distinctHapsIE = distinctHapsIE, + ref_error = ref_error, + which_haps_to_use = which_haps_to_use, + rescale_eMatRead_t = rescale_eMatRead_t, + Jmax = Jmax, + maxDifferenceBetweenReads = maxDifferenceBetweenReads + ) + eMatRead_t_new + } + + library("testthat") + expect_equal(f1(), f2()) + + library("microbenchmark") + ## can be slower (on tall data, HRC sized) + ## so can be faster (on wide data) + print(microbenchmark(f1(), f2(), times = 2)) + ## so def slower. hmm + + } + + skip("woo") for(sample_name in c("NA12878", "NA12878HT", "NA12878ONT")) { print(sample_name) @@ -360,3 +459,116 @@ test_that("profile using HRC data", { } }) + + +test_that("profile genProbs making with or without eHapsCurrent_tc", { + + skip("test") + + for(sample_name in c("NA12878", "NA12878HT", "NA12878ONT")) { + + print(sample_name) + load(paste0("/dev/shm/rwdavies/", sample_name, ".RData") ) +snp_start_1_based <- 1 + snp_end_1_based <- nSNPs +class(hapMatcher) <- "integer" + + f1 <- function() { + inflate_fhb_t_in_place( + rhb_t, + small_eHapsCurrent_tc, + haps_to_get = which_haps_to_use - 1, + nSNPs = nSNPs, + ref_error = ref_error + ) + rcpp_calculate_gn_genProbs_and_hapProbs( + genProbsM_t = genProbsM_t, + genProbsF_t = genProbsF_t, + hapProbs_t = hapProbs_t, + s = 0, + eHapsCurrent_tc = small_eHapsCurrent_tc, + gammaMT_t = gammaMT_t, + gammaMU_t = gammaMU_t, + gammaP_t = gammaP_t, + grid = grid, + snp_start_1_based = snp_start_1_based, + snp_end_1_based = snp_end_1_based, + grid_offset = 0 + ) + list(genProbsM_t = genProbsM_t, + genProbsF_t = genProbsF_t, + hapProbs_t = hapProbs_t) + } + + + + f2 <- function() { + ## rhb_t_subset <- rhb_t[which_haps_to_use, ] + rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects( + genProbsM_t = genProbsM_t_new, + genProbsF_t = genProbsF_t_new, + hapProbs_t = hapProbs_t_new, + gammaMT_t = gammaMT_t, + gammaMU_t = gammaMU_t, + gammaP_t = gammaP_t, + hapMatcher = hapMatcher, + distinctHapsIE = distinctHapsIE, + which_haps_to_use = which_haps_to_use, + ref_error = ref_error, + rhb_t = rhb_t + ) + list(genProbsM_t = genProbsM_t_new, + genProbsF_t = genProbsF_t_new, + hapProbs_t = hapProbs_t_new) + } + + genProbsM_t <- array(0, c(3, nSNPs)) + genProbsF_t <- array(0, c(3, nSNPs)) + hapProbs_t <- array(0, c(3, nSNPs)) + + + genProbsM_t_new <- array(0, c(3, nSNPs)) + genProbsF_t_new <- array(0, c(3, nSNPs)) + hapProbs_t_new <- array(0, c(3, nSNPs)) + + inflate_fhb_t_in_place( + rhb_t, + small_eHapsCurrent_tc, + haps_to_get = which_haps_to_use - 1, + nSNPs = nSNPs, + ref_error = ref_error + ) + + f <- function() { + K <- nrow(small_eHapsCurrent_tc) + nGrids <- ncol(rhb_t) + gammaMT_t <- array(runif(K * nGrids), c(K, nGrids)) + for(iGrid in 1:nGrids) { + gammaMT_t[, iGrid] <- gammaMT_t[, iGrid] / sum(gammaMT_t[, iGrid]) + } + gammaMT_t + } + gammaMT_t <- f() + gammaMU_t <- f() + gammaP_t <- f() + library("testthat") + expect_equal(f1(), f2(), tol = 1e-5) + + + library("microbenchmark") + ## can be slower (on tall data, HRC sized) + ## so can be faster (on wide data) + print(microbenchmark(f1(), f2(), times = 2)) + ## so DEF slower, much. hmm + + + } + + ## other one + + ## slower but no problems! + ## hmm, though does add up, when run repeatedly + ## can I make faster, or is it just what it is? + + +}) diff --git a/README.md b/README.md index 6a3afdd..29e101d 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ QUILT ===== -**__Current Version: 0.1.6__** -Release date: April 9 2021 +**__Current Version: 0.1.7__** +Release date: April 22 2021 [![Build Status](https://img.shields.io/travis/rwdavies/QUILT/master.svg)](https://travis-ci.org/rwdavies/QUILT/) @@ -49,8 +49,8 @@ git clone --recursive https://github.com/rwdavies/QUILT.git cd QUILT ./scripts/install-dependencies.sh cd releases -wget https://github.com/rwdavies/quilt/releases/download/0.1.6/QUILT_0.1.6.tar.gz ## or curl -O -R CMD INSTALL QUILT_0.1.6.tar.gz +wget https://github.com/rwdavies/quilt/releases/download/0.1.7/QUILT_0.1.7.tar.gz ## or curl -O +R CMD INSTALL QUILT_0.1.7.tar.gz ``` ### conda diff --git a/scripts/test-cli.R b/scripts/test-cli.R index 1cc8148..a56238c 100755 --- a/scripts/test-cli.R +++ b/scripts/test-cli.R @@ -36,8 +36,8 @@ STITCH::make_STITCH_cli( other_character_params = c("phasefile", "output_RData_filename", "RData_objects_to_save", "prepared_reference_filename", "reference_exclude_samplelist_file", "output_sites_filename", "region_exclude_file", "block_gibbs_iterations"), integer_vectors = c("block_gibbs_iterations"), character_vectors = c("RData_objects_to_save"), - other_logical_params = c("make_plots", "verbose", "record_read_label_usage", "record_interim_dosages", "use_bx_tag", "addOptimalHapsToVCF", "estimate_bq_using_truth_read_labels", "make_plots_block_gibbs", "override_default_params_for_small_ref_panel", "hla_run", "make_fake_vcf_with_sites_list", "print_extra_timing_information", "plot_per_sample_likelihoods", "save_prepared_reference"), - other_integer_params = c("nGibbsSamples", "n_seek_its", "Ksubset", "Knew", "K_top_matches", "panel_size", "bxTagUpperLimit", "seed", "gamma_physically_closest_to", "nMaxDH", "minimum_number_of_sample_reads", "n_gibbs_burn_in_its", "use_small_eHapsCurrent_tc"), + other_logical_params = c("make_plots", "verbose", "record_read_label_usage", "record_interim_dosages", "use_bx_tag", "addOptimalHapsToVCF", "estimate_bq_using_truth_read_labels", "make_plots_block_gibbs", "override_default_params_for_small_ref_panel", "hla_run", "make_fake_vcf_with_sites_list", "print_extra_timing_information", "plot_per_sample_likelihoods", "save_prepared_reference", "use_small_eHapsCurrent_tc"), + other_integer_params = c("nGibbsSamples", "n_seek_its", "Ksubset", "Knew", "K_top_matches", "panel_size", "bxTagUpperLimit", "seed", "gamma_physically_closest_to", "nMaxDH", "minimum_number_of_sample_reads", "n_gibbs_burn_in_its"), other_double_params = c("heuristic_match_thin", "minGLValue"), function_name = "QUILT", library_name = "QUILT"