From 6bd7b0b3d3ea78e89cd68dd014cb1748ca1e661d Mon Sep 17 00:00:00 2001
From: rwdavies <robertwilliamdavies@gmail.com>
Date: Thu, 22 Apr 2021 20:56:06 +0100
Subject: [PATCH] 0.1.7 speedups and RAM decrease

---
 CHANGELOG.md                                 |   2 +
 QUILT.R                                      |   6 +-
 QUILT/DESCRIPTION                            |   4 +-
 QUILT/R/RcppExports.R                        |   4 +-
 QUILT/R/functions.R                          |  78 +++----
 QUILT/R/gibbs-small.R                        |  80 -------
 QUILT/R/quilt.R                              |  21 +-
 QUILT/R/reference-single.R                   |  16 +-
 QUILT/man/QUILT.Rd                           |   2 +-
 QUILT/src/RcppExports.cpp                    |  17 +-
 QUILT/src/gibbs-nipt.cpp                     |  31 ++-
 QUILT/src/gibbs-small.cpp                    | 134 +++++++++---
 QUILT/tests/testthat/test-unit-gibbs-small.R | 216 ++++++++++++++++++-
 README.md                                    |   8 +-
 scripts/test-cli.R                           |   4 +-
 15 files changed, 431 insertions(+), 192 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index adc2d48..015b811 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,5 @@
+* v0.1.7
+	* Speedups and RAM decrease when using reference panels with fewer haplotypes
 * v0.1.6
 	* Discovered bug where only 1 read caused an error. Fix (obviate) this by requiring 2 of more reads through parameter minimum_number_of_sample_reads with default currently 2, where samples with fewer than 2 reads have all missing output
 * v0.1.5
diff --git a/QUILT.R b/QUILT.R
index e73ad31..bb1a1d8 100755
--- a/QUILT.R
+++ b/QUILT.R
@@ -388,9 +388,9 @@ option_list <- list(
     ), 
     make_option(
         "--use_small_eHapsCurrent_tc",
-        type = "integer",
-        help = "For testing purposes only [default TRUE] ",
-        default = TRUE
+        type = "logical",
+        help = "For testing purposes only [default FALSE] ",
+        default = FALSE
     )
 )
 opt <- suppressWarnings(parse_args(OptionParser(option_list = option_list)))
diff --git a/QUILT/DESCRIPTION b/QUILT/DESCRIPTION
index 3f9f628..be7fd2d 100644
--- a/QUILT/DESCRIPTION
+++ b/QUILT/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: QUILT
 Type: Package
 Title: QUILT
-Version: 0.1.6
-Date: 2021-04-09
+Version: 0.1.7
+Date: 2021-04-22
 Author: Robert William Davies
 Maintainer: Robert William Davies <robertwilliamdavies@gmail.com>
 Description: QUILT performs imputation of individuals sequenced to low coverage
diff --git a/QUILT/R/RcppExports.R b/QUILT/R/RcppExports.R
index 2a020fb..d7f4505 100644
--- a/QUILT/R/RcppExports.R
+++ b/QUILT/R/RcppExports.R
@@ -182,8 +182,8 @@ rcpp_fly_weighter <- function(i, gCurrent, gAverage, ll_current, log_mult, ll_re
 }
 
 #' @export
-rcpp_forwardBackwardGibbsNIPT <- function(sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, hapSum_tc, hapMatcher, distinctHapsIE, rhb_t, ref_error, which_haps_to_use, wif0, grid_has_read, L_grid, smooth_cm, param_list, Jmax_local = 100L, maxDifferenceBetweenReads = 1000, maxEmissionMatrixDifference = 10000000000, run_fb_subset = FALSE, run_fb_grid_offset = 0L, grid = 0L, snp_start_1_based = -1L, snp_end_1_based = -1L, generate_fb_snp_offsets = FALSE, suppressOutput = 1L, n_gibbs_starts = 1L, n_gibbs_sample_its = 1L, n_gibbs_burn_in_its = 1L, use_starting_read_labels = FALSE, verbose = FALSE, double_list_of_starting_read_labels = NULL, seed_vector = -1L, prev_list_of_alphaBetaBlocks = NULL, i_snp_block_for_alpha_beta = 1L, haploid_gibbs_equal_weighting = TRUE, gibbs_initialize_iteratively = FALSE, gibbs_initialize_at_first_read = TRUE, do_block_resampling = FALSE, artificial_relabel = -1L, pass_in_alphaBeta = FALSE, update_in_place = FALSE, update_hapSum = FALSE, record_read_set = FALSE, class_sum_cutoff = 0.06, perform_block_gibbs = FALSE, shuffle_bin_radius = 5000L, block_gibbs_iterations = as.integer( c(0)), rescale_eMatRead_t = TRUE, use_smooth_cm_in_block_gibbs = FALSE, block_gibbs_quantile_prob = 0.9, do_shard_ff0_block_gibbs = TRUE, use_small_eHapsCurrent_tc = TRUE) {
-    .Call('_QUILT_rcpp_forwardBackwardGibbsNIPT', PACKAGE = 'QUILT', sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, hapSum_tc, hapMatcher, distinctHapsIE, rhb_t, ref_error, which_haps_to_use, wif0, grid_has_read, L_grid, smooth_cm, param_list, Jmax_local, maxDifferenceBetweenReads, maxEmissionMatrixDifference, run_fb_subset, run_fb_grid_offset, grid, snp_start_1_based, snp_end_1_based, generate_fb_snp_offsets, suppressOutput, n_gibbs_starts, n_gibbs_sample_its, n_gibbs_burn_in_its, use_starting_read_labels, verbose, double_list_of_starting_read_labels, seed_vector, prev_list_of_alphaBetaBlocks, i_snp_block_for_alpha_beta, haploid_gibbs_equal_weighting, gibbs_initialize_iteratively, gibbs_initialize_at_first_read, do_block_resampling, artificial_relabel, pass_in_alphaBeta, update_in_place, update_hapSum, record_read_set, class_sum_cutoff, perform_block_gibbs, shuffle_bin_radius, block_gibbs_iterations, rescale_eMatRead_t, use_smooth_cm_in_block_gibbs, block_gibbs_quantile_prob, do_shard_ff0_block_gibbs, use_small_eHapsCurrent_tc)
+rcpp_forwardBackwardGibbsNIPT <- function(sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, eMatGrid_t1, eMatGrid_t2, eMatGrid_t3, gammaMT_t_local, gammaMU_t_local, gammaP_t_local, hapSum_tc, hapMatcher, distinctHapsIE, rhb_t, ref_error, which_haps_to_use, wif0, grid_has_read, L_grid, smooth_cm, param_list, Jmax_local = 100L, maxDifferenceBetweenReads = 1000, maxEmissionMatrixDifference = 10000000000, run_fb_grid_offset = 0L, grid = 0L, snp_start_1_based = -1L, snp_end_1_based = -1L, generate_fb_snp_offsets = FALSE, suppressOutput = 1L, n_gibbs_starts = 1L, n_gibbs_sample_its = 1L, n_gibbs_burn_in_its = 1L, double_list_of_starting_read_labels = NULL, seed_vector = -1L, prev_list_of_alphaBetaBlocks = NULL, i_snp_block_for_alpha_beta = 1L, haploid_gibbs_equal_weighting = TRUE, gibbs_initialize_iteratively = FALSE, gibbs_initialize_at_first_read = TRUE, do_block_resampling = FALSE, artificial_relabel = -1L, pass_in_alphaBeta = FALSE, update_in_place = FALSE, update_hapSum = FALSE, record_read_set = FALSE, class_sum_cutoff = 0.06, perform_block_gibbs = FALSE, shuffle_bin_radius = 5000L, block_gibbs_iterations = as.integer( c(0)), rescale_eMatRead_t = TRUE, use_smooth_cm_in_block_gibbs = FALSE, block_gibbs_quantile_prob = 0.9, do_shard_ff0_block_gibbs = TRUE, use_small_eHapsCurrent_tc = TRUE) {
+    .Call('_QUILT_rcpp_forwardBackwardGibbsNIPT', PACKAGE = 'QUILT', sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, eMatGrid_t1, eMatGrid_t2, eMatGrid_t3, gammaMT_t_local, gammaMU_t_local, gammaP_t_local, hapSum_tc, hapMatcher, distinctHapsIE, rhb_t, ref_error, which_haps_to_use, wif0, grid_has_read, L_grid, smooth_cm, param_list, Jmax_local, maxDifferenceBetweenReads, maxEmissionMatrixDifference, run_fb_grid_offset, grid, snp_start_1_based, snp_end_1_based, generate_fb_snp_offsets, suppressOutput, n_gibbs_starts, n_gibbs_sample_its, n_gibbs_burn_in_its, double_list_of_starting_read_labels, seed_vector, prev_list_of_alphaBetaBlocks, i_snp_block_for_alpha_beta, haploid_gibbs_equal_weighting, gibbs_initialize_iteratively, gibbs_initialize_at_first_read, do_block_resampling, artificial_relabel, pass_in_alphaBeta, update_in_place, update_hapSum, record_read_set, class_sum_cutoff, perform_block_gibbs, shuffle_bin_radius, block_gibbs_iterations, rescale_eMatRead_t, use_smooth_cm_in_block_gibbs, block_gibbs_quantile_prob, do_shard_ff0_block_gibbs, use_small_eHapsCurrent_tc)
 }
 
 #' @export
diff --git a/QUILT/R/functions.R b/QUILT/R/functions.R
index 3a5613f..5b83cb5 100644
--- a/QUILT/R/functions.R
+++ b/QUILT/R/functions.R
@@ -467,10 +467,16 @@ get_and_impute_one_sample <- function(
     small_transMatRate_tc_H,
     alphaHat_t1,
     betaHat_t1,
+    eMatGrid_t1,
     alphaHat_t2,
     betaHat_t2,
+    eMatGrid_t2,    
     alphaHat_t3,
     betaHat_t3,
+    eMatGrid_t3,
+    gammaMT_t_local,
+    gammaMU_t_local,
+    gammaP_t_local,
     small_alphaMatCurrent_tc,
     small_priorCurrent_m,
     small_eHapsCurrent_tc,
@@ -811,10 +817,16 @@ get_and_impute_one_sample <- function(
                 small_transMatRate_tc_H = small_transMatRate_tc_H,
                 alphaHat_t1 = alphaHat_t1,
                 betaHat_t1 = betaHat_t1,
+                eMatGrid_t1 = eMatGrid_t1,                
                 alphaHat_t2 = alphaHat_t2,
                 betaHat_t2 = betaHat_t2,
+                eMatGrid_t2 = eMatGrid_t2,
                 alphaHat_t3 = alphaHat_t3,
                 betaHat_t3 = betaHat_t3,
+                eMatGrid_t3 = eMatGrid_t3,
+                gammaMT_t_local = gammaMT_t_local,
+                gammaMU_t_local = gammaMU_t_local,
+                gammaP_t_local = gammaP_t_local,
                 small_alphaMatCurrent_tc = small_alphaMatCurrent_tc,
                 small_priorCurrent_m = small_priorCurrent_m,
                 smooth_cm = smooth_cm,
@@ -846,7 +858,7 @@ get_and_impute_one_sample <- function(
                 return_extra = FALSE,
                 return_genProbs = return_genProbs,
                 return_hapProbs = return_hapProbs,
-                return_gibbs_block_output = TRUE,
+                return_gibbs_block_output = FALSE,
                 gibbs_initialize_iteratively = gibbs_initialize_iteratively,
                 gibbs_initialize_at_first_read = FALSE,
                 maxDifferenceBetweenReads = maxDifferenceBetweenReads,
@@ -962,7 +974,7 @@ get_and_impute_one_sample <- function(
 
             if (have_truth_haplotypes) {
                 w <- i_it + n_seek_its * (i_gibbs_sample - 1)
-                x <- calculate_pse_and_r2_during_gibbs(inRegion2 = inRegion2, hap1 = hap1, hap2 = hap2, truth_haps = truth_haps, af = af, verbose = FALSE)
+                x <- calculate_pse_and_r2_during_gibbs(inRegion2 = inRegion2, hap1 = hap1, hap2 = hap2, truth_haps = truth_haps, af = af, verbose = verbose)
                 pse_mat[w, ] <- c(i_gibbs_sample, i_it, as.integer(phasing_it), x)
             }
         }
@@ -1858,10 +1870,16 @@ impute_one_sample <- function(
     small_transMatRate_tc_H,
     alphaHat_t1,
     betaHat_t1,
+    eMatGrid_t1,
     alphaHat_t2,
     betaHat_t2,
+    eMatGrid_t2,    
     alphaHat_t3,
     betaHat_t3,
+    eMatGrid_t3,
+    gammaMT_t_local,
+    gammaMU_t_local,
+    gammaP_t_local,
     small_alphaMatCurrent_tc,
     small_priorCurrent_m,
     smooth_cm,
@@ -1916,42 +1934,6 @@ impute_one_sample <- function(
     ##
     K <- length(which_haps_to_use)
     S <- 1
-
-    ## print("TEMPORARY")
-    ## print(paste0("/dev/shm/rwdavies/", sample_name, ".RData"))
-    ## save(
-    ##     rhb_t,
-    ##     small_eHapsCurrent_tc,
-    ##     which_haps_to_use,
-    ##     nSNPs,
-    ##     ref_error,
-    ##     maxDifferenceBetweenReads,
-    ##     Jmax,
-    ##     hapMatcher,
-    ##     grid,
-    ##     distinctHapsIE,
-    ##     sampleReads,
-    ##     file = paste0("/dev/shm/rwdavies/", sample_name, ".RData"),
-    ##     compress = FALSE
-    ## )
-    ## stop("WER")
-    ## load("~/temp.RData")
-    ## temp_compare_two_versions(
-    ##         rhb_t,
-    ##         small_eHapsCurrent_tc,
-    ##         which_haps_to_use,
-    ##         nSNPs,
-    ##         ref_error,
-    ##         maxDifferenceBetweenReads,
-    ##         Jmax,
-    ##         hapMatcher,
-    ##         grid,
-    ##     distinctHapsIE,
-    ##     sampleReads
-    ## )
-    ## print("TEMPORARY")
-    
-    ## argh
     ## print(paste0("start = ", Sys.time()))
     if (use_small_eHapsCurrent_tc) {
         inflate_fhb_t_in_place(
@@ -1977,7 +1959,10 @@ impute_one_sample <- function(
     ##     return_hapProbs = TRUE,
     ##     return_gamma = TRUE,
     ##     return_gibbs_block_output = FALSE,
-    ##     return_advanced_gibbs_block_output = FALSE
+    ##     return_advanced_gibbs_block_output = FALSE,
+    ##     use_starting_read_labels = FALSE,
+    ##     verbose = FALSE,
+    ##     run_fb_subset = FALSE
     ## )
     param_list <- list(
         return_alpha = FALSE,
@@ -1987,8 +1972,12 @@ impute_one_sample <- function(
         return_hapProbs = return_hapProbs,
         return_p_store = return_p_store,
         return_gibbs_block_output = return_gibbs_block_output,
-        return_advanced_gibbs_block_output = return_advanced_gibbs_block_output
+        return_advanced_gibbs_block_output = return_advanced_gibbs_block_output,
+        use_starting_read_labels = TRUE,
+        verbose = verbose,
+        run_fb_subset = FALSE
     )
+    ## 
     out <- rcpp_forwardBackwardGibbsNIPT(
         sampleReads = sampleReads,
         priorCurrent_m = small_priorCurrent_m,        
@@ -2004,7 +1993,6 @@ impute_one_sample <- function(
         maxDifferenceBetweenReads = maxDifferenceBetweenReads,
         Jmax = Jmax,
         maxEmissionMatrixDifference = maxEmissionMatrixDifference,
-        run_fb_subset = FALSE,
         run_fb_grid_offset = FALSE,
         blocks_for_output = array(0, c(1, 1)),
         grid = grid,
@@ -2015,6 +2003,12 @@ impute_one_sample <- function(
         betaHat_t1 = betaHat_t1,
         betaHat_t2 = betaHat_t2,
         betaHat_t3 = betaHat_t3,
+        eMatGrid_t1 = eMatGrid_t1,
+        eMatGrid_t2 = eMatGrid_t2,
+        eMatGrid_t3 = eMatGrid_t3,
+        gammaMT_t_local = gammaMT_t_local,
+        gammaMU_t_local = gammaMU_t_local,
+        gammaP_t_local = gammaP_t_local,
         hapSum_tc = array(0, c(1, 1, 1)),
         snp_start_1_based = -1,
         snp_end_1_based = -1,
@@ -2022,13 +2016,11 @@ impute_one_sample <- function(
         suppressOutput = suppressOutput,
         n_gibbs_burn_in_its = n_gibbs_burn_in_its,
         n_gibbs_sample_its = n_gibbs_sample_its,            
-        use_starting_read_labels = TRUE,
         n_gibbs_starts = n_gibbs_starts,
         double_list_of_starting_read_labels = double_list_of_starting_read_labels,
         prev_list_of_alphaBetaBlocks = as.list(c(1, 2)),
         i_snp_block_for_alpha_beta = -1,
         haploid_gibbs_equal_weighting = TRUE,
-        verbose = verbose,
         gibbs_initialize_iteratively = gibbs_initialize_iteratively,
         gibbs_initialize_at_first_read = gibbs_initialize_at_first_read, ## experiment with
         do_block_resampling = FALSE, ## turn off for now
diff --git a/QUILT/R/gibbs-small.R b/QUILT/R/gibbs-small.R
index 6088971..0c6b707 100644
--- a/QUILT/R/gibbs-small.R
+++ b/QUILT/R/gibbs-small.R
@@ -70,83 +70,3 @@ R_make_eMatRead_t_for_gibbs_using_objects <- function(
 
 
 
-temp_compare_two_versions <- function(
-            rhb_t,
-            small_eHapsCurrent_tc,
-            which_haps_to_use,
-            nSNPs,
-            ref_error,
-            maxDifferenceBetweenReads,
-            Jmax,
-            hapMatcher,
-            grid,
-            distinctHapsIE,
-            sampleReads
-) {
-
-    K <- length(which_haps_to_use)
-    rescale_eMatRead_t <- TRUE
-    nReads <- length(sampleReads)
-    
-    f1 <- function() {
-        ##
-        S <- 1
-        ## argh
-        ## print(paste0("start = ", Sys.time()))
-        inflate_fhb_t_in_place(
-            rhb_t,
-            small_eHapsCurrent_tc,
-            haps_to_get = which_haps_to_use - 1,
-            nSNPs = nSNPs,
-            ref_error = ref_error
-        )
-        eMatRead_t_old <- array(1, c(K, nReads))        
-        rcpp_make_eMatRead_t(
-            eMatRead_t = eMatRead_t_old,
-            sampleReads = sampleReads,
-            eHapsCurrent_tc = small_eHapsCurrent_tc,
-            s = 0,
-            maxDifferenceBetweenReads = maxDifferenceBetweenReads,
-            Jmax = Jmax,
-            eMatHapOri_t = matrix(0, nrow = 0, ncol = 0),
-            pRgivenH1 = vector(),
-            pRgivenH2 = vector(),
-            prev = 1,
-            suppressOutput = 1,
-            prev_section = "N/A",
-            next_section = "N/A",
-            run_pseudo_haploid = FALSE,
-            rescale_eMatRead_t = rescale_eMatRead_t
-        )
-        eMatRead_t_old
-    }
-
-
-    f2 <- function() {
-        eMatRead_t_new <- array(1, c(K, nReads))
-        Rcpp_make_eMatRead_t_for_gibbs_using_objects(
-            eMatRead_t = eMatRead_t_new,
-            sampleReads = sampleReads,
-            hapMatcher = hapMatcher,
-            grid = grid,
-            rhb_t = rhb_t,
-            distinctHapsIE = distinctHapsIE,
-            ref_error = ref_error,
-            which_haps_to_use = which_haps_to_use,
-            rescale_eMatRead_t = rescale_eMatRead_t,
-            Jmax = Jmax,
-            maxDifferenceBetweenReads = maxDifferenceBetweenReads
-        )
-        eMatRead_t_new
-    }
-
-    library("testthat")
-    expect_equal(f1(), f2())
-
-    library("microbenchmark")
-    ## can be slower (on tall data, HRC sized)
-    ## so can be faster (on wide data)
-    print(microbenchmark(f1(), f2(), times = 2))
-    ## so def slower. hmm
-
-}
diff --git a/QUILT/R/quilt.R b/QUILT/R/quilt.R
index 69fbee7..34f2aac 100644
--- a/QUILT/R/quilt.R
+++ b/QUILT/R/quilt.R
@@ -133,7 +133,7 @@ QUILT <- function(
     block_gibbs_iterations = c(3,6,9),
     n_gibbs_burn_in_its = 20,
     plot_per_sample_likelihoods = FALSE,
-    use_small_eHapsCurrent_tc = TRUE
+    use_small_eHapsCurrent_tc = FALSE
 ) {
 
 
@@ -575,10 +575,17 @@ QUILT <- function(
         S <- 1
         alphaHat_t1 <- array(0, c(K, nGrids))
         betaHat_t1 <- array(0, c(K, nGrids))
+        eMatGrid_t1 <- array(0, c(K, nGrids))
         alphaHat_t2 <- array(0, c(K, nGrids))
         betaHat_t2 <- array(0, c(K, nGrids))
+        eMatGrid_t2 <- array(0, c(K, nGrids))        
         alphaHat_t3 <- array(0, c(K, nGrids))
         betaHat_t3 <- array(0, c(K, nGrids))
+        eMatGrid_t3 <- array(0, c(K, nGrids))
+        gammaMT_t_local <- array(0, c(K, nGrids))
+        gammaMU_t_local <- array(0, c(K, nGrids))
+        gammaP_t_local <- array(0, c(K, nGrids))
+        ## 
         small_priorCurrent_m <- array(1 / K, c(K, S))
         small_alphaMatCurrent_tc <- array(1 / K, c(K, nGrids - 1, S))
         if (use_small_eHapsCurrent_tc) {
@@ -587,6 +594,9 @@ QUILT <- function(
             small_eHapsCurrent_tc <- array(0, c(1, 1, 1))
         }
 
+
+        
+
         results_across_samples <- as.list(sampleRange[2] - sampleRange[1] + 1)
         
         for(iSample in sampleRange[1]:sampleRange[2]) {
@@ -606,10 +616,16 @@ QUILT <- function(
                 small_transMatRate_tc_H = small_transMatRate_tc_H,
                 alphaHat_t1 = alphaHat_t1,
                 betaHat_t1 = betaHat_t1,
+                eMatGrid_t1 = eMatGrid_t1,                
                 alphaHat_t2 = alphaHat_t2,
                 betaHat_t2 = betaHat_t2,
+                eMatGrid_t2 = eMatGrid_t2,
                 alphaHat_t3 = alphaHat_t3,
                 betaHat_t3 = betaHat_t3,
+                eMatGrid_t3 = eMatGrid_t3,
+                gammaMT_t_local = gammaMT_t_local,
+                gammaMU_t_local = gammaMU_t_local,
+                gammaP_t_local = gammaP_t_local,
                 small_alphaMatCurrent_tc = small_alphaMatCurrent_tc,
                 small_priorCurrent_m = small_priorCurrent_m,
                 small_eHapsCurrent_tc = small_eHapsCurrent_tc,
@@ -683,12 +699,11 @@ QUILT <- function(
             }
 
             ## optionally, do some gc here, if longer running job
-            if (as.numeric(K) * as.numeric(nSNPs) > (1e8)) {
+            if (as.numeric(K) * as.numeric(nSNPs) > (1e6)) {
                 for(i in 1:5) {
                     gc(reset = TRUE)
                 }
             }
-                
             
         }
 
diff --git a/QUILT/R/reference-single.R b/QUILT/R/reference-single.R
index 3410463..ed792ec 100644
--- a/QUILT/R/reference-single.R
+++ b/QUILT/R/reference-single.R
@@ -413,7 +413,19 @@ make_rhb_t_equality <- function(
     ##
     if (infer_nMaxDH) {
         running_count <- cumsum(rowSums(temp_counter) / (nrow(rhb_t) * nGrids))
-        suggested_value <- which.max(running_count > 0.99)
+        ## really need to tune this better
+        ## basically, larger K, important to set large
+        if (K > 50000) {
+            thresh <- 0.9999
+        } else if (K > 10000) {
+            thresh <- 0.9995
+        } else if (K > 1000) {
+            thresh <- 0.999
+        } else {
+            thresh <- 0.99
+        }
+        ## really want to almost never need this, within reason, for large K
+        suggested_value <- which.max(running_count > thresh)
         nMaxDH <- min(
             max(c(2 ** 4 - 1, suggested_value)),
             nMaxDH_default
@@ -422,7 +434,7 @@ make_rhb_t_equality <- function(
             print_message(paste0("Using nMaxDH = ", nMaxDH))
         }
         distinctHapsB <- distinctHapsB[1:nMaxDH, ]
-        hapMatcher[hapMatcher > (nMaxDH)] <- 0
+        hapMatcher[hapMatcher > (nMaxDH)] <- 0L
     }
     ##
     ## inflate them too, they're pretty small
diff --git a/QUILT/man/QUILT.Rd b/QUILT/man/QUILT.Rd
index c2946be..5545976 100644
--- a/QUILT/man/QUILT.Rd
+++ b/QUILT/man/QUILT.Rd
@@ -69,7 +69,7 @@ QUILT(
   block_gibbs_iterations = c(3, 6, 9),
   n_gibbs_burn_in_its = 20,
   plot_per_sample_likelihoods = FALSE,
-  use_small_eHapsCurrent_tc = TRUE
+  use_small_eHapsCurrent_tc = FALSE
 )
 }
 \arguments{
diff --git a/QUILT/src/RcppExports.cpp b/QUILT/src/RcppExports.cpp
index be18e98..9096da1 100644
--- a/QUILT/src/RcppExports.cpp
+++ b/QUILT/src/RcppExports.cpp
@@ -842,8 +842,8 @@ BEGIN_RCPP
 END_RCPP
 }
 // rcpp_forwardBackwardGibbsNIPT
-Rcpp::List rcpp_forwardBackwardGibbsNIPT(const Rcpp::List& sampleReads, const arma::mat& priorCurrent_m, const arma::cube& alphaMatCurrent_tc, const arma::cube& eHapsCurrent_tc, const arma::cube& transMatRate_tc_H, const double ff, const arma::mat& blocks_for_output, arma::mat& alphaHat_t1, arma::mat& betaHat_t1, arma::mat& alphaHat_t2, arma::mat& betaHat_t2, arma::mat& alphaHat_t3, arma::mat& betaHat_t3, arma::cube& hapSum_tc, arma::imat& hapMatcher, arma::mat& distinctHapsIE, const arma::imat& rhb_t, double ref_error, const Rcpp::IntegerVector& which_haps_to_use, Rcpp::IntegerVector& wif0, Rcpp::LogicalVector& grid_has_read, Rcpp::IntegerVector& L_grid, Rcpp::NumericVector& smooth_cm, Rcpp::List param_list, const int Jmax_local, const double maxDifferenceBetweenReads, const double maxEmissionMatrixDifference, const bool run_fb_subset, const int run_fb_grid_offset, const Rcpp::IntegerVector& grid, int snp_start_1_based, int snp_end_1_based, const bool generate_fb_snp_offsets, const int suppressOutput, int n_gibbs_starts, const int n_gibbs_sample_its, const int n_gibbs_burn_in_its, const bool use_starting_read_labels, const bool verbose, const Rcpp::List& double_list_of_starting_read_labels, Rcpp::IntegerVector seed_vector, const Rcpp::List& prev_list_of_alphaBetaBlocks, const int i_snp_block_for_alpha_beta, const bool haploid_gibbs_equal_weighting, const bool gibbs_initialize_iteratively, const bool gibbs_initialize_at_first_read, const bool do_block_resampling, const int artificial_relabel, const bool pass_in_alphaBeta, const bool update_in_place, const bool update_hapSum, const bool record_read_set, const double class_sum_cutoff, const bool perform_block_gibbs, const int shuffle_bin_radius, const Rcpp::IntegerVector block_gibbs_iterations, const bool rescale_eMatRead_t, const bool use_smooth_cm_in_block_gibbs, const double block_gibbs_quantile_prob, const bool do_shard_ff0_block_gibbs, const bool use_small_eHapsCurrent_tc);
-RcppExport SEXP _QUILT_rcpp_forwardBackwardGibbsNIPT(SEXP sampleReadsSEXP, SEXP priorCurrent_mSEXP, SEXP alphaMatCurrent_tcSEXP, SEXP eHapsCurrent_tcSEXP, SEXP transMatRate_tc_HSEXP, SEXP ffSEXP, SEXP blocks_for_outputSEXP, SEXP alphaHat_t1SEXP, SEXP betaHat_t1SEXP, SEXP alphaHat_t2SEXP, SEXP betaHat_t2SEXP, SEXP alphaHat_t3SEXP, SEXP betaHat_t3SEXP, SEXP hapSum_tcSEXP, SEXP hapMatcherSEXP, SEXP distinctHapsIESEXP, SEXP rhb_tSEXP, SEXP ref_errorSEXP, SEXP which_haps_to_useSEXP, SEXP wif0SEXP, SEXP grid_has_readSEXP, SEXP L_gridSEXP, SEXP smooth_cmSEXP, SEXP param_listSEXP, SEXP Jmax_localSEXP, SEXP maxDifferenceBetweenReadsSEXP, SEXP maxEmissionMatrixDifferenceSEXP, SEXP run_fb_subsetSEXP, SEXP run_fb_grid_offsetSEXP, SEXP gridSEXP, SEXP snp_start_1_basedSEXP, SEXP snp_end_1_basedSEXP, SEXP generate_fb_snp_offsetsSEXP, SEXP suppressOutputSEXP, SEXP n_gibbs_startsSEXP, SEXP n_gibbs_sample_itsSEXP, SEXP n_gibbs_burn_in_itsSEXP, SEXP use_starting_read_labelsSEXP, SEXP verboseSEXP, SEXP double_list_of_starting_read_labelsSEXP, SEXP seed_vectorSEXP, SEXP prev_list_of_alphaBetaBlocksSEXP, SEXP i_snp_block_for_alpha_betaSEXP, SEXP haploid_gibbs_equal_weightingSEXP, SEXP gibbs_initialize_iterativelySEXP, SEXP gibbs_initialize_at_first_readSEXP, SEXP do_block_resamplingSEXP, SEXP artificial_relabelSEXP, SEXP pass_in_alphaBetaSEXP, SEXP update_in_placeSEXP, SEXP update_hapSumSEXP, SEXP record_read_setSEXP, SEXP class_sum_cutoffSEXP, SEXP perform_block_gibbsSEXP, SEXP shuffle_bin_radiusSEXP, SEXP block_gibbs_iterationsSEXP, SEXP rescale_eMatRead_tSEXP, SEXP use_smooth_cm_in_block_gibbsSEXP, SEXP block_gibbs_quantile_probSEXP, SEXP do_shard_ff0_block_gibbsSEXP, SEXP use_small_eHapsCurrent_tcSEXP) {
+Rcpp::List rcpp_forwardBackwardGibbsNIPT(const Rcpp::List& sampleReads, const arma::mat& priorCurrent_m, const arma::cube& alphaMatCurrent_tc, const arma::cube& eHapsCurrent_tc, const arma::cube& transMatRate_tc_H, const double ff, const arma::mat& blocks_for_output, arma::mat& alphaHat_t1, arma::mat& betaHat_t1, arma::mat& alphaHat_t2, arma::mat& betaHat_t2, arma::mat& alphaHat_t3, arma::mat& betaHat_t3, arma::mat& eMatGrid_t1, arma::mat& eMatGrid_t2, arma::mat& eMatGrid_t3, arma::mat& gammaMT_t_local, arma::mat& gammaMU_t_local, arma::mat& gammaP_t_local, arma::cube& hapSum_tc, arma::imat& hapMatcher, arma::mat& distinctHapsIE, const arma::imat& rhb_t, double ref_error, const Rcpp::IntegerVector& which_haps_to_use, Rcpp::IntegerVector& wif0, Rcpp::LogicalVector& grid_has_read, Rcpp::IntegerVector& L_grid, Rcpp::NumericVector& smooth_cm, Rcpp::List param_list, const int Jmax_local, const double maxDifferenceBetweenReads, const double maxEmissionMatrixDifference, const int run_fb_grid_offset, const Rcpp::IntegerVector& grid, int snp_start_1_based, int snp_end_1_based, const bool generate_fb_snp_offsets, const int suppressOutput, int n_gibbs_starts, const int n_gibbs_sample_its, const int n_gibbs_burn_in_its, const Rcpp::List& double_list_of_starting_read_labels, Rcpp::IntegerVector seed_vector, const Rcpp::List& prev_list_of_alphaBetaBlocks, const int i_snp_block_for_alpha_beta, const bool haploid_gibbs_equal_weighting, const bool gibbs_initialize_iteratively, const bool gibbs_initialize_at_first_read, const bool do_block_resampling, const int artificial_relabel, const bool pass_in_alphaBeta, const bool update_in_place, const bool update_hapSum, const bool record_read_set, const double class_sum_cutoff, const bool perform_block_gibbs, const int shuffle_bin_radius, const Rcpp::IntegerVector block_gibbs_iterations, const bool rescale_eMatRead_t, const bool use_smooth_cm_in_block_gibbs, const double block_gibbs_quantile_prob, const bool do_shard_ff0_block_gibbs, const bool use_small_eHapsCurrent_tc);
+RcppExport SEXP _QUILT_rcpp_forwardBackwardGibbsNIPT(SEXP sampleReadsSEXP, SEXP priorCurrent_mSEXP, SEXP alphaMatCurrent_tcSEXP, SEXP eHapsCurrent_tcSEXP, SEXP transMatRate_tc_HSEXP, SEXP ffSEXP, SEXP blocks_for_outputSEXP, SEXP alphaHat_t1SEXP, SEXP betaHat_t1SEXP, SEXP alphaHat_t2SEXP, SEXP betaHat_t2SEXP, SEXP alphaHat_t3SEXP, SEXP betaHat_t3SEXP, SEXP eMatGrid_t1SEXP, SEXP eMatGrid_t2SEXP, SEXP eMatGrid_t3SEXP, SEXP gammaMT_t_localSEXP, SEXP gammaMU_t_localSEXP, SEXP gammaP_t_localSEXP, SEXP hapSum_tcSEXP, SEXP hapMatcherSEXP, SEXP distinctHapsIESEXP, SEXP rhb_tSEXP, SEXP ref_errorSEXP, SEXP which_haps_to_useSEXP, SEXP wif0SEXP, SEXP grid_has_readSEXP, SEXP L_gridSEXP, SEXP smooth_cmSEXP, SEXP param_listSEXP, SEXP Jmax_localSEXP, SEXP maxDifferenceBetweenReadsSEXP, SEXP maxEmissionMatrixDifferenceSEXP, SEXP run_fb_grid_offsetSEXP, SEXP gridSEXP, SEXP snp_start_1_basedSEXP, SEXP snp_end_1_basedSEXP, SEXP generate_fb_snp_offsetsSEXP, SEXP suppressOutputSEXP, SEXP n_gibbs_startsSEXP, SEXP n_gibbs_sample_itsSEXP, SEXP n_gibbs_burn_in_itsSEXP, SEXP double_list_of_starting_read_labelsSEXP, SEXP seed_vectorSEXP, SEXP prev_list_of_alphaBetaBlocksSEXP, SEXP i_snp_block_for_alpha_betaSEXP, SEXP haploid_gibbs_equal_weightingSEXP, SEXP gibbs_initialize_iterativelySEXP, SEXP gibbs_initialize_at_first_readSEXP, SEXP do_block_resamplingSEXP, SEXP artificial_relabelSEXP, SEXP pass_in_alphaBetaSEXP, SEXP update_in_placeSEXP, SEXP update_hapSumSEXP, SEXP record_read_setSEXP, SEXP class_sum_cutoffSEXP, SEXP perform_block_gibbsSEXP, SEXP shuffle_bin_radiusSEXP, SEXP block_gibbs_iterationsSEXP, SEXP rescale_eMatRead_tSEXP, SEXP use_smooth_cm_in_block_gibbsSEXP, SEXP block_gibbs_quantile_probSEXP, SEXP do_shard_ff0_block_gibbsSEXP, SEXP use_small_eHapsCurrent_tcSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -860,6 +860,12 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< arma::mat& >::type betaHat_t2(betaHat_t2SEXP);
     Rcpp::traits::input_parameter< arma::mat& >::type alphaHat_t3(alphaHat_t3SEXP);
     Rcpp::traits::input_parameter< arma::mat& >::type betaHat_t3(betaHat_t3SEXP);
+    Rcpp::traits::input_parameter< arma::mat& >::type eMatGrid_t1(eMatGrid_t1SEXP);
+    Rcpp::traits::input_parameter< arma::mat& >::type eMatGrid_t2(eMatGrid_t2SEXP);
+    Rcpp::traits::input_parameter< arma::mat& >::type eMatGrid_t3(eMatGrid_t3SEXP);
+    Rcpp::traits::input_parameter< arma::mat& >::type gammaMT_t_local(gammaMT_t_localSEXP);
+    Rcpp::traits::input_parameter< arma::mat& >::type gammaMU_t_local(gammaMU_t_localSEXP);
+    Rcpp::traits::input_parameter< arma::mat& >::type gammaP_t_local(gammaP_t_localSEXP);
     Rcpp::traits::input_parameter< arma::cube& >::type hapSum_tc(hapSum_tcSEXP);
     Rcpp::traits::input_parameter< arma::imat& >::type hapMatcher(hapMatcherSEXP);
     Rcpp::traits::input_parameter< arma::mat& >::type distinctHapsIE(distinctHapsIESEXP);
@@ -874,7 +880,6 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< const int >::type Jmax_local(Jmax_localSEXP);
     Rcpp::traits::input_parameter< const double >::type maxDifferenceBetweenReads(maxDifferenceBetweenReadsSEXP);
     Rcpp::traits::input_parameter< const double >::type maxEmissionMatrixDifference(maxEmissionMatrixDifferenceSEXP);
-    Rcpp::traits::input_parameter< const bool >::type run_fb_subset(run_fb_subsetSEXP);
     Rcpp::traits::input_parameter< const int >::type run_fb_grid_offset(run_fb_grid_offsetSEXP);
     Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type grid(gridSEXP);
     Rcpp::traits::input_parameter< int >::type snp_start_1_based(snp_start_1_basedSEXP);
@@ -884,8 +889,6 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< int >::type n_gibbs_starts(n_gibbs_startsSEXP);
     Rcpp::traits::input_parameter< const int >::type n_gibbs_sample_its(n_gibbs_sample_itsSEXP);
     Rcpp::traits::input_parameter< const int >::type n_gibbs_burn_in_its(n_gibbs_burn_in_itsSEXP);
-    Rcpp::traits::input_parameter< const bool >::type use_starting_read_labels(use_starting_read_labelsSEXP);
-    Rcpp::traits::input_parameter< const bool >::type verbose(verboseSEXP);
     Rcpp::traits::input_parameter< const Rcpp::List& >::type double_list_of_starting_read_labels(double_list_of_starting_read_labelsSEXP);
     Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type seed_vector(seed_vectorSEXP);
     Rcpp::traits::input_parameter< const Rcpp::List& >::type prev_list_of_alphaBetaBlocks(prev_list_of_alphaBetaBlocksSEXP);
@@ -908,7 +911,7 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< const double >::type block_gibbs_quantile_prob(block_gibbs_quantile_probSEXP);
     Rcpp::traits::input_parameter< const bool >::type do_shard_ff0_block_gibbs(do_shard_ff0_block_gibbsSEXP);
     Rcpp::traits::input_parameter< const bool >::type use_small_eHapsCurrent_tc(use_small_eHapsCurrent_tcSEXP);
-    rcpp_result_gen = Rcpp::wrap(rcpp_forwardBackwardGibbsNIPT(sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, hapSum_tc, hapMatcher, distinctHapsIE, rhb_t, ref_error, which_haps_to_use, wif0, grid_has_read, L_grid, smooth_cm, param_list, Jmax_local, maxDifferenceBetweenReads, maxEmissionMatrixDifference, run_fb_subset, run_fb_grid_offset, grid, snp_start_1_based, snp_end_1_based, generate_fb_snp_offsets, suppressOutput, n_gibbs_starts, n_gibbs_sample_its, n_gibbs_burn_in_its, use_starting_read_labels, verbose, double_list_of_starting_read_labels, seed_vector, prev_list_of_alphaBetaBlocks, i_snp_block_for_alpha_beta, haploid_gibbs_equal_weighting, gibbs_initialize_iteratively, gibbs_initialize_at_first_read, do_block_resampling, artificial_relabel, pass_in_alphaBeta, update_in_place, update_hapSum, record_read_set, class_sum_cutoff, perform_block_gibbs, shuffle_bin_radius, block_gibbs_iterations, rescale_eMatRead_t, use_smooth_cm_in_block_gibbs, block_gibbs_quantile_prob, do_shard_ff0_block_gibbs, use_small_eHapsCurrent_tc));
+    rcpp_result_gen = Rcpp::wrap(rcpp_forwardBackwardGibbsNIPT(sampleReads, priorCurrent_m, alphaMatCurrent_tc, eHapsCurrent_tc, transMatRate_tc_H, ff, blocks_for_output, alphaHat_t1, betaHat_t1, alphaHat_t2, betaHat_t2, alphaHat_t3, betaHat_t3, eMatGrid_t1, eMatGrid_t2, eMatGrid_t3, gammaMT_t_local, gammaMU_t_local, gammaP_t_local, hapSum_tc, hapMatcher, distinctHapsIE, rhb_t, ref_error, which_haps_to_use, wif0, grid_has_read, L_grid, smooth_cm, param_list, Jmax_local, maxDifferenceBetweenReads, maxEmissionMatrixDifference, run_fb_grid_offset, grid, snp_start_1_based, snp_end_1_based, generate_fb_snp_offsets, suppressOutput, n_gibbs_starts, n_gibbs_sample_its, n_gibbs_burn_in_its, double_list_of_starting_read_labels, seed_vector, prev_list_of_alphaBetaBlocks, i_snp_block_for_alpha_beta, haploid_gibbs_equal_weighting, gibbs_initialize_iteratively, gibbs_initialize_at_first_read, do_block_resampling, artificial_relabel, pass_in_alphaBeta, update_in_place, update_hapSum, record_read_set, class_sum_cutoff, perform_block_gibbs, shuffle_bin_radius, block_gibbs_iterations, rescale_eMatRead_t, use_smooth_cm_in_block_gibbs, block_gibbs_quantile_prob, do_shard_ff0_block_gibbs, use_small_eHapsCurrent_tc));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -1264,7 +1267,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_QUILT_rcpp_gibbs_nipt_initialize", (DL_FUNC) &_QUILT_rcpp_gibbs_nipt_initialize, 31},
     {"_QUILT_rcpp_gibbs_nipt_iterate", (DL_FUNC) &_QUILT_rcpp_gibbs_nipt_iterate, 49},
     {"_QUILT_rcpp_fly_weighter", (DL_FUNC) &_QUILT_rcpp_fly_weighter, 14},
-    {"_QUILT_rcpp_forwardBackwardGibbsNIPT", (DL_FUNC) &_QUILT_rcpp_forwardBackwardGibbsNIPT, 61},
+    {"_QUILT_rcpp_forwardBackwardGibbsNIPT", (DL_FUNC) &_QUILT_rcpp_forwardBackwardGibbsNIPT, 64},
     {"_QUILT_Rcpp_make_eMatRead_t_for_gibbs_using_objects", (DL_FUNC) &_QUILT_Rcpp_make_eMatRead_t_for_gibbs_using_objects, 11},
     {"_QUILT_rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects", (DL_FUNC) &_QUILT_rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects, 11},
     {"_QUILT_Rcpp_quilt_test_doubler", (DL_FUNC) &_QUILT_Rcpp_quilt_test_doubler, 1},
diff --git a/QUILT/src/gibbs-nipt.cpp b/QUILT/src/gibbs-nipt.cpp
index eb6adec..3a1e266 100644
--- a/QUILT/src/gibbs-nipt.cpp
+++ b/QUILT/src/gibbs-nipt.cpp
@@ -2005,6 +2005,12 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT(
     arma::mat& betaHat_t2, 
     arma::mat& alphaHat_t3, 
     arma::mat& betaHat_t3,
+    arma::mat& eMatGrid_t1,
+    arma::mat& eMatGrid_t2,
+    arma::mat& eMatGrid_t3,
+    arma::mat& gammaMT_t_local,
+    arma::mat& gammaMU_t_local,
+    arma::mat& gammaP_t_local,
     arma::cube& hapSum_tc,
     arma::imat& hapMatcher,
     arma::mat& distinctHapsIE,
@@ -2019,7 +2025,6 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT(
     const int Jmax_local = 100,
     const double maxDifferenceBetweenReads = 1000,
     const double maxEmissionMatrixDifference = 10000000000,
-    const bool run_fb_subset = false,
     const int run_fb_grid_offset = 0,
     const Rcpp::IntegerVector& grid = 0,
     int snp_start_1_based = -1,
@@ -2029,8 +2034,6 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT(
     int n_gibbs_starts = 1,    
     const int n_gibbs_sample_its = 1,
     const int n_gibbs_burn_in_its = 1,    
-    const bool use_starting_read_labels = false,
-    const bool verbose = false,
     const Rcpp::List& double_list_of_starting_read_labels = R_NilValue,
     Rcpp::IntegerVector seed_vector = -1,
     const Rcpp::List& prev_list_of_alphaBetaBlocks = R_NilValue,
@@ -2075,7 +2078,10 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT(
     // ##     return_hapProbs = TRUE,
     // ##     return_gamma = TRUE,
     // ##     return_gibbs_block_output = FALSE,
-    // ##     return_advanced_gibbs_block_output = FALSE
+    // ##     return_advanced_gibbs_block_output = FALSE,
+    // ##     use_starting_read_labels = FALSE,
+    // ##     verbose = FALSE,
+    // ##     run_fb_subset = FALSE
     // ## )
     const bool return_alpha = as<bool>(param_list["return_alpha"]);
     const bool return_p_store = as<bool>(param_list["return_p_store"]);
@@ -2085,6 +2091,9 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT(
     const bool return_gamma = as<bool>(param_list["return_gamma"]);
     const bool return_gibbs_block_output = as<bool>(param_list["return_gibbs_block_output"]);
     const bool return_advanced_gibbs_block_output = as<bool>(param_list["return_advanced_gibbs_block_output"]);
+    const bool use_starting_read_labels = as<bool>(param_list["use_starting_read_labels"]);
+    const bool verbose = as<bool>(param_list["verbose"]);
+    const bool run_fb_subset = as<bool>(param_list["run_fb_subset"]);    
     //
     //
     // initialize variables 
@@ -2126,9 +2135,11 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT(
         gammaMU_t = arma::zeros(K, nGrids);
         gammaP_t = arma::zeros(K, nGrids);
     }
-    arma::mat gammaMT_t_local = arma::zeros(K, nGrids);
-    arma::mat gammaMU_t_local = arma::zeros(K, nGrids);
-    arma::mat gammaP_t_local = arma::zeros(K, nGrids);
+    if (!pass_in_alphaBeta) {    
+        gammaMT_t_local = arma::zeros(K, nGrids);
+        gammaMU_t_local = arma::zeros(K, nGrids);
+        gammaP_t_local = arma::zeros(K, nGrids);
+    }    
     arma::mat genProbsM_t_master = arma::zeros(3, nSNPsLocal);
     arma::mat genProbsF_t_master = arma::zeros(3, nSNPsLocal);
     arma::mat genProbsM_t = arma::zeros(3, nSNPsLocal);
@@ -2196,13 +2207,13 @@ Rcpp::List rcpp_forwardBackwardGibbsNIPT(
         betaHat_t2 = arma::zeros(K, nGrids);
         alphaHat_t3 = arma::zeros(K, nGrids);
         betaHat_t3 = arma::zeros(K, nGrids);
+        eMatGrid_t1 = arma::ones(K, nGrids);    
+        eMatGrid_t2 = arma::ones(K, nGrids);    
+        eMatGrid_t3 = arma::ones(K, nGrids);
     }
     arma::rowvec c1 = arma::zeros(1, nGrids);
-    arma::mat eMatGrid_t1 = arma::ones(K, nGrids);    
     arma::rowvec c2 = arma::zeros(1, nGrids);
-    arma::mat eMatGrid_t2 = arma::ones(K, nGrids);    
     arma::rowvec c3 = arma::zeros(1, nGrids);
-    arma::mat eMatGrid_t3 = arma::ones(K, nGrids);
     if (update_hapSum & !update_in_place) {
         hapSum_tc = arma::zeros(K, nGrids, S);
     }
diff --git a/QUILT/src/gibbs-small.cpp b/QUILT/src/gibbs-small.cpp
index 246c136..0b0bda0 100644
--- a/QUILT/src/gibbs-small.cpp
+++ b/QUILT/src/gibbs-small.cpp
@@ -152,6 +152,7 @@ void rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects(
     const int K = gammaMT_t.n_rows;
     const int nGrids = gammaMT_t.n_cols;
     const int nSNPs = genProbsM_t.n_cols;
+    const int nMaxDH = distinctHapsIE.n_rows;    
     const double ref_one_minus_error = 1 - ref_error;
     arma::icolvec dh_col(K);
     arma::vec g0(32);
@@ -160,7 +161,16 @@ void rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects(
     g1.fill(0);
     arma::vec g2(32);
     g2.fill(0);
+    arma::vec h0(32);
+    h0.fill(0);
+    arma::vec h1(32);
+    h1.fill(0);
+    arma::vec h2(32);
+    h2.fill(0);
     double gk0, gk1, gk2;
+    arma::vec mg0(nMaxDH + 1);
+    arma::vec mg1(nMaxDH + 1);
+    arma::vec mg2(nMaxDH + 1);        
     //
     for(iGrid = 0; iGrid < nGrids; iGrid++) {
         s = 32 * (iGrid); // 0-based here
@@ -169,45 +179,40 @@ void rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects(
             e = nSNPs - 1;
         }
         nSNPsLocal = e - s + 1;
-        for(k = 0; k < K; k++) {
-            dh_col(k) = hapMatcher(which_haps_to_use(k) - 1, iGrid);
-        }
         g0.fill(0);
         g1.fill(0);
         g2.fill(0);
-        //
-        // loop
-        //
+        h0.fill(0);
+        h1.fill(0);
+        h2.fill(0);
         for(k = 0; k < K; k++) {
-            dh = dh_col(k);
-            if (dh == 0) {
-                // need to build this one
-                gk0 = gammaMT_t(k, iGrid);
-                gk1 = gammaMU_t(k, iGrid);
-                gk2 = gammaP_t(k, iGrid);                
-                std::uint32_t tmp(rhb_t(which_haps_to_use(k) - 1, iGrid));
-                for(b = 0; b < nSNPsLocal; b++) {
-                    if (tmp & (1<<b)) {
-                        // alternate
-                        g0(b) += gk0 * (ref_one_minus_error);
-                        g1(b) += gk1 * (ref_one_minus_error);
-                        g2(b) += gk2 * (ref_one_minus_error);                        
-                    } else {
-                        // reference
-                        g0(b) += gk0 * (ref_error);
-                        g1(b) += gk1 * (ref_error);
-                        g2(b) += gk2 * (ref_error);
-                    }
-                }
-            } else {
-                for(b = 0; b < nSNPsLocal; b++) {
-                    g0(b) += distinctHapsIE(dh - 1, s + b) * gammaMT_t(k, iGrid);
-                    g1(b) += distinctHapsIE(dh - 1, s + b) * gammaMU_t(k, iGrid);
-                    g2(b) += distinctHapsIE(dh - 1, s + b) * gammaP_t(k, iGrid);
+            gk0 = gammaMT_t(k, iGrid);
+            gk1 = gammaMU_t(k, iGrid);
+            gk2 = gammaP_t(k, iGrid);                
+            std::uint32_t tmp(rhb_t(which_haps_to_use(k) - 1, iGrid));
+            //std::uint32_t tmp(rhb_t_subset(k, iGrid));
+            for(b = 0; b < nSNPsLocal; b++, tmp >>= 1) {
+                if ((tmp & 0x1) == 0) {
+                //if (tmp & (1<<b)) {                    
+                    // alternate
+                    h0(b) += gk0;
+                    h1(b) += gk1;
+                    h2(b) += gk2;
+                } else {
+                    g0(b) += gk0;
+                    g1(b) += gk1;
+                    g2(b) += gk2;
                 }
             }
         }
+        for(b = 0; b < nSNPsLocal; b++) {
+            g0(b) = g0(b) * ref_one_minus_error + h0(b) * ref_error;
+            g1(b) = g1(b) * ref_one_minus_error + h1(b) * ref_error;
+            g2(b) = g2(b) * ref_one_minus_error + h2(b) * ref_error;            
+        }
+        //
         // add back in properly
+        //
         for(b = 0; b < nSNPsLocal; b++) {
             genProbsM_t(0, s + b) = (1 - g0(b)) * (1 - g1(b));
             genProbsM_t(1, s + b) = (g0(b) * (1 - g1(b)) + (1 - g0(b)) * g1(b));
@@ -223,4 +228,71 @@ void rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects(
         }
     }
     return;
+    //     s = 32 * (iGrid); // 0-based here
+    //     e = 32 * (iGrid + 1) - 1;
+    //     if (e > (nSNPs - 1)) {
+    //         e = nSNPs - 1;
+    //     }
+    //     nSNPsLocal = e - s + 1;
+    //     //
+    //     g0.fill(0);
+    //     g1.fill(0);
+    //     g2.fill(0);
+    //     mg0.fill(0);
+    //     mg1.fill(0);
+    //     mg2.fill(0);                
+    //     //
+    //     // loop
+    //     //
+    //     for(k = 0; k < K; k++) {
+    //         dh = hapMatcher(which_haps_to_use(k) - 1, iGrid);
+    //         mg0(dh) += gammaMT_t(k, iGrid);
+    //         mg1(dh) += gammaMU_t(k, iGrid);
+    //         mg2(dh) += gammaP_t(k, iGrid);            
+    //         if (dh == 0) {
+    //             // need to build this one
+    //             gk0 = gammaMT_t(k, iGrid);
+    //             gk1 = gammaMU_t(k, iGrid);
+    //             gk2 = gammaP_t(k, iGrid);                
+    //             std::uint32_t tmp(rhb_t(which_haps_to_use(k) - 1, iGrid));
+    //             for(b = 0; b < nSNPsLocal; b++) {
+    //                 if (tmp & (1<<b)) {
+    //                     // alternate
+    //                     g0(b) += gk0 * (ref_one_minus_error);
+    //                     g1(b) += gk1 * (ref_one_minus_error);
+    //                     g2(b) += gk2 * (ref_one_minus_error);                        
+    //                 } else {
+    //                     // reference
+    //                     g0(b) += gk0 * (ref_error);
+    //                     g1(b) += gk1 * (ref_error);
+    //                     g2(b) += gk2 * (ref_error);
+    //                 }
+    //             }
+    //         }
+    //     }
+    //     //
+    //     // now do the rest!
+    //     //
+    //     for(b = 0; b < nSNPsLocal; b++) {
+    //         for(dh = 0; dh < nMaxDH; dh++) {
+    //             g0(b) += distinctHapsIE(dh, s + b) * mg0(dh + 1);
+    //             g1(b) += distinctHapsIE(dh, s + b) * mg1(dh + 1);
+    //             g2(b) += distinctHapsIE(dh, s + b) * mg2(dh + 1);
+    //         }
+    //     }
+    //     // add back in properly
+    //     for(b = 0; b < nSNPsLocal; b++) {
+    //         genProbsM_t(0, s + b) = (1 - g0(b)) * (1 - g1(b));
+    //         genProbsM_t(1, s + b) = (g0(b) * (1 - g1(b)) + (1 - g0(b)) * g1(b));
+    //         genProbsM_t(2, s + b) = g0(b) * g1(b);
+    //         //
+    //         genProbsF_t(0, s + b) = (1 - g0(b)) * (1 - g2(b));
+    //         genProbsF_t(1, s + b) = (g0(b) * (1 - g2(b)) + (1 - g0(b)) * g2(b));
+    //         genProbsF_t(2, s + b) = g0(b) * g2(b);
+    //         //
+    //         hapProbs_t(0, s + b) = g0(b);
+    //         hapProbs_t(1, s + b) = g1(b);
+    //         hapProbs_t(2, s + b) = g2(b);
+    //     }
+    // }
 }
diff --git a/QUILT/tests/testthat/test-unit-gibbs-small.R b/QUILT/tests/testthat/test-unit-gibbs-small.R
index eb3d3bb..cc4ec55 100644
--- a/QUILT/tests/testthat/test-unit-gibbs-small.R
+++ b/QUILT/tests/testthat/test-unit-gibbs-small.R
@@ -1,3 +1,20 @@
+if ( 1 == 0 ) {
+    
+    library("testthat")
+    library("QUILT")
+    dir <- "~/proj/QUILT/"
+    setwd(paste0(dir, "/QUILT/R"))
+    a <- dir(pattern = "*.R")
+    b <- grep("~", a)
+    if (length(b) > 0) {
+        a <- a[-b]
+    }
+    o <- sapply(a, source)
+
+}
+
+
+
 test_that("can avoid using eHapsCurrent_tc in genProbs calculation", {
 
     ## OK am here
@@ -254,7 +271,7 @@ test_that("can avoid inflating fhb_t using eHapsCurrent_tc to make eMatRead_t",
 
 test_that("profile using riyan fish data", {
 
-    skip("WER")
+    skip("speed test")
     setwd("/data/smew1/rdavies/riyan_debug_2021_03_15")
 
     i_chr <- 2
@@ -316,7 +333,7 @@ test_that("profile using riyan fish data", {
 
 test_that("profile using HRC data", {
 
-    skip("WER")
+    skip("speed test")
     
     setwd("~/proj/QUILT/")
     dir <- "/data/smew1/rdavies/quilt_data/hrc_2021_04_20/2021_04_20_bams"
@@ -336,6 +353,88 @@ test_that("profile using HRC data", {
 
     if (1 == 0) {
 
+        temp_compare_two_versions <- function(
+                                              rhb_t,
+                                              small_eHapsCurrent_tc,
+                                              which_haps_to_use,
+                                              nSNPs,
+                                              ref_error,
+                                              maxDifferenceBetweenReads,
+                                              Jmax,
+                                              hapMatcher,
+                                              grid,
+                                              distinctHapsIE,
+                                              sampleReads
+                                              ) {
+
+            K <- length(which_haps_to_use)
+            rescale_eMatRead_t <- TRUE
+            nReads <- length(sampleReads)
+            
+            f1 <- function() {
+                ##
+                S <- 1
+                ## argh
+                ## print(paste0("start = ", Sys.time()))
+                inflate_fhb_t_in_place(
+                    rhb_t,
+                    small_eHapsCurrent_tc,
+                    haps_to_get = which_haps_to_use - 1,
+                    nSNPs = nSNPs,
+                    ref_error = ref_error
+                )
+                eMatRead_t_old <- array(1, c(K, nReads))        
+                rcpp_make_eMatRead_t(
+                    eMatRead_t = eMatRead_t_old,
+                    sampleReads = sampleReads,
+                    eHapsCurrent_tc = small_eHapsCurrent_tc,
+                    s = 0,
+                    maxDifferenceBetweenReads = maxDifferenceBetweenReads,
+                    Jmax = Jmax,
+                    eMatHapOri_t = matrix(0, nrow = 0, ncol = 0),
+                    pRgivenH1 = vector(),
+                    pRgivenH2 = vector(),
+                    prev = 1,
+                    suppressOutput = 1,
+                    prev_section = "N/A",
+                    next_section = "N/A",
+                    run_pseudo_haploid = FALSE,
+                    rescale_eMatRead_t = rescale_eMatRead_t
+                )
+                eMatRead_t_old
+            }
+
+
+            f2 <- function() {
+                eMatRead_t_new <- array(1, c(K, nReads))
+                Rcpp_make_eMatRead_t_for_gibbs_using_objects(
+                    eMatRead_t = eMatRead_t_new,
+                    sampleReads = sampleReads,
+                    hapMatcher = hapMatcher,
+                    grid = grid,
+                    rhb_t = rhb_t,
+                    distinctHapsIE = distinctHapsIE,
+                    ref_error = ref_error,
+                    which_haps_to_use = which_haps_to_use,
+                    rescale_eMatRead_t = rescale_eMatRead_t,
+                    Jmax = Jmax,
+                    maxDifferenceBetweenReads = maxDifferenceBetweenReads
+                )
+                eMatRead_t_new
+            }
+
+            library("testthat")
+            expect_equal(f1(), f2())
+
+            library("microbenchmark")
+            ## can be slower (on tall data, HRC sized)
+            ## so can be faster (on wide data)
+            print(microbenchmark(f1(), f2(), times = 2))
+            ## so def slower. hmm
+
+        }
+
+        
         skip("woo")
         for(sample_name in c("NA12878", "NA12878HT", "NA12878ONT")) {
             print(sample_name)
@@ -360,3 +459,116 @@ test_that("profile using HRC data", {
     }
 
 })
+
+
+test_that("profile genProbs making with or without eHapsCurrent_tc", {
+
+    skip("test")
+    
+    for(sample_name in c("NA12878", "NA12878HT", "NA12878ONT")) {
+        
+            print(sample_name)
+            load(paste0("/dev/shm/rwdavies/", sample_name, ".RData")    )
+snp_start_1_based <- 1
+    snp_end_1_based <- nSNPs    
+class(hapMatcher) <- "integer"
+    
+            f1 <- function() {
+        inflate_fhb_t_in_place(
+            rhb_t,
+            small_eHapsCurrent_tc,
+            haps_to_get = which_haps_to_use - 1,
+            nSNPs = nSNPs,
+            ref_error = ref_error
+        )
+        rcpp_calculate_gn_genProbs_and_hapProbs(
+            genProbsM_t = genProbsM_t,
+            genProbsF_t = genProbsF_t,
+            hapProbs_t = hapProbs_t,
+            s = 0,
+            eHapsCurrent_tc = small_eHapsCurrent_tc,
+            gammaMT_t = gammaMT_t,
+            gammaMU_t = gammaMU_t,
+            gammaP_t = gammaP_t,
+            grid = grid,
+            snp_start_1_based = snp_start_1_based,
+            snp_end_1_based = snp_end_1_based,
+            grid_offset = 0
+        )
+        list(genProbsM_t = genProbsM_t,
+            genProbsF_t = genProbsF_t,
+            hapProbs_t = hapProbs_t)
+    }
+
+
+        
+        f2 <- function() {
+            ## rhb_t_subset <- rhb_t[which_haps_to_use, ]
+    rcpp_calculate_gibbs_small_genProbs_and_hapProbs_using_binary_objects(
+        genProbsM_t = genProbsM_t_new,
+        genProbsF_t = genProbsF_t_new,
+        hapProbs_t = hapProbs_t_new,
+        gammaMT_t = gammaMT_t,
+        gammaMU_t = gammaMU_t,
+        gammaP_t = gammaP_t,
+        hapMatcher = hapMatcher,
+        distinctHapsIE = distinctHapsIE,
+        which_haps_to_use = which_haps_to_use,
+        ref_error = ref_error,
+        rhb_t = rhb_t
+    )
+        list(genProbsM_t = genProbsM_t_new,
+            genProbsF_t = genProbsF_t_new,
+            hapProbs_t = hapProbs_t_new)
+    }
+
+    genProbsM_t <- array(0, c(3, nSNPs))
+    genProbsF_t <- array(0, c(3, nSNPs))
+    hapProbs_t <- array(0, c(3, nSNPs))
+    
+    
+    genProbsM_t_new <- array(0, c(3, nSNPs))
+    genProbsF_t_new <- array(0, c(3, nSNPs))
+    hapProbs_t_new <- array(0, c(3, nSNPs))
+
+        inflate_fhb_t_in_place(
+            rhb_t,
+            small_eHapsCurrent_tc,
+            haps_to_get = which_haps_to_use - 1,
+            nSNPs = nSNPs,
+            ref_error = ref_error
+        )
+    
+    f <- function() {
+        K <- nrow(small_eHapsCurrent_tc)
+        nGrids <- ncol(rhb_t)
+        gammaMT_t <- array(runif(K * nGrids), c(K, nGrids))
+        for(iGrid in 1:nGrids) {
+            gammaMT_t[, iGrid] <- gammaMT_t[, iGrid] / sum(gammaMT_t[, iGrid])
+        }
+        gammaMT_t
+    }
+    gammaMT_t <- f()
+    gammaMU_t <- f()
+    gammaP_t <- f()
+    library("testthat")
+    expect_equal(f1(), f2(), tol = 1e-5)
+
+
+    library("microbenchmark")
+    ## can be slower (on tall data, HRC sized)
+    ## so can be faster (on wide data)
+    print(microbenchmark(f1(), f2(), times = 2))
+    ## so DEF slower, much. hmm
+
+
+        }
+
+        ## other one
+        
+        ## slower but no problems!
+        ## hmm, though does add up, when run repeatedly
+        ## can I make faster, or is it just what it is?
+
+
+})
diff --git a/README.md b/README.md
index 6a3afdd..29e101d 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 QUILT
 =====
-**__Current Version: 0.1.6__**
-Release date: April 9 2021
+**__Current Version: 0.1.7__**
+Release date: April 22 2021
 
 [![Build Status](https://img.shields.io/travis/rwdavies/QUILT/master.svg)](https://travis-ci.org/rwdavies/QUILT/)
 
@@ -49,8 +49,8 @@ git clone --recursive https://github.com/rwdavies/QUILT.git
 cd QUILT
 ./scripts/install-dependencies.sh
 cd releases
-wget https://github.com/rwdavies/quilt/releases/download/0.1.6/QUILT_0.1.6.tar.gz ## or curl -O
-R CMD INSTALL QUILT_0.1.6.tar.gz
+wget https://github.com/rwdavies/quilt/releases/download/0.1.7/QUILT_0.1.7.tar.gz ## or curl -O
+R CMD INSTALL QUILT_0.1.7.tar.gz
 ```
 
 ### conda <a name="paragraph-installation-conda"></a>
diff --git a/scripts/test-cli.R b/scripts/test-cli.R
index 1cc8148..a56238c 100755
--- a/scripts/test-cli.R
+++ b/scripts/test-cli.R
@@ -36,8 +36,8 @@ STITCH::make_STITCH_cli(
     other_character_params = c("phasefile", "output_RData_filename", "RData_objects_to_save", "prepared_reference_filename", "reference_exclude_samplelist_file", "output_sites_filename", "region_exclude_file", "block_gibbs_iterations"),
     integer_vectors = c("block_gibbs_iterations"),
     character_vectors = c("RData_objects_to_save"),
-    other_logical_params = c("make_plots", "verbose", "record_read_label_usage", "record_interim_dosages", "use_bx_tag", "addOptimalHapsToVCF", "estimate_bq_using_truth_read_labels", "make_plots_block_gibbs", "override_default_params_for_small_ref_panel", "hla_run", "make_fake_vcf_with_sites_list", "print_extra_timing_information", "plot_per_sample_likelihoods", "save_prepared_reference"),
-    other_integer_params = c("nGibbsSamples", "n_seek_its", "Ksubset", "Knew", "K_top_matches", "panel_size", "bxTagUpperLimit", "seed", "gamma_physically_closest_to", "nMaxDH", "minimum_number_of_sample_reads", "n_gibbs_burn_in_its", "use_small_eHapsCurrent_tc"),
+    other_logical_params = c("make_plots", "verbose", "record_read_label_usage", "record_interim_dosages", "use_bx_tag", "addOptimalHapsToVCF", "estimate_bq_using_truth_read_labels", "make_plots_block_gibbs", "override_default_params_for_small_ref_panel", "hla_run", "make_fake_vcf_with_sites_list", "print_extra_timing_information", "plot_per_sample_likelihoods", "save_prepared_reference", "use_small_eHapsCurrent_tc"),
+    other_integer_params = c("nGibbsSamples", "n_seek_its", "Ksubset", "Knew", "K_top_matches", "panel_size", "bxTagUpperLimit", "seed", "gamma_physically_closest_to", "nMaxDH", "minimum_number_of_sample_reads", "n_gibbs_burn_in_its"),
     other_double_params = c("heuristic_match_thin", "minGLValue"),
     function_name = "QUILT",
     library_name = "QUILT"