diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f3ba9f4..ed7a984 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -91,7 +91,7 @@ jobs: fi popd pushd src/testdata - conda install --yes -c conda-forge h5py + conda install --yes -c conda-forge h5py python=3.10 # weighted_unnormalized time ssu -m weighted_unnormalized_fp32 -i test500.biom -t test500.tre --pcoa 4 -r hdf5_fp32 -o t1.h5 ./compare_unifrac_matrix.py test500.weighted_unnormalized_fp32.h5 t1.h5 1.e-5 diff --git a/src/api.cpp b/src/api.cpp index 83af3a6..7e69a13 100644 --- a/src/api.cpp +++ b/src/api.cpp @@ -510,14 +510,33 @@ compute_status faith_pd_one_off(const char* biom_filename, const char* tree_file SETUP_TDBG("faith_pd_one_off") CHECK_FILE(biom_filename, table_missing) CHECK_FILE(tree_filename, tree_missing) - PARSE_SYNC_TREE_TABLE(tree_filename, table_filename) - + PARSE_TREE_TABLE(tree_filename, biom_filename) TDBG_STEP("load_files") - initialize_results_vec(*result, table); - // compute faithpd - su::faith_pd(table, tree_sheared, std::ref((*result)->values)); - TDBG_STEP("faith_pd") + // Filter out any elements with zero counts + su::biom_inmem table_nz(table,1.0); + if ((table_nz.n_samples==0) || (table_nz.n_obs==0)) { + fprintf(stderr, "WARNING: All samples had zero counts. Forcing zero result.\n"); + SYNC_TREE_TABLE(tree, table) + + TDBG_STEP("sync_tree_table") + initialize_results_vec(*result, table); + // nothing else to do... results already initialized to 0 + TDBG_STEP("faith_pd") + } else { + if ((table_nz.n_samples!=table.n_samples) || (table_nz.n_obs!=table.n_obs)) { + fprintf(stderr, "WARNING: Some samples had zero counts and were filtered out.\n"); + } + SYNC_TREE_TABLE(tree, table_nz) + + TDBG_STEP("sync_tree_table") + + initialize_results_vec(*result, table_nz); + + // compute faithpd + su::faith_pd(table_nz, tree_sheared, std::ref((*result)->values)); + TDBG_STEP("faith_pd") + } return okay; } diff --git a/src/biom_inmem.cpp b/src/biom_inmem.cpp index b444dea..69a8d61 100644 --- a/src/biom_inmem.cpp +++ b/src/biom_inmem.cpp @@ -257,8 +257,9 @@ biom_inmem::biom_inmem(const biom_inmem &other, const double min_sample_counts) , sample_ids() , obs_ids() { - #pragma omp parallel for schedule(static) - for(int i = 0; i < 2; i++) { + if ((resident_obj.n_obs>0) && (resident_obj.n_samples>0)) { + #pragma omp parallel for schedule(static) + for(int i = 0; i < 2; i++) { if (i==0) { // filter out zero obs uint32_t obs_cnt = 0; @@ -293,7 +294,14 @@ biom_inmem::biom_inmem(const biom_inmem &other, const double min_sample_counts) } create_id_index(sample_ids, sample_id_index); } - } + } + } //((resident_obj.n_obs>0) && (resident_obj.n_samples>0)) + else + { + // degenerate case, just set to 0 + n_samples = 0; + n_obs = 0; + } } diff --git a/src/unifrac.cpp b/src/unifrac.cpp index 6d3bc4b..e2ea020 100644 --- a/src/unifrac.cpp +++ b/src/unifrac.cpp @@ -369,7 +369,8 @@ void su::faith_pd(biom_interface &table, double length; // for node in postorderselect - for(unsigned int k = 0; k < (tree.nparens / 2) - 1; k++) { + const unsigned int max_k = (tree.nparens>1) ? ((tree.nparens / 2) - 1) : 0; + for(unsigned int k = 0; k < max_k; k++) { node = tree.postorderselect(k); // get branch length length = tree.lengths[node]; diff --git a/src/unifrac_cmp.cpp b/src/unifrac_cmp.cpp index e40c9e8..7ae65e6 100644 --- a/src/unifrac_cmp.cpp +++ b/src/unifrac_cmp.cpp @@ -137,7 +137,7 @@ inline void unifracTT(const su::biom_interface &table, */ unsigned int k = 0; // index in tree - const unsigned int max_k = (tree.nparens / 2) - 1; + const unsigned int max_k = (tree.nparens>1) ? ((tree.nparens / 2) - 1) : 0; const unsigned int num_prop_chunks = propstack_multi.get_num_stacks(); while (k1) ? ((tree.nparens / 2) - 1) : 0; const unsigned int num_prop_chunks = propstack_multi.get_num_stacks(); while (k