Skip to content

Commit

Permalink
Merge pull request #53 from sfiligoi/filter0_231006
Browse files Browse the repository at this point in the history
Filter elements with zero sample size in fathpd
  • Loading branch information
sfiligoi authored Oct 9, 2023
2 parents 37ee976 + ed1485b commit 1834378
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ jobs:
fi
popd
pushd src/testdata
conda install --yes -c conda-forge h5py
conda install --yes -c conda-forge h5py python=3.10
# weighted_unnormalized
time ssu -m weighted_unnormalized_fp32 -i test500.biom -t test500.tre --pcoa 4 -r hdf5_fp32 -o t1.h5
./compare_unifrac_matrix.py test500.weighted_unnormalized_fp32.h5 t1.h5 1.e-5
Expand Down
31 changes: 25 additions & 6 deletions src/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -510,14 +510,33 @@ compute_status faith_pd_one_off(const char* biom_filename, const char* tree_file
SETUP_TDBG("faith_pd_one_off")
CHECK_FILE(biom_filename, table_missing)
CHECK_FILE(tree_filename, tree_missing)
PARSE_SYNC_TREE_TABLE(tree_filename, table_filename)

PARSE_TREE_TABLE(tree_filename, biom_filename)
TDBG_STEP("load_files")
initialize_results_vec(*result, table);

// compute faithpd
su::faith_pd(table, tree_sheared, std::ref((*result)->values));
TDBG_STEP("faith_pd")
// Filter out any elements with zero counts
su::biom_inmem table_nz(table,1.0);
if ((table_nz.n_samples==0) || (table_nz.n_obs==0)) {
fprintf(stderr, "WARNING: All samples had zero counts. Forcing zero result.\n");
SYNC_TREE_TABLE(tree, table)

TDBG_STEP("sync_tree_table")
initialize_results_vec(*result, table);
// nothing else to do... results already initialized to 0
TDBG_STEP("faith_pd")
} else {
if ((table_nz.n_samples!=table.n_samples) || (table_nz.n_obs!=table.n_obs)) {
fprintf(stderr, "WARNING: Some samples had zero counts and were filtered out.\n");
}
SYNC_TREE_TABLE(tree, table_nz)

TDBG_STEP("sync_tree_table")

initialize_results_vec(*result, table_nz);

// compute faithpd
su::faith_pd(table_nz, tree_sheared, std::ref((*result)->values));
TDBG_STEP("faith_pd")
}

return okay;
}
Expand Down
14 changes: 11 additions & 3 deletions src/biom_inmem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,9 @@ biom_inmem::biom_inmem(const biom_inmem &other, const double min_sample_counts)
, sample_ids()
, obs_ids()
{
#pragma omp parallel for schedule(static)
for(int i = 0; i < 2; i++) {
if ((resident_obj.n_obs>0) && (resident_obj.n_samples>0)) {
#pragma omp parallel for schedule(static)
for(int i = 0; i < 2; i++) {
if (i==0) {
// filter out zero obs
uint32_t obs_cnt = 0;
Expand Down Expand Up @@ -293,7 +294,14 @@ biom_inmem::biom_inmem(const biom_inmem &other, const double min_sample_counts)
}
create_id_index(sample_ids, sample_id_index);
}
}
}
} //((resident_obj.n_obs>0) && (resident_obj.n_samples>0))
else
{
// degenerate case, just set to 0
n_samples = 0;
n_obs = 0;
}
}


Expand Down
3 changes: 2 additions & 1 deletion src/unifrac.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,8 @@ void su::faith_pd(biom_interface &table,
double length;

// for node in postorderselect
for(unsigned int k = 0; k < (tree.nparens / 2) - 1; k++) {
const unsigned int max_k = (tree.nparens>1) ? ((tree.nparens / 2) - 1) : 0;
for(unsigned int k = 0; k < max_k; k++) {
node = tree.postorderselect(k);
// get branch length
length = tree.lengths[node];
Expand Down
4 changes: 2 additions & 2 deletions src/unifrac_cmp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ inline void unifracTT(const su::biom_interface &table,
*/

unsigned int k = 0; // index in tree
const unsigned int max_k = (tree.nparens / 2) - 1;
const unsigned int max_k = (tree.nparens>1) ? ((tree.nparens / 2) - 1) : 0;

const unsigned int num_prop_chunks = propstack_multi.get_num_stacks();
while (k<max_k) {
Expand Down Expand Up @@ -277,7 +277,7 @@ inline void unifrac_vawTT(const su::biom_interface &table,
TFloat *lengths = taskObj.lengths;

unsigned int k = 0; // index in tree
const unsigned int max_k = (tree.nparens / 2) - 1;
const unsigned int max_k = (tree.nparens>1) ? ((tree.nparens / 2) - 1) : 0;

const unsigned int num_prop_chunks = propstack_multi.get_num_stacks();
while (k<max_k) {
Expand Down

0 comments on commit 1834378

Please sign in to comment.