Skip to content

Commit

Permalink
BUG: samples dropped by rarefying are now handled by mismatched pairs (
Browse files Browse the repository at this point in the history
  • Loading branch information
cherman2 authored Dec 12, 2024
1 parent 9275844 commit 7a88753
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 0 deletions.
6 changes: 6 additions & 0 deletions q2_fmt/_peds.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,10 @@ def pedf_permutation_test(table: pd.DataFrame, metadata: qiime2.Metadata,
actual_pedf = pedf_df[['id', 'measure']].set_index('id')['measure']

# Mismatch simulation:

# Filtering out any samples that are going to be dropped by rarfying before
# we assign donor-receipent mismatches.
table = table[table.sum(axis=1) >= sampling_depth]
recip_df = _create_recipient_table(used_references, metadata_df, table)
donor_df = table[table.index.isin(used_references)]
mismatched_series = \
Expand All @@ -529,10 +533,12 @@ def pedf_permutation_test(table: pd.DataFrame, metadata: qiime2.Metadata,
simulated_recip_table, simulated_donor_table =\
_create_duplicated_tables(simulated_mismatched_series, recip_df,
donor_df)

# concating or recip and donor tables so column number stays the same after
# subsampling
simulated_table = pd.concat([simulated_recip_table,
simulated_donor_table])

rarefied_simulated_table = _subsample(table=simulated_table,
sampling_depth=sampling_depth)

Expand Down
35 changes: 35 additions & 0 deletions q2_fmt/tests/test_engraftment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2014,6 +2014,41 @@ def test_peds_sim_stats_99_iters(self):
self.assertEqual(count_less, exp_count_less)
self.assertEqual(per_subject_p, exp_per_subject_p)

def test_samples_drop(self):
# This tests has a very small chance of failing by random chance if
# out of the 9 random samples, the present feature doesnt subsample
# at least one time but that is pretty unlikely.
metadata_df = pd.DataFrame({
'id': ['sample1', 'sample2', 'sample3',
'donor1', 'donor2', 'donor3'],
'Ref': ['donor1', 'donor2', 'donor3', np.nan, np.nan,
np.nan],
'subject': ['sub1', 'sub2', 'sub3', np.nan, np.nan,
np.nan],
'group': [1, 1, 1, np.nan, np.nan,
np.nan],
"Location": [np.nan, np.nan,
np.nan, 'test', 'test',
'test']}).set_index('id')

table_df = pd.DataFrame({
'id': ['sample1', 'sample2', 'sample3',
'donor1', 'donor2', 'donor3'],
'Feature1': [10, 0, 0, 10, 0, 0],
'Feature2': [0, 1, 0, 0, 10, 0],
'Feature3': [0, 0, 10, 0, 0, 10]}).set_index('id')
metadata = Metadata(metadata_df)

peds, _, _ = pedf_permutation_test(metadata=metadata,
table=table_df,
time_column="group",
reference_column="Ref",
subject_column="subject",
num_resamples=999,
sampling_depth=9)

self.assertFalse(peds['id'].isin(['sample2']).any())


class detect(TestBase):
def test_baseline_donor_md(self):
Expand Down

0 comments on commit 7a88753

Please sign in to comment.