qiime2 · gregcaporaso · Jan 24, 2025 · Dec 5, 2024 · Dec 5, 2024 · Jan 8, 2025
diff --git a/q2_feature_table/_normalize.py b/q2_feature_table/_normalize.py
@@ -5,7 +5,6 @@
 #
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
-
 import biom
 
 import os
@@ -15,13 +14,19 @@
 from rnanorm import CPM, CTF, CUF, FPKM, TMM, TPM, UQ
 
 
-def rarefy(table: biom.Table, sampling_depth: int,
-           with_replacement: bool = False) -> biom.Table:
+def rarefy(table: biom.Table,
+           sampling_depth: int,
+           with_replacement: bool = False,
+           random_seed: int = None
+           ) -> biom.Table:
+
     if with_replacement:
         table = table.filter(lambda v, i, m: v.sum() >= sampling_depth,
                              inplace=False, axis='sample')
+
     table = table.subsample(sampling_depth, axis='sample', by_id=False,
-                            with_replacement=with_replacement)
+                            with_replacement=with_replacement,
+                            seed=random_seed)
 
     if table.is_empty():
         raise ValueError('The rarefied table contains no samples or features. '

diff --git a/q2_feature_table/plugin_setup.py b/q2_feature_table/plugin_setup.py
@@ -37,7 +37,8 @@
     function=q2_feature_table.rarefy,
     inputs={'table': FeatureTable[Frequency]},
     parameters={'sampling_depth': Int % Range(1, None),
-                'with_replacement': Bool},
+                'with_replacement': Bool,
+                'random_seed': Int % Range(0, None)},
     outputs=[('rarefied_table', FeatureTable[Frequency])],
     input_descriptions={'table': 'The feature table to be rarefied.'},
     parameter_descriptions={
@@ -47,7 +48,10 @@
                            'included in the resulting table.'),
         'with_replacement': ('Rarefy with replacement by sampling from the '
                              'multinomial distribution instead of rarefying '
-                             'without replacement.')
+                             'without replacement.'),
+        'random_seed': ('Set the seed for the subsampling. Using the same '
+                        'seed with the same table will always lead to the '
+                        'same result. Defaults to a random seed.')
     },
     output_descriptions={
         'rarefied_table': 'The resulting rarefied feature table.'

diff --git a/q2_feature_table/tests/test_normalize.py b/q2_feature_table/tests/test_normalize.py
@@ -23,15 +23,40 @@
 
 class RarefyTests(TestCase):
 
-    def test_rarefy(self):
+    def test_rarefy_random_seed_is_randomized(self):
         t = Table(np.array([[0, 1, 3], [1, 1, 2]]),
                   ['O1', 'O2'],
                   ['S1', 'S2', 'S3'])
         a = rarefy(t, 2)
-        self.assertEqual(a.shape, (2, 2))
-        self.assertEqual(set(a.ids(axis='sample')), set(['S2', 'S3']))
-        self.assertEqual(set(a.ids(axis='observation')), set(['O1', 'O2']))
-        npt.assert_array_equal(a.sum(axis='sample'), np.array([2., 2.]))
+        a_eq_b = []
+        n_iterations = 100
+        for i in range(n_iterations):
+            b = rarefy(t, 2)
+            self.assertEqual(b.shape, (2, 2))
+            self.assertEqual(set(b.ids(axis='sample')), set(['S2', 'S3']))
+            self.assertEqual(set(b.ids(axis='observation')), set(['O1', 'O2']))
+            npt.assert_array_equal(b.sum(axis='sample'), np.array([2., 2.]))
+            a_eq_b.append(a == b)
+        self.assertTrue(False in a_eq_b,
+                        f"After {n_iterations} iterations, all resulting "
+                        "feature tables are identical. It therefore seems "
+                        "that a randomized seed is not being used.")
+
+    def test_rarefy_seed_is_not_randomized(self):
+        t = Table(np.array([[0, 1, 3], [1, 1, 2]]),
+                  ['O1', 'O2'],
+                  ['S1', 'S2', 'S3'])
+        a = rarefy(t, 2, random_seed=1)
+        a_eq_b = []
+        n_iterations = 100
+        for i in range(n_iterations):
+            b = rarefy(t, 2, random_seed=1)
+            a_eq_b.append(a == b)
+        self.assertFalse(False in a_eq_b,
+                         f"After {n_iterations} iterations, at least one "
+                         "feature table differed from the others. It "
+                         "therefore seems that a randomized seed is being "
+                         "used.")
 
     def test_rarefy_replacement(self):
         t = Table(np.array([[0, 10, 30], [10, 10, 20]]),