From cd57f45f25aaf0b06d0314f6f6402c56aa3fa4b4 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Fri, 2 Aug 2024 18:03:09 -0700 Subject: [PATCH 1/3] BUG: Fix #562 Ideally we would test this on numpy v2, but I can't seem to get empress running with numpy v2 on my system (get a cython error from iow). in any case this should address the problem --- empress/tools.py | 12 +++++++++--- tests/python/test_tools.py | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/empress/tools.py b/empress/tools.py index 850a51fcb..c5b7b3c4c 100644 --- a/empress/tools.py +++ b/empress/tools.py @@ -345,19 +345,23 @@ def shifting(bitlist, size=51): Parameters ---------- bitlist: list of int - The input list of 0-1 + The input list of bits (0 or 1). Depending on the version of iow + installed, the entries in this list might be of a slightly different + type (e.g. np.uint8). size: int The size of the buffer Returns ------- list of int - Representation of the 0-1s as a list of int + Representation of the bits as a list of int. Regardless of the types in + the input bitlist, the entries in the output list will always have type + int. Raises ------ ValueError - If any of the list values is different than 0 or 1 + If any of the entries in bitlist is not equal to 0 or 1. References ---------- @@ -370,6 +374,8 @@ def shifting(bitlist, size=51): if not all(x in [0, 1] for x in bitlist): raise ValueError('Your list has values other than 0-1s') + bitlist = [int(x) for x in bitlist] + values = [iter(bitlist)] * size ints = [] for num in zip_longest(*values): diff --git a/tests/python/test_tools.py b/tests/python/test_tools.py index 6f3b87b62..90dc37516 100644 --- a/tests/python/test_tools.py +++ b/tests/python/test_tools.py @@ -583,6 +583,22 @@ def _count_bits(n): "than 0-1s"): tools.shifting([10]) + def test_shifting_np_uint8(self): + # Verifies that https://github.com/biocore/empress/issues/562 is fixed. + # Checks that, whether the inputs are ints or np.uint8s, the output is + # the same. + bits = [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1] + np_bits = [np.uint8(b) for b in bits] + expected = [4035] + + o1 = tools.shifting(bits) + assert o1 == expected + assert type(o1[0]) is int + + o2 = tools.shifting(np_bits) + assert o2 == expected + assert type(o2[0]) is int + def test_filter_feature_metadata_to_tree_1_tip_filtered(self): ft, fi = tools.filter_feature_metadata_to_tree( self.tip_md, self.int_md, self.shorn_tree From 58d4348154a721e0e167099cd47c18d58cd435a7 Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Fri, 2 Aug 2024 18:14:58 -0700 Subject: [PATCH 2/3] STY: fix line length in an unrelated file far as i can tell, both my system and gh actions are using flake8 7.1.0, so idk why only gh actions complained about the 80 char line length. whatever it's worth fixing i guess --- tests/python/test_taxonomy_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/python/test_taxonomy_utils.py b/tests/python/test_taxonomy_utils.py index 657b451ad..cb8ab23d9 100644 --- a/tests/python/test_taxonomy_utils.py +++ b/tests/python/test_taxonomy_utils.py @@ -57,9 +57,9 @@ def setUp(self): ) def _check_basic_case_worked(self, split_fm, taxcols): - """Checks that a given DataFrame (and list of split-up taxonomy columns) - matches the expected output from running split_taxonomy() on - self.feature_metadata. + """Checks that a given DataFrame (and list of split-up taxonomy + columns) matches the expected output from running split_taxonomy() + on self.feature_metadata. """ # Let's verify that split_fm looks how we expect it to look. From 68e2b8c5747ecc826b1d2ec525d28ad7c54c8efc Mon Sep 17 00:00:00 2001 From: Marcus Fedarko Date: Sun, 4 Aug 2024 07:06:15 -0700 Subject: [PATCH 3/3] DOC: add comment to change made ref'ing #562 --- empress/tools.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/empress/tools.py b/empress/tools.py index c5b7b3c4c..678e5c3b0 100644 --- a/empress/tools.py +++ b/empress/tools.py @@ -374,6 +374,9 @@ def shifting(bitlist, size=51): if not all(x in [0, 1] for x in bitlist): raise ValueError('Your list has values other than 0-1s') + # Convert all the bits to ints -- if they are np.uint8, etc. instead and + # we don't convert them, then this can cause problems: see + # https://github.com/biocore/empress/issues/562 bitlist = [int(x) for x in bitlist] values = [iter(bitlist)] * size