Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix BP tree corruption due to numpy uints #563

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions empress/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,19 +345,23 @@ def shifting(bitlist, size=51):
Parameters
----------
bitlist: list of int
The input list of 0-1
The input list of bits (0 or 1). Depending on the version of iow
installed, the entries in this list might be of a slightly different
type (e.g. np.uint8).
size: int
The size of the buffer

Returns
-------
list of int
Representation of the 0-1s as a list of int
Representation of the bits as a list of int. Regardless of the types in
the input bitlist, the entries in the output list will always have type
int.

Raises
------
ValueError
If any of the list values is different than 0 or 1
If any of the entries in bitlist is not equal to 0 or 1.

References
----------
Expand All @@ -370,6 +374,11 @@ def shifting(bitlist, size=51):
if not all(x in [0, 1] for x in bitlist):
raise ValueError('Your list has values other than 0-1s')

# Convert all the bits to ints -- if they are np.uint8, etc. instead and
# we don't convert them, then this can cause problems: see
# https://github.com/biocore/empress/issues/562
bitlist = [int(x) for x in bitlist]

values = [iter(bitlist)] * size
ints = []
for num in zip_longest(*values):
Expand Down
6 changes: 3 additions & 3 deletions tests/python/test_taxonomy_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ def setUp(self):
)

def _check_basic_case_worked(self, split_fm, taxcols):
"""Checks that a given DataFrame (and list of split-up taxonomy columns)
matches the expected output from running split_taxonomy() on
self.feature_metadata.
"""Checks that a given DataFrame (and list of split-up taxonomy
columns) matches the expected output from running split_taxonomy()
on self.feature_metadata.
"""

# Let's verify that split_fm looks how we expect it to look.
Expand Down
16 changes: 16 additions & 0 deletions tests/python/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,22 @@ def _count_bits(n):
"than 0-1s"):
tools.shifting([10])

def test_shifting_np_uint8(self):
# Verifies that https://github.com/biocore/empress/issues/562 is fixed.
# Checks that, whether the inputs are ints or np.uint8s, the output is
# the same.
bits = [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1]
np_bits = [np.uint8(b) for b in bits]
expected = [4035]

o1 = tools.shifting(bits)
assert o1 == expected
assert type(o1[0]) is int

o2 = tools.shifting(np_bits)
assert o2 == expected
assert type(o2[0]) is int

def test_filter_feature_metadata_to_tree_1_tip_filtered(self):
ft, fi = tools.filter_feature_metadata_to_tree(
self.tip_md, self.int_md, self.shorn_tree
Expand Down
Loading