From bb6b7005a5f624dab5ac781e50e30e3bc8ed855b Mon Sep 17 00:00:00 2001 From: asistradition Date: Wed, 16 Feb 2022 15:22:04 -0500 Subject: [PATCH] Fix parsing error v0.3.2 --- CHANGELOG.md | 4 ++++ inferelator_prior/motifs/meme.py | 4 ++++ inferelator_prior/motifs/pwm.py | 13 +++++++------ setup.py | 4 ++-- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34343d9..12cd0b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +### Version 0.3.2 + +* Corrected a parsing error when reading CisBP PWM files + ### Version 0.3.1 * Added `link_atac_bed_to_genes` module to link specific peaks from a BED file to nearby genes diff --git a/inferelator_prior/motifs/meme.py b/inferelator_prior/motifs/meme.py index 7a95a59..49a3e46 100644 --- a/inferelator_prior/motifs/meme.py +++ b/inferelator_prior/motifs/meme.py @@ -39,6 +39,10 @@ def read(file_descript): def write(file_descript, motifs, alphabet=None, background=None, mode="w"): motifs = [motifs] if not isinstance(motifs, (list, tuple, pd.Series)) else motifs + + if len(motifs) == 0: + raise RuntimeError("Unable to write motif file with zero motifs") + alphabet = alphabet if alphabet is not None else motifs[0].alphabet background = np.array([[1 / len(alphabet)] * len(alphabet)]) if background is None else background diff --git a/inferelator_prior/motifs/pwm.py b/inferelator_prior/motifs/pwm.py index 48300fd..3bc3d51 100644 --- a/inferelator_prior/motifs/pwm.py +++ b/inferelator_prior/motifs/pwm.py @@ -23,19 +23,20 @@ def read(pwm_file_list, info_file, background=None, direct_only=False, pwm_has_i for pwm_file in pwm_file_list: pwm_id = os.path.splitext(os.path.basename(pwm_file))[0] + match_id = info_df[MOTIF_COL] == pwm_id - if pwm_id not in info_df[MOTIF_COL]: + if (match_id).sum() == 0: pwm_not_present.append(pwm_id) continue if direct_only: - direct = info_df.loc[info_df[MOTIF_COL] == pwm_id, TF_STATUS_COL].str.contains("D") + direct = info_df.loc[match_id, TF_STATUS_COL].str.contains("D") if not direct.any(): continue else: - pwm_names = info_df.loc[(info_df[MOTIF_COL] == pwm_id) & (info_df[TF_STATUS_COL] == "D"), TF_NAME_COL] + pwm_names = info_df.loc[match_id & (info_df[TF_STATUS_COL] == "D"), TF_NAME_COL] else: - pwm_names = info_df.loc[info_df[MOTIF_COL] == pwm_id, TF_NAME_COL] + pwm_names = info_df.loc[match_id, TF_NAME_COL] pwm_name = "/".join(pwm_names) @@ -63,8 +64,8 @@ def read(pwm_file_list, info_file, background=None, direct_only=False, pwm_has_i motifs.append(motif) if len(pwm_not_present) > 0: - print("{pwm} PWM files not found in in {c} of {cf}".format(pwm=len(pwm_not_present), c=MOTIF_COL, cf=info_file)) + print(f"{len(pwm_not_present)} PWM files not found in in {MOTIF_COL} of {info_file}") if len(pwm_malformed) > 0: - print("{pwm} PWM files malformed and improperly parsed".format(pwm=len(pwm_malformed))) + print(f"{len(pwm_malformed)} PWM files malformed and improperly parsed") return motifs diff --git a/setup.py b/setup.py index 43d09fd..fa5f01e 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,9 @@ import os from setuptools import setup, find_packages -install_requires = ["numpy", "pandas>=1.0", "HTSeq", "pybedtools", "scipy", "pathos", "sklearn"] +install_requires = ["numpy", "pandas>=1.0", "HTSeq", "pybedtools", "scipy", "pathos", "sklearn", "tqdm"] tests_require = ["coverage", "nose", "pysam"] -version = "0.3.1" +version = "0.3.2" # Description from README.md base_dir = os.path.dirname(os.path.abspath(__file__))