Skip to content

Commit

Permalink
Merge pull request #45 from cmatKhan/develop
Browse files Browse the repository at this point in the history
debuggging after full run
  • Loading branch information
cmatKhan authored Aug 31, 2023
2 parents 0b6ab79 + 0c75c77 commit 9ddf2d2
Show file tree
Hide file tree
Showing 19 changed files with 116 additions and 189 deletions.
156 changes: 41 additions & 115 deletions poetry.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "isocomp"
version = "0.1.0"
version = "0.2.1"
description = ""
authors = ["Yutong Qiu <[email protected]>", "Chia Sin Liew <[email protected]>", "Rupesh Kesharwani <[email protected]>",
"Bida Gu <[email protected]>", "chase mateusiak <[email protected]>",
Expand Down Expand Up @@ -30,6 +30,7 @@ isocomp = "isocomp:__main__.main"

[tool.poetry.group.dev.dependencies]
matplotlib = "^3.7.1"
autopep8 = "^2.0.4"

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
6 changes: 3 additions & 3 deletions scripts/create_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

warnings.filterwarnings("ignore")

logging.getLogger(__name__).addHandler(logging.NullHandler())
logger = logging.getLogger(__name__)

__all__ = ['get_all_windows','further_merge','main']

Expand Down Expand Up @@ -192,11 +192,11 @@ def parse_args(args=None):

def main(args=None):

logging.debug('cmd ling arguments: {args}')
logger.debug('cmd ling arguments: {args}')
args = parse_args(args)

# Check inputs
logging.info('checking input...')
logger.info('checking input...')
input_path_list = [args.gene_window_name]
for input_path in input_path_list:
if not os.path.exists(input_path):
Expand Down
16 changes: 8 additions & 8 deletions scripts/find_unique_isoform.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@

# strict positional/optional arguments checking
argsDict = vars(parser.parse_args())
logging.info('Parsing Input Arguements...')
logging.info('Parsing Input Arguements...')
logger.info('Parsing Input Arguements...')
logger.info('Parsing Input Arguements...')
for key, value in argsDict.items():
if key in posList: logging.info('Required Argument - %s: %s' %(key, value))
if key in optList: logging.info('Optional Argument - %s: %s' %(key, value))
if key in posList: logging.info('Required Argument - %s: %s' %(key, value))
if key in optList: logging.info('Optional Argument - %s: %s' %(key, value))
if key in posList: logger.info('Required Argument - %s: %s' %(key, value))
if key in optList: logger.info('Optional Argument - %s: %s' %(key, value))
if key in posList: logger.info('Required Argument - %s: %s' %(key, value))
if key in optList: logger.info('Optional Argument - %s: %s' %(key, value))
vars()[key] = value # assign values of arguments into shorthand global variables


Expand Down Expand Up @@ -388,6 +388,6 @@ def compareFunc(seqDict:dict[tuple[str,str,str], list[isoform]], outPre:str, min
compareFunc(seqDict, outPre, minPercent)
compareFunc(seqDict, outPre, minPercent)

logging.info('End of Program\n')
logging.info('End of Program\n')
logger.info('End of Program\n')
logger.info('End of Program\n')

6 changes: 3 additions & 3 deletions scripts/rename_fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import sys
import os

logging.getLogger(__name__).addHandler(logging.NullHandler())
logger = logging.getLogger(__name__)


__all__ = ['rename_fa_desc','main']
Expand Down Expand Up @@ -76,11 +76,11 @@ def parse_args(args=None):

def main(args=None):

logging.debug('cmd ling arguments: {args}')
logger.debug('cmd ling arguments: {args}')
args = parse_args(args)

# Check inputs
logging.info('checking input...')
logger.info('checking input...')
input_path_list = [args.input]
for input_path in input_path_list:
if not os.path.exists(input_path):
Expand Down
6 changes: 3 additions & 3 deletions src/isocomp/Compare/IsoformLibrary.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# local imports
from isocomp.Coordinates import Window

logging.getLogger(__name__).addHandler(logging.NullHandler())
logger = logging.getLogger(__name__)

__all__ = ['IsoformLibrary']

Expand Down Expand Up @@ -58,7 +58,7 @@ def clustered_gtf_path(self) -> str:

@clustered_gtf_path.setter
def clustered_gtf_path(self, new_path: str) -> None:
logging.debug('trying to set new clustered_gtf_path: {new_path}')
logger.debug('trying to set new clustered_gtf_path: {new_path}')
if not os.path.exists(new_path):
raise FileNotFoundError(f'{new_path} does not exist')
# TODO allow gff and check format, not extension
Expand Down Expand Up @@ -128,7 +128,7 @@ def fasta_dict(self, new_fasta_dict: dict) -> None:
index file does not exist
"""

logging.debug("new dict: %s", new_fasta_dict)
logger.debug("new dict: %s", new_fasta_dict)

# check type
if not isinstance(new_fasta_dict, dict):
Expand Down
2 changes: 1 addition & 1 deletion src/isocomp/Compare/align_isoforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# external dependencies
import edlib

logging.getLogger(__name__).addHandler(logging.NullHandler())
logger = logging.getLogger(__name__)

__all__ = ['align_isoforms']

Expand Down
5 changes: 3 additions & 2 deletions src/isocomp/Compare/compare_isoforms_in_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .align_isoforms import align_isoforms
from .IsoformLibrary import IsoformLibrary

logging.getLogger(__name__).addHandler(logging.NullHandler())
logger = logging.getLogger(__name__)

__all__ = ['compare_isoforms_in_cluster']

Expand Down Expand Up @@ -126,7 +126,8 @@ def compare_isoforms_in_cluster(
# same strand, overlap threshold, different subjects
else:
# group transcripts by coordinates; return unique
cluster_gtf_grouped = cluster_gtf.df.groupby(by=['Start', 'End', 'Strand'], as_index=True)
cluster_gtf_grouped = cluster_gtf.df\
.groupby(by=['Start', 'End', 'Strand'], as_index=True)

for group, cluster_gtf_unique in cluster_gtf_grouped:
if len(cluster_gtf_unique) > 1:
Expand Down
2 changes: 1 addition & 1 deletion src/isocomp/Compare/filter_comparisons.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pandas as pd

logging.getLogger(__name__).addHandler(logging.NullHandler())
logger = logging.getLogger(__name__)

__all__ = ['filter_comparisons']

Expand Down
9 changes: 5 additions & 4 deletions src/isocomp/Compare/find_unique_isoforms.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import logging

import pandas as pd
from pandas import DataFrame

from .IsoformLibrary import IsoformLibrary
from .compare_isoforms_in_cluster import compare_isoforms_in_cluster
from .filter_comparisons import filter_comparisons

logging.getLogger(__name__).addHandler(logging.NullHandler())
logger = logging.getLogger(__name__)

__all__ = ['find_unique_isoforms']

Expand Down Expand Up @@ -34,11 +34,12 @@ def find_unique_isoforms(clustered_gtf: str,
# iterate over clusters and compare isoforms
for cluster in il.cluster_list:
cluster = str(cluster)
logger.debug(cluster)
# only compare if there are more than 1 isoforms in the window
if il.get_cluster_coord(cluster).score > 1:
all_comparisons\
.extend(compare_isoforms_in_cluster(il, cluster))
# filter the result of the comparisons
compare_df_fltr = filter_comparisons(all_comparisons)
#compare_df_fltr = filter_comparisons(all_comparisons)

return compare_df_fltr
return pd.DataFrame(all_comparisons) #compare_df_fltr
2 changes: 1 addition & 1 deletion src/isocomp/Compare/vector_crosser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np

logging.getLogger(__name__).addHandler(logging.NullHandler())
logger = logging.getLogger(__name__)

__all__ = ['vector_crosser']

Expand Down
12 changes: 6 additions & 6 deletions src/isocomp/Coordinates/Window.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import re

logging.getLogger(__name__).addHandler(logging.NullHandler())
logger = logging.getLogger(__name__)

__all__ = ['Window']

Expand Down Expand Up @@ -48,7 +48,7 @@ def chr(self):
@chr.setter
def chr(self, new_chr: str):
if not isinstance(new_chr, str):
logging.debug(new_chr)
logger.debug(new_chr)
raise ValueError('chr must be a string')
self._chr = new_chr

Expand All @@ -61,7 +61,7 @@ def start(self):
@start.setter
def start(self, new_start: int):
if not isinstance(new_start, int):
logging.debug(new_start)
logger.debug(new_start)
raise ValueError('start must be an integer')
self._start = new_start

Expand All @@ -74,7 +74,7 @@ def end(self):
@end.setter
def end(self, new_end: int):
if not isinstance(new_end, int):
logging.debug(new_end)
logger.debug(new_end)
raise ValueError('end must be an integer')
self._end = new_end

Expand All @@ -87,7 +87,7 @@ def strand(self):
@strand.setter
def strand(self, new_strand: str):
if new_strand not in self._STRAND_VALUES:
logging.debug(new_strand)
logger.debug(new_strand)
raise ValueError('strand value: %s is not one of the recognized '
'strand values: %s'
% (new_strand, ','.join(self._STRAND_VALUES)))
Expand All @@ -113,7 +113,7 @@ def score(self):
@score.setter
def score(self, new_score: int):

logging.debug('new score: %s', new_score)
logger.debug('new score: %s', new_score)

if not isinstance(new_score, int):
raise ValueError('Score must be an integer')
Expand Down
6 changes: 3 additions & 3 deletions src/isocomp/Coordinates/create_comparison_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from .update_source import update_source

logging.getLogger(__name__).addHandler(logging.NullHandler())
logger = logging.getLogger(__name__)

__all__ = ['create_comparison_windows']

Expand Down Expand Up @@ -39,7 +39,7 @@ def create_comparison_windows(gtf_list: list,
transcript_id, gene_id, Cluster
"""
# check input
logging.debug(gtf_list)
logger.debug(gtf_list)
if not isinstance(gtf_list, list):
raise IOError('pyranges_list must be type list')
for path in gtf_list:
Expand Down Expand Up @@ -67,7 +67,7 @@ def create_comparison_windows(gtf_list: list,
concat_ranges = concat_ranges[concat_ranges.Feature == feature]
clustered_ranges = concat_ranges.cluster(**kwargs)

logging.debug('number of merged ranges: %s',
logger.debug('number of merged ranges: %s',
str(max(clustered_ranges.Cluster)))

return clustered_ranges
2 changes: 1 addition & 1 deletion src/isocomp/Coordinates/update_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# ext dependencies
import pyranges as pr

logging.getLogger(__name__).addHandler(logging.NullHandler())
logger = logging.getLogger(__name__)

__all__ = ['update_source']

Expand Down
4 changes: 1 addition & 3 deletions src/isocomp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
from . import Coordinates
from . import Compare

__version__ = '0.1.0'
from . import Compare
16 changes: 8 additions & 8 deletions src/isocomp/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from .Coordinates import create_comparison_windows
from .Compare import find_unique_isoforms

logging.getLogger(__name__).addHandler(logging.NullHandler())
logger = logging.getLogger(__name__)


def parse_args() -> Callable[[list], argparse.Namespace]:
Expand Down Expand Up @@ -222,7 +222,7 @@ def __fasta_to_fastq(args=None) -> None:
fasta, a path to a fasta format file. Defaults to None.
"""

logging.debug(args)
logger.debug(args)

for seq in SeqIO.parse(args.fasta, "fasta"):
seq.letter_annotations["solexa_quality"] = [40] * len(seq)
Expand All @@ -242,13 +242,13 @@ def __create_windows_gtfs(args=None) -> None:
FileExistsError: raised if the output path exists and overwrite is
False
"""
logging.debug(args)
logger.debug(args)

# TODO consider stripping extension, if one is passed, from output_prefix
output_filename = args.output_prefix+'.gtf' \
if args.output_prefix \
else 'clustered_regions.gtf'
logging.debug(output_filename)
logger.debug(output_filename)

if os.path.exists(output_filename) and not args.overwrite:
raise FileExistsError(f'file with name {output_filename} already '
Expand Down Expand Up @@ -277,7 +277,7 @@ def __find_unique_isoforms(args=None) -> None:
column names
"""

logging.debug(args)
logger.debug(args)

for path in [args.clustered_gtf, args.fasta_map]:
if not os.path.exists(path):
Expand All @@ -287,7 +287,7 @@ def __find_unique_isoforms(args=None) -> None:
output_filename = args.output_prefix+'.csv' \
if args.output_prefix \
else 'unique_isoforms.csv'
logging.debug(output_filename)
logger.debug(output_filename)

if os.path.exists(output_filename) and not args.overwrite:
raise FileExistsError(f'file with name {output_filename} already '
Expand Down Expand Up @@ -357,8 +357,8 @@ def main(args=None) -> None:
}
dictConfig(log_config)
# log the cmd line arguments at the debug level
logging.debug(sys.argv)
logging.debug(str(args))
logger.debug(sys.argv)
logger.debug(str(args))

# note that this works b/c the subparser set_defaults function attribute
# is set.
Expand Down
4 changes: 2 additions & 2 deletions src/isocomp/utils/fasta_to_fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# external dependencies
from Bio import SeqIO

logging.getLogger(__name__).addHandler(logging.NullHandler())
logger = logging.getLogger(__name__)

__all__ = ['fasta_to_fastq']

Expand All @@ -16,7 +16,7 @@ def fasta_to_fastq(fasta_file: str) -> None:
fasta_file (str): path to a fasta file
"""

logging.debug(fasta_file)
logger.debug(fasta_file)

for seq in SeqIO.parse(fasta_file, "fasta"):
seq.letter_annotations["solexa_quality"] = [40] * len(seq)
Expand Down
Loading

0 comments on commit 9ddf2d2

Please sign in to comment.