Skip to content

Commit

Permalink
Added exception handling when running multiple syri tasks in a folder…
Browse files Browse the repository at this point in the history
…. Added utilities for better post-processing syri output.
  • Loading branch information
mnshgl0110 committed Jul 18, 2022
1 parent 767f2c3 commit 79a78be
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 3 deletions.
16 changes: 16 additions & 0 deletions syri/scripts/func.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,22 @@
from os import remove


def readfaidxbed(f):
"""
Reads .faidx file from a genome assembly and returns a BED file consisting
for entire chromosomes
"""
from collections import deque
import pybedtools as bt
fabed = deque()
with open(f, 'r') as fin:
for line in fin:
line = line.strip().split()
fabed.append([line[0], 1, int(line[1])])
return list(fabed)
# END


def unlist(nestedList):
"""Take a nested-list as input and return a 1d list of all elements in it"""

Expand Down
131 changes: 129 additions & 2 deletions syri/scripts/regAnno → syri/scripts/reganno
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,125 @@ def getSnpAnno(args, cwdPath):
outData = pd.concat(list(outData))
outData["inChr"] = list(chroms)
outData["inPos"] = list(poses)
print(outData.to_csv(sep="\t", index=False,header=False), end = "")
print(outData.to_csv(sep="\t", index=False, header=False), end="")
# END


def syri2bedpe(args):
input = args.input.name
output = args.output.name
f = args.f
# TODO: Complete this function
with open(input, 'r') as fin, open(output, 'w') as fout:
for line in fin:
line = line.strip().split()
# END


def plotref(refidx, syrireg1, syrireg2, varpos, figout, bw=100000):
"""
:param refbed: path to ref.faidx
:param syrireg: syri regions in BEDPE format
:param varpos: variant positions in BED
:param figout: output figure path
:return:
"""
from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection
from syri.scripts.func import readfaidxbed, mergeRanges
import pybedtools as bt # TODO: remove this dependency
from collections import deque, defaultdict
import numpy as np
import logging

logger = logging.getLogger('plotref')

def _readbed(vp, refbed):
_chrs = set([r[0] for r in refbed])
bincnt = defaultdict(deque)
skipchrs = []
curchr = ''
pos = deque()
added_chrs = list()
with open(vp, 'r') as fin:
for line in fin:
line = line.strip().split()
if len(line) < 3:
logger.warning("Incomplete information in BED file at line: {}. Skipping it.".format("\t".join(line)))
continue
if line[0] not in _chrs:
if line[0] not in skipchrs:
logger.info("Chromosome in BED is not present in FASTA or not selected for plotting. Skipping it. BED line: {}".format("\t".join(line)))
skipchrs.append(line[0])
continue
if curchr == '':
curchr = line[0]
pos.append([int(line[1]), int(line[2])])
elif curchr == line[0]:
pos.append([int(line[1]), int(line[2])])
else:
if line[0] in added_chrs:
logger.error("BED file: {} is not sorted. For plotting tracks, sorted bed file is required. Exiting.".format(vp))
sys.exit()
if len(pos) > 1:
rngs = mergeRanges(np.array(pos))
else:
rngs = pos
chrpos = np.array(list(set([i for r in rngs for i in range(r[0], r[1])])))
# Get bin breakpoints for the chromosome
bins = np.concatenate((np.arange(0, [r[2] for r in refbed if r[0] == curchr][0], bw), np.array([r[2] for r in refbed if r[0] == curchr])), axis=0)
binval = np.histogram(chrpos, bins)[0]
bincnt[curchr] = deque([((bins[i] + bins[i+1])/2, binval[i]/bw) for i in range(len(binval))])
added_chrs.append(curchr)
# Set the new chromosome
curchr = line[0]
pos = deque([[int(line[1]), int(line[2])]])
if len(pos) > 1:
rngs = mergeRanges(np.array(pos))
else:
rngs = pos
chrpos = np.array(list(set([i for r in rngs for i in range(r[0], r[1])])))
# Get bin breakpoints for the chromosome
bins = np.concatenate((np.arange(0, [r[2] for r in refbed if r[0] == curchr][0], bw), np.array([r[2] for r in refbed if r[0] == curchr])), axis=0)
binval = np.histogram(chrpos, bins)[0]
bincnt[curchr] = deque([((bins[i] + bins[i+1])/2, binval[i]/bw) for i in range(len(binval))])
return bincnt
# END

chr_height = 0.3
th = 0.6 # Track height for plotting bedfile data

refbed = readfaidxbed(refidx)
# TODO: remove dependency on pybedtools
syrioutbed = bt.BedTool('/srv/netscratch/dep_mercier/grp_schneeberger/projects/read_vs_assembly/results/human/hg002/reads15kb/svcalls/syri/hg002.reads15kb.winnowmap.hap1syri.bedpe').sort()


bedbin = _readbed(varpos, refbed)
fig = plt.figure()
ax = fig.add_subplot()
ax.set_ylim([0, len(refbed)+1])
ax.set_xlim([0, max([r[2] for r in refbed])])
colors = {'SYN': 'lightgrey', 'INV': '#FFA500', 'TRANS': '#9ACD32', 'DUP': '#00BBFF'}
for i in range(len(refbed)):
patches = deque()
r = refbed[i]
y = len(refbed) - i
ax.add_patch(Rectangle((r[1], y), r[2], chr_height, fill=False, linewidth=0.5))
# ax.add_patch(Rectangle((r[1], y), r[2], chr_height, linewidth=0.5, color='black'))
bed = syrioutbed.filter(lambda b: b.chrom == r[0]).saveas()
for b in bed:
patches.append(Rectangle((b.start, y), b.stop-b.start, chr_height, color=colors[b[6]], linewidth=0))
ax.add_collection(PatchCollection(patches, match_original=True))

chrpos = [k[0] for k in bedbin[r[0]]]
tpos = [k[1] for k in bedbin[r[0]]]
tposmax = max(tpos)
y0 = len(refbed) - i + chr_height
ypos = [(t*th/tposmax)+y0 for t in tpos]
ax.fill_between(chrpos, ypos, y0, color='blue', lw=0.1, zorder=2)


# END


Expand All @@ -109,11 +227,20 @@ if __name__ == "__main__":
parser_region = subparsers.add_parser("region", help="get annotation for the region")
parser_getbed = subparsers.add_parser("getbed", help="get bed file for the regions")
parser_snpanno = subparsers.add_parser("snpanno", help="get annotation for SNPs list")

parser_syri2bedpe = subparsers.add_parser("syri2bedpe", help="Converts the syri.out to BEDPE format")

if len(sys.argv[1:]) == 0:
parser.print_help()
sys.exit()

parser_syri2bedpe.set_defaults(func=syri2bedpe)
parser_syri2bedpe.add_argument('input', help="syri.out file", type=argparse.FileType('r'))
parser_syri2bedpe.add_argument('output', help="output BEDPE file", type=argparse.FileType('w'))
parser_syri2bedpe.add_argument('-f', help="Annotation to filter out. Use multiple times to filter more than one annotation types.", type=str, action='append')




parser_region.set_defaults(func=getRegion)
parser_region.add_argument("chr", help="Chromosome ID", type=str)
parser_region.add_argument("start", help="region start", type=int)
Expand Down
5 changes: 4 additions & 1 deletion syri/scripts/syri.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ def syri(args):
import sys

logger = logging.getLogger("Running SyRI")
os.remove("syri.log")
try:
os.remove("syri.log")
except FileNotFoundError:
pass
# Set CWD and check if it exists
if args.dir is None:
args.dir = os.getcwd() + os.sep
Expand Down

0 comments on commit 79a78be

Please sign in to comment.