forked from yal054/snATAC
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsnATAC.nmf.bam.py
56 lines (47 loc) · 1.66 KB
/
snATAC.nmf.bam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/bin/python
import argparse
from multiprocessing import Pool
parser = argparse.ArgumentParser(description='filter bam based on QNAMES')
parser.add_argument('--bam', type=str, dest="bam", help='bam file')
parser.add_argument('--statH', type=str, dest="statH", help='input statH matrix')
parser.add_argument('-o', '--outPrefix', type=str, dest="outPrefix", help='output prefix')
args = parser.parse_args()
import numpy as np
import pysam
from time import perf_counter as pc
def run():
""" Run standard NMF on rank """
start_time = pc()
""" init input files """
bamf = args.bam
statHf = args.statH
outPrefix = args.outPrefix
print("filter out bam files")
generate_bams(bamf, statHf, outPrefix)
end_time = pc()
print('Used (secs): ', end_time - start_time)
def generate_bams(bamf, statHf, prefix):
o_stat_H = np.genfromtxt(statHf, dtype=None, names=True)
rank = np.max(o_stat_H['class0']).astype(int) + 1
p = Pool(rank)
for idx in range(rank):
p.apply_async(generate_bam_worker, (bamf, o_stat_H, idx, prefix))
p.close()
p.join()
def generate_bam_worker(bamf, o_stat_H, r, prefix):
bamF = pysam.AlignmentFile(bamf)
qnames = list(o_stat_H[np.where(o_stat_H['class0']==r)]['xgi'].astype(str))
qnames_set = set(qnames)
n = r + 1
bam_fname = prefix + "." + "metacell_" + str(n) + "." + "bam"
print("For metaCell =", n, "The filtered bam is writing to:", bam_fname)
obam = pysam.AlignmentFile(bam_fname, "wb", template=bamF)
for b in bamF.fetch(until_eof=True):
if b.query_name.split(':')[0] in qnames_set:
obam.write(b)
obam.close()
bamF.close()
print("metaCell =", n, " witting finished.")
if __name__ == "__main__":
"""filter bam based on QNAMES"""
run()