forked from sreeramkannan/Shannon
-
Notifications
You must be signed in to change notification settings - Fork 0
/
filter_trans.py
38 lines (31 loc) · 918 Bytes
/
filter_trans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def filter_trans(fin,fout,length):
ntrans = 0
tr_name = ''
with open(fout,'w') as file_out:
for line in open(fin,'r'):
if line[0]=='>':
fields=line.strip().split()
tr_name=fields[0][1:]
to_write = line[:]
elif len(line) >=length:
file_out.write(to_write)
file_out.write(line)
def filter_duplicates(fin,fout):
'''Remove duplicates of a FASTA file with the same name'''
tr_dict = {}
write = 1
with open(fout,'w') as file_out:
for line in open(fin,'r'):
if line[0]=='>':
fields=line.strip().split()
tr_name=fields[0][1:]
if tr_dict.get(tr_name):
write = 0
continue
else:
write=1
file_out.write(line[:])
else:
if write:
file_out.write(line[:])
#filter_trans('./OneHalf_NewSE_L_100_N_1000000_algo_output/reconstructed.fasta','./OneHalf_NewSE_L_100_N_1000000_algo_output/rec_short.fasta',200)