-
Notifications
You must be signed in to change notification settings - Fork 0
/
runMAGs.sh
59 lines (39 loc) · 1.41 KB
/
runMAGs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env bash
# mamba create -n phyloeuk -c bioconda -c conda-forge trimal mamba mafft busco=5 fasttree perl-bioperl perl-file-slurp bioawk
conda activate phyloeuk
cd MAGs
ls *faa > ../listMAGs.txt
for i in *faa
do busco -m prot -c 8 -i $i -o ${i%.faa}_busco -l eukaryota
done
mkdir BuscoGenes
while read -r line; do mkdir BuscoGenes/$line\.faa; done < ../listGenes.tsv
for i in *busco
do for j in $i/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences/*faa
do cp $j BuscoGenes/${j#$i/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences/}/${i%_busco}\_${j#$i/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences/} 2>&1 >/dev/null
done
done
cd BuscoGenes
rm toRenameFasta.sh
for i in *.faa
do for j in $i/*
do echo perl -pe \'"s/>/>${j#$i/}/g"\' $j \> ${j%.faa}_rename.faa
done
done >> toRenameFasta.sh
bash toRenameFasta.sh
mkdir concatGenes
for i in *faa
do cat $i/*rename.faa > concatGenes/$i
done
cd concatGenes
mkdir align
for i in *faa
do
mafft --quiet --add $i --keeplength ../../../reference/BuscoGenes/concatGenes/align/${i%.faa}_align_trim.faa > align/${i%.faa}_align_all.faa
done
for i in $(find align/*align_all.faa -type f -empty)
do
cp ../../../reference/BuscoGenes/concatGenes/${i%_all.faa}_trim.faa $i
done
cd ../../..
perl scripts/concatenateFastaReferenceMAGs.pl MAGs/BuscoGenes/concatGenes/align listReference.txt listMAGs.txt