forked from akahanaton/multiz
-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare_genome_mysql.sh
executable file
·75 lines (73 loc) · 3.66 KB
/
prepare_genome_mysql.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#--------------------------------------------------
# for speDir in `ls -d batsGenome/[[:lower:]]*[[:upper:]]*`
#--------------------------------------------------
for speDir in `ls -d batsGenome/eonSpe`
do
echo $speDir
genomic=`basename $speDir/*genomic.fa`
ls $speDir/$genomic
id="`basename $speDir`1"
#--------------------------------------------------
# if [ $id == "eidHel2" ]; then
# continue
# fi
#--------------------------------------------------
mskDir="$speDir/$genomic.mask"
chrDir="$speDir/$id.chrs1"
trfDir="$speDir/$id.trf1"
nibDir="$speDir/$id.nib1"
lavDir="$speDir/$id.lav1"
pslDir="$speDir/$id.psl1"
chainDir="$speDir/$id.chain1"
#--------------------------------------------------
# rm -rf $chrDir
# rm -rf $trfDir
#--------------------------------------------------
if [ ! -e $chrDir ]; then mkdir $chrDir; fi
if [ ! -e $trfDir ]; then mkdir $trfDir; fi
if [ ! -e $nibDir ]; then mkdir $nibDir; fi
if [ ! -e $mskDir ]; then mkdir $mskDir; fi
if [ ! -e $lavDir ]; then mkdir $lavDir; fi
if [ ! -e $pslDir ]; then mkdir $pslDir; fi
if [ ! -e $chainDir ]; then mkdir $chainDir; fi
maskedGenomic=`ls $mskDir/$genomic.masked`
#--------------------------------------------------
# # qsub -N $id -pe smp 16 $HOME/software/src/RepeatMasker/RepeatMasker -pa 16 -s -norna -species mammal -dir $mskDir $fa
# bioawk -c fastx '{l=length($seq);nNum=gsub("N","N",$seq);if(l>500 && nNum/l<0.8){print ">"$name"\n"$seq}}' $maskedGenomic > $maskedGenomic.filtered
# faSplit byName $maskedGenomic.filtered $chrDir/
# python3 ./seprate_fasta.py 100 $maskedGenomic $chrDir
# faSize $maskedGenomic -detailed > $speDir/$id.chrom.sizes
# awk -v id=$id '{printf "%s\t%d\t/synology/gbdb/%s/%s.2bit\n", $1, $2, id, id}' $speDir/$id.chrom.sizes > $speDir/$id.chromInfo.tab
# hgFakeAgp -minContigGap=1 $maskedGenomic $speDir/$id.fake.agp
# faToTwoBit $maskedGenomic $speDir/$id.2bit
# for i in `find $chrDir -name "*.fa"`
# do
# curChr=`basename $i .fa`
# trfBig -bedAt=$trfDir/$curChr.bed -tempDir=$trfDir $i $trfDir/$curChr.fa
# faToNib $trfDir/$curChr.fa $nibDir/$curChr.nib
# done
#--------------------------------------------------
#--------------------------------------------------
# awk '{print NR%3000, $1}' $speDir/$id.chrom.sizes > $speDir/$id.chrom.index
#--------------------------------------------------
#--------------------------------------------------
# find $trfDir -name "*.bed" | xargs cat > $speDir/$id.simplerepeat.bed
#--------------------------------------------------
#--------------------------------------------------
# grep '^>' $maskedGenomic.filtered | wc -l
# ls -f $nibDir | wc -l
#--------------------------------------------------
mkdir /synology/gbdb/$id
#--------------------------------------------------
# faToTwoBit $speDir/$genomic /synology/gbdb/$id/$id.2bit
#--------------------------------------------------
#--------------------------------------------------
# hgsql -e "create database $id;"
# hgsql $id < /home/gmswenm/software/src/kentUtils/src/hg/lib/grp.sql
#--------------------------------------------------
echo "hgLoadSqlTab $id chromInfo $HOME/software/src/kentUtils/src/hg/lib/chromInfo.sql $speDir/$id.chromInfo.tab"
hgLoadSqlTab $id chromInfo $HOME/software/src/kentUtils/src/hg/lib/chromInfo.sql $speDir/$id.chromInfo.tab
hgGoldGapGl $id $speDir/$id.fake.agp
hgLoadBed $id simpleRepeat $speDir/$id.simplerepeat.bed -sqlTable=./kentUtils/src/hg/lib/simpleRepeat.sql
hgLoadOut $id $mskDir/$genomic.out
done