ASE

#Generating allele counts

java -jar GenomeAnalysisTK.jar -T ASEReadCounter -R oryCun2.fa -I Samplel_AA.bam -o Samplel_AA.csv -sites heterozygous_coding_map_1_non_repeat.vcf -U ALLOW_N_CIGAR_READS -minDepth 10 --minMappingQuality 20 --minBaseQuality 20 --countOverlapReadsType COUNT_FRAGMENTS_REQUIRE_SAME_BASE 
java -jar GenomeAnalysisTK.jar -T ASEReadCounter -R oryCun2.fa -I Samplel_BB.bam -o Samplel_BB.csv -sites heterozygous_coding_map_1_non_repeat.vcf -U ALLOW_N_CIGAR_READS -minDepth 10 --minMappingQuality 20 --minBaseQuality 20 --countOverlapReadsType COUNT_FRAGMENTS_REQUIRE_SAME_BASE 

##Rscript
#Calculating effective library size
correction_factors<-read.table("Correction_factor_AA_BB.txt",header=T) #Generated from edgeR

#Here, we sum up the read counts from two parental genomes and normalized based on effective library size generated by edgeR

individual_list<-read.table("Samples_AA.list", stringsAsFactors = F)
for(i in 1:nrow(individual_list))
{
  AA<-get(paste(individual_list[i,1],"_AA",sep=""))#Allele count from parental genome AA
  BB<-get(paste(individual_list[i,1],"_BB",sep=""))#Allele count from parental genome BB
  
  ##Normalize based on library size
  AA[,7:9]<-round(AA[,7:9]/correction_factors[which(correction_factors[,1]==as.character(paste(individual_list[i,1],"_AA",sep=""))),2],0)
  BB[,7:9]<-round(BB[,7:9]/correction_factors[which(correction_factors[,1]==as.character(paste(individual_list[i,1],"_BB",sep=""))),2],0)
  ##merging 
  tmp_merged<-merge(AA,BB,by="variantID")
  tmp_merged$refCount.x=as.numeric(tmp_merged$refCount.x)+as.numeric(tmp_merged$refCount.y)
  tmp_merged$altCount.x=as.numeric(tmp_merged$altCount.x)+as.numeric(tmp_merged$altCount.y)
  tmp_merged$totalCount.x=tmp_merged$totalCount.x+tmp_merged$totalCount.y
  tmp_merged$lowMAPQDepth.x=tmp_merged$lowMAPQDepth.x+tmp_merged$lowMAPQDepth.y
  tmp_merged$lowBaseQDepth.x=tmp_merged$lowBaseQDepth.x+tmp_merged$lowBaseQDepth.y
  tmp_merged$rawDepth.x=tmp_merged$rawDepth.x+tmp_merged$rawDepth.y
  tmp_merged$otherBases.x=tmp_merged$otherBases.x+tmp_merged$otherBases.y
  tmp_merged$improperPairs.x=tmp_merged$improperPairs.x+tmp_merged$improperPairs.y
  tmp_merged=tmp_merged[,c(2:4,1,5:21)]
  colnames(tmp_merged)=c(colnames(header),"Exon_Start","Exon_End","gene_exon_id")
  assign(paste(as.character(individual_list[i,1]),"_AA_BB",sep=""),tmp_merged)
  write.table(get(paste(as.character(individual_list[i,1]),"_AA_BB",sep="")),paste(as.character(individual_list[i,1]),"_AA_BB",sep=""),col.names=T,row.names = F,quote = F,sep="\t")
}

data<-read.table("Sample_AA_BB",header=T)
data<-cbind.data.frame(data[,1],data[,2]-1,data[,2:13],-1,-1,-100,-1)
colnames(data)[c(1:3,15:18)]<-c("Chr","Start","End","PValue","ABS_Imb","MValue","FDR")
cntr=0
for(i in 1:nrow(data))
{
	cntr=cntr+1
       if(data$totalCount[i]>=10)
       {
               data$PValue[i]=binom.test((data$refCount[i]),data$totalCount[i],0.5)$p.value
               data$ABS_Imb[i]=abs(0.5-data$refCount[i]/(data$totalCount[i]))
               data$MValue[i]=log2(data$refCount[i]/(1+data$altCount[i]))
       }
       print(cntr)
}
data$FDR<-p.adjust(data$PValue,method="fdr")
write.table(data,"Sample_AA_BB_summed_binom.bed",sep="\t",quote=F,row.names=F,col.names=F,append=F)

#Intersecting with annotation
intersectBed -wa -wb -a Sample_AA_BB.out -b merged.flat.gff | awk '{OFS="\t";print $1,$2,$3,$1"_"$2"_"$3,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$22,$23,$27,$28,$29,$30,$31,$32}' | sed 's/gene_id	//' | sed 's/;.*num	/_/' | sed 's/;.*//' | sort -k4,4 -u >Sample_AA_BB-merged.flat.gff.bed