-
Notifications
You must be signed in to change notification settings - Fork 0
/
conservation.R
21 lines (18 loc) · 1.05 KB
/
conservation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
ConsScore <- function(seq, eval="1e-4", remote=TRUE, db="refseq_protein"){
if(remote){
blast.xml <- paste(system(paste("blastp -remote -db ", db, " -outfmt 5 -evalue ", eval, " -query ", seq), intern=TRUE), collapse="")
}
tmp <- tempfile()
Blast_fasta_Name <- paste(tmp, "blastFASTA", sep = ".")
ClustalOutName <- paste(tmp, "clustal", sep = ".")
#Writes the information of interest from the blast-query to a fasta-format file
blast.tree <- xmlInternalTreeParse(blast.xml, asText=TRUE)
hit_IDs <- xpathSApply(blast.tree, "//Hit/Hit_id", xmlValue)
hit_seqs <- xpathSApply(blast.tree, "//Hit/Hit_hsps/Hsp/Hsp_hseq", xmlValue)
write(paste(">", hit_IDs, "\n", hit_seqs, "\n", sep=""), file=Blast_fasta_Name)
#Makes a multiple alignment from the fasta file using clustalOmega
system(paste("clustalo -i ", Blast_fasta_Name," -o ", ClustalOutName))
#Computes the conservation scores using mstatx
system(paste("/Users/simon/workspace/SLU/MutImpact_project/MstatX/mstatx -m ", ClustalOutName))
Scores <- read.delim(paste(tmp, "stat", sep="."), header=FALSE)
}