-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathfeatures_PaRSnIP_v2.R
executable file
·38 lines (30 loc) · 1.1 KB
/
features_PaRSnIP_v2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
library(data.table)
library(doParallel)
library(foreach)
args = commandArgs(trailingOnly=TRUE)
#setwd('/home/local/QCRI/rmall/Proteins/Protein_Crystallization/scripts/')
processes <- 4
registerDoParallel(processes)
source('PaRSnIP_v2.R')
#Get list of protein sequences
alns <- read.fasta(args[1])
N <- length(alns$id)
#Scratch path
SCRATCH.path <- './SCRATCH-1D_1.2/bin/run_SCRATCH-1D_predictors.sh'
DISORDER.path <- './DISOPRED/run_disopred.pl'
#Create feature matrix along with true labels
full_feature_matrix <- foreach (j = 1:N, .inorder = TRUE, .combine = 'rbind') %dopar%
{
aln.ali <- alns$ali[j,]
output_prefix <- tempfile( pattern = paste0("tmp_",j,"_"),tmpdir="/tmp",
fileext = "" )
temp_features <- PaRSnIP.calc.features.RSAhydro.test(aln.ali,SCRATCH.path,DISORDER.path,
output_prefix,n.cores=1);
temp_features
}
full_feature_matrix <- as.matrix(full_feature_matrix);
if (dim(full_feature_matrix)[2] == 1)
{
full_feature_matrix <- t(full_feature_matrix)
}
write.csv(full_feature_matrix,"features.csv",row.names = T)