-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
94 lines (72 loc) · 3.11 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#Importing the necessary libraries
import argparse
import os
import pandas as pd
import datetime
#### Constants for the pipeline
threshHLA = 0.75
#### Recommendation databases
drugVatiants = pd.read_csv('Data/total_var_ann.csv')
HLAVariants = pd.read_csv('Data/HLA_var_ann.csv')
#Implementing arguments importation from the command line type
#python main.py -f filname to import filename into the programm.
parser = argparse.ArgumentParser(description='Import the file name into the programm')
parser.add_argument("-f", "--file", dest="filename",
help="Option to add filename", metavar="FILE")
args = parser.parse_args()
#Define the patient file
patientFile = args.filename
patientName = patientFile.split('.')[0]
#Make a folder for the results of the patient
#We store the reults in separate folders each day there is an analysis
today = str(datetime.datetime.today().date())
path_today = 'Results/'+today
if not os.path.exists(path_today):
os.mkdir(path_today)
#We also create a directory for the patient name
pathPatient = path_today+'/'+patientName
if not os.path.exists(pathPatient):
os.mkdir(pathPatient)
#Converting the file of the patient to plink
os.system('plink --vcf '+patientFile+' --recode bimbam --out '+patientName+' --make-bed --double-id')
#Extracting the chromosome 6
patientChrom6 = patientName+'_chr6'
os.system('plink --bfile '+patientName+' --chr 6 --out '+ patientChrom6 +' --make-bed')
#COmputing HLA imputation with HIBAG method
os.system('Rscript --vanilla HLA_from_patient.R '+patientChrom6+' '+pathPatient)
#move HLA results to the right folder.
hlaPatient = pd.read_csv(patientChrom6+'_results.csv')
os.system('mv '+patientChrom6+'_results.csv '+pathPatient)
#Now we import tne results file so that we modify it to be more readable
hlaPatient.columns = hlaPatient.loc[0]
hlaPatient.drop(0,inplace=True)
HLAFinal = hlaPatient.copy()
hlaPatient.drop(1,axis=1,inplace=True)
hlaPatient.to_csv(pathPatient+'/ResultsHLA.csv')
#Now we filter the locuses to be sure that all locuses are rightly imupted
for locus in hlaPatient.columns:
if float(HLAFinal.loc[3,locus])<threshHLA:
HLAFinal.drop(locus,axis=1,inplace=True)
#Modifying the datafame so that we can
for locus in HLAFinal.columns[1:]:
allele1 = HLAFinal.loc[1, locus].split(':')
allele2 = HLAFinal.loc[2, locus].split(':')
HLAFinal.loc[1, locus] = 'HLA_' + locus + '_' + allele1[0] + allele1[1]
HLAFinal.loc[2, locus] = 'HLA_' + locus + '_' + allele2[0] + allele2[1]
#Matching with the HLA database
arr_HLA = HLAVariants['Variant']
HLAPAtient = []
for index in HLAVariants.index:
locus = HLAVariants.loc[index,'Variant']
for col in HLAFinal.columns[1:]:
if HLAFinal[col].isin([locus]).any()==True:
HLAPAtient.append(HLAVariants[HLAVariants.index==index])
PatientHLA = pd.concat(HLAPAtient)
PatientHLA = PatientHLA[['Variant','Chemical','Note']]
#Saving the result in a csv file
PatientHLA.to_csv(pathPatient+'/Results_HLA_Recommendations.csv')
os.system('mv '+patientFile+' '+pathPatient)
#Clean the directory
for file in os.listdir():
if file.startswith(patientName):
os.remove(file)