-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
31 lines (26 loc) · 1009 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from dataParser import *
from clean_smiles import *
from filter import *
from fetch_descriptors import *
from rdkit.Chem import PandasTools
def main():
reader = Parser(sys.argv[1])
cleaner = SmilesCleaner(reader.getSmiles())
canonical = cleaner.getCanonicaSmiles()
reader.data['SMILES'] = canonical
filtered = Filter(reader.data).getFiltered()
df = pd.DataFrame(filtered)
desc = FetchDescriptors(df)
Mol_desc, desc_name = desc.getDescr()
df_full_descr = pd.DataFrame(Mol_desc, columns=desc_name)
clean_df = df_full_descr[
['ExactMolWt', 'HeavyAtomCount', 'NumHAcceptors', 'NumHDonors', 'NumAromaticRings', 'NumRotatableBonds',
'MolLogP']]
ddf = df.join(clean_df, how='left')
# ddf.to_excel('test.xls')
PandasTools.AddMoleculeColumnToFrame(ddf, smilesCol='SMILES')
ddf.dropna(axis=1, how="any", inplace=True)
PandasTools.SaveXlsxFromFrame(ddf, 'output.xlsx', molCol='ROMol')
print(ddf)
if __name__ == '__main__':
main()