Skip to content

Commit

Permalink
ss-main: added depedency
Browse files Browse the repository at this point in the history
  • Loading branch information
Sulstice committed Apr 5, 2022
1 parent 2510eed commit 579e939
Show file tree
Hide file tree
Showing 9 changed files with 2,352 additions and 839 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "PY-f8bB8jpqi"
},
"outputs": [],
"source": [
"# Environment Setup\n",
"\n",
"!rm -rf global-chem\n",
"!git clone -q https://github.com/Sulstice/global-chem\n",
"\n",
"!pip install -q global-chem-extensions --upgrade"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "NoYEtzDf1jYF"
},
"outputs": [],
"source": [
"# Header\n",
"\n",
"from global_chem import GlobalChem\n",
"from global_chem_extensions import GlobalChemExtensions\n",
"\n",
"gc = GlobalChem()\n",
"gce = GlobalChemExtensions()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "o6q50QS4DEq_",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "d06d1fb9-4545-4728-97e7-800c3a8fec62"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['NC(CCCCNC(N)=N)C(NC(CO)C(NC(C(C)([H])O)C(NC(CCC(O)=O)C(NC(CC1=CC=CC=C1)C(NC([H])C(NC(CC1=CNC=N1)C(NC(C(CC)([H])C)C(NC(CCCCN)C(NC(CC(C)C)C(NC(C)C(NC(CC(O)=O)C(NC(C2CCCN2)C(NC(CCC(N)=O)C(NCC(O)=O)=O)=O)=O)=O)=O)=O)=O)=O)=O)=O)=O)=O)=O)=O']\n",
"['RSTEFGHIKLADPQ', None]\n"
]
}
],
"source": [
"# Amino Acids\n",
"\n",
"amino_acid_test = ['RSTEFGHIKLADPQ']\n",
"smiles = gce.amino_acids_to_smiles(amino_acid_test)\n",
"print (smiles)\n",
"\n",
"amino_acid = gce.smiles_to_amino_acids(smiles)\n",
"print (amino_acid)"
]
}
],
"metadata": {
"colab": {
"name": "GlobalChemExtensions_Demonstration.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "PY-f8bB8jpqi"
},
"outputs": [],
"source": [
"# Environment Setup\n",
"\n",
"!rm -rf global-chem\n",
"!git clone -q https://github.com/Sulstice/global-chem\n",
"\n",
"!pip install -q global-chem-extensions --upgrade"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "NoYEtzDf1jYF"
},
"outputs": [],
"source": [
"# Header\n",
"\n",
"from global_chem import GlobalChem\n",
"from global_chem_extensions import GlobalChemExtensions\n",
"\n",
"gc = GlobalChem()\n",
"gce = GlobalChemExtensions()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "XWGLAGE8TLyH",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "33435a75-cc5d-455d-ebd2-706910030c43"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Method: 'generate' Time: 0.98 seconds\n",
"[b'CCC(N)CC1=CC(=C(OC)C(=C1)OC)OC', b'COC1=CC(=CC(=C1OCC=C)OC)CCN', b'COC1=CC(=C(OC)C=C1CC(C)N)SC', b'CCSC1=C(OC)C=C(CC(C)N)C(=C1)OC', b'COC1=CC(=C(OC)C=C1CC(C)N)SC(C)C', b'COC1=CC(=C(OC)C=C1CC(C)N)SC2=CC=CC=C2', b'CCCSC1=C(OC)C=C(CC(C)N)C(=C1)OC', b'CCC(N)CC1=CC(=C(C)C=C1OC)OC', b'CCOC1=CC(=CC(=C1OCC)OC)CCN', b'CCCCOC1=C(OC)C=C(CCN)C=C1OC', b'CNC(C)CC1=CC(=C(C)C=C1OC)OC', b'CSC1=C(C)C=C(SC)C(=C1)CC(C)N', b'COC(CN)C1=CC(=C(Br)C=C1OC)OC', b'COC(CN)C1=CC(=C(C)C=C1OC)OC', b'COC(CN)C1=CC(=C(C)C=C1OC)OC', b'CC1=C(O)C=C(C(O)CN)C(=C1)Br', b'COC(CN)C1=CC(=C(OC)C(=C1)OC)OC', b'COC1=C(Br)C(=CC(=C1)CC(C)N)OC', b'CC(N)CC1=C(Br)C=C2OCOC2=C1', b'[CH3:20][O:13][C:6]1=[CH:5][C:4](=[C:3]([O:16][CH3:11])[CH:2]=[C:1]1[Br:12])[CH2:8][CH2:9][NH2:10]', b'COC1=C(OCC2=CC=CC=C2)C(=CC(=C1)CC(C)N)OC', b'COC1=CC(=C(OC)C=C1Cl)CCN', b'[CH3:20][O:13][C:6]1=[CH:5][C:4](=[C:3]([O:16][CH3:11])[CH:2]=[C:1]1[CH3:12])[CH2:8][CH2:9][NH2:10]', b'CCC1=CC(=C(CCN)C=C1OC)OC', b'CCOC1=C(OC)C=C(CC(C)N)C=C1OC', b'COC1=CC(=C(OC)C=C1F)CCN', b'COC1=CC(=C(OC)C(=C1C)C)CCN', b'COC1=CC(=C(OC)C2=C1CCC2)CCN', b'COC1=CC(=C(OC)C2=C1CCCC2)CCN', b'COC1=CC(=C(OC)C2=C1C3CCC2C3)CCN', b'COC1=CC(=C(OC)C2=C1C=CC=C2)CCN', b'COC1=CC(=C(OC)C=C1)CCN', b'[CH3:20][O:13][C:6]1=[CH:5][C:4](=[C:3]([O:16][CH3:11])[CH:2]=[C:1]1[I:12])[CH2:8][CH2:9][NH2:10]', b'COC1=C(CCN)C=C(OC)C(=C1)[N](=O)=O', b'COC1=C(CCN)C=C(OC)C(=C1)OC(C)C', b'CCCOC1=CC(=C(CCN)C=C1OC)OC', b'COC1=C(CCN)C=C(OC)C(=C1)OCC2CC2', b'COC1=C(CCN)C=C(OC)C(=C1)[Se]C', b'COC1=C(CCN)C=C(OC)C(=C1)SC', b'[CH3:30][CH2:29][S:12][C:1]1=[CH:2][C:3](=[C:4]([CH2:8][CH2:9][NH2:10])[CH:5]=[C:6]1[O:13][CH3:20])[O:16][CH3:11]', b'COC1=C(CCN)C=C(OC)C(=C1)SC(C)C', b'COC1=C(CCN)C(=CC(=C1)SC(C)C)OC', b'[CH3:33][CH2:30][CH2:29][S:12][C:1]1=[CH:2][C:3](=[C:4]([CH2:8][CH2:9][NH2:10])[CH:5]=[C:6]1[O:13][CH3:20])[O:16][CH3:11]', b'COC1=C(CCN)C=C(OC)C(=C1)SCC2CC2', b'COC1=C(CCN)C=C(OC)C(=C1)SC(C)(C)C', b'COCCSC1=CC(=C(CCN)C=C1OC)OC', b'COC1=C(CCN)C=C(OC)C(=C1)SC2CC2', b'CCC(C)SC1=CC(=C(CCN)C=C1OC)OC', b'COC1=C(CCN)C=C(OC)C(=C1)SCCF', b'COC1=CC(=CC(=C1OC)OC)CCN', b'COC1=CC(=CC(=C1OC)OC)CCN', b'COC1=CC(=CC(=C1C)OC)CCN', b'COC1=CC=C(CC(C)N)C(=C1)OC', b'COC1=CC=C(OC)C(=C1)CC(C)N', b'COC1=CC=C(CC(C)N)C=C1OC', b'COC1=CC(=C(OC)C=C1C)C2CC2N', b'COC1=CC=C(C=C1OC)C(O)CN', b'COC1=C2OCOC2=C(OC)C(=C1)CC(C)N', b'COC1=C(CC(C)N)C=C2OCOC2=C1OC', b'COC1=CC=C(CCN)C=C1OC', b'CCCCCC1=CC(=C(CC(C)N)C=C1OC)OC', b'COC1=CC(=C(OC)C=C1Br)CC(C)N', b'CCCCC1=CC(=C(CC(C)N)C=C1OC)OC', b'COC1=CC(=C(OC)C=C1Cl)CC(C)N', b'COC1=CC(=C(OC)C=C1CCF)CC(C)N', b'CCC1=CC(=C(CC(C)N)C=C1OC)OC', b'COC1=CC(=C(OC)C=C1I)CC(C)N', b'COC1=CC(=C(OC)C=C1C)CC(C)N', b'COC1=C(CC(C)N)C(=CC(=C1)C)OC', b'COC1=C(CC(C)N)C=C(OC)C(=C1)[N](=O)=O', b'CCCC1=CC(=C(CC(C)N)C=C1OC)OC', b'CCOC1=C(OC)C=C(CCN)C=C1OC', b'CCOC1=C(CC(C)N)C=C(OCC)C(=C1)OCC', b'CCOC1=C(CCN)C=C(OC)C(=C1)OCC', b'CCOC1=C(CCN)C=C(OCC)C(=C1)OC', b'CCOC1=C(CCN)C=C(OC)C(=C1)OC', b'CCC(N)CC1=CC=C2OCOC2=C1', b'CCCC(CC1=CC=C2OCOC2=C1)NCC', b'COC1=C(CC(C)N)C=C2OC(C)CC2=C1', b'COC1=C(CC(C)N)C=C2OC(C)(C)CC2=C1', b'CC(CC1=CC=C2OCOC2=C1)N(C)O', b'COC1=C2CCCC2=C(OC)C(=C1)CC(C)N', b'COC1=C2CCCCC2=C(OC)C(=C1)CC(C)N', b'COC1=C2C3CCC(C3)C2=C(OC)C(=C1)CC(C)N', b'COC1=CC(=C(OC)C(=C1C)C)CC(C)N', b'COC1=CC(=C(OC)C2=C1C=CC=C2)CC(C)N', b'', b'CCCSC1=CC(=C(CCNO)C=C1OC)OC', b'CCC(C)SC1=CC(=C(CCNO)C=C1OC)OC', b'COC1=CC(=C(OC)C=C1I)CC(C)N(C)C', b'COC1=C(OC)C(=C(CCN)C=C1)OC', b'COC1=CC(=CC(=C1OC(C)C)OC)CCN', b'CCOC1=CC(=C(OC)C=C1C)CC(C)N', b'CCC(N)CC1=CC=C2OCOC2=C1', b'COC1=C2OCOC2=CC(=C1)CCN', b'C[O:7][C:2]1=[CH:1][C:6](=[CH:5][C:4](=[C:3]1[O:9]C)[O:20]C)[CH2:10][CH2:11][NH2:12]', b'CC(CC1=CC=C(C=C1)OC)N', b'CNC(C)CC1=C(C)C=C2OCOC2=C1', b'COC1=CC(=CC(=C1OCC(C)=C)OC)CCN', b'CC(N)CC1=CC=C2OCOC2=C1', b'CC(CC1=CC=C2OCOC2=C1)NCC=C', b'CCCCNC(C)CC1=CC=C2OCOC2=C1', b'CC(CC1=CC=C2OCOC2=C1)NCC3=CC=CC=C3', b'CC(CC1=CC=C2OCOC2=C1)NCC3CC3', b'CC(CC1=CC=C2OCOC2=C1)N(C)C', b'CCNC(C)CC1=CC=C2OCOC2=C1', b'CC(CC1=CC=C2OCOC2=C1)NCCO', b'CC(C)NC(C)CC1=CC=C2OCOC2=C1', b'CNC(C)CC1=CC=C2OCOC2=C1', b'CNC(C)CC1=CC=C2OCCOC2=C1', b'CONC(C)CC1=CC=C2OCOC2=C1', b'COCCNC(C)CC1=CC=C2OCOC2=C1', b'CNC(C)(C)CC1=CC=C2OCOC2=C1', b'CC(CC1=CC=C2OCOC2=C1)NO', b'NCCC1=CC=C2OCOC2=C1', b'CC(C)(N)CC1=CC=C2OCOC2=C1', b'CC(CC1=CC=C2OCOC2=C1)NCC#C', b'CCCNC(C)CC1=CC=C2OCOC2=C1', b'CCOC1=CC(=CC(=C1OC)OC)CCN', b'COC1=C2OCCOC2=CC(=C1)CCN', b'CCOC1=CC(=C(OC)C=C1OCC)CC(C)N', b'CCOC1=CC(=C(CC(C)N)C=C1OC)OC', b'CCOC1=CC=C(CCN)C=C1OC', b'COC1=CC(=C(CC(C)N)C=C1Br)OC', b'COC1=C(CC(C)N)C=C(SC)C(=C1)OC', b'CNC(C)CC1=C(OC)C=CC(=C1)OC', b'CNC(C)CC1=C(OC)C=C(Br)C(=C1)OC', b'CCC(CC1=CC=C2OCOC2=C1)NC', b'CCCC(CC1=CC=C2OCOC2=C1)NC', b'CNC(C)CC1=CC=C(OC)C=C1', b'CNC(C)CC1=C(OC)C=C2OCOC2=C1', b'COC1=C2OCOC2=CC(=C1)CC(C)N', b'COC1=C(CC(C)N)C=C2OCOC2=C1', b'COC1=C2OCOC2=CC=C1CC(C)N', b'COC1=CC=C(CC(C)N)C2=C1OCO2', b'CCOC1=CC(=C(OC)C=C1OC)CC(C)N', b'CCCOC1=CC(=CC(=C1OC)OC)CCN', b'COc1cc(OC)c(cc1OCCC)CC(C)N', b'COC1=CC(=C(SC)C=C1OC)CC(C)N', b'CCCOC1=C(OC)C=C(CCN)C=C1OC', b'COC1=CC(=CC(=C1OCCCC2=CC=CC=C2)OC)CCN', b'NCCC1=CC=CC=C1', b'COC1=CC(=CC(=C1OCC=C)OC)CCN', b'CCOC1=CC(=CC(=C1OC)OCC)CCN', b'COC1=C(OC)C(=C(OC)C(=C1)CC(C)N)OC', b'[CH3:16][CH2:15][O:14][C:8]1=[C:1]([S:10][CH2:11][CH3:17])[CH:2]=[C:3]([CH2:5][CH2:6][NH2:7])[CH:4]=[C:9]1[O:12][CH3:13]', b'CCOC1=CC(=CC(=C1SCC)OC)CCN', b'CCOC1=CC(=CC(=C1OCC)SC)CCN', b'CCCCSC1=C(OC)C=C(CCN)C=C1OC', b'CCOC1=C(OC)C=C(CCN)C=C1SC', b'[CH3:16][CH2:11][S:10][C:8]1=[C:1]([O:12][CH3:13])[CH:2]=[C:3]([CH2:5][CH2:6][NH2:7])[CH:4]=[C:9]1[O:14][CH3:15]', b'COC1=C(OC)C(=C(CCN)C=C1)SC', b'COC1=C(SC)C(=C(CCN)C=C1)OC', b'COC1=C(CCN)C=CC(=C1OC)SC', b'COC1=CC(=CC(=C1OC)SC)CCN', b'COC1=CC(=CC(=C1SC)OC)CCN', b'COC1=CC(=CC(=C1OC)OC)CC(C)N', b'COC1=C(CC(C)N)C=C(OC)C(=C1)OC', b'COC1=C(OC)C(=C(CC(C)N)C=C1)OC', b'COC1=CC(=C(OC)C(=C1)OC)CC(C)N', b'COC1=CC=C(OC)C(=C1CC(C)N)OC', b'COC1=CC(=C(CC(C)N)C(=C1)OC)OC', b'CCSC1=CC(=CC(=C1OC)OC)CCN', b'CCOC1=CC(=CC(=C1SC)OC)CCN', b'CCOC1=CC(=CC(=C1OC)SC)CCN', b'CSC1=C2OCOC2=CC=C1CC(C)N', b'COC1=C(CC(C)N)C=C2OCSC2=C1', b'COC1=C(CCN)C=C(OC)C(=C1)OC', b'CCC1=CC(=C(CC(C)N)C=C1OC)SC', b'CCC1=CC(=C(CC(C)N)C=C1SC)OC', b'COC1=CC(=C(SC)C=C1C)CC(C)N', b'COC1=C(CC(C)N)C=C(SC)C(=C1)C', b'COC1=C(CC(C)N)C=C(C(=C1)C)[S](C)=O', b'CCCSC1=C(OC)C=C(CCN)C=C1OC', b'CCOC1=CC(=CC(=C1OCC)OCC)CCN', b'CCOC1=C(OC)C(=CC(=C1)CCN)SCC', b'CCOC1=CC(=CC(=C1SC)OCC)CCN', b'CCOC1=C(OCC)C(=CC(=C1)CCN)SCC', b'CCOC1=CC(=CC(=C1SCC)OCC)CCN']\n"
]
}
],
"source": [
"# PDF Generation & Parsing\n",
"\n",
"gc.build_global_chem_network(print_output=False, debugger=False)\n",
"smiles_list = list(gc.get_node_smiles('pihkal').values())\n",
"\n",
"gce.smiles_to_pdf(\n",
" smiles=smiles_list,\n",
" labels = [],\n",
" file_name = 'molecules.pdf',\n",
" include_failed_smiles = True,\n",
" title = 'MY MOLECULES',\n",
")\n",
"\n",
"molecules = gce.pdf_to_smiles(\n",
" 'molecules.pdf',\n",
")\n",
"\n",
"print (molecules)\n"
]
}
],
"metadata": {
"colab": {
"name": "GlobalChemExtensions_Demonstration.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "PY-f8bB8jpqi"
},
"outputs": [],
"source": [
"# Environment Setup\n",
"\n",
"!rm -rf global-chem\n",
"!git clone -q https://github.com/Sulstice/global-chem\n",
"\n",
"!pip install -q global-chem-extensions --upgrade"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "NoYEtzDf1jYF"
},
"outputs": [],
"source": [
"# Header\n",
"\n",
"from global_chem import GlobalChem\n",
"from global_chem_extensions import GlobalChemExtensions\n",
"\n",
"gc = GlobalChem()\n",
"gce = GlobalChemExtensions()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "IMm6kQvpSIQr",
"outputId": "ba25b01c-496e-4cec-b552-275ccbb42cf0"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Percantage of Accepted SMILES: 100.0\n"
]
}
],
"source": [
"# Check the Validity of the SMILES\n",
"\n",
"smiles_list = gc.get_all_smiles()\n",
"\n",
"sucesses, failures = gce.verify_smiles(\n",
" smiles_list,\n",
" rdkit=False, \n",
" partial_smiles=False,\n",
" return_failures=True,\n",
" pysmiles=False,\n",
" molvs=False,\n",
" deepsmiles=False,\n",
" selfies=True\n",
")\n",
"\n",
"total = len(sucesses) + len(failures)\n",
"print (\"Percantage of Accepted SMILES: %s\" % ((len(sucesses) / total) * 100))"
]
}
],
"metadata": {
"colab": {
"name": "GlobalChemExtensions_Demonstration.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "PY-f8bB8jpqi"
},
"outputs": [],
"source": [
"# Environment Setup\n",
"\n",
"!rm -rf global-chem\n",
"!git clone -q https://github.com/Sulstice/global-chem\n",
"\n",
"!pip install -q global-chem-extensions --upgrade"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "NoYEtzDf1jYF"
},
"outputs": [],
"source": [
"# Header\n",
"\n",
"from global_chem import GlobalChem\n",
"from global_chem_extensions import GlobalChemExtensions\n",
"\n",
"gc = GlobalChem()\n",
"gce = GlobalChemExtensions()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "UWRGdx8zStAL",
"outputId": "f916b146-4f1d-4dc1-86fc-d1121719d965"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Zinc 15\n",
" Zinc 20\n",
" PubChem\n",
" NIST Chemistry Webhook\n",
" Chem Exper\n",
" NMR Shift Database\n",
" Drug Bank\n",
" Binding Database\n",
" Spectral Database for Organic Compounds\n",
" Sider\n",
" ChemSpider\n",
" Stitch\n",
" CardPred\n",
" Comparative Toxicogenomics Database\n",
" AMED Cardiotoxicity Database\n",
" Tox21\n",
" Drug Safety Analysis System\n",
" OpenFDA\n",
" Metabolites Biological Role\n",
" MetaboAnalyst\n",
" Adverse Drug Reaction Classification System\n",
" Metabolism and Transport Database\n",
" Ecology Toxicity \n",
" Human and Environment Risk Assessment \n",
" International Toxicity Information for Risk Assesments\n",
" Japan Exisiting Database\n",
" National Pesticide Center\n",
" Pesticide Info\n",
" Kyoto Encyclopedia of Genes and Genomes\n",
" Hetereocycles\n",
" Chemical Resolver\n",
" LookChem\n",
" Lipid Maps\n"
]
}
],
"source": [
"# Database Monitoring\n",
"\n",
"GlobalChemExtensions.check_status_on_open_source_databases()"
]
}
],
"metadata": {
"colab": {
"name": "GlobalChemExtensions_Demonstration.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Loading

0 comments on commit 579e939

Please sign in to comment.