-
-
Notifications
You must be signed in to change notification settings - Fork 132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Protein_ligand graph support & junction tree feature #164
base: master
Are you sure you want to change the base?
Changes from 45 commits
105a2eb
2554e24
8d55f67
ea8c50e
eb3d634
299d69c
9ef623c
968b9ee
a1cdce5
9a52956
d2ae675
02227d9
b66c418
e3fe2c5
1d657c5
e5a47e1
88fdd88
be1b30c
9d1374a
d2a4277
0a167b8
748cb1b
d1e2bed
ad2ecb1
140954b
cd15a36
bbe9f0a
b5f50a0
b5b793c
fe77069
bb62ea6
828644a
5a52f9c
a65f81b
78aa744
0e9a1ee
a48483d
619957c
2cfd8e8
b7caf16
3470357
7f25b92
7a0d1a5
8a5f4df
2de9712
c83a383
021a7a2
732e691
f97e280
c2a40da
bc9b3db
de66061
a106405
d2433db
d4d6817
cf2b541
fffc28a
f04c0d2
d81fc2f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,12 @@ | |
compute_edges, | ||
import_message, | ||
) | ||
from graphein.utils.junction_tree.jt_utils import ( | ||
get_mol, | ||
get_smiles, | ||
tree_decomp, | ||
get_clique_mol, | ||
) | ||
|
||
from .config import MoleculeGraphConfig | ||
|
||
|
@@ -94,7 +100,7 @@ def construct_graph( | |
graph_annotation_funcs: Optional[List[Callable]] = None, | ||
) -> nx.Graph: | ||
""" | ||
Constructs protein structure graph from a ``sdf_path``, ``mol2_path`` or ``smiles``. | ||
Constructs molecule structure graph from a ``sdf_path``, ``mol2_path`` or ``smiles``. | ||
|
||
Users can provide a :class:`~graphein.molecule.config.MoleculeGraphConfig` | ||
object to specify construction parameters. | ||
|
@@ -227,3 +233,37 @@ def construct_graph( | |
g = annotate_edge_metadata(g, config.edge_metadata_functions) | ||
|
||
return g | ||
|
||
def construct_junction_tree( | ||
smiles: Optional[str] = None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think using an RDKit mol as the input makes this function more general (this way we can call it for mols parsed from SDF, PDB, Mol2 etc). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also don't think the input should be optional |
||
) -> nx.Graph: | ||
""" | ||
Constructs molecule structure junction tree graph from a ``smiles``. | ||
|
||
:param smiles: smiles string to build graph from. Default is ``None``. | ||
:type smiles: str, optional | ||
:return: Molecule Structure Junction Tree Graph | ||
:type: nx.Graph | ||
""" | ||
|
||
mol = get_mol(smiles) | ||
|
||
g = nx.Graph( | ||
name=smiles, smiles=smiles | ||
) | ||
|
||
cliques, edges = tree_decomp(mol) | ||
|
||
for i, c in enumerate(cliques): | ||
cmol = get_clique_mol(mol, c) | ||
g.add_node( | ||
f"{get_smiles(cmol)}:{str(i)}", | ||
) | ||
|
||
for n1, n2 in edges: | ||
if g.has_edge(n1, n2): | ||
g.edges[n1, n2]["kind"].add("junction_tree") | ||
else: | ||
g.add_edge(n1, n2, kind={"junction_tree"}) | ||
|
||
return g | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. needs newline |
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
"""Protein graph construction module.""" | ||
from .config import * | ||
from .edges import * | ||
from .graphs import * | ||
from .utils import * | ||
from .visualisation import ( | ||
plot_distance_landscape, | ||
plot_distance_matrix, | ||
plot_protein_structure_graph, | ||
plotly_protein_structure_graph, | ||
) | ||
|
||
try: | ||
from .visualisation import plot_chord_diagram | ||
except ImportError: | ||
pass | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should use the import message util from |
||
|
||
try: | ||
from .meshes import * | ||
from .visualisation import plot_pointcloud | ||
except ImportError: | ||
pass |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,220 @@ | ||
"""Base Config object for use with Protein Graph Construction.""" | ||
# Graphein | ||
# Author: Arian Jamasb <[email protected]> | ||
# License: MIT | ||
# Project Website: https://github.com/a-r-j/graphein | ||
# Code Repository: https://github.com/a-r-j/graphein | ||
from __future__ import annotations | ||
|
||
from functools import partial | ||
from pathlib import Path | ||
from typing import Any, Callable, List, Optional, Union | ||
|
||
from deepdiff import DeepDiff | ||
from pydantic import BaseModel | ||
from typing_extensions import Literal | ||
|
||
from graphein.protein_ligand.edges.distance import add_peptide_bonds | ||
from graphein.protein_ligand.features.nodes.amino_acid import meiler_embedding | ||
|
||
from graphein.protein_ligand.edges.atomic import add_atom_bonds | ||
from graphein.protein_ligand.edges.distance import ( | ||
add_distance_threshold_ligand, | ||
add_fully_connected_edges_ligand, | ||
add_k_nn_edges_ligand, | ||
add_fully_connected_edges_protein_ligand, | ||
add_k_nn_edges_protein_ligand, | ||
add_distance_threshold_protein_ligand, | ||
) | ||
from graphein.protein_ligand.features.nodes.atom_type import atom_type_one_hot | ||
|
||
from graphein.utils.config import PartialMatchOperator, PathMatchOperator | ||
|
||
|
||
class DSSPConfig(BaseModel): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unneeded, can import |
||
executable: str = "mkdssp" | ||
|
||
|
||
class GetContactsConfig(BaseModel): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unneeded, can import |
||
""" | ||
Config object for parameters relating to running ``GetContacts``. | ||
``GetContacts`` is an optional dependency from which intramolecular interactions can be computed and used as edges in the graph. | ||
|
||
More information about ``GetContacts`` can be found at https://getcontacts.github.io/ | ||
|
||
:param get_contacts_path: Path to ``GetContacts`` installation | ||
:type get_contacts_path: pathlib.Path | ||
:param contacts_dir: Path to store output of ``GetContacts`` | ||
:type contacts_dir: pathlib.Path | ||
:param pdb_dir: Path to PDB files to be used to compute intramolecular interactions. | ||
:type pdb_dir: pathlib.Path | ||
:param granularity: Specifies the node types of the graph, defaults to ``"CA"`` for alpha-carbons as nodes. Other options are ``"CB"`` (beta-carbon), ``"atom"`` for all-atom graphs, and ``"centroid"`` for nodes positioned as residue centroids. | ||
:type granularity: str | ||
""" | ||
|
||
get_contacts_path: Path = Path( | ||
"/Users/arianjamasb/github/getcontacts/" | ||
).resolve() | ||
contacts_dir: Path = Path("../examples/contacts/").resolve() | ||
pdb_dir: Path = Path("../examples/pdbs/").resolve() | ||
granularity: str = "CA" | ||
|
||
|
||
GraphAtoms = Literal[ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unneeded, can import |
||
"N", | ||
"CA", | ||
"C", | ||
"O", | ||
"CB", | ||
"OG", | ||
"CG", | ||
"CD1", | ||
"CD2", | ||
"CE1", | ||
"CE2", | ||
"CZ", | ||
"OD1", | ||
"ND2", | ||
"CG1", | ||
"CG2", | ||
"CD", | ||
"CE", | ||
"NZ", | ||
"OD2", | ||
"OE1", | ||
"NE2", | ||
"OE2", | ||
"OH", | ||
"NE", | ||
"NH1", | ||
"NH2", | ||
"OG1", | ||
"SD", | ||
"ND1", | ||
"SG", | ||
"NE1", | ||
"CE3", | ||
"CZ2", | ||
"CZ3", | ||
"CH2", | ||
"OXT", | ||
] | ||
"""Allowable atom types for nodes in the graph.""" | ||
|
||
GranularityOpts = Literal["atom", "centroids"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can import |
||
"""Allowable granularity options for nodes in the graph.""" | ||
|
||
|
||
class ProteinLigandGraphConfig(BaseModel): | ||
""" | ||
Config Object for Protein Structure Graph Construction. | ||
|
||
If you encounter a problematic structure, perusing https://www.umass.edu/microbio/chime/pe_beta/pe/protexpl/badpdbs.htm may provide some additional insight. | ||
PDBs are notoriously troublesome and this is an excellent overview. | ||
|
||
:param granularity: Controls the granularity of the graph construction. ``"atom"`` builds an atomic-scale graph where | ||
nodes are constituent atoms. Residue-level graphs can be build by specifying which constituent atom should | ||
represent node positions (see :const:`~graphein.protein.config.GraphAtoms`). Additionally, ``"centroids"`` can be specified to | ||
compute the centre of gravity for a given atom (Specified in :const:`~graphein.protein.config.GranularityOpts`). | ||
Defaults to ``"CA"`` (alpha-Carbon). | ||
:type granularity: str (Union[graphein.protein.config.GraphAtoms, graphein.protein.config.GranularityOpts]) | ||
:param keep_hets: Controls whether or not heteroatoms are removed from the PDB file. These are typically modified | ||
residues, bound ligands, crystallographic adjuvants, ions or water molecules. | ||
|
||
For more information, see: https://proteopedia.org/wiki/index.php/Hetero_atoms | ||
:type keep_hets: bool | ||
:param insertions: Controls whether or not insertions are allowed. | ||
:type insertions: bool | ||
:param pdb_dir: Specifies path to download protein structures into. | ||
:type pdb_dir: pathlib.Path | ||
:param verbose: Specifies verbosity of graph creation process. | ||
:type verbose: bool | ||
:param exclude_waters: Specifies whether or not water molecules are excluded from the structure | ||
:type excluded_waters: bool | ||
:param deprotonate: Specifies whether or not to remove ``H`` atoms from the graph. | ||
:type deprotonate: bool | ||
:param protein_df_processing_functions: List of functions that take a ``pd.DataFrame`` and return a ``pd.DataFrame``. | ||
This allows users to define their own series of processing functions for the protein structure DataFrame and | ||
override the default sequencing of processing steps provided by Graphein. We refer users to our low-level API | ||
tutorial for more details. | ||
:type protein_df_processing_functions: Optional[List[Callable]] | ||
:param edge_construction_functions: List of functions that take an ``nx.Graph`` and return an ``nx.Graph`` with desired | ||
edges added. Prepared edge constructions can be found in :ref:`graphein.protein.edges` | ||
:type edge_construction_functions: List[Callable] | ||
:param node_metadata_functions: List of functions that take an ``nx.Graph`` | ||
:type node_metadata_functions: List[Callable], optional | ||
:param edge_metadata_functions: List of functions that take an | ||
:type edge_metadata_functions: List[Callable], optional | ||
:param graph_metadata_functions: List of functions that take an ``nx.Graph`` and return an ``nx.Graph`` with added | ||
graph-level features and metadata. | ||
:type graph_metadata_functions: List[Callable], optional | ||
:param get_contacts_config: Config object containing parameters for running ``GetContacts`` for computing intramolecular | ||
contact-based edges. Defaults to None. | ||
:type get_contacts_config: GetContactsConfig, optional | ||
:param dssp_config: Config Object containing reference to ``DSSP`` executable. Defaults to None. | ||
**NB** DSSP must be installed. See installation instructions: https://graphein.ai/getting_started/installation.html#optional-dependencies | ||
:type dssp_config: DSSPConfig, optional | ||
""" | ||
|
||
granularity: Union[GraphAtoms, GranularityOpts] = "CA" | ||
keep_hets: bool = False | ||
insertions: bool = False | ||
pdb_dir: Path = Path( | ||
"../examples/pdbs/" | ||
) # Also suggest to avoid hard-coding paths if possible! | ||
verbose: bool = False | ||
exclude_waters: bool = True | ||
deprotonate: bool = False | ||
add_hs: bool = False | ||
|
||
# Graph construction functions for protein | ||
protein_df_processing_functions: Optional[List[Callable]] = None | ||
protein_edge_construction_functions: List[Union[Callable, str]] = [ | ||
add_peptide_bonds | ||
] | ||
protein_node_metadata_functions: Optional[List[Union[Callable, str]]] = [ | ||
meiler_embedding | ||
] | ||
protein_edge_metadata_functions: Optional[List[Union[Callable, str]]] = None | ||
graph_metadata_functions: Optional[List[Callable]] = None | ||
|
||
# Graph construction functions for ligand | ||
ligand_edge_construction_functions: List[Union[Callable, str]] = [ | ||
add_fully_connected_edges_ligand, | ||
add_k_nn_edges_ligand, | ||
add_distance_threshold_ligand, | ||
# TODO: infer bond from pdb? do we have to save to pdb and infer bond or better way? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, not sure. Commenting to highlight TODO |
||
# add_atom_bonds, | ||
] | ||
ligand_node_metadata_functions: Optional[List[Union[Callable, str]]] = [ | ||
atom_type_one_hot | ||
] | ||
ligand_edge_metadata_functions: Optional[List[Union[Callable, str]]] = None | ||
|
||
# Graph construction functions for protein-ligand | ||
protein_ligand_edge_construction_functions: List[Union[Callable, str]] = [ | ||
add_fully_connected_edges_protein_ligand, | ||
add_k_nn_edges_protein_ligand, | ||
add_distance_threshold_protein_ligand, | ||
] | ||
protein_ligand_edge_metadata_functions: Optional[List[Union[Callable, str]]] = None | ||
|
||
# External Dependency configs | ||
get_contacts_config: Optional[GetContactsConfig] = None | ||
dssp_config: Optional[DSSPConfig] = None | ||
|
||
def __eq__(self, other: Any) -> bool: | ||
"""Overwrites the BaseModel __eq__ function in order to check more specific cases (like partial functions).""" | ||
if isinstance(other, ProteinLigandGraphConfig): | ||
return ( | ||
DeepDiff( | ||
self, | ||
other, | ||
custom_operators=[ | ||
PartialMatchOperator(types=[partial]), | ||
PathMatchOperator(types=[Path]), | ||
], | ||
) | ||
== {} | ||
) | ||
return self.dict() == other |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .atomic import * | ||
from .distance import * | ||
from .intramolecular import * |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can move these utils to
graphein.molecule.utils