From d512274780c92dec47dc59394af7ae326982610a Mon Sep 17 00:00:00 2001 From: Rohit Basu Date: Fri, 28 Jun 2024 11:45:07 -0400 Subject: [PATCH 01/19] Added drug mode dgidb.py: Added get_drug_list(), which pulls all drug names present in DGIdb graph_app.py: Reformatted and renamed many variables/functions for consistency Added search mode selection menu, allowing users to choose between gene and drug mode. Selecting a mode will update the terms dropdown menu with genes or drugs respectively, and allow genes or drugs to be graphed. Node selection, the neighbors dropdown menu, and edge selection, all function as expected in drug mode. network_graph.py: Updated code to support drug-search interaction data --- src/dgipy/dgidb.py | 26 ++++++++ src/dgipy/graph_app.py | 123 ++++++++++++++++++++++++------------- src/dgipy/network_graph.py | 31 +++++++--- 3 files changed, 129 insertions(+), 51 deletions(-) diff --git a/src/dgipy/dgidb.py b/src/dgipy/dgidb.py index 820bd52..54ab801 100644 --- a/src/dgipy/dgidb.py +++ b/src/dgipy/dgidb.py @@ -367,6 +367,32 @@ def get_gene_list(api_url: str | None = None) -> list: return genes +def get_drug_list(api_url: str | None = None) -> list: + """Get all drug names present in DGIdb + + :param api_url: API endpoint for GraphQL request + :return: a full list of drugs present in dgidb + """ + query = gql( + """ + { + drugs { + nodes { + name + conceptId + } + } + } + """ + ) + api_url = api_url if api_url else API_ENDPOINT_URL + client = _get_client(api_url) + result = client.execute(query) + drugs = result["drugs"]["nodes"] + drugs.sort(key=lambda i: i["name"]) + return drugs + + def get_drug_applications( terms: list | str, use_pandas: bool = True, api_url: str | None = None ) -> pd.DataFrame | dict: diff --git a/src/dgipy/graph_app.py b/src/dgipy/graph_app.py index db5d359..6887695 100644 --- a/src/dgipy/graph_app.py +++ b/src/dgipy/graph_app.py @@ -12,38 +12,53 @@ def generate_app() -> dash.Dash: :return: a python dash app that can be run with run_server() """ - genes = dgidb.get_gene_list() - plot = ng.generate_plotly(None) + genes = [ + {"label": gene["name"], "value": gene["name"]} for gene in dgidb.get_gene_list() + ] + drugs = [ + {"label": drug["name"], "value": drug["name"]} for drug in dgidb.get_drug_list() + ] + app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) - __set_app_layout(app, plot, genes) - __update_plot(app) + __set_app_layout(app) + __update_plotly(app) + __update_terms_dropdown(app, genes, drugs) __update_selected_node(app) - __update_selected_node_display(app) - __update_neighbor_dropdown(app) + __update_selected_node_text(app) + __update_neighbors_dropdown(app) __update_edge_info(app) return app -def __set_app_layout(app: dash.Dash, plot: ng.go.Figure, genes: list) -> None: - graph_display = dcc.Graph( - id="network-graph", figure=plot, style={"width": "100%", "height": "800px"} +def __set_app_layout(app: dash.Dash) -> None: + plotly_figure = dcc.Graph( + id="plotly-figure", style={"width": "100%", "height": "800px"} + ) + + search_mode = dcc.RadioItems( + id="search-mode", + options=[ + {"label": "Gene Search", "value": "genes"}, + {"label": "Drug Search", "value": "drugs"}, + ], + value="genes", ) - genes_dropdown_display = dcc.Dropdown( - id="gene-dropdown", - options=[{"label": gene["name"], "value": gene["name"]} for gene in genes], - multi=True, + terms_dropdown = dcc.Dropdown( + id="terms-dropdown", optionHeight=75, multi=True, value=[] ) - selected_node_display = dcc.Markdown( + selected_node_text = dcc.Markdown( id="selected-node-text", children="No Node Selected" ) - neighbors_dropdown_display = dcc.Dropdown(id="neighbor-dropdown", multi=False) + neighbors_dropdown = dcc.Dropdown(id="neighbors-dropdown", multi=False) - edge_info_display = dcc.Markdown(id="edge-info-text", children="No Edge Selected") + selected_edge_info = dcc.Markdown( + id="selected-edge-info", children="No Edge Selected" + ) app.layout = html.Div( [ @@ -54,22 +69,29 @@ def __set_app_layout(app: dash.Dash, plot: ng.go.Figure, genes: list) -> None: dbc.Row( [ dbc.Col( - dbc.Card(graph_display, body=True, style={"margin": "10px"}), + dbc.Card(plotly_figure, body=True, style={"margin": "10px"}), width=8, ), dbc.Col( [ dbc.Card( [ - dbc.CardHeader("Genes Dropdown Display"), - dbc.CardBody(genes_dropdown_display), + dbc.CardHeader("Search Mode"), + dbc.CardBody(search_mode), + ], + style={"margin": "10px"}, + ), + dbc.Card( + [ + dbc.CardHeader("Terms Dropdown"), + dbc.CardBody(terms_dropdown), ], style={"margin": "10px"}, ), dbc.Card( [ - dbc.CardHeader("Neighbors Dropdown Display"), - dbc.CardBody(neighbors_dropdown_display), + dbc.CardHeader("Neighbors Dropdown"), + dbc.CardBody(neighbors_dropdown), ], style={"margin": "10px"}, ), @@ -77,9 +99,9 @@ def __set_app_layout(app: dash.Dash, plot: ng.go.Figure, genes: list) -> None: dbc.CardBody( [ html.H4("Selected Node/Edge:"), - html.P(selected_node_display), + html.P(selected_node_text), html.H4("Selected Edge Info:"), - html.P(edge_info_display), + html.P(selected_edge_info), ] ), style={"margin": "10px"}, @@ -93,39 +115,53 @@ def __set_app_layout(app: dash.Dash, plot: ng.go.Figure, genes: list) -> None: ) -def __update_plot(app: dash.Dash) -> None: +def __update_plotly(app: dash.Dash) -> None: @app.callback( - [Output("graph", "data"), Output("network-graph", "figure")], - Input("gene-dropdown", "value"), + [Output("graph", "data"), Output("plotly-figure", "figure")], + Input("terms-dropdown", "value"), + State("search-mode", "value"), ) def update( - selected_genes: list | None, + terms: list | None, search_mode: str ) -> tuple[dict | None, ng.go.Figure]: - if selected_genes is not None: - gene_interactions = dgidb.get_interactions(selected_genes) - updated_graph = ng.create_network(gene_interactions, selected_genes) - updated_plot = ng.generate_plotly(updated_graph) - return ng.generate_json(updated_graph), updated_plot + if len(terms) != 0: + interactions = dgidb.get_interactions(terms, search_mode) + network_graph = ng.create_network(interactions, terms, search_mode) + plotly_figure = ng.generate_plotly(network_graph) + return ng.generate_json(network_graph), plotly_figure return None, ng.generate_plotly(None) +def __update_terms_dropdown(app: dash.Dash, genes: list, drugs: list) -> None: + @app.callback( + Output("terms-dropdown", "options"), + Input("search-mode", "value"), + ) + def update(search_mode: str) -> list: + if search_mode == "genes": + return genes + if search_mode == "drugs": + return drugs + return None + + def __update_selected_node(app: dash.Dash) -> None: @app.callback( Output("selected-node", "data"), - [Input("network-graph", "click_data"), Input("gene-dropdown", "value")], + [Input("plotly-figure", "clickData"), Input("terms-dropdown", "value")], ) - def update(click_data: dict | None, new_gene: list | None) -> str | dict: - if ctx.triggered_id == "gene-dropdown": + def update(clickData: dict | None, new_gene: list | None) -> str | dict: # noqa: N803 + if ctx.triggered_id == "terms-dropdown": return "" - if click_data is not None and "points" in click_data: - selected_node = click_data["points"][0] + if clickData is not None and "points" in clickData: + selected_node = clickData["points"][0] if "text" not in selected_node: return dash.no_update return selected_node return dash.no_update -def __update_selected_node_display(app: dash.Dash) -> None: +def __update_selected_node_text(app: dash.Dash) -> None: @app.callback( Output("selected-node-text", "children"), Input("selected-node", "data") ) @@ -135,9 +171,12 @@ def update(selected_node: str | dict) -> str: return "No Node Selected" -def __update_neighbor_dropdown(app: dash.Dash) -> None: +def __update_neighbors_dropdown(app: dash.Dash) -> None: @app.callback( - [Output("neighbor-dropdown", "options"), Output("neighbor-dropdown", "value")], + [ + Output("neighbors-dropdown", "options"), + Output("neighbors-dropdown", "value"), + ], Input("selected-node", "data"), ) def update(selected_node: str | dict) -> tuple[list, None]: @@ -148,8 +187,8 @@ def update(selected_node: str | dict) -> tuple[list, None]: def __update_edge_info(app: dash.Dash) -> None: @app.callback( - Output("edge-info-text", "children"), - [Input("selected-node", "data"), Input("neighbor-dropdown", "value")], + Output("selected-edge-info", "children"), + [Input("selected-node", "data"), Input("neighbors-dropdown", "value")], State("graph", "data"), ) def update( diff --git a/src/dgipy/network_graph.py b/src/dgipy/network_graph.py index 609d3b6..bb40d07 100644 --- a/src/dgipy/network_graph.py +++ b/src/dgipy/network_graph.py @@ -7,11 +7,17 @@ PLOTLY_SEED = 7 -def __initalize_network(interactions: pd.DataFrame, selected_genes: list) -> nx.Graph: +def __initalize_network( + interactions: pd.DataFrame, terms: list, search_mode: str +) -> nx.Graph: interactions_graph = nx.Graph() - graphed_genes = set() + graphed_terms = set() + for index in interactions.index: - graphed_genes.add(interactions["gene"][index]) + if search_mode == "genes": + graphed_terms.add(interactions["gene"][index]) + if search_mode == "drugs": + graphed_terms.add(interactions["drug"][index]) interactions_graph.add_node(interactions["gene"][index], isGene=True) interactions_graph.add_node(interactions["drug"][index], isGene=False) interactions_graph.add_edge( @@ -24,9 +30,13 @@ def __initalize_network(interactions: pd.DataFrame, selected_genes: list) -> nx. source=interactions["source"][index], pmid=interactions["pmid"][index], ) - ungraphed_genes = set(selected_genes).difference(graphed_genes) - for gene in ungraphed_genes: - interactions_graph.add_node(gene, isGene=True) + + graphed_terms = set(terms).difference(graphed_terms) + for term in graphed_terms: + if search_mode == "genes": + interactions_graph.add_node(term, isGene=True) + if search_mode == "drugs": + interactions_graph.add_node(term, isGene=False) return interactions_graph @@ -48,14 +58,17 @@ def __add_node_attributes(interactions_graph: nx.Graph) -> None: interactions_graph.nodes[node]["node_size"] = set_size -def create_network(interactions: pd.DataFrame, selected_genes: list) -> nx.Graph: +def create_network( + interactions: pd.DataFrame, terms: list, search_mode: str +) -> nx.Graph: """Create a networkx graph representing interactions between genes and drugs :param interactions: DataFrame containing drug-gene interaction data - :param selected_genes: List containing genes used to query interaction data + :param terms: List containing terms used to query interaction data + :param search_mode: String indicating whether query was gene-focused or drug-focused :return: a networkx graph of drug-gene interactions """ - interactions_graph = __initalize_network(interactions, selected_genes) + interactions_graph = __initalize_network(interactions, terms, search_mode) __add_node_attributes(interactions_graph) return interactions_graph From 7ad0244c6cf95ff645fb5caec1cdeb7180031036 Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:20:21 -0400 Subject: [PATCH 02/19] Updated __add_node_attributes() Now changes node_color and sizes based on search_mode Whereas before there only existed single/multi degree drugs, genes now are displayed to be single/multi degree In summary: Genes and Drugs are larger when in their respective search mode Blue and Red nodes represent single-degree genes and drugs respectively Cyan and Orange nodes represent multi-degree genes and drugs respectively --- src/dgipy/network_graph.py | 55 +++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/src/dgipy/network_graph.py b/src/dgipy/network_graph.py index bb40d07..c975b6f 100644 --- a/src/dgipy/network_graph.py +++ b/src/dgipy/network_graph.py @@ -40,20 +40,40 @@ def __initalize_network( return interactions_graph -def __add_node_attributes(interactions_graph: nx.Graph) -> None: +def __add_node_attributes(interactions_graph: nx.Graph, search_mode: str) -> None: for node in interactions_graph.nodes: is_gene = interactions_graph.nodes[node]["isGene"] - if is_gene: - set_color = "cyan" - set_size = 10 - else: - degree = interactions_graph.degree[node] - if degree > 1: - set_color = "orange" - set_size = 7 + degree = interactions_graph.degree[node] + if search_mode == "genes": + if is_gene: + if degree > 1: + set_color = "cyan" + set_size = 10 + else: + set_color = "blue" + set_size = 10 + else: + if degree > 1: + set_color = "orange" + set_size = 7 + else: + set_color = "red" + set_size = 7 + if search_mode == "drugs": + if is_gene: + if degree > 1: + set_color = "cyan" + set_size = 7 + else: + set_color = "blue" + set_size = 7 else: - set_color = "red" - set_size = 7 + if degree > 1: + set_color = "orange" + set_size = 10 + else: + set_color = "red" + set_size = 10 interactions_graph.nodes[node]["node_color"] = set_color interactions_graph.nodes[node]["node_size"] = set_size @@ -69,7 +89,7 @@ def create_network( :return: a networkx graph of drug-gene interactions """ interactions_graph = __initalize_network(interactions, terms, search_mode) - __add_node_attributes(interactions_graph) + __add_node_attributes(interactions_graph, search_mode) return interactions_graph @@ -110,7 +130,7 @@ def __create_trace_nodes(graph: nx.Graph, pos: dict) -> list: "node_color": [], "node_size": [], "neighbors": [], - "legend_name": "genes", + "legend_name": "multi-degree genes", }, "orange": { "node_x": [], @@ -130,6 +150,15 @@ def __create_trace_nodes(graph: nx.Graph, pos: dict) -> list: "neighbors": [], "legend_name": "single-degree drugs", }, + "blue": { + "node_x": [], + "node_y": [], + "node_text": [], + "node_color": [], + "node_size": [], + "neighbors": [], + "legend_name": "single-degree genes", + } } for node in graph.nodes(): From 7c224e3f1ba902fab03df7e0b6ebe5cac836d205 Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:35:11 -0400 Subject: [PATCH 03/19] Quick ruff error fix Removed trailing whitespace on 2 lines. --- src/dgipy/network_graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dgipy/network_graph.py b/src/dgipy/network_graph.py index c975b6f..d8ac591 100644 --- a/src/dgipy/network_graph.py +++ b/src/dgipy/network_graph.py @@ -45,7 +45,7 @@ def __add_node_attributes(interactions_graph: nx.Graph, search_mode: str) -> Non is_gene = interactions_graph.nodes[node]["isGene"] degree = interactions_graph.degree[node] if search_mode == "genes": - if is_gene: + if is_gene: if degree > 1: set_color = "cyan" set_size = 10 @@ -60,7 +60,7 @@ def __add_node_attributes(interactions_graph: nx.Graph, search_mode: str) -> Non set_color = "red" set_size = 7 if search_mode == "drugs": - if is_gene: + if is_gene: if degree > 1: set_color = "cyan" set_size = 7 From b66dc2a9fe43d7a88b3b0a0dc28162cc2f1ba036 Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:39:18 -0400 Subject: [PATCH 04/19] (Another) ruff error fix --- src/dgipy/network_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dgipy/network_graph.py b/src/dgipy/network_graph.py index d8ac591..2ca3b86 100644 --- a/src/dgipy/network_graph.py +++ b/src/dgipy/network_graph.py @@ -158,7 +158,7 @@ def __create_trace_nodes(graph: nx.Graph, pos: dict) -> list: "node_size": [], "neighbors": [], "legend_name": "single-degree genes", - } + }, } for node in graph.nodes(): From 390e905f8a8d9762873f153a361a6966f686194d Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Fri, 2 Aug 2024 14:29:35 -0400 Subject: [PATCH 05/19] Merge from main --- .pre-commit-config.yaml | 5 +- pyproject.toml | 22 +- src/dgipy/dgidb.py | 229 ++------------- src/dgipy/graph_app.py | 260 ++++++++++++++++++ src/dgipy/network_graph.py | 210 ++++++++++++++ src/dgipy/queries/__init__.py | 51 ++++ src/dgipy/queries/get_all_genes.graphql | 8 + .../queries/get_drug_applications.graphql | 10 + src/dgipy/queries/get_drugs.graphql | 31 +++ src/dgipy/queries/get_gene_categories.graphql | 12 + src/dgipy/queries/get_genes.graphql | 16 ++ .../queries/get_interactions_by_drug.graphql | 35 +++ .../queries/get_interactions_by_gene.graphql | 36 +++ src/dgipy/queries/get_sources.graphql | 12 + src/dgipy/vcf.py | 9 +- tests/conftest.py | 1 + tests/test_dgidb.py | 91 +++--- 17 files changed, 785 insertions(+), 253 deletions(-) create mode 100644 src/dgipy/queries/__init__.py create mode 100644 src/dgipy/queries/get_all_genes.graphql create mode 100644 src/dgipy/queries/get_drug_applications.graphql create mode 100644 src/dgipy/queries/get_drugs.graphql create mode 100644 src/dgipy/queries/get_gene_categories.graphql create mode 100644 src/dgipy/queries/get_genes.graphql create mode 100644 src/dgipy/queries/get_interactions_by_drug.graphql create mode 100644 src/dgipy/queries/get_interactions_by_gene.graphql create mode 100644 src/dgipy/queries/get_sources.graphql diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dd19de5..a774ac3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,14 +1,15 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v1.4.0 + rev: v4.6.0 # pre-commit-hooks version hooks: - id: check-added-large-files - id: detect-private-key - id: trailing-whitespace - id: end-of-file-fixer - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.2.0 + rev: v0.5.0 # ruff version hooks: - id: ruff-format - id: ruff args: [ --fix, --exit-non-zero-on-fix ] +minimum_pre_commit_version: 3.7.1 diff --git a/pyproject.toml b/pyproject.toml index 13b62f5..d8c72b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ dynamic = ["version"] [project.optional-dependencies] tests = ["pytest", "pytest-cov", "pytest-benchmark", "requests_mock"] -dev = ["pre-commit", "ruff==0.2.0"] +dev = ["pre-commit>=3.7.1", "ruff==0.5.0"] docs = [ "sphinx==6.1.3", "sphinx-autodoc-typehints==1.22.0", @@ -90,16 +90,22 @@ lint.select = [ "DTZ", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz "T10", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz "EM", # https://docs.astral.sh/ruff/rules/#flake8-errmsg-em + "LOG", # https://docs.astral.sh/ruff/rules/#flake8-logging-log "G", # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g + "INP", # https://docs.astral.sh/ruff/rules/#flake8-no-pep420-inp "PIE", # https://docs.astral.sh/ruff/rules/#flake8-pie-pie "T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20 "PT", # https://docs.astral.sh/ruff/rules/#flake8-pytest-style-pt "Q", # https://docs.astral.sh/ruff/rules/#flake8-quotes-q "RSE", # https://docs.astral.sh/ruff/rules/#flake8-raise-rse "RET", # https://docs.astral.sh/ruff/rules/#flake8-return-ret + "SLF", # https://docs.astral.sh/ruff/rules/#flake8-self-slf "SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim + "ARG", # https://docs.astral.sh/ruff/rules/#flake8-unused-arguments-arg "PTH", # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth "PGH", # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh + "PERF", # https://docs.astral.sh/ruff/rules/#perflint-perf + "FURB", # https://docs.astral.sh/ruff/rules/#refurb-furb "RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf ] lint.fixable = [ @@ -110,16 +116,20 @@ lint.fixable = [ "ANN", "B", "C4", + "LOG", "G", "PIE", "PT", "RSE", "SIM", + "PERF", + "FURB", "RUF" ] -# ANN101 - missing-type-self # ANN003 - missing-type-kwargs +# ANN101 - missing-type-self +# ANN102 - missing-type-cls # D203 - one-blank-line-before-class # D205 - blank-line-after-summary # D206 - indent-with-spaces* @@ -134,7 +144,7 @@ lint.fixable = [ # W191 - tab-indentation* # *ignored for compatibility with formatter lint.ignore = [ - "ANN101", "ANN003", + "ANN003", "ANN101", "ANN102", "D203", "D205", "D206", "D213", "D300", "D400", "D415", "E111", "E114", "E117", "E501", "W191", @@ -150,4 +160,8 @@ lint.ignore = [ # D103 - undocumented-public-function # S101 - assert # B011 - assert-false -"tests/*" = ["ANN001", "ANN102", "ANN2", "S101", "B011", "D100", "D101", "D103"] +# INP001 - implicit-namespace-package +"tests/*" = ["ANN001", "ANN102", "ANN2", "S101", "B011", "D100", "D101", "D103", "INP001"] + +[tool.ruff.format] +docstring-code-format = true diff --git a/src/dgipy/dgidb.py b/src/dgipy/dgidb.py index 54ab801..b3baf1e 100644 --- a/src/dgipy/dgidb.py +++ b/src/dgipy/dgidb.py @@ -4,9 +4,11 @@ import pandas as pd import requests -from gql import Client, gql +from gql import Client from gql.transport.requests import RequestsHTTPTransport +import dgipy.queries as queries + API_ENDPOINT_URL = os.environ.get("DGIDB_API_URL", "https://dgidb.org/api/graphql") @@ -45,44 +47,9 @@ def get_drug( if antineoplastic is not None: params["antineoplastic"] = antineoplastic - query = gql( - """ - query getDrugs($names: [String!], $immunotherapy: Boolean, $antiNeoplastic: Boolean) { - drugs( - names: $names - immunotherapy: $immunotherapy - antiNeoplastic: $antiNeoplastic - ) { - nodes { - name - conceptId - drugAliases { - alias - } - drugAttributes { - name - value - } - antiNeoplastic - immunotherapy - approved - drugApprovalRatings { - rating - source { - sourceDbName - } - } - drugApplications { - appNo - } - } - } - } - """ - ) api_url = api_url if api_url else API_ENDPOINT_URL client = _get_client(api_url) - result = client.execute(query, variable_values=params) + result = client.execute(queries.get_drugs.query, variable_values=params) if use_pandas is True: return __process_drug(result) @@ -102,29 +69,9 @@ def get_gene( if isinstance(terms, str): terms = [terms] - query = gql( - """ - query getGenes($names: [String!]) { - genes(names: $names) { - nodes { - name - longName - conceptId - geneAliases { - alias - } - geneAttributes { - name - value - } - } - } - } - """ - ) api_url = api_url if api_url else API_ENDPOINT_URL client = _get_client(api_url) - result = client.execute(query, variable_values={"names": terms}) + result = client.execute(queries.get_genes.query, variable_values={"names": terms}) if use_pandas is True: return __process_gene(result) @@ -174,86 +121,9 @@ def get_interactions( params["approved"] = approved if search == "genes": - query = gql( - """ - query getInteractionsByGene($names: [String!], $sourceDbName: String, $pmid: Int, $interactionType: String) { - genes( - names: $names - sourceDbName: $sourceDbName - pmid: $pmid - interactionType: $interactionType - ) { - nodes { - name - longName - geneCategories { - name - } - interactions { - interactionAttributes { - name - value - } - drug { - name - approved - } - interactionScore - interactionClaims { - publications { - citation - pmid - } - source { - sourceDbName - } - } - } - } - } - } - """ - ) + query = queries.get_interactions_by_gene.query elif search == "drugs": - query = gql( - """ - query getInteractionsByDrug($names: [String!], $immunotherapy: Boolean, $antineoplastic: Boolean, $sourceDbName: String, $pmid: Int, $interactionType: String, $approved: Boolean) { - drugs( - names: $names - immunotherapy: $immunotherapy - antiNeoplastic: $antineoplastic - sourceDbName: $sourceDbName - pmid: $pmid - interactionType: $interactionType - approved: $approved - ) { - nodes { - name - approved - interactions { - interactionAttributes { - name - value - } - gene { - name - } - interactionScore - interactionClaims { - publications { - citation - pmid - } - source { - sourceDbName - } - } - } - } - } - } - """ - ) + query = queries.get_interactions_by_drug.query else: msg = "Search type must be specified using: search='drugs' or search='genes'" raise Exception(msg) @@ -282,25 +152,11 @@ def get_categories( if isinstance(terms, str): terms = [terms] - query = gql( - """ - query getGeneCategories($names: [String!]) { - genes(names: $names) { - nodes { - name - longName - geneCategoriesWithSources { - name - sourceNames - } - } - } - } - """ - ) api_url = api_url if api_url else API_ENDPOINT_URL client = _get_client(api_url) - result = client.execute(query, variable_values={"names": terms}) + result = client.execute( + queries.get_gene_categories.query, variable_values={"names": terms} + ) if use_pandas is True: return __process_gene_categories(result) @@ -319,26 +175,10 @@ def get_source(search: str = "all", api_url: str | None = None) -> dict: msg = "Type must be a valid source type: drug, gene, interaction, potentially_druggable" raise Exception(msg) - query = gql( - """ - query getSources($sourceType: SourceTypeFilter) { - sources(sourceType: $sourceType) { - nodes { - fullName - sourceDbName - sourceDbVersion - drugClaimsCount - geneClaimsCount - interactionClaimsCount - } - } - } - """ - ) api_url = api_url if api_url else API_ENDPOINT_URL client = _get_client(api_url) params = {} if search.lower() == "all" else {"sourceType": search} - return client.execute(query, variable_values=params) + return client.execute(queries.get_sources.query, variable_values=params) def get_gene_list(api_url: str | None = None) -> list: @@ -347,21 +187,9 @@ def get_gene_list(api_url: str | None = None) -> list: :param api_url: API endpoint for GraphQL request :return: a full list of genes present in dgidb """ - query = gql( - """ - { - genes { - nodes { - name - conceptId - } - } - } - """ - ) api_url = api_url if api_url else API_ENDPOINT_URL client = _get_client(api_url) - result = client.execute(query) + result = client.execute(queries.get_all_genes.query) genes = result["genes"]["nodes"] genes.sort(key=lambda i: i["name"]) return genes @@ -406,23 +234,11 @@ def get_drug_applications( if isinstance(terms, str): terms = [terms] - query = gql( - """ - query getDrugApplications($names: [String!]) { - drugs(names: $names) { - nodes { - name - drugApplications { - appNo - } - } - } - } - """ - ) api_url = api_url if api_url else API_ENDPOINT_URL client = _get_client(api_url) - result = client.execute(query, variable_values={"names": terms}) + result = client.execute( + queries.get_drug_applications.query, variable_values={"names": terms} + ) if use_pandas is True: data = __process_drug_applications(result) @@ -528,9 +344,10 @@ def __process_gene_search(results: dict) -> pd.DataFrame: approval_list.append(str(interaction["drug"]["approved"])) interactionscore_list.append(interaction["interactionScore"]) - list_string = [] - for attribute in interaction["interactionAttributes"]: - list_string.append(f"{attribute['name']}: {attribute['value']}") + list_string = [ + f"{attribute['name']}: {attribute['value']}" + for attribute in interaction["interactionAttributes"] + ] interactionattributes_list.append(" | ".join(list_string)) list_string = [] @@ -599,10 +416,10 @@ def __process_drug_search(results: dict) -> pd.DataFrame: interactionscore_list.append(interaction["interactionScore"]) approval_list.append(current_approval) - list_string = [] - for attribute in interaction["interactionAttributes"]: - list_string.append(f"{attribute['name']}: {attribute['value']}") - + list_string = [ + f"{attribute['name']}: {attribute['value']}" + for attribute in interaction["interactionAttributes"] + ] interactionattributes_list.append("| ".join(list_string)) list_string = [] diff --git a/src/dgipy/graph_app.py b/src/dgipy/graph_app.py index 6887695..23c6a30 100644 --- a/src/dgipy/graph_app.py +++ b/src/dgipy/graph_app.py @@ -1,3 +1,4 @@ +<<<<<<< HEAD """Provides functionality to create a Dash web application for interacting with drug-gene data from DGIdb""" import dash_bootstrap_components as dbc @@ -255,3 +256,262 @@ def __get_node_data_from_id(nodes: list, node_id: str) -> dict | None: if node["id"] == node_id: return node return None +======= +"""Provides functionality to create a Dash web application for interacting with drug-gene data from DGIdb""" + +import dash_bootstrap_components as dbc +from dash import Input, Output, State, ctx, dash, dcc, html + +from dgipy import dgidb +from dgipy import network_graph as ng + + +def generate_app() -> dash.Dash: + """Initialize a Dash application object with a layout designed for visualizing: drug-gene interactions, options for user interactivity, and other visual elements. + + :return: a python dash app that can be run with run_server() + """ + genes = [ + {"label": gene["name"], "value": gene["name"]} for gene in dgidb.get_gene_list() + ] + drugs = [ + {"label": drug["name"], "value": drug["name"]} for drug in dgidb.get_drug_list() + ] + + app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) + + __set_app_layout(app) + __update_plotly(app) + __update_terms_dropdown(app, genes, drugs) + __update_selected_node(app) + __update_selected_node_text(app) + __update_neighbors_dropdown(app) + __update_edge_info(app) + + return app + + +def __set_app_layout(app: dash.Dash) -> None: + plotly_figure = dcc.Graph( + id="plotly-figure", style={"width": "100%", "height": "800px"} + ) + + search_mode = dcc.RadioItems( + id="search-mode", + options=[ + {"label": "Gene Search", "value": "genes"}, + {"label": "Drug Search", "value": "drugs"}, + ], + value="genes", + ) + + terms_dropdown = dcc.Dropdown( + id="terms-dropdown", optionHeight=75, multi=True, value=[] + ) + + selected_node_text = dcc.Markdown( + id="selected-node-text", children="No Node Selected" + ) + + neighbors_dropdown = dcc.Dropdown(id="neighbors-dropdown", multi=False) + + selected_edge_info = dcc.Markdown( + id="selected-edge-info", children="No Edge Selected" + ) + + app.layout = html.Div( + [ + # Variables + dcc.Store(id="selected-node", data=""), + dcc.Store(id="graph"), + # Layout + dbc.Row( + [ + dbc.Col( + dbc.Card(plotly_figure, body=True, style={"margin": "10px"}), + width=8, + ), + dbc.Col( + [ + dbc.Card( + [ + dbc.CardHeader("Search Mode"), + dbc.CardBody(search_mode), + ], + style={"margin": "10px"}, + ), + dbc.Card( + [ + dbc.CardHeader("Terms Dropdown"), + dbc.CardBody(terms_dropdown), + ], + style={"margin": "10px"}, + ), + dbc.Card( + [ + dbc.CardHeader("Neighbors Dropdown"), + dbc.CardBody(neighbors_dropdown), + ], + style={"margin": "10px"}, + ), + dbc.Card( + dbc.CardBody( + [ + html.H4("Selected Node/Edge:"), + html.P(selected_node_text), + html.H4("Selected Edge Info:"), + html.P(selected_edge_info), + ] + ), + style={"margin": "10px"}, + ), + ], + width=4, + ), + ] + ), + ] + ) + + +def __update_plotly(app: dash.Dash) -> None: + @app.callback( + [Output("graph", "data"), Output("plotly-figure", "figure")], + Input("terms-dropdown", "value"), + State("search-mode", "value"), + ) + def update( + terms: list | None, search_mode: str + ) -> tuple[dict | None, ng.go.Figure]: + if len(terms) != 0: + interactions = dgidb.get_interactions(terms, search_mode) + network_graph = ng.create_network(interactions, terms, search_mode) + plotly_figure = ng.generate_plotly(network_graph) + return ng.generate_json(network_graph), plotly_figure + return None, ng.generate_plotly(None) + + +def __update_terms_dropdown(app: dash.Dash, genes: list, drugs: list) -> None: + @app.callback( + Output("terms-dropdown", "options"), + Input("search-mode", "value"), + ) + def update(search_mode: str) -> list: + if search_mode == "genes": + return genes + if search_mode == "drugs": + return drugs + return None + + +def __update_selected_node(app: dash.Dash) -> None: + @app.callback( + Output("selected-node", "data"), + [Input("plotly-figure", "clickData"), Input("terms-dropdown", "value")], + ) + def update(clickData: dict | None, new_gene: list | None) -> str | dict: # noqa: N803 # noqa: ARG001 + if ctx.triggered_id == "terms-dropdown": + return "" + if clickData is not None and "points" in clickData: + selected_node = clickData["points"][0] + if "text" not in selected_node: + return dash.no_update + return selected_node + return dash.no_update + + +def __update_selected_node_text(app: dash.Dash) -> None: + @app.callback( + Output("selected-node-text", "children"), Input("selected-node", "data") + ) + def update(selected_node: str | dict) -> str: + if selected_node != "": + return selected_node["text"] + return "No Node Selected" + + +def __update_neighbors_dropdown(app: dash.Dash) -> None: + @app.callback( + [ + Output("neighbors-dropdown", "options"), + Output("neighbors-dropdown", "value"), + ], + Input("selected-node", "data"), + ) + def update(selected_node: str | dict) -> tuple[list, None]: + if selected_node != "" and selected_node["curveNumber"] != 1: + return selected_node["customdata"], None + return [], None + + +def __update_edge_info(app: dash.Dash) -> None: + @app.callback( + Output("selected-edge-info", "children"), + [Input("selected-node", "data"), Input("neighbors-dropdown", "value")], + State("graph", "data"), + ) + def update( + selected_node: str | dict, + selected_neighbor: str | None, + graph: dict | None, + ) -> str: + if selected_node == "": + return "No Edge Selected" + if selected_node["curveNumber"] == 1: + selected_data = __get_node_data_from_id( + graph["links"], selected_node["text"] + ) + return ( + "ID: " + + str(selected_data["id"]) + + "\n\nApproval: " + + str(selected_data["approval"]) + + "\n\nScore: " + + str(selected_data["score"]) + + "\n\nAttributes: " + + str(selected_data["attributes"]) + + "\n\nSource: " + + str(selected_data["source"]) + + "\n\nPmid: " + + str(selected_data["pmid"]) + ) + if selected_neighbor is not None: + edge_node_id = None + selected_node_is_gene = __get_node_data_from_id( + graph["nodes"], selected_node["text"] + )["isGene"] + selected_neighbor_is_gene = __get_node_data_from_id( + graph["nodes"], selected_neighbor + )["isGene"] + if selected_node_is_gene == selected_neighbor_is_gene: + return dash.no_update + if selected_node_is_gene: + edge_node_id = selected_node["text"] + " - " + selected_neighbor + elif selected_neighbor_is_gene: + edge_node_id = selected_neighbor + " - " + selected_node["text"] + selected_data = __get_node_data_from_id(graph["links"], edge_node_id) + if selected_data is None: + return dash.no_update + return ( + "ID: " + + str(selected_data["id"]) + + "\n\nApproval: " + + str(selected_data["approval"]) + + "\n\nScore: " + + str(selected_data["score"]) + + "\n\nAttributes: " + + str(selected_data["attributes"]) + + "\n\nSource: " + + str(selected_data["source"]) + + "\n\nPmid: " + + str(selected_data["pmid"]) + ) + return "No Edge Selected" + + +def __get_node_data_from_id(nodes: list, node_id: str) -> dict | None: + for node in nodes: + if node["id"] == node_id: + return node + return None +>>>>>>> origin/main diff --git a/src/dgipy/network_graph.py b/src/dgipy/network_graph.py index 2ca3b86..51afb47 100644 --- a/src/dgipy/network_graph.py +++ b/src/dgipy/network_graph.py @@ -1,3 +1,4 @@ +<<<<<<< HEAD """Provides functionality to create networkx graphs and pltoly figures for network visualization""" import networkx as nx @@ -247,3 +248,212 @@ def generate_json(graph: nx.Graph) -> dict: :return: a dictionary representing the JSON data of the graph """ return nx.node_link_data(graph) +======= +"""Provides functionality to create networkx graphs and pltoly figures for network visualization""" + +import networkx as nx +import pandas as pd +import plotly.graph_objects as go + +PLOTLY_SEED = 7 + + +def __initalize_network(interactions: pd.DataFrame, selected_genes: list) -> nx.Graph: + interactions_graph = nx.Graph() + graphed_genes = set() + for index in interactions.index: + graphed_genes.add(interactions["gene"][index]) + interactions_graph.add_node(interactions["gene"][index], isGene=True) + interactions_graph.add_node(interactions["drug"][index], isGene=False) + interactions_graph.add_edge( + interactions["gene"][index], + interactions["drug"][index], + id=interactions["gene"][index] + " - " + interactions["drug"][index], + approval=interactions["approval"][index], + score=interactions["score"][index], + attributes=interactions["interaction_attributes"][index], + source=interactions["source"][index], + pmid=interactions["pmid"][index], + ) + ungraphed_genes = set(selected_genes).difference(graphed_genes) + for gene in ungraphed_genes: + interactions_graph.add_node(gene, isGene=True) + return interactions_graph + + +def __add_node_attributes(interactions_graph: nx.Graph) -> None: + for node in interactions_graph.nodes: + is_gene = interactions_graph.nodes[node]["isGene"] + if is_gene: + set_color = "cyan" + set_size = 10 + else: + degree = interactions_graph.degree[node] + if degree > 1: + set_color = "orange" + set_size = 7 + else: + set_color = "red" + set_size = 7 + interactions_graph.nodes[node]["node_color"] = set_color + interactions_graph.nodes[node]["node_size"] = set_size + + +def create_network(interactions: pd.DataFrame, selected_genes: list) -> nx.Graph: + """Create a networkx graph representing interactions between genes and drugs + + :param interactions: DataFrame containing drug-gene interaction data + :param selected_genes: List containing genes used to query interaction data + :return: a networkx graph of drug-gene interactions + """ + interactions_graph = __initalize_network(interactions, selected_genes) + __add_node_attributes(interactions_graph) + return interactions_graph + + +def generate_plotly(graph: nx.Graph) -> go.Figure: + """Create a plotly graph representing interactions between genes and drugs + + :param graph: networkx graph to be formatted as a plotly graph + :return: a plotly graph of drug-gene interactions + """ + layout = go.Layout( + hovermode="closest", + xaxis={"showgrid": False, "zeroline": False, "showticklabels": False}, + yaxis={"showgrid": False, "zeroline": False, "showticklabels": False}, + showlegend=True, + ) + fig = go.Figure(layout=layout) + + if graph is not None: + pos = nx.spring_layout(graph, seed=PLOTLY_SEED) + + trace_nodes = __create_trace_nodes(graph, pos) + trace_edges = __create_trace_edges(graph, pos) + + fig.add_trace(trace_edges[0]) + fig.add_trace(trace_edges[1]) + for trace_group in trace_nodes: + fig.add_trace(trace_group) + + return fig + + +def __create_trace_nodes(graph: nx.Graph, pos: dict) -> list: + nodes_by_group = { + "cyan": { + "node_x": [], + "node_y": [], + "node_text": [], + "node_color": [], + "node_size": [], + "neighbors": [], + "legend_name": "genes", + }, + "orange": { + "node_x": [], + "node_y": [], + "node_text": [], + "node_color": [], + "node_size": [], + "neighbors": [], + "legend_name": "multi-degree drugs", + }, + "red": { + "node_x": [], + "node_y": [], + "node_text": [], + "node_color": [], + "node_size": [], + "neighbors": [], + "legend_name": "single-degree drugs", + }, + } + + for node in graph.nodes(): + node_color = graph.nodes[node]["node_color"] + node_size = graph.nodes[node]["node_size"] + x, y = pos[node] + nodes_by_group[node_color]["node_x"].append(x) + nodes_by_group[node_color]["node_y"].append(y) + nodes_by_group[node_color]["node_text"].append(str(node)) + nodes_by_group[node_color]["node_color"].append(node_color) + nodes_by_group[node_color]["node_size"].append(node_size) + nodes_by_group[node_color]["neighbors"].append(list(graph.neighbors(node))) + + trace_nodes = [] + + for node in nodes_by_group.values(): + trace_group = go.Scatter( + x=node["node_x"], + y=node["node_y"], + mode="markers", + marker={ + "symbol": "circle", + "size": node["node_size"], + "color": node["node_color"], + }, + text=node["node_text"], + name=node["legend_name"], + customdata=node["neighbors"], + hoverinfo="text", + visible=True, + showlegend=True, + ) + trace_nodes.append(trace_group) + + return trace_nodes + + +def __create_trace_edges(graph: nx.Graph, pos: dict) -> go.Scatter: + edge_x = [] + edge_y = [] + + i_edge_x = [] + i_edge_y = [] + i_edge_id = [] + + for edge in graph.edges(): + x0, y0 = pos[edge[0]] + x1, y1 = pos[edge[1]] + edge_x.append(x0) + edge_x.append(x1) + edge_x.append(None) + edge_y.append(y0) + edge_y.append(y1) + edge_y.append(None) + + i_edge_x.append((x0 + x1) / 2) + i_edge_y.append((y0 + y1) / 2) + i_edge_id.append(graph.edges[edge]["id"]) + + trace_edges = go.Scatter( + x=edge_x, + y=edge_y, + mode="lines", + line={"width": 0.5, "color": "gray"}, + hoverinfo="none", + showlegend=False, + ) + + i_trace_edges = go.Scatter( + x=i_edge_x, + y=i_edge_y, + mode="markers", + marker_size=0.5, + text=i_edge_id, + hoverinfo="text", + showlegend=False, + ) + + return trace_edges, i_trace_edges + + +def generate_json(graph: nx.Graph) -> dict: + """Generate a JSON representation of a networkx graph + + :param graph: networkx graph to be formatted as a JSON + :return: a dictionary representing the JSON data of the graph + """ + return nx.node_link_data(graph) +>>>>>>> origin/main diff --git a/src/dgipy/queries/__init__.py b/src/dgipy/queries/__init__.py new file mode 100644 index 0000000..3a84550 --- /dev/null +++ b/src/dgipy/queries/__init__.py @@ -0,0 +1,51 @@ +"""Provide queries and lazy accessors to queries. + + +Individual query loader classes provide ``gql`` queries via a ``.query`` property: + +>>> from dgipy.queries import get_drugs +>>> get_drugs.query +DocumentNode at 0:545 +""" + +from importlib import resources + +from gql import gql +from graphql import DocumentNode + + +class _LazyQueryLoader: + """Provide lazy loading functionality for query access.""" + + def __init__(self, query_name: str) -> None: + self.query_name = query_name + self._query = None + + @property + def query(self) -> DocumentNode: + if self._query is None: + with resources.open_text(__name__, f"{self.query_name}.graphql") as f: + self._query = gql(f.read()) + return self._query + + +get_all_genes = _LazyQueryLoader("get_all_genes") +get_drug_applications = _LazyQueryLoader("get_drug_applications") +get_drugs = _LazyQueryLoader("get_drugs") +get_gene_categories = _LazyQueryLoader("get_gene_categories") +get_genes = _LazyQueryLoader("get_genes") +get_interactions_by_drug = _LazyQueryLoader("get_interactions_by_drug") +get_interactions_by_gene = _LazyQueryLoader("get_interactions_by_gene") +get_sources = _LazyQueryLoader("get_sources") + + +__all__ = [ + "get_all_genes", + "get_drug_applications", + "get_drugs", + "get_gene_categories", + "get_genes", + "get_interactions_by_drug", + "get_interactions_by_gene", + "get_sources", +] diff --git a/src/dgipy/queries/get_all_genes.graphql b/src/dgipy/queries/get_all_genes.graphql new file mode 100644 index 0000000..134f521 --- /dev/null +++ b/src/dgipy/queries/get_all_genes.graphql @@ -0,0 +1,8 @@ +{ + genes { + nodes { + name + conceptId + } + } +} diff --git a/src/dgipy/queries/get_drug_applications.graphql b/src/dgipy/queries/get_drug_applications.graphql new file mode 100644 index 0000000..bf450e6 --- /dev/null +++ b/src/dgipy/queries/get_drug_applications.graphql @@ -0,0 +1,10 @@ +query getDrugApplications($names: [String!]) { + drugs(names: $names) { + nodes { + name + drugApplications { + appNo + } + } + } +} diff --git a/src/dgipy/queries/get_drugs.graphql b/src/dgipy/queries/get_drugs.graphql new file mode 100644 index 0000000..7cb5475 --- /dev/null +++ b/src/dgipy/queries/get_drugs.graphql @@ -0,0 +1,31 @@ +query getDrugs($names: [String!], $immunotherapy: Boolean, $antiNeoplastic: Boolean) { + drugs( + names: $names + immunotherapy: $immunotherapy + antiNeoplastic: $antiNeoplastic + ) { + nodes { + name + conceptId + drugAliases { + alias + } + drugAttributes { + name + value + } + antiNeoplastic + immunotherapy + approved + drugApprovalRatings { + rating + source { + sourceDbName + } + } + drugApplications { + appNo + } + } + } +} diff --git a/src/dgipy/queries/get_gene_categories.graphql b/src/dgipy/queries/get_gene_categories.graphql new file mode 100644 index 0000000..014dc6f --- /dev/null +++ b/src/dgipy/queries/get_gene_categories.graphql @@ -0,0 +1,12 @@ +query getGeneCategories($names: [String!]) { + genes(names: $names) { + nodes { + name + longName + geneCategoriesWithSources { + name + sourceNames + } + } + } +} diff --git a/src/dgipy/queries/get_genes.graphql b/src/dgipy/queries/get_genes.graphql new file mode 100644 index 0000000..49c0f2c --- /dev/null +++ b/src/dgipy/queries/get_genes.graphql @@ -0,0 +1,16 @@ +query getGenes($names: [String!]) { + genes(names: $names) { + nodes { + name + longName + conceptId + geneAliases { + alias + } + geneAttributes { + name + value + } + } + } +} diff --git a/src/dgipy/queries/get_interactions_by_drug.graphql b/src/dgipy/queries/get_interactions_by_drug.graphql new file mode 100644 index 0000000..d387f95 --- /dev/null +++ b/src/dgipy/queries/get_interactions_by_drug.graphql @@ -0,0 +1,35 @@ +query getInteractionsByDrug($names: [String!], $immunotherapy: Boolean, $antineoplastic: Boolean, $sourceDbName: String, $pmid: Int, $interactionType: String, $approved: Boolean) { + drugs( + names: $names + immunotherapy: $immunotherapy + antiNeoplastic: $antineoplastic + sourceDbName: $sourceDbName + pmid: $pmid + interactionType: $interactionType + approved: $approved + ) { + nodes { + name + approved + interactions { + interactionAttributes { + name + value + } + gene { + name + } + interactionScore + interactionClaims { + publications { + citation + pmid + } + source { + sourceDbName + } + } + } + } + } +} diff --git a/src/dgipy/queries/get_interactions_by_gene.graphql b/src/dgipy/queries/get_interactions_by_gene.graphql new file mode 100644 index 0000000..9008150 --- /dev/null +++ b/src/dgipy/queries/get_interactions_by_gene.graphql @@ -0,0 +1,36 @@ +query getInteractionsByGene($names: [String!], $sourceDbName: String, $pmid: Int, $interactionType: String) { + genes( + names: $names + sourceDbName: $sourceDbName + pmid: $pmid + interactionType: $interactionType + ) { + nodes { + name + longName + geneCategories { + name + } + interactions { + interactionAttributes { + name + value + } + drug { + name + approved + } + interactionScore + interactionClaims { + publications { + citation + pmid + } + source { + sourceDbName + } + } + } + } + } +} diff --git a/src/dgipy/queries/get_sources.graphql b/src/dgipy/queries/get_sources.graphql new file mode 100644 index 0000000..ae962dc --- /dev/null +++ b/src/dgipy/queries/get_sources.graphql @@ -0,0 +1,12 @@ +query getSources($sourceType: SourceTypeFilter) { + sources(sourceType: $sourceType) { + nodes { + fullName + sourceDbName + sourceDbVersion + drugClaimsCount + geneClaimsCount + interactionClaimsCount + } + } +} diff --git a/src/dgipy/vcf.py b/src/dgipy/vcf.py index 289516e..2dabc0b 100644 --- a/src/dgipy/vcf.py +++ b/src/dgipy/vcf.py @@ -18,8 +18,9 @@ class GeneResult: """A gene result from original VCF .. code-block:: python import vcf + # Provide path to VCF file and specify chromosome - data = vcf.annotate('link/to/file',chr='chr#') + data = vcf.annotate("link/to/file", chr="chr#") """ def __init__(self, data: list) -> None: @@ -63,11 +64,7 @@ def annotate(filepath: Path, contig: str) -> pd.DataFrame: # Group records with like-genes grouped = _group_by_name(mapped) # Instance each gene set as a class - vcf_results = [] - for gene in grouped: - vcf_results.append(GeneResult(grouped[gene])) - - return vcf_results + return [GeneResult(grouped[gene]) for gene in grouped] def _process_vcf(filepath: Path, contig: str) -> list: diff --git a/tests/conftest.py b/tests/conftest.py index 26cac60..62f5137 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ """Provide basic test configuration and fixture root.""" + from io import TextIOWrapper from pathlib import Path diff --git a/tests/test_dgidb.py b/tests/test_dgidb.py index e1d6ac5..ee8eddf 100644 --- a/tests/test_dgidb.py +++ b/tests/test_dgidb.py @@ -18,11 +18,13 @@ def test_get_drugs(fixtures_dir: Path, set_up_graphql_mock: Callable): - with requests_mock.Mocker() as m, ( - fixtures_dir / "get_drug_api_response.json" - ).open() as json_response, ( - fixtures_dir / "get_drug_filtered_api_response.json" - ).open() as filtered_json_response: + with ( + requests_mock.Mocker() as m, + (fixtures_dir / "get_drug_api_response.json").open() as json_response, + ( + fixtures_dir / "get_drug_filtered_api_response.json" + ).open() as filtered_json_response, + ): set_up_graphql_mock(m, json_response) results = get_drug(["Imatinib"]) @@ -54,9 +56,10 @@ def test_get_drugs(fixtures_dir: Path, set_up_graphql_mock: Callable): def test_get_genes(fixtures_dir: Path, set_up_graphql_mock: Callable): - with requests_mock.Mocker() as m, ( - fixtures_dir / "get_gene_api_response.json" - ).open() as json_response: + with ( + requests_mock.Mocker() as m, + (fixtures_dir / "get_gene_api_response.json").open() as json_response, + ): set_up_graphql_mock(m, json_response) results = get_gene(["ereg"]) @@ -75,11 +78,15 @@ def test_get_genes(fixtures_dir: Path, set_up_graphql_mock: Callable): def test_get_interactions_by_genes(fixtures_dir: Path, set_up_graphql_mock: Callable): - with requests_mock.Mocker() as m, ( - fixtures_dir / "get_interactions_by_genes_response.json" - ).open() as genes_response, ( - fixtures_dir / "get_interactions_by_multiple_genes_response.json" - ).open() as multiple_genes_response: + with ( + requests_mock.Mocker() as m, + ( + fixtures_dir / "get_interactions_by_genes_response.json" + ).open() as genes_response, + ( + fixtures_dir / "get_interactions_by_multiple_genes_response.json" + ).open() as multiple_genes_response, + ): set_up_graphql_mock(m, genes_response) results = get_interactions(["ereg"]) assert isinstance(results, pd.DataFrame), "Results object is a DataFrame" @@ -102,11 +109,15 @@ def test_get_interactions_by_genes(fixtures_dir: Path, set_up_graphql_mock: Call def test_get_interactions_by_drugs(fixtures_dir: Path, set_up_graphql_mock: Callable): - with requests_mock.Mocker() as m, ( - fixtures_dir / "get_interactions_by_drugs_response.json" - ).open() as drugs_response, ( - fixtures_dir / "get_interactions_by_multiple_drugs_response.json" - ).open() as multiple_drugs_response: + with ( + requests_mock.Mocker() as m, + ( + fixtures_dir / "get_interactions_by_drugs_response.json" + ).open() as drugs_response, + ( + fixtures_dir / "get_interactions_by_multiple_drugs_response.json" + ).open() as multiple_drugs_response, + ): set_up_graphql_mock(m, drugs_response) results = get_interactions(["sunitinib"], search="drugs") assert isinstance(results, pd.DataFrame), "Results object is a DataFrame" @@ -131,9 +142,10 @@ def test_get_interactions_by_drugs(fixtures_dir: Path, set_up_graphql_mock: Call def test_get_categories(fixtures_dir: Path, set_up_graphql_mock: Callable): - with requests_mock.Mocker() as m, ( - fixtures_dir / "get_categories_response.json" - ).open() as categories_response: + with ( + requests_mock.Mocker() as m, + (fixtures_dir / "get_categories_response.json").open() as categories_response, + ): set_up_graphql_mock(m, categories_response) results = get_categories("BRAF") assert len(results), "Results are non-empty" @@ -143,11 +155,13 @@ def test_get_categories(fixtures_dir: Path, set_up_graphql_mock: Callable): def test_get_sources(fixtures_dir: Path, set_up_graphql_mock: Callable): - with requests_mock.Mocker() as m, ( - fixtures_dir / "get_sources_response.json" - ).open() as sources_response, ( - fixtures_dir / "get_sources_filtered_response.json" - ).open() as filtered_sources_response: + with ( + requests_mock.Mocker() as m, + (fixtures_dir / "get_sources_response.json").open() as sources_response, + ( + fixtures_dir / "get_sources_filtered_response.json" + ).open() as filtered_sources_response, + ): set_up_graphql_mock(m, sources_response) results = get_source() sources = results["sources"]["nodes"] @@ -165,10 +179,13 @@ def test_get_sources(fixtures_dir: Path, set_up_graphql_mock: Callable): def test_get_gene_list(fixtures_dir: Path, set_up_graphql_mock: Callable): - with requests_mock.Mocker() as m, ( - fixtures_dir - / "get_gene_list_response.json" # this fixture is truncated from the real response - ).open() as gene_list_response: + with ( + requests_mock.Mocker() as m, + ( + fixtures_dir + / "get_gene_list_response.json" # this fixture is truncated from the real response + ).open() as gene_list_response, + ): set_up_graphql_mock(m, gene_list_response) results = get_gene_list() @@ -176,11 +193,15 @@ def test_get_gene_list(fixtures_dir: Path, set_up_graphql_mock: Callable): def test_get_drug_applications(fixtures_dir, set_up_graphql_mock: Callable): - with requests_mock.Mocker() as m, ( - fixtures_dir / "get_drug_applications_response.json" - ).open() as drug_applications_response, ( - fixtures_dir / "get_drug_applications_drugsatfda_response.json" - ).open() as drugsatfda_response: + with ( + requests_mock.Mocker() as m, + ( + fixtures_dir / "get_drug_applications_response.json" + ).open() as drug_applications_response, + ( + fixtures_dir / "get_drug_applications_drugsatfda_response.json" + ).open() as drugsatfda_response, + ): set_up_graphql_mock(m, drug_applications_response) m.get( "https://api.fda.gov/drug/drugsfda.json?search=openfda.application_number:%22NDA212099%22", From 64776a546e823d8d4f183962a1aeb19cdb4417f7 Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Fri, 2 Aug 2024 14:56:29 -0400 Subject: [PATCH 06/19] Fix merge from main --- src/dgipy/dgidb.py | 14 +- src/dgipy/graph_app.py | 262 +----------------------- src/dgipy/network_graph.py | 212 +------------------ src/dgipy/queries/__init__.py | 2 + src/dgipy/queries/get_all_drugs.graphql | 8 + 5 files changed, 13 insertions(+), 485 deletions(-) create mode 100644 src/dgipy/queries/get_all_drugs.graphql diff --git a/src/dgipy/dgidb.py b/src/dgipy/dgidb.py index b3baf1e..f2bfd50 100644 --- a/src/dgipy/dgidb.py +++ b/src/dgipy/dgidb.py @@ -201,21 +201,9 @@ def get_drug_list(api_url: str | None = None) -> list: :param api_url: API endpoint for GraphQL request :return: a full list of drugs present in dgidb """ - query = gql( - """ - { - drugs { - nodes { - name - conceptId - } - } - } - """ - ) api_url = api_url if api_url else API_ENDPOINT_URL client = _get_client(api_url) - result = client.execute(query) + result = client.execute(queries.get_all_drugs.query) drugs = result["drugs"]["nodes"] drugs.sort(key=lambda i: i["name"]) return drugs diff --git a/src/dgipy/graph_app.py b/src/dgipy/graph_app.py index 23c6a30..8738791 100644 --- a/src/dgipy/graph_app.py +++ b/src/dgipy/graph_app.py @@ -1,4 +1,3 @@ -<<<<<<< HEAD """Provides functionality to create a Dash web application for interacting with drug-gene data from DGIdb""" import dash_bootstrap_components as dbc @@ -151,7 +150,7 @@ def __update_selected_node(app: dash.Dash) -> None: Output("selected-node", "data"), [Input("plotly-figure", "clickData"), Input("terms-dropdown", "value")], ) - def update(clickData: dict | None, new_gene: list | None) -> str | dict: # noqa: N803 + def update(clickData: dict | None, new_gene: list | None) -> str | dict: # noqa: N803, ARG001 if ctx.triggered_id == "terms-dropdown": return "" if clickData is not None and "points" in clickData: @@ -256,262 +255,3 @@ def __get_node_data_from_id(nodes: list, node_id: str) -> dict | None: if node["id"] == node_id: return node return None -======= -"""Provides functionality to create a Dash web application for interacting with drug-gene data from DGIdb""" - -import dash_bootstrap_components as dbc -from dash import Input, Output, State, ctx, dash, dcc, html - -from dgipy import dgidb -from dgipy import network_graph as ng - - -def generate_app() -> dash.Dash: - """Initialize a Dash application object with a layout designed for visualizing: drug-gene interactions, options for user interactivity, and other visual elements. - - :return: a python dash app that can be run with run_server() - """ - genes = [ - {"label": gene["name"], "value": gene["name"]} for gene in dgidb.get_gene_list() - ] - drugs = [ - {"label": drug["name"], "value": drug["name"]} for drug in dgidb.get_drug_list() - ] - - app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) - - __set_app_layout(app) - __update_plotly(app) - __update_terms_dropdown(app, genes, drugs) - __update_selected_node(app) - __update_selected_node_text(app) - __update_neighbors_dropdown(app) - __update_edge_info(app) - - return app - - -def __set_app_layout(app: dash.Dash) -> None: - plotly_figure = dcc.Graph( - id="plotly-figure", style={"width": "100%", "height": "800px"} - ) - - search_mode = dcc.RadioItems( - id="search-mode", - options=[ - {"label": "Gene Search", "value": "genes"}, - {"label": "Drug Search", "value": "drugs"}, - ], - value="genes", - ) - - terms_dropdown = dcc.Dropdown( - id="terms-dropdown", optionHeight=75, multi=True, value=[] - ) - - selected_node_text = dcc.Markdown( - id="selected-node-text", children="No Node Selected" - ) - - neighbors_dropdown = dcc.Dropdown(id="neighbors-dropdown", multi=False) - - selected_edge_info = dcc.Markdown( - id="selected-edge-info", children="No Edge Selected" - ) - - app.layout = html.Div( - [ - # Variables - dcc.Store(id="selected-node", data=""), - dcc.Store(id="graph"), - # Layout - dbc.Row( - [ - dbc.Col( - dbc.Card(plotly_figure, body=True, style={"margin": "10px"}), - width=8, - ), - dbc.Col( - [ - dbc.Card( - [ - dbc.CardHeader("Search Mode"), - dbc.CardBody(search_mode), - ], - style={"margin": "10px"}, - ), - dbc.Card( - [ - dbc.CardHeader("Terms Dropdown"), - dbc.CardBody(terms_dropdown), - ], - style={"margin": "10px"}, - ), - dbc.Card( - [ - dbc.CardHeader("Neighbors Dropdown"), - dbc.CardBody(neighbors_dropdown), - ], - style={"margin": "10px"}, - ), - dbc.Card( - dbc.CardBody( - [ - html.H4("Selected Node/Edge:"), - html.P(selected_node_text), - html.H4("Selected Edge Info:"), - html.P(selected_edge_info), - ] - ), - style={"margin": "10px"}, - ), - ], - width=4, - ), - ] - ), - ] - ) - - -def __update_plotly(app: dash.Dash) -> None: - @app.callback( - [Output("graph", "data"), Output("plotly-figure", "figure")], - Input("terms-dropdown", "value"), - State("search-mode", "value"), - ) - def update( - terms: list | None, search_mode: str - ) -> tuple[dict | None, ng.go.Figure]: - if len(terms) != 0: - interactions = dgidb.get_interactions(terms, search_mode) - network_graph = ng.create_network(interactions, terms, search_mode) - plotly_figure = ng.generate_plotly(network_graph) - return ng.generate_json(network_graph), plotly_figure - return None, ng.generate_plotly(None) - - -def __update_terms_dropdown(app: dash.Dash, genes: list, drugs: list) -> None: - @app.callback( - Output("terms-dropdown", "options"), - Input("search-mode", "value"), - ) - def update(search_mode: str) -> list: - if search_mode == "genes": - return genes - if search_mode == "drugs": - return drugs - return None - - -def __update_selected_node(app: dash.Dash) -> None: - @app.callback( - Output("selected-node", "data"), - [Input("plotly-figure", "clickData"), Input("terms-dropdown", "value")], - ) - def update(clickData: dict | None, new_gene: list | None) -> str | dict: # noqa: N803 # noqa: ARG001 - if ctx.triggered_id == "terms-dropdown": - return "" - if clickData is not None and "points" in clickData: - selected_node = clickData["points"][0] - if "text" not in selected_node: - return dash.no_update - return selected_node - return dash.no_update - - -def __update_selected_node_text(app: dash.Dash) -> None: - @app.callback( - Output("selected-node-text", "children"), Input("selected-node", "data") - ) - def update(selected_node: str | dict) -> str: - if selected_node != "": - return selected_node["text"] - return "No Node Selected" - - -def __update_neighbors_dropdown(app: dash.Dash) -> None: - @app.callback( - [ - Output("neighbors-dropdown", "options"), - Output("neighbors-dropdown", "value"), - ], - Input("selected-node", "data"), - ) - def update(selected_node: str | dict) -> tuple[list, None]: - if selected_node != "" and selected_node["curveNumber"] != 1: - return selected_node["customdata"], None - return [], None - - -def __update_edge_info(app: dash.Dash) -> None: - @app.callback( - Output("selected-edge-info", "children"), - [Input("selected-node", "data"), Input("neighbors-dropdown", "value")], - State("graph", "data"), - ) - def update( - selected_node: str | dict, - selected_neighbor: str | None, - graph: dict | None, - ) -> str: - if selected_node == "": - return "No Edge Selected" - if selected_node["curveNumber"] == 1: - selected_data = __get_node_data_from_id( - graph["links"], selected_node["text"] - ) - return ( - "ID: " - + str(selected_data["id"]) - + "\n\nApproval: " - + str(selected_data["approval"]) - + "\n\nScore: " - + str(selected_data["score"]) - + "\n\nAttributes: " - + str(selected_data["attributes"]) - + "\n\nSource: " - + str(selected_data["source"]) - + "\n\nPmid: " - + str(selected_data["pmid"]) - ) - if selected_neighbor is not None: - edge_node_id = None - selected_node_is_gene = __get_node_data_from_id( - graph["nodes"], selected_node["text"] - )["isGene"] - selected_neighbor_is_gene = __get_node_data_from_id( - graph["nodes"], selected_neighbor - )["isGene"] - if selected_node_is_gene == selected_neighbor_is_gene: - return dash.no_update - if selected_node_is_gene: - edge_node_id = selected_node["text"] + " - " + selected_neighbor - elif selected_neighbor_is_gene: - edge_node_id = selected_neighbor + " - " + selected_node["text"] - selected_data = __get_node_data_from_id(graph["links"], edge_node_id) - if selected_data is None: - return dash.no_update - return ( - "ID: " - + str(selected_data["id"]) - + "\n\nApproval: " - + str(selected_data["approval"]) - + "\n\nScore: " - + str(selected_data["score"]) - + "\n\nAttributes: " - + str(selected_data["attributes"]) - + "\n\nSource: " - + str(selected_data["source"]) - + "\n\nPmid: " - + str(selected_data["pmid"]) - ) - return "No Edge Selected" - - -def __get_node_data_from_id(nodes: list, node_id: str) -> dict | None: - for node in nodes: - if node["id"] == node_id: - return node - return None ->>>>>>> origin/main diff --git a/src/dgipy/network_graph.py b/src/dgipy/network_graph.py index 51afb47..fab7796 100644 --- a/src/dgipy/network_graph.py +++ b/src/dgipy/network_graph.py @@ -1,4 +1,3 @@ -<<<<<<< HEAD """Provides functionality to create networkx graphs and pltoly figures for network visualization""" import networkx as nx @@ -175,7 +174,7 @@ def __create_trace_nodes(graph: nx.Graph, pos: dict) -> list: trace_nodes = [] - for _, node in nodes_by_group.items(): + for _, node in nodes_by_group.items(): # noqa: PERF102 trace_group = go.Scatter( x=node["node_x"], y=node["node_y"], @@ -248,212 +247,3 @@ def generate_json(graph: nx.Graph) -> dict: :return: a dictionary representing the JSON data of the graph """ return nx.node_link_data(graph) -======= -"""Provides functionality to create networkx graphs and pltoly figures for network visualization""" - -import networkx as nx -import pandas as pd -import plotly.graph_objects as go - -PLOTLY_SEED = 7 - - -def __initalize_network(interactions: pd.DataFrame, selected_genes: list) -> nx.Graph: - interactions_graph = nx.Graph() - graphed_genes = set() - for index in interactions.index: - graphed_genes.add(interactions["gene"][index]) - interactions_graph.add_node(interactions["gene"][index], isGene=True) - interactions_graph.add_node(interactions["drug"][index], isGene=False) - interactions_graph.add_edge( - interactions["gene"][index], - interactions["drug"][index], - id=interactions["gene"][index] + " - " + interactions["drug"][index], - approval=interactions["approval"][index], - score=interactions["score"][index], - attributes=interactions["interaction_attributes"][index], - source=interactions["source"][index], - pmid=interactions["pmid"][index], - ) - ungraphed_genes = set(selected_genes).difference(graphed_genes) - for gene in ungraphed_genes: - interactions_graph.add_node(gene, isGene=True) - return interactions_graph - - -def __add_node_attributes(interactions_graph: nx.Graph) -> None: - for node in interactions_graph.nodes: - is_gene = interactions_graph.nodes[node]["isGene"] - if is_gene: - set_color = "cyan" - set_size = 10 - else: - degree = interactions_graph.degree[node] - if degree > 1: - set_color = "orange" - set_size = 7 - else: - set_color = "red" - set_size = 7 - interactions_graph.nodes[node]["node_color"] = set_color - interactions_graph.nodes[node]["node_size"] = set_size - - -def create_network(interactions: pd.DataFrame, selected_genes: list) -> nx.Graph: - """Create a networkx graph representing interactions between genes and drugs - - :param interactions: DataFrame containing drug-gene interaction data - :param selected_genes: List containing genes used to query interaction data - :return: a networkx graph of drug-gene interactions - """ - interactions_graph = __initalize_network(interactions, selected_genes) - __add_node_attributes(interactions_graph) - return interactions_graph - - -def generate_plotly(graph: nx.Graph) -> go.Figure: - """Create a plotly graph representing interactions between genes and drugs - - :param graph: networkx graph to be formatted as a plotly graph - :return: a plotly graph of drug-gene interactions - """ - layout = go.Layout( - hovermode="closest", - xaxis={"showgrid": False, "zeroline": False, "showticklabels": False}, - yaxis={"showgrid": False, "zeroline": False, "showticklabels": False}, - showlegend=True, - ) - fig = go.Figure(layout=layout) - - if graph is not None: - pos = nx.spring_layout(graph, seed=PLOTLY_SEED) - - trace_nodes = __create_trace_nodes(graph, pos) - trace_edges = __create_trace_edges(graph, pos) - - fig.add_trace(trace_edges[0]) - fig.add_trace(trace_edges[1]) - for trace_group in trace_nodes: - fig.add_trace(trace_group) - - return fig - - -def __create_trace_nodes(graph: nx.Graph, pos: dict) -> list: - nodes_by_group = { - "cyan": { - "node_x": [], - "node_y": [], - "node_text": [], - "node_color": [], - "node_size": [], - "neighbors": [], - "legend_name": "genes", - }, - "orange": { - "node_x": [], - "node_y": [], - "node_text": [], - "node_color": [], - "node_size": [], - "neighbors": [], - "legend_name": "multi-degree drugs", - }, - "red": { - "node_x": [], - "node_y": [], - "node_text": [], - "node_color": [], - "node_size": [], - "neighbors": [], - "legend_name": "single-degree drugs", - }, - } - - for node in graph.nodes(): - node_color = graph.nodes[node]["node_color"] - node_size = graph.nodes[node]["node_size"] - x, y = pos[node] - nodes_by_group[node_color]["node_x"].append(x) - nodes_by_group[node_color]["node_y"].append(y) - nodes_by_group[node_color]["node_text"].append(str(node)) - nodes_by_group[node_color]["node_color"].append(node_color) - nodes_by_group[node_color]["node_size"].append(node_size) - nodes_by_group[node_color]["neighbors"].append(list(graph.neighbors(node))) - - trace_nodes = [] - - for node in nodes_by_group.values(): - trace_group = go.Scatter( - x=node["node_x"], - y=node["node_y"], - mode="markers", - marker={ - "symbol": "circle", - "size": node["node_size"], - "color": node["node_color"], - }, - text=node["node_text"], - name=node["legend_name"], - customdata=node["neighbors"], - hoverinfo="text", - visible=True, - showlegend=True, - ) - trace_nodes.append(trace_group) - - return trace_nodes - - -def __create_trace_edges(graph: nx.Graph, pos: dict) -> go.Scatter: - edge_x = [] - edge_y = [] - - i_edge_x = [] - i_edge_y = [] - i_edge_id = [] - - for edge in graph.edges(): - x0, y0 = pos[edge[0]] - x1, y1 = pos[edge[1]] - edge_x.append(x0) - edge_x.append(x1) - edge_x.append(None) - edge_y.append(y0) - edge_y.append(y1) - edge_y.append(None) - - i_edge_x.append((x0 + x1) / 2) - i_edge_y.append((y0 + y1) / 2) - i_edge_id.append(graph.edges[edge]["id"]) - - trace_edges = go.Scatter( - x=edge_x, - y=edge_y, - mode="lines", - line={"width": 0.5, "color": "gray"}, - hoverinfo="none", - showlegend=False, - ) - - i_trace_edges = go.Scatter( - x=i_edge_x, - y=i_edge_y, - mode="markers", - marker_size=0.5, - text=i_edge_id, - hoverinfo="text", - showlegend=False, - ) - - return trace_edges, i_trace_edges - - -def generate_json(graph: nx.Graph) -> dict: - """Generate a JSON representation of a networkx graph - - :param graph: networkx graph to be formatted as a JSON - :return: a dictionary representing the JSON data of the graph - """ - return nx.node_link_data(graph) ->>>>>>> origin/main diff --git a/src/dgipy/queries/__init__.py b/src/dgipy/queries/__init__.py index 3a84550..6d19f81 100644 --- a/src/dgipy/queries/__init__.py +++ b/src/dgipy/queries/__init__.py @@ -30,6 +30,7 @@ def query(self) -> DocumentNode: get_all_genes = _LazyQueryLoader("get_all_genes") +get_all_drugs = _LazyQueryLoader("get_all_drugs") get_drug_applications = _LazyQueryLoader("get_drug_applications") get_drugs = _LazyQueryLoader("get_drugs") get_gene_categories = _LazyQueryLoader("get_gene_categories") @@ -41,6 +42,7 @@ def query(self) -> DocumentNode: __all__ = [ "get_all_genes", + "get_all_drugs", "get_drug_applications", "get_drugs", "get_gene_categories", diff --git a/src/dgipy/queries/get_all_drugs.graphql b/src/dgipy/queries/get_all_drugs.graphql new file mode 100644 index 0000000..54906f8 --- /dev/null +++ b/src/dgipy/queries/get_all_drugs.graphql @@ -0,0 +1,8 @@ +{ + drugs { + nodes { + name + conceptId + } + } +} \ No newline at end of file From 84e8f0623b5160c97acc9170e784c5160ec8311e Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Tue, 6 Aug 2024 15:43:51 -0400 Subject: [PATCH 07/19] Implemented graph visualization with cytoscape - Previously implementation of using plotly to visualize the the networkx graph has now been replaced with cytoscape - All previously used plotly functions have been removed - Functionality of __add_node_attributes has been moved to the stylesheet of cytoscape_figure in __set_app_layout() in graph_app.py - generate_cytoscape() handles creation of a cytoscape graph from a networkx graph - Basic implementation of cytoscape - Nodes are categorized by whether or not they are genes and their degree - Further stylistic distinction TBD - Many elements on right sidebar may not currently work as intended TBD --- pyproject.toml | 3 +- src/dgipy/graph_app.py | 89 ++++++++++---- src/dgipy/network_graph.py | 240 +++++-------------------------------- 3 files changed, 94 insertions(+), 238 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 13b62f5..953e01e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,8 @@ dependencies = [ "plotly", "networkx[default]", "requests", - "gql[requests]" + "gql[requests]", + "dash_cytoscape" ] dynamic = ["version"] diff --git a/src/dgipy/graph_app.py b/src/dgipy/graph_app.py index 6887695..03efc26 100644 --- a/src/dgipy/graph_app.py +++ b/src/dgipy/graph_app.py @@ -1,6 +1,7 @@ """Provides functionality to create a Dash web application for interacting with drug-gene data from DGIdb""" import dash_bootstrap_components as dbc +import dash_cytoscape as cyto from dash import Input, Output, State, ctx, dash, dcc, html from dgipy import dgidb @@ -22,19 +23,60 @@ def generate_app() -> dash.Dash: app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) __set_app_layout(app) - __update_plotly(app) + __update_cytoscape(app) __update_terms_dropdown(app, genes, drugs) __update_selected_node(app) __update_selected_node_text(app) __update_neighbors_dropdown(app) - __update_edge_info(app) + #__update_edge_info(app) return app def __set_app_layout(app: dash.Dash) -> None: - plotly_figure = dcc.Graph( - id="plotly-figure", style={"width": "100%", "height": "800px"} + cytoscape_figure = cyto.Cytoscape( + id="cytoscape-figure", + layout={"name": "preset"}, + style={"width": "100%", "height": "800px"}, + stylesheet=[ + # Group selectors + { + "selector": "node", + "style": { + "content": "data(label)" + }, + }, + { + "selector": "edge", + "style": { + "width": 0.75 + } + }, + { + "selector": "[node_degree > 1][isGene]", + "style": { + "background-color": "cyan", + } + }, + { + "selector": "[node_degree <= 1][isGene]", + "style": { + "background-color": "blue", + } + }, + { + "selector": "[node_degree > 1][!isGene]", + "style": { + "background-color": "orange", + } + }, + { + "selector": "[node_degree <= 1][!isGene]", + "style": { + "background-color": "red", + } + } + ] ) search_mode = dcc.RadioItems( @@ -69,7 +111,7 @@ def __set_app_layout(app: dash.Dash) -> None: dbc.Row( [ dbc.Col( - dbc.Card(plotly_figure, body=True, style={"margin": "10px"}), + dbc.Card(cytoscape_figure, body=True, style={"margin": "10px"}), width=8, ), dbc.Col( @@ -115,21 +157,18 @@ def __set_app_layout(app: dash.Dash) -> None: ) -def __update_plotly(app: dash.Dash) -> None: +def __update_cytoscape(app: dash.Dash) -> None: @app.callback( - [Output("graph", "data"), Output("plotly-figure", "figure")], + Output("cytoscape-figure", "elements"), Input("terms-dropdown", "value"), State("search-mode", "value"), ) - def update( - terms: list | None, search_mode: str - ) -> tuple[dict | None, ng.go.Figure]: + def update(terms: list | None, search_mode: str) -> dict: if len(terms) != 0: interactions = dgidb.get_interactions(terms, search_mode) - network_graph = ng.create_network(interactions, terms, search_mode) - plotly_figure = ng.generate_plotly(network_graph) - return ng.generate_json(network_graph), plotly_figure - return None, ng.generate_plotly(None) + network_graph = ng.initalize_network(interactions, terms, search_mode) + return ng.generate_cytoscape(network_graph) + return {} def __update_terms_dropdown(app: dash.Dash, genes: list, drugs: list) -> None: @@ -148,16 +187,14 @@ def update(search_mode: str) -> list: def __update_selected_node(app: dash.Dash) -> None: @app.callback( Output("selected-node", "data"), - [Input("plotly-figure", "clickData"), Input("terms-dropdown", "value")], + [Input("cytoscape-figure", "tapNode"), Input("terms-dropdown", "value")], ) - def update(clickData: dict | None, new_gene: list | None) -> str | dict: # noqa: N803 + def update(tapNode: dict | None, new_gene: list | None) -> str | dict: # noqa: N803 if ctx.triggered_id == "terms-dropdown": return "" - if clickData is not None and "points" in clickData: - selected_node = clickData["points"][0] - if "text" not in selected_node: - return dash.no_update - return selected_node + if tapNode is not None: + print(tapNode) + return tapNode return dash.no_update @@ -167,7 +204,7 @@ def __update_selected_node_text(app: dash.Dash) -> None: ) def update(selected_node: str | dict) -> str: if selected_node != "": - return selected_node["text"] + return selected_node["data"]["id"] return "No Node Selected" @@ -180,8 +217,12 @@ def __update_neighbors_dropdown(app: dash.Dash) -> None: Input("selected-node", "data"), ) def update(selected_node: str | dict) -> tuple[list, None]: - if selected_node != "" and selected_node["curveNumber"] != 1: - return selected_node["customdata"], None + if selected_node != "" and selected_node["data"]["node_degree"] != 1: + neighbor_list = [] + for edge in selected_node["edgesData"]: + neighbor_list.append(edge["source"]) + print(neighbor_list) + return neighbor_list, None return [], None diff --git a/src/dgipy/network_graph.py b/src/dgipy/network_graph.py index 2ca3b86..38c0d77 100644 --- a/src/dgipy/network_graph.py +++ b/src/dgipy/network_graph.py @@ -2,14 +2,18 @@ import networkx as nx import pandas as pd -import plotly.graph_objects as go -PLOTLY_SEED = 7 +LAYOUT_SEED = 7 -def __initalize_network( - interactions: pd.DataFrame, terms: list, search_mode: str -) -> nx.Graph: +def initalize_network(interactions: pd.DataFrame, terms: list, search_mode: str) -> nx.Graph: + """Create a networkx graph representing interactions between genes and drugs + + :param interactions: DataFrame containing drug-gene interaction data + :param terms: List containing terms used to query interaction data + :param search_mode: String indicating whether query was gene-focused or drug-focused + :return: a networkx graph of drug-gene interactions + """ interactions_graph = nx.Graph() graphed_terms = set() @@ -18,8 +22,8 @@ def __initalize_network( graphed_terms.add(interactions["gene"][index]) if search_mode == "drugs": graphed_terms.add(interactions["drug"][index]) - interactions_graph.add_node(interactions["gene"][index], isGene=True) - interactions_graph.add_node(interactions["drug"][index], isGene=False) + interactions_graph.add_node(interactions["gene"][index], label=interactions["gene"][index], isGene=True) + interactions_graph.add_node(interactions["drug"][index], label=interactions["drug"][index], isGene=False) interactions_graph.add_edge( interactions["gene"][index], interactions["drug"][index], @@ -34,216 +38,26 @@ def __initalize_network( graphed_terms = set(terms).difference(graphed_terms) for term in graphed_terms: if search_mode == "genes": - interactions_graph.add_node(term, isGene=True) - if search_mode == "drugs": - interactions_graph.add_node(term, isGene=False) - return interactions_graph - - -def __add_node_attributes(interactions_graph: nx.Graph, search_mode: str) -> None: - for node in interactions_graph.nodes: - is_gene = interactions_graph.nodes[node]["isGene"] - degree = interactions_graph.degree[node] - if search_mode == "genes": - if is_gene: - if degree > 1: - set_color = "cyan" - set_size = 10 - else: - set_color = "blue" - set_size = 10 - else: - if degree > 1: - set_color = "orange" - set_size = 7 - else: - set_color = "red" - set_size = 7 + interactions_graph.add_node(term, label=term, isGene=True) if search_mode == "drugs": - if is_gene: - if degree > 1: - set_color = "cyan" - set_size = 7 - else: - set_color = "blue" - set_size = 7 - else: - if degree > 1: - set_color = "orange" - set_size = 10 - else: - set_color = "red" - set_size = 10 - interactions_graph.nodes[node]["node_color"] = set_color - interactions_graph.nodes[node]["node_size"] = set_size - - -def create_network( - interactions: pd.DataFrame, terms: list, search_mode: str -) -> nx.Graph: - """Create a networkx graph representing interactions between genes and drugs + interactions_graph.add_node(term, label=term, isGene=False) - :param interactions: DataFrame containing drug-gene interaction data - :param terms: List containing terms used to query interaction data - :param search_mode: String indicating whether query was gene-focused or drug-focused - :return: a networkx graph of drug-gene interactions - """ - interactions_graph = __initalize_network(interactions, terms, search_mode) - __add_node_attributes(interactions_graph, search_mode) + nx.set_node_attributes(interactions_graph, dict(interactions_graph.degree()), "node_degree") return interactions_graph -def generate_plotly(graph: nx.Graph) -> go.Figure: - """Create a plotly graph representing interactions between genes and drugs - - :param graph: networkx graph to be formatted as a plotly graph - :return: a plotly graph of drug-gene interactions - """ - layout = go.Layout( - hovermode="closest", - xaxis={"showgrid": False, "zeroline": False, "showticklabels": False}, - yaxis={"showgrid": False, "zeroline": False, "showticklabels": False}, - showlegend=True, - ) - fig = go.Figure(layout=layout) - - if graph is not None: - pos = nx.spring_layout(graph, seed=PLOTLY_SEED) - - trace_nodes = __create_trace_nodes(graph, pos) - trace_edges = __create_trace_edges(graph, pos) - - fig.add_trace(trace_edges[0]) - fig.add_trace(trace_edges[1]) - for trace_group in trace_nodes: - fig.add_trace(trace_group) - - return fig - - -def __create_trace_nodes(graph: nx.Graph, pos: dict) -> list: - nodes_by_group = { - "cyan": { - "node_x": [], - "node_y": [], - "node_text": [], - "node_color": [], - "node_size": [], - "neighbors": [], - "legend_name": "multi-degree genes", - }, - "orange": { - "node_x": [], - "node_y": [], - "node_text": [], - "node_color": [], - "node_size": [], - "neighbors": [], - "legend_name": "multi-degree drugs", - }, - "red": { - "node_x": [], - "node_y": [], - "node_text": [], - "node_color": [], - "node_size": [], - "neighbors": [], - "legend_name": "single-degree drugs", - }, - "blue": { - "node_x": [], - "node_y": [], - "node_text": [], - "node_color": [], - "node_size": [], - "neighbors": [], - "legend_name": "single-degree genes", - }, - } - - for node in graph.nodes(): - node_color = graph.nodes[node]["node_color"] - node_size = graph.nodes[node]["node_size"] - x, y = pos[node] - nodes_by_group[node_color]["node_x"].append(x) - nodes_by_group[node_color]["node_y"].append(y) - nodes_by_group[node_color]["node_text"].append(str(node)) - nodes_by_group[node_color]["node_color"].append(node_color) - nodes_by_group[node_color]["node_size"].append(node_size) - nodes_by_group[node_color]["neighbors"].append(list(graph.neighbors(node))) - - trace_nodes = [] - - for _, node in nodes_by_group.items(): - trace_group = go.Scatter( - x=node["node_x"], - y=node["node_y"], - mode="markers", - marker={ - "symbol": "circle", - "size": node["node_size"], - "color": node["node_color"], - }, - text=node["node_text"], - name=node["legend_name"], - customdata=node["neighbors"], - hoverinfo="text", - visible=True, - showlegend=True, - ) - trace_nodes.append(trace_group) - - return trace_nodes - - -def __create_trace_edges(graph: nx.Graph, pos: dict) -> go.Scatter: - edge_x = [] - edge_y = [] - - i_edge_x = [] - i_edge_y = [] - i_edge_id = [] - - for edge in graph.edges(): - x0, y0 = pos[edge[0]] - x1, y1 = pos[edge[1]] - edge_x.append(x0) - edge_x.append(x1) - edge_x.append(None) - edge_y.append(y0) - edge_y.append(y1) - edge_y.append(None) - - i_edge_x.append((x0 + x1) / 2) - i_edge_y.append((y0 + y1) / 2) - i_edge_id.append(graph.edges[edge]["id"]) - - trace_edges = go.Scatter( - x=edge_x, - y=edge_y, - mode="lines", - line={"width": 0.5, "color": "gray"}, - hoverinfo="none", - showlegend=False, - ) - - i_trace_edges = go.Scatter( - x=i_edge_x, - y=i_edge_y, - mode="markers", - marker_size=0.5, - text=i_edge_id, - hoverinfo="text", - showlegend=False, - ) - - return trace_edges, i_trace_edges - - -def generate_json(graph: nx.Graph) -> dict: - """Generate a JSON representation of a networkx graph +def generate_cytoscape(graph: nx.Graph) -> dict: + """Create a cytoscape graph representing interactions between genes and drugs - :param graph: networkx graph to be formatted as a JSON - :return: a dictionary representing the JSON data of the graph + :param graph: networkx graph to be formatted as a cytoscape graph + :return: a cytoscape graph of drug-gene interactions """ - return nx.node_link_data(graph) + pos = nx.spring_layout(graph, seed=LAYOUT_SEED, scale=4000) + cytoscape_data = nx.cytoscape_data(graph)["elements"] + cytoscape_node_data = cytoscape_data["nodes"] + cytoscape_edge_data = cytoscape_data["edges"] + for node in range(len(cytoscape_node_data)): + node_pos = pos[cytoscape_node_data[node]["data"]["id"]] + node_pos = {"position" : {"x": int(node_pos[0].item()), "y": int(node_pos[1].item())}} + cytoscape_node_data[node].update(node_pos) + return cytoscape_node_data + cytoscape_edge_data From b828390b94775bd3baea9e83d3d0fe363b747400 Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Tue, 13 Aug 2024 15:08:55 -0400 Subject: [PATCH 08/19] Full-feature integration with cytoscape All info-boxes and user-interaction features (found on the right side of graph app) are now functional with the cytoscape: Selecting nodes AND edges works as intended Neighbor Dropdown properly updates with selected nodes Edge info properly updates when an edge is selected OR when a node is selected and a neighbor from the dropdown menu is selected Various variables and functions have been renamed for better consistency --- src/dgipy/graph_app.py | 141 ++++++++++++++++--------------------- src/dgipy/network_graph.py | 2 +- 2 files changed, 60 insertions(+), 83 deletions(-) diff --git a/src/dgipy/graph_app.py b/src/dgipy/graph_app.py index 03efc26..612cc22 100644 --- a/src/dgipy/graph_app.py +++ b/src/dgipy/graph_app.py @@ -25,10 +25,10 @@ def generate_app() -> dash.Dash: __set_app_layout(app) __update_cytoscape(app) __update_terms_dropdown(app, genes, drugs) - __update_selected_node(app) - __update_selected_node_text(app) + __update_selected_element(app) + __update_selected_element_text(app) __update_neighbors_dropdown(app) - #__update_edge_info(app) + __update_edge_info(app) return app @@ -92,8 +92,8 @@ def __set_app_layout(app: dash.Dash) -> None: id="terms-dropdown", optionHeight=75, multi=True, value=[] ) - selected_node_text = dcc.Markdown( - id="selected-node-text", children="No Node Selected" + selected_element_text = dcc.Markdown( + id="selected-element-text", children="No Element Selected" ) neighbors_dropdown = dcc.Dropdown(id="neighbors-dropdown", multi=False) @@ -105,7 +105,7 @@ def __set_app_layout(app: dash.Dash) -> None: app.layout = html.Div( [ # Variables - dcc.Store(id="selected-node", data=""), + dcc.Store(id="selected-element", data=""), dcc.Store(id="graph"), # Layout dbc.Row( @@ -141,7 +141,7 @@ def __set_app_layout(app: dash.Dash) -> None: dbc.CardBody( [ html.H4("Selected Node/Edge:"), - html.P(selected_node_text), + html.P(selected_element_text), html.H4("Selected Edge Info:"), html.P(selected_edge_info), ] @@ -184,27 +184,30 @@ def update(search_mode: str) -> list: return None -def __update_selected_node(app: dash.Dash) -> None: +def __update_selected_element(app: dash.Dash) -> None: @app.callback( - Output("selected-node", "data"), - [Input("cytoscape-figure", "tapNode"), Input("terms-dropdown", "value")], + Output("selected-element", "data"), + [Input("cytoscape-figure", "tapNode"), Input("cytoscape-figure", "tapEdge"), Input("terms-dropdown", "value")], ) - def update(tapNode: dict | None, new_gene: list | None) -> str | dict: # noqa: N803 - if ctx.triggered_id == "terms-dropdown": - return "" - if tapNode is not None: - print(tapNode) - return tapNode + def update(tapNode: dict | None, tapEdge: dict | None, termsDropdown: list | None) -> str | dict: # noqa: N803 + if ctx.triggered_prop_ids: + dash_trigger = next(iter(ctx.triggered_prop_ids.keys())) + if dash_trigger == "terms-dropdown.value": + return "" + if dash_trigger == "cytoscape-figure.tapNode" and tapNode is not None: + return tapNode + if dash_trigger == "cytoscape-figure.tapEdge" and tapEdge is not None: + return tapEdge return dash.no_update -def __update_selected_node_text(app: dash.Dash) -> None: +def __update_selected_element_text(app: dash.Dash) -> None: @app.callback( - Output("selected-node-text", "children"), Input("selected-node", "data") + Output("selected-element-text", "children"), Input("selected-element", "data") ) - def update(selected_node: str | dict) -> str: - if selected_node != "": - return selected_node["data"]["id"] + def update(selected_element: str | dict) -> str: + if selected_element != "": + return selected_element["data"]["id"] return "No Node Selected" @@ -214,14 +217,16 @@ def __update_neighbors_dropdown(app: dash.Dash) -> None: Output("neighbors-dropdown", "options"), Output("neighbors-dropdown", "value"), ], - Input("selected-node", "data"), + Input("selected-element", "data") ) - def update(selected_node: str | dict) -> tuple[list, None]: - if selected_node != "" and selected_node["data"]["node_degree"] != 1: - neighbor_list = [] - for edge in selected_node["edgesData"]: - neighbor_list.append(edge["source"]) - print(neighbor_list) + def update(selected_element: str | dict) -> tuple[list, None]: + if selected_element != "" and selected_element["group"] == "nodes" and selected_element["data"]["node_degree"] != 1: + neighbor_set = set() + for edge in selected_element["edgesData"]: + neighbor_set.add(edge["target"]) + neighbor_set.add(edge["source"]) + neighbor_set.remove(selected_element["data"]["id"]) + neighbor_list = list(neighbor_set) return neighbor_list, None return [], None @@ -229,70 +234,42 @@ def update(selected_node: str | dict) -> tuple[list, None]: def __update_edge_info(app: dash.Dash) -> None: @app.callback( Output("selected-edge-info", "children"), - [Input("selected-node", "data"), Input("neighbors-dropdown", "value")], - State("graph", "data"), + [Input("selected-element", "data"), Input("neighbors-dropdown", "value")] ) def update( - selected_node: str | dict, - selected_neighbor: str | None, - graph: dict | None, + selected_element: str | dict, + selected_neighbor: str | None ) -> str: - if selected_node == "": + if selected_element == "": return "No Edge Selected" - if selected_node["curveNumber"] == 1: - selected_data = __get_node_data_from_id( - graph["links"], selected_node["text"] - ) - return ( - "ID: " - + str(selected_data["id"]) - + "\n\nApproval: " - + str(selected_data["approval"]) - + "\n\nScore: " - + str(selected_data["score"]) - + "\n\nAttributes: " - + str(selected_data["attributes"]) - + "\n\nSource: " - + str(selected_data["source"]) - + "\n\nPmid: " - + str(selected_data["pmid"]) - ) - if selected_neighbor is not None: - edge_node_id = None - selected_node_is_gene = __get_node_data_from_id( - graph["nodes"], selected_node["text"] - )["isGene"] - selected_neighbor_is_gene = __get_node_data_from_id( - graph["nodes"], selected_neighbor - )["isGene"] - if selected_node_is_gene == selected_neighbor_is_gene: - return dash.no_update - if selected_node_is_gene: - edge_node_id = selected_node["text"] + " - " + selected_neighbor - elif selected_neighbor_is_gene: - edge_node_id = selected_neighbor + " - " + selected_node["text"] - selected_data = __get_node_data_from_id(graph["links"], edge_node_id) - if selected_data is None: - return dash.no_update + + edge_info = None + if selected_element["group"] == "nodes" and selected_neighbor is not None: + edge_name = None + if selected_element["data"]["isGene"]: + edge_name = selected_element["data"]["id"] + " - " + selected_neighbor + else: + edge_name = selected_neighbor + " - " + selected_element["data"]["id"] + for edge in selected_element["edgesData"]: + if edge["id"] == edge_name: + edge_info = edge + break + if selected_element["group"] == "edges": + edge_info = selected_element["data"] + + if (selected_element["group"] == "nodes" and selected_neighbor is not None) or selected_element["group"] == "edges": return ( "ID: " - + str(selected_data["id"]) + + str(edge_info["id"]) + "\n\nApproval: " - + str(selected_data["approval"]) + + str(edge_info["approval"]) + "\n\nScore: " - + str(selected_data["score"]) + + str(edge_info["score"]) + "\n\nAttributes: " - + str(selected_data["attributes"]) + + str(edge_info["attributes"]) + "\n\nSource: " - + str(selected_data["source"]) + + str(edge_info["source"]) + "\n\nPmid: " - + str(selected_data["pmid"]) + + str(edge_info["pmid"]) ) return "No Edge Selected" - - -def __get_node_data_from_id(nodes: list, node_id: str) -> dict | None: - for node in nodes: - if node["id"] == node_id: - return node - return None diff --git a/src/dgipy/network_graph.py b/src/dgipy/network_graph.py index 38c0d77..e808589 100644 --- a/src/dgipy/network_graph.py +++ b/src/dgipy/network_graph.py @@ -31,7 +31,7 @@ def initalize_network(interactions: pd.DataFrame, terms: list, search_mode: str) approval=interactions["approval"][index], score=interactions["score"][index], attributes=interactions["interaction_attributes"][index], - source=interactions["source"][index], + sourcedata=interactions["source"][index], pmid=interactions["pmid"][index], ) From 69a5a187c3aaf394b6b94dfd3fe54a4e8b36fcb2 Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Thu, 29 Aug 2024 08:46:08 -0400 Subject: [PATCH 09/19] refactor(ruff): make ruff fixes --- src/dgipy/graph_app.py | 62 ++++++++++++++++++++------------------ src/dgipy/network_graph.py | 20 +++++++++--- 2 files changed, 47 insertions(+), 35 deletions(-) diff --git a/src/dgipy/graph_app.py b/src/dgipy/graph_app.py index 612cc22..14cebcf 100644 --- a/src/dgipy/graph_app.py +++ b/src/dgipy/graph_app.py @@ -39,44 +39,37 @@ def __set_app_layout(app: dash.Dash) -> None: layout={"name": "preset"}, style={"width": "100%", "height": "800px"}, stylesheet=[ - # Group selectors + # Group Selectors { "selector": "node", - "style": { - "content": "data(label)" - }, - }, - { - "selector": "edge", - "style": { - "width": 0.75 - } + "style": {"content": "data(label)"}, }, + {"selector": "edge", "style": {"width": 0.75}}, { "selector": "[node_degree > 1][isGene]", "style": { "background-color": "cyan", - } + }, }, { "selector": "[node_degree <= 1][isGene]", "style": { "background-color": "blue", - } + }, }, { "selector": "[node_degree > 1][!isGene]", "style": { "background-color": "orange", - } + }, }, { "selector": "[node_degree <= 1][!isGene]", "style": { "background-color": "red", - } - } - ] + }, + }, + ], ) search_mode = dcc.RadioItems( @@ -187,17 +180,23 @@ def update(search_mode: str) -> list: def __update_selected_element(app: dash.Dash) -> None: @app.callback( Output("selected-element", "data"), - [Input("cytoscape-figure", "tapNode"), Input("cytoscape-figure", "tapEdge"), Input("terms-dropdown", "value")], + [ + Input("cytoscape-figure", "tapNode"), + Input("cytoscape-figure", "tapEdge"), + Input("terms-dropdown", "value"), + ], ) - def update(tapNode: dict | None, tapEdge: dict | None, termsDropdown: list | None) -> str | dict: # noqa: N803 + def update( + tap_node: dict | None, tap_edge: dict | None, terms_dropdown: list | None # noqa: ARG001 + ) -> str | dict: if ctx.triggered_prop_ids: dash_trigger = next(iter(ctx.triggered_prop_ids.keys())) if dash_trigger == "terms-dropdown.value": return "" - if dash_trigger == "cytoscape-figure.tapNode" and tapNode is not None: - return tapNode - if dash_trigger == "cytoscape-figure.tapEdge" and tapEdge is not None: - return tapEdge + if dash_trigger == "cytoscape-figure.tapNode" and tap_node is not None: + return tap_node + if dash_trigger == "cytoscape-figure.tapEdge" and tap_edge is not None: + return tap_edge return dash.no_update @@ -217,10 +216,14 @@ def __update_neighbors_dropdown(app: dash.Dash) -> None: Output("neighbors-dropdown", "options"), Output("neighbors-dropdown", "value"), ], - Input("selected-element", "data") + Input("selected-element", "data"), ) def update(selected_element: str | dict) -> tuple[list, None]: - if selected_element != "" and selected_element["group"] == "nodes" and selected_element["data"]["node_degree"] != 1: + if ( + selected_element != "" + and selected_element["group"] == "nodes" + and selected_element["data"]["node_degree"] != 1 + ): neighbor_set = set() for edge in selected_element["edgesData"]: neighbor_set.add(edge["target"]) @@ -234,12 +237,9 @@ def update(selected_element: str | dict) -> tuple[list, None]: def __update_edge_info(app: dash.Dash) -> None: @app.callback( Output("selected-edge-info", "children"), - [Input("selected-element", "data"), Input("neighbors-dropdown", "value")] + [Input("selected-element", "data"), Input("neighbors-dropdown", "value")], ) - def update( - selected_element: str | dict, - selected_neighbor: str | None - ) -> str: + def update(selected_element: str | dict, selected_neighbor: str | None) -> str: if selected_element == "": return "No Edge Selected" @@ -257,7 +257,9 @@ def update( if selected_element["group"] == "edges": edge_info = selected_element["data"] - if (selected_element["group"] == "nodes" and selected_neighbor is not None) or selected_element["group"] == "edges": + if ( + selected_element["group"] == "nodes" and selected_neighbor is not None + ) or selected_element["group"] == "edges": return ( "ID: " + str(edge_info["id"]) diff --git a/src/dgipy/network_graph.py b/src/dgipy/network_graph.py index e808589..2eaa80d 100644 --- a/src/dgipy/network_graph.py +++ b/src/dgipy/network_graph.py @@ -6,7 +6,9 @@ LAYOUT_SEED = 7 -def initalize_network(interactions: pd.DataFrame, terms: list, search_mode: str) -> nx.Graph: +def initalize_network( + interactions: pd.DataFrame, terms: list, search_mode: str +) -> nx.Graph: """Create a networkx graph representing interactions between genes and drugs :param interactions: DataFrame containing drug-gene interaction data @@ -22,8 +24,12 @@ def initalize_network(interactions: pd.DataFrame, terms: list, search_mode: str) graphed_terms.add(interactions["gene"][index]) if search_mode == "drugs": graphed_terms.add(interactions["drug"][index]) - interactions_graph.add_node(interactions["gene"][index], label=interactions["gene"][index], isGene=True) - interactions_graph.add_node(interactions["drug"][index], label=interactions["drug"][index], isGene=False) + interactions_graph.add_node( + interactions["gene"][index], label=interactions["gene"][index], isGene=True + ) + interactions_graph.add_node( + interactions["drug"][index], label=interactions["drug"][index], isGene=False + ) interactions_graph.add_edge( interactions["gene"][index], interactions["drug"][index], @@ -42,7 +48,9 @@ def initalize_network(interactions: pd.DataFrame, terms: list, search_mode: str) if search_mode == "drugs": interactions_graph.add_node(term, label=term, isGene=False) - nx.set_node_attributes(interactions_graph, dict(interactions_graph.degree()), "node_degree") + nx.set_node_attributes( + interactions_graph, dict(interactions_graph.degree()), "node_degree" + ) return interactions_graph @@ -58,6 +66,8 @@ def generate_cytoscape(graph: nx.Graph) -> dict: cytoscape_edge_data = cytoscape_data["edges"] for node in range(len(cytoscape_node_data)): node_pos = pos[cytoscape_node_data[node]["data"]["id"]] - node_pos = {"position" : {"x": int(node_pos[0].item()), "y": int(node_pos[1].item())}} + node_pos = { + "position": {"x": int(node_pos[0].item()), "y": int(node_pos[1].item())} + } cytoscape_node_data[node].update(node_pos) return cytoscape_node_data + cytoscape_edge_data From 7ca0695ac3e49df67fb3caa074dd20fae741aafb Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Thu, 29 Aug 2024 09:35:39 -0400 Subject: [PATCH 10/19] refactor(ruff): Make ruff fix --- src/dgipy/graph_app.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/dgipy/graph_app.py b/src/dgipy/graph_app.py index 14cebcf..70164b3 100644 --- a/src/dgipy/graph_app.py +++ b/src/dgipy/graph_app.py @@ -187,7 +187,9 @@ def __update_selected_element(app: dash.Dash) -> None: ], ) def update( - tap_node: dict | None, tap_edge: dict | None, terms_dropdown: list | None # noqa: ARG001 + tap_node: dict | None, + tap_edge: dict | None, + terms_dropdown: list | None, # noqa: ARG001 ) -> str | dict: if ctx.triggered_prop_ids: dash_trigger = next(iter(ctx.triggered_prop_ids.keys())) From 2e2d505023b0f8c02d1ec9210ba2dd03b3abfcd8 Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Fri, 30 Aug 2024 17:23:16 -0400 Subject: [PATCH 11/19] Revert "Merge branch 'main' into dash-cytoscape-integration" This reverts commit 585c1fb3cf488445bd9b37d3ac82e2d39728e83c, reversing changes made to 7ca0695ac3e49df67fb3caa074dd20fae741aafb. --- README.md | 40 +- src/dgipy/data_utils.py | 57 --- src/dgipy/dgidb.py | 649 +++++++++++++----------- src/dgipy/graph_app.py | 10 +- src/dgipy/network_graph.py | 152 +++--- src/dgipy/queries/get_all_drugs.graphql | 2 +- tests/test_dgidb.py | 83 +-- 7 files changed, 485 insertions(+), 508 deletions(-) delete mode 100644 src/dgipy/data_utils.py diff --git a/README.md b/README.md index ca95c6e..d71c629 100644 --- a/README.md +++ b/README.md @@ -19,34 +19,22 @@ python3 -m pip install dgipy ## Usage -DGIpy is built around query methods that wrap a GraphQL client and fetch data from the public DGIdb API endpoint. By default, data returned in a columnar format (i.e., as a dictionary where keys are column names and values are lists representing column data). - -```pycon ->>> from dgipy import get_drug ->>> results = get_gene(["BRAF"]) ->>> results["name"][0], results["concept_id"][0], results["aliases"][0][:5] -('BRAF', 'hgnc:1097', ['B-RAF PROTO-ONCOGENE, SERINE/THREONINE KINASE', 'BRAF1', 'BRAF-1', 'UCSC:UC003VWC.5', 'VEGA:OTTHUMG00000157457']) +Methods in `dgipy.dgidb` send pre-defined queries with user-supplied parameters to the DGIdb GraphQL API endpoint. Response objects can optionally be returned as Pandas dataframes for readability and ease of use, or retained as the raw GraphQL responses by setting the `use_pandas` argument to `False`. + +```python +from dgipy.dgidb import get_drug + +# get a dataframe including drug name, identifier/aliases, molecular attributes, and regulatory data +response = get_drug(["sunitinib", "trastuzumab", "not-a-real-drug"]) +print(list(response["drug"].unique())) +# ['BROMPHENIRAMINE MALEATE', 'SUNITINIB', 'BROMPHENIRAMINE'] +print(dict(response[["drug", "concept_id", "approved"]].iloc[0])) +# {'drug': 'BROMPHENIRAMINE MALEATE', +# 'concept_id': 'rxcui:142427', +# 'approved': 'True'} ``` -This orientation enables easy use within the dataframe library of your choosing: - -```pycon ->>> import pandas as pd ->>> pd.DataFrame(results) - name concept_id aliases attributes -0 BRAF hgnc:1097 [B-RAF PROTO-ONCOGENE, SERINE/THREONINE KINASE... {'BRAF MUT': ['Reported Genome Event Targeted'... ->>> ->>> import polars as pl # not included in DGIpy dependencies ->>> pl.DataFrame(results) -shape: (1, 4) -┌──────┬────────────┬─────────────────────────────────┬─────────────────────────────────┐ -│ name ┆ concept_id ┆ aliases ┆ attributes │ -│ --- ┆ --- ┆ --- ┆ --- │ -│ str ┆ str ┆ list[str] ┆ struct[14] │ -╞══════╪════════════╪═════════════════════════════════╪═════════════════════════════════╡ -│ BRAF ┆ hgnc:1097 ┆ ["B-RAF PROTO-ONCOGENE, SERINE… ┆ {["0"],["Swiss-Prot"],["Report… │ -└──────┴────────────┴─────────────────────────────────┴─────────────────────────────────┘ -``` +Similar methods are provided for looking up genes and drug-gene interactions. ## Graph App diff --git a/src/dgipy/data_utils.py b/src/dgipy/data_utils.py deleted file mode 100644 index 5924b13..0000000 --- a/src/dgipy/data_utils.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Provide utilities relating to data types.""" - -import csv -from pathlib import Path - - -def make_tabular(columnar_dict: dict) -> list[dict]: - """Convert DGIpy query method output to a tabular format. - - :param columnar_dict: column-oriented dict as returned by DGIpy query methods - :return: list of table rows, where each row keys the column name to the value at - that column and row. - """ - return [ - dict(zip(columnar_dict.keys(), row, strict=False)) - for row in zip(*columnar_dict.values(), strict=False) - ] - - -def dump_columnar_to_tsv(columnar_dict: dict, output_file: Path) -> None: - """Dump DGIpy query method output to a TSV file. - - :param columnar_dict: column-oriented dict as returned by DGIpy query methods - :param output_file: path to save location - """ - rows = zip(*columnar_dict.values(), strict=False) - with output_file.open("w", newline="") as tsvfile: - writer = csv.writer(tsvfile, delimiter="\t") - writer.writerow(columnar_dict.keys()) - writer.writerows(rows) - - -def drop_none_attrs(column: list[dict]) -> list[dict]: - """For an attributes column (i.e., a list of dicts), drop all entries with `None` - values. - - In DGIdb (and consequently DGIpy), there's no semantic information intended by - giving attributes `None` values. They are, however, included to ensure compatibility - with strongly-structured dataframe libraries like Polars. Otherwise, these - properties are unnecessary, and can be safely dropped without loss of information. - - >>> from dgipy import get_interactions - >>> from dgipy.data_utils import drop_none_attrs - >>> results = get_interactions(["braf"]) - >>> results["interaction_attributes"][2] - {'Response Type': None, 'Combination Therapy': None, 'Novel Drug Target': ['Established target'], 'Variant Effect': None, 'Cancer Type': None, 'Direct Interaction': None, 'Indication': None, 'Clinical Trial Name': ['XL281'], 'Pathway': None, 'Clinical Trial ID': None, 'Mechanism of Action': None, 'Evidence Type': None, 'Alteration': None, 'Approval Status': None} - >>> results["interaction_attributes"] = drop_none_attrs( - ... results["interaction_attributes"] - ... ) - >>> results["interaction_attributes"][2] - {'Novel Drug Target': ['Established target'], 'Clinical Trial Name': ['XL281']} - - :param column: an individual column value from the columnar output of a - :py:module:`dgipy.dgidb` query function - :return: the same column, but with `None` attributes removed from all cells - """ - return [{k: v for k, v in d.items() if v is not None} for d in column] diff --git a/src/dgipy/dgidb.py b/src/dgipy/dgidb.py index bf66c9b..f316bce 100644 --- a/src/dgipy/dgidb.py +++ b/src/dgipy/dgidb.py @@ -4,6 +4,7 @@ import os from enum import Enum +import pandas as pd import requests from gql import Client from gql.transport.requests import RequestsHTTPTransport @@ -28,36 +29,21 @@ def _get_client(api_url: str) -> Client: return Client(transport=transport, fetch_schema_from_transport=True) -def _group_attributes(row: list[dict]) -> dict: - grouped_dict = {} - for attr in row: - if attr["value"] is None: - continue - if attr["name"] in grouped_dict: - grouped_dict[attr["name"]].append(attr["value"]) - else: - grouped_dict[attr["name"]] = [attr["value"]] - return grouped_dict - - -def _backfill_dicts(col: list[dict]) -> list[dict]: - keys = {key for cell in col for key in cell} - return [{key: cell.get(key) for key in keys} for cell in col] - - def get_drug( terms: list | str, + use_pandas: bool = True, immunotherapy: bool | None = None, antineoplastic: bool | None = None, api_url: str | None = None, -) -> dict: +) -> pd.DataFrame | dict: """Perform a record look up in DGIdb for a drug of interest :param terms: drug or drugs for record lookup + :param use_pandas: boolean for whether pandas should be used to format response :param immunotherapy: filter option for results that are only immunotherapy :param antineoplastic: filter option for results that see antineoplastic use :param api_url: API endpoint for GraphQL request - :return: drug data + :return: record page results for drug in either a dataframe or json object """ if isinstance(terms, str): terms = [terms] @@ -72,44 +58,20 @@ def get_drug( client = _get_client(api_url) result = client.execute(queries.get_drugs.query, variable_values=params) - output = { - "name": [], - "concept_id": [], - "aliases": [], - "attributes": [], - "antineoplastic": [], - "immunotherapy": [], - "approved": [], - "approval_ratings": [], - "fda_applications": [], - } - for match in result["drugs"]["nodes"]: - output["name"].append(match["name"]) - output["concept_id"].append(match["conceptId"]) - output["aliases"].append([a["alias"] for a in match["drugAliases"]]) - output["attributes"].append(_group_attributes(match["drugAttributes"])) - output["antineoplastic"].append(match["antiNeoplastic"]) - output["immunotherapy"].append(match["immunotherapy"]) - output["approved"].append(match["approved"]) - output["approval_ratings"].append( - [ - {"rating": r["rating"], "source": r["source"]["sourceDbName"]} - for r in match["drugApprovalRatings"] - ] - ) - output["fda_applications"].append( - [app["appNo"] for app in match["drugApplications"]] - ) - output["attributes"] = _backfill_dicts(output["attributes"]) - return output + if use_pandas is True: + return _process_drug(result) + return result -def get_gene(terms: list | str, api_url: str | None = None) -> dict: +def get_gene( + terms: list | str, use_pandas: bool = True, api_url: str | None = None +) -> pd.DataFrame | dict: """Perform a record look up in DGIdb for a gene of interest :param terms: gene or genes for record lookup + :param use_pandas: boolean for whether pandas should be used to format response :param api_url: API endpoint for GraphQL request - :return: gene data + :return: record page results for gene in either a dataframe or json object """ if isinstance(terms, str): terms = [terms] @@ -118,24 +80,15 @@ def get_gene(terms: list | str, api_url: str | None = None) -> dict: client = _get_client(api_url) result = client.execute(queries.get_genes.query, variable_values={"names": terms}) - output = { - "name": [], - "concept_id": [], - "aliases": [], - "attributes": [], - } - for match in result["genes"]["nodes"]: - output["name"].append(match["name"]) - output["concept_id"].append(match["conceptId"]) - output["aliases"].append([a["alias"] for a in match["geneAliases"]]) - output["attributes"].append(_group_attributes(match["geneAttributes"])) - output["attributes"] = _backfill_dicts(output["attributes"]) - return output + if use_pandas is True: + return _process_gene(result) + return result def get_interactions( terms: list | str, search: str = "genes", + use_pandas: bool = True, immunotherapy: bool | None = None, antineoplastic: bool | None = None, source: str | None = None, @@ -143,11 +96,12 @@ def get_interactions( interaction_type: str | None = None, approved: str | None = None, api_url: str | None = None, -) -> dict: +) -> pd.DataFrame | dict: """Perform an interaction look up for drugs or genes of interest :param terms: drugs or genes for interaction look up :param search: interaction search type. valid types are "drugs" or "genes" + :param use_pandas: boolean for whether pandas should be used to format response :param immunotherapy: filter option for results that are used in immunotherapy :param antineoplastic: filter option for results that are part of antineoplastic regimens :param source: filter option for specific database of interest @@ -155,7 +109,7 @@ def get_interactions( :param interaction_type: filter option for specific interaction types :param approved: filter option for approved interactions :param api_url: API endpoint for GraphQL request - :return: interaction results for terms + :return: interaction results for terms in either a dataframe or a json object """ if isinstance(terms, str): terms = [terms] @@ -173,122 +127,47 @@ def get_interactions( if approved is not None: params["approved"] = approved - api_url = api_url if api_url else API_ENDPOINT_URL - client = _get_client(api_url) - if search == "genes": - return _get_interactions_by_genes(params, client) - if search == "drugs": - return _get_interactions_by_drugs(params, client) - msg = "Search type must be specified using: search='drugs' or search='genes'" - raise Exception(msg) - - -def _get_interactions_by_genes( - params: dict, - client: Client, -) -> dict: - results = client.execute(queries.get_interactions_by_gene.query, params) - output = { - "gene_name": [], - "gene_long_name": [], - "drug_name": [], - "approved": [], - "interaction_score": [], - "interaction_attributes": [], - "sources": [], - "pmids": [], - } - for result in results["genes"]["nodes"]: - gene_name = result["name"] - long_name = result["longName"] - for interaction in result["interactions"]: - output["gene_name"].append(gene_name) - output["gene_long_name"].append(long_name) - output["drug_name"].append(interaction["drug"]["name"]) - output["approved"].append(interaction["drug"]["approved"]) - output["interaction_score"].append(interaction["interactionScore"]) - output["interaction_attributes"].append( - _group_attributes(interaction["interactionAttributes"]) - ) - - pubs = [] - sources = [] - for claim in interaction["interactionClaims"]: - sources.append(claim["source"]["sourceDbName"]) - pubs += [p["pmid"] for p in claim["publications"]] - output["pmids"].append(pubs) - output["sources"].append(sources) - output["interaction_attributes"] = _backfill_dicts(output["interaction_attributes"]) - return output + query = queries.get_interactions_by_gene.query + elif search == "drugs": + query = queries.get_interactions_by_drug.query + else: + msg = "Search type must be specified using: search='drugs' or search='genes'" + raise Exception(msg) + api_url = api_url if api_url else API_ENDPOINT_URL + client = _get_client(api_url) + result = client.execute(query, variable_values=params) -def _get_interactions_by_drugs( - params: dict, - client: Client, -) -> dict: - results = client.execute(queries.get_interactions_by_drug.query, params) - output = { - "drug_name": [], - "gene_name": [], - "interaction_score": [], - "approved": [], - "interaction_attributes": [], - "sources": [], - "pmids": [], - } - for result in results["drugs"]["nodes"]: - drug_name = result["name"] - approval = result["approved"] - for interaction in result["interactions"]: - output["drug_name"].append(drug_name) - output["gene_name"].append(interaction["gene"]["name"]) - output["interaction_score"].append(interaction["interactionScore"]) - output["approved"].append(approval) - output["interaction_attributes"].append( - _group_attributes(interaction["interactionAttributes"]) - ) - pubs = [] - sources = [] - for claim in interaction["interactionClaims"]: - sources.append(claim["source"]["sourceDbName"]) - pubs += [p["pmid"] for p in claim["publications"]] - output["pmids"].append(pubs) - output["sources"].append(sources) - output["interaction_attributes"] = _backfill_dicts(output["interaction_attributes"]) - return output + if use_pandas is True: + if search == "genes": + return _process_gene_search(result) + return _process_drug_search(result) + return result -def get_categories(terms: list | str, api_url: str | None = None) -> dict: +def get_categories( + terms: list | str, use_pandas: bool = True, api_url: str | None = None +) -> pd.DataFrame | dict: """Perform a category annotation lookup for genes of interest :param terms: Genes of interest for annotations + :param use_pandas: boolean for whether pandas should be used to format a response :param api_url: API endpoint for GraphQL request - :return: category annotation results for genes + :return: category annotation results for genes formatted in a dataframe or a json object """ if isinstance(terms, str): terms = [terms] api_url = api_url if api_url else API_ENDPOINT_URL client = _get_client(api_url) - results = client.execute( + result = client.execute( queries.get_gene_categories.query, variable_values={"names": terms} ) - output = { - "gene": [], - "full_name": [], - "category": [], - "sources": [], - } - for result in results["genes"]["nodes"]: - name = result["name"] - long_name = result["longName"] - for cat in result["geneCategoriesWithSources"]: - output["gene"].append(name) - output["full_name"].append(long_name) - output["category"].append(cat["name"]) - output["sources"].append(cat["sourceNames"]) - return output + + if use_pandas is True: + return _process_gene_categories(result) + return result class SourceType(str, Enum): @@ -317,42 +196,24 @@ def get_source( api_url = api_url if api_url else API_ENDPOINT_URL client = _get_client(api_url) params = {} if source_type is None else {"sourceType": source_param} - results = client.execute(queries.get_sources.query, variable_values=params) - output = { - "name": [], - "short_name": [], - "version": [], - "drug_claims": [], - "gene_claims": [], - "interaction_claims": [], - } - for result in results["sources"]["nodes"]: - output["name"].append(result["fullName"]) - output["short_name"].append(result["sourceDbName"]) - output["version"].append(result["sourceDbVersion"]) - output["drug_claims"].append(result["drugClaimsCount"]) - output["gene_claims"].append(result["geneClaimsCount"]) - output["interaction_claims"].append(result["interactionClaimsCount"]) - return output - - -def get_gene_list(api_url: str | None = None) -> dict: + return client.execute(queries.get_sources.query, variable_values=params) + + +def get_gene_list(api_url: str | None = None) -> list: """Get all gene names present in DGIdb :param api_url: API endpoint for GraphQL request - :return: list of genes in DGIdb + :return: a full list of genes present in dgidb """ api_url = api_url if api_url else API_ENDPOINT_URL client = _get_client(api_url) - results = client.execute(queries.get_all_genes.query) - genes = {"name": [], "concept_id": []} - for result in results["genes"]["nodes"]: - genes["name"].append(result["name"]) - genes["concept_id"].append(result["conceptId"]) + result = client.execute(queries.get_all_genes.query) + genes = result["genes"]["nodes"] + genes.sort(key=lambda i: i["name"]) return genes -def get_drug_list(api_url: str | None = None) -> dict: +def get_drug_list(api_url: str | None = None) -> list: """Get all drug names present in DGIdb :param api_url: API endpoint for GraphQL request @@ -360,150 +221,344 @@ def get_drug_list(api_url: str | None = None) -> dict: """ api_url = api_url if api_url else API_ENDPOINT_URL client = _get_client(api_url) - results = client.execute(queries.get_all_drugs.query) - drugs = {"name": [], "concept_id": []} - for result in results["drugs"]["nodes"]: - drugs["name"].append(result["name"]) - drugs["concept_id"].append(result["conceptId"]) + result = client.execute(queries.get_all_drugs.query) + drugs = result["drugs"]["nodes"] + drugs.sort(key=lambda i: i["name"]) return drugs -def _get_openfda_data(app_no: str) -> list[tuple]: - url = f'https://api.fda.gov/drug/drugsfda.json?search=openfda.application_number:"{app_no}"' - response = requests.get(url, headers={"User-Agent": "Custom"}, timeout=20) - try: - response.raise_for_status() - except requests.exceptions.RequestException as e: - _logger.error("Request to %s failed: %s", url, e) - raise e - data = response.json() - return [ - ( - product["brand_name"], - product["marketing_status"], - product["dosage_form"], - product["active_ingredients"][0]["strength"], - ) - for product in data["results"][0]["products"] - ] - - -def get_drug_applications(terms: list | str, api_url: str | None = None) -> dict: +def get_drug_applications( + terms: list | str, use_pandas: bool = True, api_url: str | None = None +) -> pd.DataFrame | dict: """Perform a look up for ANDA/NDA applications for drug or drugs of interest :param terms: drug or drugs of interest + :param use_pandas: boolean for whether to format response in DataFrame :param api_url: API endpoint for GraphQL request - :return: all ANDA/NDA applications for drugs of interest + :return: all ANDA/NDA applications for drugs of interest in json or DataFrame """ if isinstance(terms, str): terms = [terms] api_url = api_url if api_url else API_ENDPOINT_URL client = _get_client(api_url) - results = client.execute( + result = client.execute( queries.get_drug_applications.query, variable_values={"names": terms} ) - output = { - "name": [], - "application": [], - "brand_name": [], - "marketing_status": [], - "dosage_form": [], - "dosage_strength": [], - } - - for result in results["drugs"]["nodes"]: - name = result["name"] - for app in result["drugApplications"]: - application_number = app["appNo"].split(".")[1].replace(":", "").upper() - for ( - brand_name, - marketing_status, - dosage_form, - dosage_strength, - ) in _get_openfda_data(application_number): - output["name"].append(name) - output["application"].append(application_number) - output["brand_name"].append(brand_name) - output["marketing_status"].append(marketing_status) - output["dosage_form"].append(dosage_form) - output["dosage_strength"].append(dosage_strength) - - return output + + if use_pandas is True: + data = _process_drug_applications(result) + return _openfda_data(data) + return result def get_clinical_trials( terms: str | list, -) -> dict: +) -> pd.DataFrame: # TODO: Better error handling for new_row?, use_pandas=False """Perform a look up for clinical trials data for drug or drugs of interest :param terms: drug or drugs of interest :return: all clinical trials data for drugs of interest in a DataFrame """ base_url = "https://clinicaltrials.gov/api/v2/studies?format=json" + rows_list = [] if isinstance(terms, str): terms = [terms] - output = { - "search_term": [], - "trial_id": [], - "brief": [], - "study_type": [], - "min_age": [], - "age_groups": [], - "pediatric": [], - "conditions": [], - "interventions": [], - } - for drug in terms: intr_url = f"&query.intr={drug}" full_uri = base_url + intr_url # TODO: + cond_url + term_url - response = requests.get(full_uri, timeout=20) try: - response.raise_for_status() + r = requests.get(full_uri, timeout=20) except requests.exceptions.RequestException as e: _logger.error("Clinical trials lookup to URL %s failed: %s", full_uri, e) raise e - if response.status_code != 200: + if r.status_code == 200: + data = r.json() + + for study in data["studies"]: + new_row = {} + new_row["search_term"] = drug + new_row["trial_id"] = study["protocolSection"]["identificationModule"][ + "nctId" + ] + new_row["brief"] = study["protocolSection"]["identificationModule"][ + "briefTitle" + ] + new_row["study_type"] = study["protocolSection"]["designModule"][ + "studyType" + ] + try: + new_row["min_age"] = study["protocolSection"]["eligibilityModule"][ + "minimumAge" + ] + except: + new_row["min_age"] = None + + new_row["age_groups"] = study["protocolSection"]["eligibilityModule"][ + "stdAges" + ] + new_row["Pediatric?"] = "CHILD" in new_row["age_groups"] + + new_row["conditions"] = study["protocolSection"]["conditionsModule"][ + "conditions" + ] + try: + new_row["interventions"] = study["protocolSection"][ + "armsInterventionsModule" + ] + except: + new_row["interventions"] = None + + rows_list.append(new_row) + else: _logger.error( "Received status code %s from request to %s -- returning empty dataframe", - response.status_code, + r.status_code, full_uri, ) - else: - data = response.json() + return pd.DataFrame(rows_list) + + +def _process_drug(results: dict) -> pd.DataFrame: + drug_list = [] + concept_list = [] + alias_list = [] + attribute_list = [] + antineoplastic_list = [] + immunotherapy_list = [] + approved_list = [] + rating_list = [] + application_list = [] + + for match in results["drugs"]["nodes"]: + drug_list.append(match["name"]) + concept_list.append(match["conceptId"]) + alias_list.append("|".join([alias["alias"] for alias in match["drugAliases"]])) + current_attributes = [ + ": ".join([attribute["name"], attribute["value"]]) + for attribute in match["drugAttributes"] + ] + attribute_list.append(" | ".join(current_attributes)) + antineoplastic_list.append(str(match["antiNeoplastic"])) + immunotherapy_list.append(str(match["immunotherapy"])) + approved_list.append(str(match["approved"])) + application_list.append( + "|".join(app["appNo"] for app in match["drugApplications"]) + ) + current_ratings = [ + ": ".join([rating["source"]["sourceDbName"], rating["rating"]]) + for rating in match["drugApprovalRatings"] + ] + rating_list.append(" | ".join(current_ratings)) + + return pd.DataFrame().assign( + drug=drug_list, + concept_id=concept_list, + aliases=alias_list, + attributes=attribute_list, + antineoplastic=antineoplastic_list, + immunotherapy=immunotherapy_list, + approved=approved_list, + approval_ratings=rating_list, + applications=application_list, + ) - for study in data["studies"]: - output["search_term"].append(drug) - output["trial_id"].append( - study["protocolSection"]["identificationModule"]["nctId"] - ) - output["brief"].append( - study["protocolSection"]["identificationModule"]["briefTitle"] - ) - output["study_type"].append( - study["protocolSection"]["designModule"]["studyType"] - ) - try: - output["min_age"].append( - study["protocolSection"]["eligibilityModule"]["minimumAge"] - ) - except KeyError: - output["min_age"].append(None) - - age_groups = study["protocolSection"]["eligibilityModule"]["stdAges"] - - output["age_groups"].append(age_groups) - output["pediatric"].append("CHILD" in age_groups) - output["conditions"].append( - study["protocolSection"]["conditionsModule"]["conditions"] + +def _process_gene(results: dict) -> pd.DataFrame: + gene_list = [] + alias_list = [] + concept_list = [] + attribute_list = [] + + for match in results["genes"]["nodes"]: + gene_list.append(match["name"]) + alias_list.append("|".join([alias["alias"] for alias in match["geneAliases"]])) + current_attributes = [ + ": ".join([attribute["name"], attribute["value"]]) + for attribute in match["geneAttributes"] + ] + attribute_list.append(" | ".join(current_attributes)) + concept_list.append(match["conceptId"]) + + return pd.DataFrame().assign( + gene=gene_list, + concept_id=concept_list, + aliases=alias_list, + attributes=attribute_list, + ) + + +def _process_gene_search(results: dict) -> pd.DataFrame: + interactionscore_list = [] + drugname_list = [] + approval_list = [] + interactionattributes_list = [] + gene_list = [] + longname_list = [] + sources_list = [] + pmids_list = [] + # genecategories_list = [] + + for match in results["genes"]["nodes"]: + current_gene = match["name"] + current_longname = match["longName"] + + # TO DO: Evaluate if categories should be returned as part of interactions search. Seems useful but also redundant? + # list_string = [] + # for category in match['geneCategories']: + # list_string.append(f"{category['name']}") + # current_genecategories = " | ".join(list_string) + + for interaction in match["interactions"]: + gene_list.append(current_gene) + # genecategories_list.append(current_genecategories) + longname_list.append(current_longname) + drugname_list.append(interaction["drug"]["name"]) + approval_list.append(str(interaction["drug"]["approved"])) + interactionscore_list.append(interaction["interactionScore"]) + + list_string = [ + f"{attribute['name']}: {attribute['value']}" + for attribute in interaction["interactionAttributes"] + ] + interactionattributes_list.append(" | ".join(list_string)) + + list_string = [] + sub_list_string = [] + for claim in interaction["interactionClaims"]: + list_string.append(f"{claim['source']['sourceDbName']}") + sub_list_string = [] + for publication in claim["publications"]: + sub_list_string.append(f"{publication['pmid']}") + sources_list.append(" | ".join(list_string)) + pmids_list.append(" | ".join(sub_list_string)) + + return pd.DataFrame().assign( + gene=gene_list, + drug=drugname_list, + longname=longname_list, + # categories=genecategories_list, + approval=approval_list, + score=interactionscore_list, + interaction_attributes=interactionattributes_list, + source=sources_list, + pmid=pmids_list, + ) + + +def _process_gene_categories(results: dict) -> pd.DataFrame: + gene_list = [] + categories_list = [] + sources_list = [] + longname_list = [] + + for match in results["genes"]["nodes"]: + current_gene = match["name"] + current_longname = match["longName"] + + for category in match["geneCategoriesWithSources"]: + gene_list.append(current_gene) + longname_list.append(current_longname) + categories_list.append(category["name"]) + sources_list.append(" | ".join(category["sourceNames"])) + + return pd.DataFrame().assign( + gene=gene_list, + longname=longname_list, + categories=categories_list, + sources=sources_list, + ) + + +def _process_drug_search(results: dict) -> pd.DataFrame: + interactionscore_list = [] + genename_list = [] + approval_list = [] + interactionattributes_list = [] + drug_list = [] + sources_list = [] + pmids_list = [] + + for match in results["drugs"]["nodes"]: + current_drug = match["name"] + current_approval = str(match["approved"]) + + for interaction in match["interactions"]: + drug_list.append(current_drug) + genename_list.append(interaction["gene"]["name"]) + interactionscore_list.append(interaction["interactionScore"]) + approval_list.append(current_approval) + + list_string = [ + f"{attribute['name']}: {attribute['value']}" + for attribute in interaction["interactionAttributes"] + ] + interactionattributes_list.append("| ".join(list_string)) + + list_string = [] + sub_list_string = [] + for claim in interaction["interactionClaims"]: + list_string.append(f"{claim['source']['sourceDbName']}") + sub_list_string = [] + for publication in claim["publications"]: + sub_list_string.append(f"{publication['pmid']}") + + sources_list.append(" | ".join(list_string)) + pmids_list.append(" | ".join(sub_list_string)) + + return pd.DataFrame().assign( + drug=drug_list, + gene=genename_list, + approval=approval_list, + score=interactionscore_list, + interaction_attributes=interactionattributes_list, + source=sources_list, + pmid=pmids_list, + ) + + +def _process_drug_applications(data: dict) -> pd.DataFrame: + drug_list = [] + application_list = [] + + for node in data["drugs"]["nodes"]: + current_drug = node["name"] + for application in node["drugApplications"]: + drug_list.append(current_drug) + application = application["appNo"].split(".")[1].replace(":", "").upper() + application_list.append(application) + + return pd.DataFrame().assign(drug=drug_list, application=application_list) + + +def _openfda_data(dataframe: pd.DataFrame) -> pd.DataFrame: + openfda_base_url = ( + "https://api.fda.gov/drug/drugsfda.json?search=openfda.application_number:" + ) + terms = list(dataframe["application"]) + descriptions = [] + for term in terms: + r = requests.get( + f'{openfda_base_url}"{term}"', headers={"User-Agent": "Custom"}, timeout=20 + ) + try: + r.json()["results"][0]["products"] + + f = [] + for product in r.json()["results"][0]["products"]: + brand_name = product["brand_name"] + marketing_status = product["marketing_status"] + dosage_form = product["dosage_form"] + # active_ingredient = product["active_ingredients"][0]["name"] + dosage_strength = product["active_ingredients"][0]["strength"] + f.append( + f"{brand_name}: {dosage_strength} {marketing_status} {dosage_form}" ) - try: - output["interventions"].append( - study["protocolSection"]["armsInterventionsModule"] - ) - except: - output["interventions"].append(None) - return output + + descriptions.append(" | ".join(f)) + except: + descriptions.append("none") + + return dataframe.assign(description=descriptions) diff --git a/src/dgipy/graph_app.py b/src/dgipy/graph_app.py index 678c902..70164b3 100644 --- a/src/dgipy/graph_app.py +++ b/src/dgipy/graph_app.py @@ -2,12 +2,10 @@ import dash_bootstrap_components as dbc import dash_cytoscape as cyto -import pandas as pd from dash import Input, Output, State, ctx, dash, dcc, html from dgipy import dgidb from dgipy import network_graph as ng -from dgipy.data_utils import make_tabular def generate_app() -> dash.Dash: @@ -16,12 +14,10 @@ def generate_app() -> dash.Dash: :return: a python dash app that can be run with run_server() """ genes = [ - {"label": gene["name"], "value": gene["name"]} - for gene in make_tabular(dgidb.get_gene_list()) + {"label": gene["name"], "value": gene["name"]} for gene in dgidb.get_gene_list() ] drugs = [ - {"label": drug["name"], "value": drug["name"]} - for drug in make_tabular(dgidb.get_drug_list()) + {"label": drug["name"], "value": drug["name"]} for drug in dgidb.get_drug_list() ] app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) @@ -162,7 +158,7 @@ def __update_cytoscape(app: dash.Dash) -> None: ) def update(terms: list | None, search_mode: str) -> dict: if len(terms) != 0: - interactions = pd.DataFrame(dgidb.get_interactions(terms, search_mode)) + interactions = dgidb.get_interactions(terms, search_mode) network_graph = ng.initalize_network(interactions, terms, search_mode) return ng.generate_cytoscape(network_graph) return {} diff --git a/src/dgipy/network_graph.py b/src/dgipy/network_graph.py index 1d327f4..2eaa80d 100644 --- a/src/dgipy/network_graph.py +++ b/src/dgipy/network_graph.py @@ -1,79 +1,73 @@ -"""Provides functionality to create networkx graphs and pltoly figures for network visualization""" - -import networkx as nx -import pandas as pd - -LAYOUT_SEED = 7 - - -def initalize_network( - interactions: pd.DataFrame, terms: list, search_mode: str -) -> nx.Graph: - """Create a networkx graph representing interactions between genes and drugs - - :param interactions: DataFrame containing drug-gene interaction data - :param terms: List containing terms used to query interaction data - :param search_mode: String indicating whether query was gene-focused or drug-focused - :return: a networkx graph of drug-gene interactions - """ - interactions_graph = nx.Graph() - graphed_terms = set() - - for index in interactions.index: - if search_mode == "genes": - graphed_terms.add(interactions["gene_name"][index]) - if search_mode == "drugs": - graphed_terms.add(interactions["drug_name"][index]) - interactions_graph.add_node( - interactions["gene"][index], - label=interactions["gene_name"][index], - isGene=True, - ) - interactions_graph.add_node( - interactions["drug"][index], - label=interactions["drug_name"][index], - isGene=False, - ) - interactions_graph.add_edge( - interactions["gene_name"][index], - interactions["drug_name"][index], - id=interactions["gene_name"][index] - + " - " - + interactions["drug_name"][index], - approval=interactions["approved"][index], - score=interactions["interaction_score"][index], - attributes=interactions["interaction_attributes"][index], - sourcedata=interactions["sources"][index], - pmid=interactions["pmids"][index], - ) - - graphed_terms = set(terms).difference(graphed_terms) - for term in graphed_terms: - if search_mode == "genes": - interactions_graph.add_node(term, label=term, isGene=True) - if search_mode == "drugs": - interactions_graph.add_node(term, label=term, isGene=False) - - nx.set_node_attributes( - interactions_graph, dict(interactions_graph.degree()), "node_degree" - ) - return interactions_graph - - -def generate_cytoscape(graph: nx.Graph) -> dict: - """Create a cytoscape graph representing interactions between genes and drugs - - :param graph: networkx graph to be formatted as a cytoscape graph - :return: a cytoscape graph of drug-gene interactions - """ - pos = nx.spring_layout(graph, seed=LAYOUT_SEED, scale=4000) - cytoscape_data = nx.cytoscape_data(graph)["elements"] - cytoscape_node_data = cytoscape_data["nodes"] - cytoscape_edge_data = cytoscape_data["edges"] - for node in range(len(cytoscape_node_data)): - node_pos = pos[cytoscape_node_data[node]["data"]["id"]] - node_pos = { - "position": {"x": int(node_pos[0].item()), "y": int(node_pos[1].item())} - } - cytoscape_node_data[node].update(node_pos) - return cytoscape_node_data + cytoscape_edge_data +"""Provides functionality to create networkx graphs and pltoly figures for network visualization""" + +import networkx as nx +import pandas as pd + +LAYOUT_SEED = 7 + + +def initalize_network( + interactions: pd.DataFrame, terms: list, search_mode: str +) -> nx.Graph: + """Create a networkx graph representing interactions between genes and drugs + + :param interactions: DataFrame containing drug-gene interaction data + :param terms: List containing terms used to query interaction data + :param search_mode: String indicating whether query was gene-focused or drug-focused + :return: a networkx graph of drug-gene interactions + """ + interactions_graph = nx.Graph() + graphed_terms = set() + + for index in interactions.index: + if search_mode == "genes": + graphed_terms.add(interactions["gene"][index]) + if search_mode == "drugs": + graphed_terms.add(interactions["drug"][index]) + interactions_graph.add_node( + interactions["gene"][index], label=interactions["gene"][index], isGene=True + ) + interactions_graph.add_node( + interactions["drug"][index], label=interactions["drug"][index], isGene=False + ) + interactions_graph.add_edge( + interactions["gene"][index], + interactions["drug"][index], + id=interactions["gene"][index] + " - " + interactions["drug"][index], + approval=interactions["approval"][index], + score=interactions["score"][index], + attributes=interactions["interaction_attributes"][index], + sourcedata=interactions["source"][index], + pmid=interactions["pmid"][index], + ) + + graphed_terms = set(terms).difference(graphed_terms) + for term in graphed_terms: + if search_mode == "genes": + interactions_graph.add_node(term, label=term, isGene=True) + if search_mode == "drugs": + interactions_graph.add_node(term, label=term, isGene=False) + + nx.set_node_attributes( + interactions_graph, dict(interactions_graph.degree()), "node_degree" + ) + return interactions_graph + + +def generate_cytoscape(graph: nx.Graph) -> dict: + """Create a cytoscape graph representing interactions between genes and drugs + + :param graph: networkx graph to be formatted as a cytoscape graph + :return: a cytoscape graph of drug-gene interactions + """ + pos = nx.spring_layout(graph, seed=LAYOUT_SEED, scale=4000) + cytoscape_data = nx.cytoscape_data(graph)["elements"] + cytoscape_node_data = cytoscape_data["nodes"] + cytoscape_edge_data = cytoscape_data["edges"] + for node in range(len(cytoscape_node_data)): + node_pos = pos[cytoscape_node_data[node]["data"]["id"]] + node_pos = { + "position": {"x": int(node_pos[0].item()), "y": int(node_pos[1].item())} + } + cytoscape_node_data[node].update(node_pos) + return cytoscape_node_data + cytoscape_edge_data diff --git a/src/dgipy/queries/get_all_drugs.graphql b/src/dgipy/queries/get_all_drugs.graphql index 5505125..54906f8 100644 --- a/src/dgipy/queries/get_all_drugs.graphql +++ b/src/dgipy/queries/get_all_drugs.graphql @@ -5,4 +5,4 @@ conceptId } } -} +} \ No newline at end of file diff --git a/tests/test_dgidb.py b/tests/test_dgidb.py index 648a658..26db842 100644 --- a/tests/test_dgidb.py +++ b/tests/test_dgidb.py @@ -2,6 +2,7 @@ from io import StringIO from pathlib import Path +import pandas as pd import pytest import requests_mock @@ -28,30 +29,31 @@ def test_get_drugs(fixtures_dir: Path, set_up_graphql_mock: Callable): set_up_graphql_mock(m, json_response) results = get_drug(["Imatinib"]) - assert len(results["name"]), "DataFrame is non-empty" + assert isinstance(results, pd.DataFrame), "Results object is a DataFrame" + assert len(results), "DataFrame is non-empty" results_with_added_fake = get_drug(["imatinib", "not-real"]) - assert len(results_with_added_fake["name"]) == len( - results["name"] + assert len(results_with_added_fake) == len( + results ), "Gracefully ignore non-existent search terms" # handling filters filtered_results = get_drug(["imatinib", "metronidazole"], antineoplastic=True) - assert len(filtered_results["name"]) == 1, "Metronidazole is filtered out" + assert len(filtered_results) == 1, "Metronidazole is filtered out" assert ( - filtered_results["name"][0] == "IMATINIB" + filtered_results["drug"][0] == "IMATINIB" ), "Imatinib is retained by the filter" - assert all(results["antineoplastic"]), "All results are antineoplastics" + assert results["antineoplastic"].all(), "All results are antineoplastics" set_up_graphql_mock(m, filtered_json_response) filtered_results = get_drug(["imatinib", "metronidazole"], antineoplastic=False) - assert len(filtered_results["name"]), "DataFrame is non-empty" - assert "METRONIDAZOLE" in filtered_results["name"] + assert len(filtered_results), "DataFrame is non-empty" + assert "METRONIDAZOLE" in filtered_results["drug"].values # empty response set_up_graphql_mock(m, StringIO('{"data": {"drugs": {"nodes": []}}}')) empty_results = get_drug("not-real") - assert len(empty_results["name"]) == 0, "Handles empty response" + assert len(empty_results) == 0, "Handles empty response" def test_get_genes(fixtures_dir: Path, set_up_graphql_mock: Callable): @@ -62,17 +64,18 @@ def test_get_genes(fixtures_dir: Path, set_up_graphql_mock: Callable): set_up_graphql_mock(m, json_response) results = get_gene(["ereg"]) - assert len(results["name"]), "DataFrame is non-empty" + assert isinstance(results, pd.DataFrame), "Results object is a DataFrame" + assert len(results), "DataFrame is non-empty" results_with_added_fake = get_gene(["ereg", "not-real"]) - assert len(results_with_added_fake["name"]) == len( - results["name"] + assert len(results_with_added_fake) == len( + results ), "Gracefully ignore non-existent search terms" # empty response set_up_graphql_mock(m, StringIO('{"data": {"genes": {"nodes": []}}}')) empty_results = get_gene("not-real") - assert len(empty_results["name"]) == 0, "Handles empty response" + assert len(empty_results) == 0, "Handles empty response" def test_get_interactions_by_genes(fixtures_dir: Path, set_up_graphql_mock: Callable): @@ -87,22 +90,23 @@ def test_get_interactions_by_genes(fixtures_dir: Path, set_up_graphql_mock: Call ): set_up_graphql_mock(m, genes_response) results = get_interactions(["ereg"]) - assert len(results["gene_name"]), "Results are non-empty" + assert isinstance(results, pd.DataFrame), "Results object is a DataFrame" + assert len(results), "Results are non-empty" results = get_interactions(["ereg", "not-real"]) - assert len(results["gene_name"]), "Handles additional not-real terms gracefully" + assert len(results), "Handles additional not-real terms gracefully" # multiple terms set_up_graphql_mock(m, multiple_genes_response) multiple_gene_results = get_interactions(["ereg", "braf"]) - assert len(multiple_gene_results["gene_name"]) > len( - results["gene_name"] + assert len(multiple_gene_results) > len( + results ), "Handles multiple genes at once" # empty response set_up_graphql_mock(m, StringIO('{"data": {"genes": {"nodes": []}}}')) empty_results = get_interactions(["not-real"]) - assert len(empty_results["gene_name"]) == 0, "Handles empty response" + assert len(empty_results) == 0, "Handles empty response" def test_get_interactions_by_drugs(fixtures_dir: Path, set_up_graphql_mock: Callable): @@ -117,24 +121,25 @@ def test_get_interactions_by_drugs(fixtures_dir: Path, set_up_graphql_mock: Call ): set_up_graphql_mock(m, drugs_response) results = get_interactions(["sunitinib"], search="drugs") - assert len(results["drug_name"]), "Results are non-empty" + assert isinstance(results, pd.DataFrame), "Results object is a DataFrame" + assert len(results), "Results are non-empty" results = get_interactions(["sunitinib", "not-real"], search="drugs") - assert len(results["drug_name"]), "Handles additional not-real terms gracefully" + assert len(results), "Handles additional not-real terms gracefully" # multiple terms set_up_graphql_mock(m, multiple_drugs_response) multiple_gene_results = get_interactions( ["sunitinib", "clonazepam"], search="drugs" ) - assert len(multiple_gene_results["drug_name"]) > len( - results["drug_name"] + assert len(multiple_gene_results) > len( + results ), "Handles multiple drugs at once" # empty response set_up_graphql_mock(m, StringIO('{"data": {"drugs": {"nodes": []}}}')) empty_results = get_interactions(["not-real"], search="drugs") - assert len(empty_results["drug_name"]) == 0, "Handles empty response" + assert len(empty_results) == 0, "Handles empty response" def test_get_categories(fixtures_dir: Path, set_up_graphql_mock: Callable): @@ -143,11 +148,11 @@ def test_get_categories(fixtures_dir: Path, set_up_graphql_mock: Callable): (fixtures_dir / "get_categories_response.json").open() as categories_response, ): set_up_graphql_mock(m, categories_response) - results = get_categories(["BRAF"]) - assert len(results["gene"]), "Results are non-empty" - assert "DRUG RESISTANCE" in results["category"] - assert "DRUGGABLE GENOME" in results["category"] - assert "CLINICALLY ACTIONABLE" in results["category"] + results = get_categories("BRAF") + assert len(results), "Results are non-empty" + assert "DRUG RESISTANCE" in results["categories"].values + assert "DRUGGABLE GENOME" in results["categories"].values + assert "CLINICALLY ACTIONABLE" in results["categories"].values def test_get_sources(fixtures_dir: Path, set_up_graphql_mock: Callable): @@ -160,17 +165,16 @@ def test_get_sources(fixtures_dir: Path, set_up_graphql_mock: Callable): ): set_up_graphql_mock(m, sources_response) results = get_source() - assert ( - len(results["name"]) == 45 - ), f"Incorrect # of sources: {len(results['name'])}" + sources = results["sources"]["nodes"] + assert len(sources) == 45, f"Incorrect # of sources: {len(sources)}" set_up_graphql_mock(m, filtered_sources_response) results = get_source(SourceType.GENE) - sources = results["name"] + sources = results["sources"]["nodes"] assert len(sources) == 3, f"Incorrect # of sources: {len(sources)}" - assert set(sources) == { - "NCBI Gene", - "HUGO Gene Nomenclature Committee", + assert {s["sourceDbName"] for s in sources} == { + "NCBI", + "HGNC", "Ensembl", }, "Contains correct sources" @@ -186,7 +190,7 @@ def test_get_gene_list(fixtures_dir: Path, set_up_graphql_mock: Callable): set_up_graphql_mock(m, gene_list_response) results = get_gene_list() - assert len(results["name"]) == 9 + assert len(results) == 9 def test_get_drug_applications(fixtures_dir, set_up_graphql_mock: Callable): @@ -205,11 +209,8 @@ def test_get_drug_applications(fixtures_dir, set_up_graphql_mock: Callable): text=drugsatfda_response.read(), ) results = get_drug_applications(["DAROLUTAMIDE"]) - assert len(results["name"]) == 1 - assert results["brand_name"][0] == "NUBEQA" - assert results["dosage_strength"][0] == "300MG" - assert results["marketing_status"][0] == "Prescription" - assert results["dosage_form"][0] == "TABLET" + assert len(results) == 1 + assert results.iloc[0]["description"] == "NUBEQA: 300MG Prescription TABLET" @pytest.mark.performance() From 374f836fb2f413330c910df4fe2ccfa1ed77ee8d Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Fri, 20 Sep 2024 15:58:07 -0400 Subject: [PATCH 12/19] Merge branch Part 1: Merge branch 'master' into "cash-cytoscape-integration" --- src/dgipy/network_graph.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/dgipy/network_graph.py b/src/dgipy/network_graph.py index d8a4837..5f42443 100644 --- a/src/dgipy/network_graph.py +++ b/src/dgipy/network_graph.py @@ -6,7 +6,7 @@ LAYOUT_SEED = 7 -def _initalize_network( +def initalize_network( interactions: pd.DataFrame, terms: list, search_mode: str ) -> nx.Graph: """Create a networkx graph representing interactions between genes and drugs @@ -21,14 +21,14 @@ def _initalize_network( for index in interactions.index: if search_mode == "genes": - graphed_terms.add(interactions["gene"][index]) + graphed_terms.add(interactions["gene_name"][index]) if search_mode == "drugs": - graphed_terms.add(interactions["drug"][index]) + graphed_terms.add(interactions["drug_name"][index]) interactions_graph.add_node( - interactions["gene"][index], label=interactions["gene"][index], isGene=True + interactions["gene_name"][index], label=interactions["gene_name"][index], isGene=True ) interactions_graph.add_node( - interactions["drug"][index], label=interactions["drug"][index], isGene=False + interactions["drug_name"][index], label=interactions["drug_name"][index], isGene=False ) interactions_graph.add_edge( interactions["gene_name"][index], @@ -48,7 +48,11 @@ def _initalize_network( if search_mode == "genes": interactions_graph.add_node(term, label=term, isGene=True) if search_mode == "drugs": - interactions_graph.add_node(term, isGene=False) + interactions_graph.add_node(term, label=term, isGene=False) + + nx.set_node_attributes( + interactions_graph, dict(interactions_graph.degree()), "node_degree" + ) return interactions_graph @@ -100,7 +104,7 @@ def create_network( :param search_mode: String indicating whether query was gene-focused or drug-focused :return: a networkx graph of drug-gene interactions """ - interactions_graph = _initalize_network(interactions, terms, search_mode) + interactions_graph = initalize_network(interactions, terms, search_mode) _add_node_attributes(interactions_graph, search_mode) return interactions_graph From 8563b42f6ddbb3e44fb27a7f3e41f982a736defd Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Fri, 20 Sep 2024 16:53:09 -0400 Subject: [PATCH 13/19] fix(merge): Fix interactions data in network_graph.py Due to interaction data no longer being a pandas, code alterations were to made to fix iteration. Naming differences were also ammended for sources and pmid. --- src/dgipy/network_graph.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dgipy/network_graph.py b/src/dgipy/network_graph.py index 5f42443..4fbac6c 100644 --- a/src/dgipy/network_graph.py +++ b/src/dgipy/network_graph.py @@ -19,7 +19,7 @@ def initalize_network( interactions_graph = nx.Graph() graphed_terms = set() - for index in interactions.index: + for index in range(len(interactions["gene_name"])-1): if search_mode == "genes": graphed_terms.add(interactions["gene_name"][index]) if search_mode == "drugs": @@ -39,8 +39,8 @@ def initalize_network( approval=interactions["drug_approved"][index], score=interactions["interaction_score"][index], attributes=interactions["interaction_attributes"][index], - sourcedata=interactions["source"][index], - pmid=interactions["pmid"][index], + sourcedata=interactions["interaction_sources"][index], + pmid=interactions["interaction_pmids"][index], ) graphed_terms = set(terms).difference(graphed_terms) From 5e9ee0690bbfed73fb59ded45384ba62175bdb6c Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Fri, 20 Sep 2024 16:59:08 -0400 Subject: [PATCH 14/19] style(ruff): Make ruff fixes --- src/dgipy/network_graph.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/dgipy/network_graph.py b/src/dgipy/network_graph.py index 4fbac6c..43d5a4a 100644 --- a/src/dgipy/network_graph.py +++ b/src/dgipy/network_graph.py @@ -19,16 +19,20 @@ def initalize_network( interactions_graph = nx.Graph() graphed_terms = set() - for index in range(len(interactions["gene_name"])-1): + for index in range(len(interactions["gene_name"]) - 1): if search_mode == "genes": graphed_terms.add(interactions["gene_name"][index]) if search_mode == "drugs": graphed_terms.add(interactions["drug_name"][index]) interactions_graph.add_node( - interactions["gene_name"][index], label=interactions["gene_name"][index], isGene=True + interactions["gene_name"][index], + label=interactions["gene_name"][index], + isGene=True, ) interactions_graph.add_node( - interactions["drug_name"][index], label=interactions["drug_name"][index], isGene=False + interactions["drug_name"][index], + label=interactions["drug_name"][index], + isGene=False, ) interactions_graph.add_edge( interactions["gene_name"][index], From 72e317ca0255b9a482160257414c9212ffe1594a Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Fri, 20 Sep 2024 17:09:05 -0400 Subject: [PATCH 15/19] test: :bug: Fix merged test Merging main main to this branch resulted in the updated test being overriden by the old one. The updated test has now been restored. --- tests/test_dgidb.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/tests/test_dgidb.py b/tests/test_dgidb.py index f50bb7f..0aafc2c 100644 --- a/tests/test_dgidb.py +++ b/tests/test_dgidb.py @@ -2,7 +2,6 @@ from io import StringIO from pathlib import Path -import pandas as pd import pytest import requests_mock @@ -90,23 +89,22 @@ def test_get_interactions_by_genes(fixtures_dir: Path, set_up_graphql_mock: Call ): set_up_graphql_mock(m, genes_response) results = get_interactions(["ereg"]) - assert isinstance(results, pd.DataFrame), "Results object is a DataFrame" - assert len(results), "Results are non-empty" + assert len(results["gene_name"]), "Results are non-empty" results = get_interactions(["ereg", "not-real"]) - assert len(results), "Handles additional not-real terms gracefully" + assert len(results["gene_name"]), "Handles additional not-real terms gracefully" # multiple terms set_up_graphql_mock(m, multiple_genes_response) multiple_gene_results = get_interactions(["ereg", "braf"]) - assert len(multiple_gene_results) > len( - results + assert len(multiple_gene_results["gene_name"]) > len( + results["gene_name"] ), "Handles multiple genes at once" # empty response set_up_graphql_mock(m, StringIO('{"data": {"genes": {"nodes": []}}}')) empty_results = get_interactions(["not-real"]) - assert len(empty_results) == 0, "Handles empty response" + assert len(empty_results["gene_name"]) == 0, "Handles empty response" def test_get_interactions_by_drugs(fixtures_dir: Path, set_up_graphql_mock: Callable): @@ -121,25 +119,24 @@ def test_get_interactions_by_drugs(fixtures_dir: Path, set_up_graphql_mock: Call ): set_up_graphql_mock(m, drugs_response) results = get_interactions(["sunitinib"], search="drugs") - assert isinstance(results, pd.DataFrame), "Results object is a DataFrame" - assert len(results), "Results are non-empty" + assert len(results["drug_name"]), "Results are non-empty" results = get_interactions(["sunitinib", "not-real"], search="drugs") - assert len(results), "Handles additional not-real terms gracefully" + assert len(results["drug_name"]), "Handles additional not-real terms gracefully" # multiple terms set_up_graphql_mock(m, multiple_drugs_response) multiple_gene_results = get_interactions( ["sunitinib", "clonazepam"], search="drugs" ) - assert len(multiple_gene_results) > len( - results + assert len(multiple_gene_results["drug_name"]) > len( + results["drug_name"] ), "Handles multiple drugs at once" # empty response set_up_graphql_mock(m, StringIO('{"data": {"drugs": {"nodes": []}}}')) empty_results = get_interactions(["not-real"], search="drugs") - assert len(empty_results) == 0, "Handles empty response" + assert len(empty_results["drug_name"]) == 0, "Handles empty response" def test_get_categories(fixtures_dir: Path, set_up_graphql_mock: Callable): @@ -173,9 +170,9 @@ def test_get_sources(fixtures_dir: Path, set_up_graphql_mock: Callable): results = get_sources(SourceType.GENE) sources = results["source_name"] assert len(sources) == 3, f"Incorrect # of sources: {len(sources)}" - assert {s["sourceDbName"] for s in sources} == { - "NCBI", - "HGNC", + assert set(sources) == { + "NCBI Gene", + "HUGO Gene Nomenclature Committee", "Ensembl", }, "Contains correct sources" From 8215c66ef5828fa787b4a09c634fc05bd4345caa Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Fri, 20 Sep 2024 17:15:18 -0400 Subject: [PATCH 16/19] fix(merge): Fix improperly merged files README and a query did not get correctly merged. This commit ammends that --- README.md | 2 +- src/dgipy/queries/get_all_drugs.graphql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7a84dcd..4e5d727 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ python3 -m pip install dgipy ## Usage -Methods in `dgipy.dgidb` send pre-defined queries with user-supplied parameters to the DGIdb GraphQL API endpoint. Response objects can optionally be returned as Pandas dataframes for readability and ease of use, or retained as the raw GraphQL responses by setting the `use_pandas` argument to `False`. +DGIpy is built around query methods that wrap a GraphQL client and fetch data from the public DGIdb API endpoint. By default, data returned in a columnar format (i.e., as a dictionary where keys are column names and values are lists representing column data). ```pycon >>> from dgipy import get_gene diff --git a/src/dgipy/queries/get_all_drugs.graphql b/src/dgipy/queries/get_all_drugs.graphql index 54906f8..5505125 100644 --- a/src/dgipy/queries/get_all_drugs.graphql +++ b/src/dgipy/queries/get_all_drugs.graphql @@ -5,4 +5,4 @@ conceptId } } -} \ No newline at end of file +} From 77f065c0843e738a1442c87ddda4cbf017bbd5ce Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Wed, 16 Oct 2024 09:48:35 -0400 Subject: [PATCH 17/19] feat: Add Image/JSON exporting to cytoscape Imported JSON to allow a cytoscape-figure to exported as a JSON. Added card header to 'Selection Info' for consistency. Added New Card with two buttons allowing the user to download the current cytoscape figure as a .png or a .json. Removed '_get_node_data_from_id' as it is unused. Added '_generate_png' and 'generate_json' to handle png and json generation. --- src/dgipy/graph_app.py | 61 ++++++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 14 deletions(-) diff --git a/src/dgipy/graph_app.py b/src/dgipy/graph_app.py index 5786fa4..31f3097 100644 --- a/src/dgipy/graph_app.py +++ b/src/dgipy/graph_app.py @@ -1,5 +1,7 @@ """Provides functionality to create a Dash web application for interacting with drug-gene data from DGIdb""" +import json + import dash_bootstrap_components as dbc import dash_cytoscape as cyto from dash import Input, Output, State, ctx, dash, dcc, html @@ -32,6 +34,8 @@ def generate_app() -> dash.Dash: _update_selected_element_text(app) _update_neighbors_dropdown(app) _update_edge_info(app) + _generate_png(app) + _generate_json(app) return app @@ -134,16 +138,32 @@ def _set_app_layout(app: dash.Dash) -> None: style={"margin": "10px"}, ), dbc.Card( - dbc.CardBody( - [ - html.H4("Selected Node/Edge:"), - html.P(selected_element_text), - html.H4("Selected Edge Info:"), - html.P(selected_edge_info), - ] - ), + [ + dbc.CardHeader("Selection Info"), + dbc.CardBody( + [ + html.H4("Selected Node/Edge:"), + html.P(selected_element_text), + html.H4("Selected Edge Info:"), + html.P(selected_edge_info), + ] + ), + ], style={"margin": "10px"}, ), + dbc.Card( + [ + dbc.CardHeader("Export Graph"), + dbc.CardBody( + [ + dbc.Button("Export Graph as .png", id="export-png-graph"), + dbc.Button("Export Graph as .json", id="export-json-graph"), + dcc.Download(id="json-download") + ] + ), + ], + style={"margin": "10px"}, + ) ], width=4, ), @@ -152,7 +172,6 @@ def _set_app_layout(app: dash.Dash) -> None: ] ) - def _update_cytoscape(app: dash.Dash) -> None: @app.callback( Output("cytoscape-figure", "elements"), @@ -281,9 +300,23 @@ def update(selected_element: str | dict, selected_neighbor: str | None) -> str: ) return "No Edge Selected" +def _generate_png(app: dash.Dash) -> None: + @app.callback( + Output("cytoscape-figure", "generateImage"), + Input("export-png-graph", "n_clicks") + ) + def update(export_png_graph: int) -> dict: # noqa: ARG001 + if ctx.triggered_id is None: + return dash.no_update + return {"type": "png", "action": "download"} -def _get_node_data_from_id(nodes: list, node_id: str) -> dict | None: - for node in nodes: - if node["id"] == node_id: - return node - return None +def _generate_json(app: dash.Dash) -> None: + @app.callback( + Output("json-download", "data"), + Input("export-json-graph", "n_clicks"), + State("cytoscape-figure","elements"), + ) + def update(export_png_graph: int, cytoscape_figure: dict) -> dict: # noqa: ARG001 + if ctx.triggered_id is None: + return dash.no_update + return dcc.send_string(json.dumps(cytoscape_figure, indent=4), "cyto.json") From 9867c75c3e18476d6cd2ac6fbd98898c95e36e43 Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Wed, 16 Oct 2024 10:06:17 -0400 Subject: [PATCH 18/19] feat: Add margin class to Export buttons Added the 'class_name="m-1"' attribute to the 'export graph' buttons to visually space them apart. --- src/dgipy/graph_app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dgipy/graph_app.py b/src/dgipy/graph_app.py index 31f3097..1ea1db2 100644 --- a/src/dgipy/graph_app.py +++ b/src/dgipy/graph_app.py @@ -156,8 +156,8 @@ def _set_app_layout(app: dash.Dash) -> None: dbc.CardHeader("Export Graph"), dbc.CardBody( [ - dbc.Button("Export Graph as .png", id="export-png-graph"), - dbc.Button("Export Graph as .json", id="export-json-graph"), + dbc.Button("Export Graph as .png", id="export-png-graph", class_name="m-1"), + dbc.Button("Export Graph as .json", id="export-json-graph", class_name="m-1"), dcc.Download(id="json-download") ] ), From c222385877d1ead0f9b83c77d802622760f155a5 Mon Sep 17 00:00:00 2001 From: Rohit Basu <107427918+rbasu101@users.noreply.github.com> Date: Fri, 25 Oct 2024 13:36:04 -0400 Subject: [PATCH 19/19] feat: Add .svg cytoscape export Added functionality to export .svg images from the cytoscape graph. A new button exists to export the graph as an .svg. --- src/dgipy/graph_app.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/dgipy/graph_app.py b/src/dgipy/graph_app.py index e0bec74..b94836f 100644 --- a/src/dgipy/graph_app.py +++ b/src/dgipy/graph_app.py @@ -10,6 +10,8 @@ from dgipy import network_graph as ng from dgipy.data_utils import make_tabular +cyto.load_extra_layouts() + def generate_app() -> dash.Dash: """Initialize a Dash application object with a layout designed for visualizing: drug-gene interactions, options for user interactivity, and other visual elements. @@ -34,7 +36,7 @@ def generate_app() -> dash.Dash: _update_selected_element_text(app) _update_neighbors_dropdown(app) _update_edge_info(app) - _generate_png(app) + _generate_image(app) _generate_json(app) return app @@ -161,6 +163,11 @@ def _set_app_layout(app: dash.Dash) -> None: id="export-png-graph", class_name="m-1", ), + dbc.Button( + "Export Graph as .svg", + id="export-svg-graph", + class_name="m-1", + ), dbc.Button( "Export Graph as .json", id="export-json-graph", @@ -310,15 +317,17 @@ def update(selected_element: str | dict, selected_neighbor: str | None) -> str: return "No Edge Selected" -def _generate_png(app: dash.Dash) -> None: +def _generate_image(app: dash.Dash) -> None: @app.callback( Output("cytoscape-figure", "generateImage"), - Input("export-png-graph", "n_clicks"), + [Input("export-png-graph", "n_clicks"), Input("export-svg-graph", "n_clicks")], ) - def update(export_png_graph: int) -> dict: # noqa: ARG001 - if ctx.triggered_id is None: - return dash.no_update - return {"type": "png", "action": "download"} + def update(export_png_graph: int, export_svg_graph: int) -> dict: # noqa: ARG001 + if ctx.triggered_id == "export-png-graph": + return {"type": "png", "action": "download"} + if ctx.triggered_id == "export-svg-graph": + return {"type": "svg", "action": "download"} + return dash.no_update def _generate_json(app: dash.Dash) -> None: