Skip to content

Commit

Permalink
Enable weights for MG similarity algorithms (#3879)
Browse files Browse the repository at this point in the history
This is a follow up PR to #3828 which enabled weighted for the python SG similarity algorithms. 
This PR also updates the tests, docstrings and remove experimental calls

Authors:
  - Joseph Nke (https://github.com/jnke2016)

Approvers:
  - Alex Barghi (https://github.com/alexbarghi-nv)

URL: #3879
  • Loading branch information
jnke2016 authored Sep 26, 2023
1 parent 8b02e24 commit a9f4297
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 147 deletions.
10 changes: 3 additions & 7 deletions python/cugraph/cugraph/dask/link_prediction/jaccard.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,9 @@ def jaccard(input_graph, vertex_pair=None, use_weight=False):
adjacent vertices in the graph.
use_weight : bool, optional (default=False)
Currently not supported
Flag to indicate whether to compute weighted jaccard (if use_weight==True)
or un-weighted jaccard (if use_weight==False).
'input_graph' must be weighted if 'use_weight=True'.
Returns
-------
Expand All @@ -144,12 +146,6 @@ def jaccard(input_graph, vertex_pair=None, use_weight=False):

vertex_pair_col_name = vertex_pair.columns

if use_weight:
raise ValueError("'use_weight' is currently not supported.")

if input_graph.is_weighted():
raise ValueError("Weighted graphs are currently not supported.")

if isinstance(vertex_pair, (dask_cudf.DataFrame, cudf.DataFrame)):
vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)

Expand Down
10 changes: 3 additions & 7 deletions python/cugraph/cugraph/dask/link_prediction/overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,9 @@ def overlap(input_graph, vertex_pair=None, use_weight=False):
adjacent vertices in the graph.
use_weight : bool, optional (default=False)
Currently not supported
Flag to indicate whether to compute weighted overlap (if use_weight==True)
or un-weighted overlap (if use_weight==False).
'input_graph' must be weighted if 'use_weight=True'.
Returns
-------
Expand All @@ -122,12 +124,6 @@ def overlap(input_graph, vertex_pair=None, use_weight=False):

vertex_pair_col_name = vertex_pair.columns

if use_weight:
raise ValueError("'use_weight' is currently not supported.")

if input_graph.is_weighted():
raise ValueError("Weighted graphs are currently not supported.")

if isinstance(vertex_pair, (dask_cudf.DataFrame, cudf.DataFrame)):
vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)

Expand Down
10 changes: 3 additions & 7 deletions python/cugraph/cugraph/dask/link_prediction/sorensen.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ def sorensen(input_graph, vertex_pair=None, use_weight=False):
adjacent vertices in the graph.
use_weight : bool, optional (default=False)
Currently not supported
Flag to indicate whether to compute weighted sorensen (if use_weight==True)
or un-weighted sorensen (if use_weight==False).
'input_graph' must be weighted if 'use_weight=True'.
Returns
-------
Expand All @@ -118,12 +120,6 @@ def sorensen(input_graph, vertex_pair=None, use_weight=False):

vertex_pair_col_name = vertex_pair.columns

if use_weight:
raise ValueError("'use_weight' is currently not supported.")

if input_graph.is_weighted():
raise ValueError("Weighted graphs are currently not supported.")

if isinstance(vertex_pair, (dask_cudf.DataFrame, cudf.DataFrame)):
vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)

Expand Down
59 changes: 17 additions & 42 deletions python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def setup_function():

IS_DIRECTED = [False]
HAS_VERTEX_PAIR = [True, False]
IS_WEIGHTED = [True, False]


# =============================================================================
Expand All @@ -48,6 +49,7 @@ def setup_function():
(datasets, "graph_file"),
(IS_DIRECTED, "directed"),
(HAS_VERTEX_PAIR, "has_vertex_pair"),
(IS_WEIGHTED, "is_weighted"),
)


Expand All @@ -57,7 +59,9 @@ def input_combo(request):
Simply return the current combination of params as a dictionary for use in
tests or other parameterized fixtures.
"""
parameters = dict(zip(("graph_file", "directed", "has_vertex_pair"), request.param))
parameters = dict(
zip(("graph_file", "directed", "has_vertex_pair", "is_weighted"), request.param)
)

return parameters

Expand All @@ -72,7 +76,10 @@ def input_expected_output(input_combo):
input_data_path = input_combo["graph_file"]
directed = input_combo["directed"]
has_vertex_pair = input_combo["has_vertex_pair"]
G = utils.generate_cugraph_graph_from_file(input_data_path, directed=directed)
is_weighted = input_combo["is_weighted"]
G = utils.generate_cugraph_graph_from_file(
input_data_path, directed=directed, edgevals=is_weighted
)
if has_vertex_pair:
# Sample random vertices from the graph and compute the two_hop_neighbors
# with those seeds
Expand All @@ -84,7 +91,9 @@ def input_expected_output(input_combo):
vertex_pair = None

input_combo["vertex_pair"] = vertex_pair
sg_cugraph_jaccard = cugraph.experimental.jaccard(G, input_combo["vertex_pair"])
sg_cugraph_jaccard = cugraph.jaccard(
G, input_combo["vertex_pair"], use_weight=is_weighted
)
# Save the results back to the input_combo dictionary to prevent redundant
# cuGraph runs. Other tests using the input_combo fixture will look for
# them, and if not present they will have to re-run the same cuGraph call.
Expand All @@ -104,6 +113,7 @@ def input_expected_output(input_combo):
ddf,
source="src",
destination="dst",
edge_attr="value" if is_weighted else None,
renumber=True,
store_transposed=True,
)
Expand All @@ -122,8 +132,11 @@ def input_expected_output(input_combo):
def test_dask_mg_jaccard(dask_client, benchmark, input_expected_output):

dg = input_expected_output["MGGraph"]
use_weight = input_expected_output["is_weighted"]

result_jaccard = benchmark(dcg.jaccard, dg, input_expected_output["vertex_pair"])
result_jaccard = benchmark(
dcg.jaccard, dg, input_expected_output["vertex_pair"], use_weight=use_weight
)

result_jaccard = (
result_jaccard.compute()
Expand Down Expand Up @@ -151,41 +164,3 @@ def test_dask_mg_jaccard(dask_client, benchmark, input_expected_output):

assert len(jaccard_coeff_diffs1) == 0
assert len(jaccard_coeff_diffs2) == 0


@pytest.mark.mg
def test_dask_mg_weighted_jaccard(dask_client):
input_data_path = datasets[0]
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
input_data_path,
chunksize=chunksize,
delimiter=" ",
names=["src", "dst", "value"],
dtype=["int32", "int32", "float32"],
)

dg = cugraph.Graph(directed=False)
dg.from_dask_cudf_edgelist(
ddf,
source="src",
destination="dst",
edge_attr="value",
renumber=True,
store_transposed=True,
)
with pytest.raises(ValueError):
dcg.jaccard(dg)

dg = cugraph.Graph(directed=False)
dg.from_dask_cudf_edgelist(
ddf,
source="src",
destination="dst",
edge_attr="value",
store_transposed=True,
)

use_weight = True
with pytest.raises(ValueError):
dcg.jaccard(dg, use_weight=use_weight)
59 changes: 17 additions & 42 deletions python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def setup_function():

IS_DIRECTED = [False]
HAS_VERTEX_PAIR = [True, False]
IS_WEIGHTED = [True, False]


# =============================================================================
Expand All @@ -48,6 +49,7 @@ def setup_function():
(datasets, "graph_file"),
(IS_DIRECTED, "directed"),
(HAS_VERTEX_PAIR, "has_vertex_pair"),
(IS_WEIGHTED, "is_weighted"),
)


Expand All @@ -57,7 +59,9 @@ def input_combo(request):
Simply return the current combination of params as a dictionary for use in
tests or other parameterized fixtures.
"""
parameters = dict(zip(("graph_file", "directed", "has_vertex_pair"), request.param))
parameters = dict(
zip(("graph_file", "directed", "has_vertex_pair", "is_weighted"), request.param)
)

return parameters

Expand All @@ -72,7 +76,10 @@ def input_expected_output(input_combo):
input_data_path = input_combo["graph_file"]
directed = input_combo["directed"]
has_vertex_pair = input_combo["has_vertex_pair"]
G = utils.generate_cugraph_graph_from_file(input_data_path, directed=directed)
is_weighted = input_combo["is_weighted"]
G = utils.generate_cugraph_graph_from_file(
input_data_path, directed=directed, edgevals=is_weighted
)
if has_vertex_pair:
# Sample random vertices from the graph and compute the two_hop_neighbors
# with those seeds
Expand All @@ -84,7 +91,9 @@ def input_expected_output(input_combo):
vertex_pair = None

input_combo["vertex_pair"] = vertex_pair
sg_cugraph_overlap = cugraph.experimental.overlap(G, input_combo["vertex_pair"])
sg_cugraph_overlap = cugraph.overlap(
G, input_combo["vertex_pair"], use_weight=is_weighted
)
# Save the results back to the input_combo dictionary to prevent redundant
# cuGraph runs. Other tests using the input_combo fixture will look for
# them, and if not present they will have to re-run the same cuGraph call.
Expand All @@ -104,6 +113,7 @@ def input_expected_output(input_combo):
ddf,
source="src",
destination="dst",
edge_attr="value" if is_weighted else None,
renumber=True,
store_transposed=True,
)
Expand All @@ -125,8 +135,11 @@ def input_expected_output(input_combo):
def test_dask_mg_overlap(dask_client, benchmark, input_expected_output):

dg = input_expected_output["MGGraph"]
use_weight = input_expected_output["is_weighted"]

result_overlap = benchmark(dcg.overlap, dg, input_expected_output["vertex_pair"])
result_overlap = benchmark(
dcg.overlap, dg, input_expected_output["vertex_pair"], use_weight=use_weight
)

result_overlap = (
result_overlap.compute()
Expand Down Expand Up @@ -154,41 +167,3 @@ def test_dask_mg_overlap(dask_client, benchmark, input_expected_output):

assert len(overlap_coeff_diffs1) == 0
assert len(overlap_coeff_diffs2) == 0


@pytest.mark.mg
def test_dask_mg_weighted_overlap():
input_data_path = datasets[0]
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
input_data_path,
chunksize=chunksize,
delimiter=" ",
names=["src", "dst", "value"],
dtype=["int32", "int32", "float32"],
)

dg = cugraph.Graph(directed=False)
dg.from_dask_cudf_edgelist(
ddf,
source="src",
destination="dst",
edge_attr="value",
renumber=True,
store_transposed=True,
)
with pytest.raises(ValueError):
dcg.overlap(dg)

dg = cugraph.Graph(directed=False)
dg.from_dask_cudf_edgelist(
ddf,
source="src",
destination="dst",
edge_attr="value",
store_transposed=True,
)

use_weight = True
with pytest.raises(ValueError):
dcg.overlap(dg, use_weight=use_weight)
Loading

0 comments on commit a9f4297

Please sign in to comment.