Enable weights for MG similarity algorithms (#3879)

This is a follow up PR to #3828 which enabled weighted for the python SG similarity algorithms. This PR also updates the tests, docstrings and remove experimental calls Authors: - Joseph Nke (https://github.com/jnke2016) Approvers: - Alex Barghi (https://github.com/alexbarghi-nv) URL: #3879
rapidsai · Sep 26, 2023 · a9f4297 · a9f4297
1 parent 8b02e24
commit a9f4297
Show file tree

Hide file tree

Showing 6 changed files with 60 additions and 147 deletions.
diff --git a/python/cugraph/cugraph/dask/link_prediction/jaccard.py b/python/cugraph/cugraph/dask/link_prediction/jaccard.py
@@ -118,7 +118,9 @@ def jaccard(input_graph, vertex_pair=None, use_weight=False):
         adjacent vertices in the graph.
 
     use_weight : bool, optional (default=False)
-        Currently not supported
+        Flag to indicate whether to compute weighted jaccard (if use_weight==True)
+        or un-weighted jaccard (if use_weight==False).
+        'input_graph' must be weighted if 'use_weight=True'.
 
     Returns
     -------
@@ -144,12 +146,6 @@ def jaccard(input_graph, vertex_pair=None, use_weight=False):
 
     vertex_pair_col_name = vertex_pair.columns
 
-    if use_weight:
-        raise ValueError("'use_weight' is currently not supported.")
-
-    if input_graph.is_weighted():
-        raise ValueError("Weighted graphs are currently not supported.")
-
     if isinstance(vertex_pair, (dask_cudf.DataFrame, cudf.DataFrame)):
         vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
 

diff --git a/python/cugraph/cugraph/dask/link_prediction/overlap.py b/python/cugraph/cugraph/dask/link_prediction/overlap.py
@@ -96,7 +96,9 @@ def overlap(input_graph, vertex_pair=None, use_weight=False):
         adjacent vertices in the graph.
 
     use_weight : bool, optional (default=False)
-        Currently not supported
+        Flag to indicate whether to compute weighted overlap (if use_weight==True)
+        or un-weighted overlap (if use_weight==False).
+        'input_graph' must be weighted if 'use_weight=True'.
 
     Returns
     -------
@@ -122,12 +124,6 @@ def overlap(input_graph, vertex_pair=None, use_weight=False):
 
     vertex_pair_col_name = vertex_pair.columns
 
-    if use_weight:
-        raise ValueError("'use_weight' is currently not supported.")
-
-    if input_graph.is_weighted():
-        raise ValueError("Weighted graphs are currently not supported.")
-
     if isinstance(vertex_pair, (dask_cudf.DataFrame, cudf.DataFrame)):
         vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
 

diff --git a/python/cugraph/cugraph/dask/link_prediction/sorensen.py b/python/cugraph/cugraph/dask/link_prediction/sorensen.py
@@ -92,7 +92,9 @@ def sorensen(input_graph, vertex_pair=None, use_weight=False):
         adjacent vertices in the graph.
 
     use_weight : bool, optional (default=False)
-        Currently not supported
+        Flag to indicate whether to compute weighted sorensen (if use_weight==True)
+        or un-weighted sorensen (if use_weight==False).
+        'input_graph' must be weighted if 'use_weight=True'.
 
     Returns
     -------
@@ -118,12 +120,6 @@ def sorensen(input_graph, vertex_pair=None, use_weight=False):
 
     vertex_pair_col_name = vertex_pair.columns
 
-    if use_weight:
-        raise ValueError("'use_weight' is currently not supported.")
-
-    if input_graph.is_weighted():
-        raise ValueError("Weighted graphs are currently not supported.")
-
     if isinstance(vertex_pair, (dask_cudf.DataFrame, cudf.DataFrame)):
         vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
 

diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py
@@ -34,6 +34,7 @@ def setup_function():
 
 IS_DIRECTED = [False]
 HAS_VERTEX_PAIR = [True, False]
+IS_WEIGHTED = [True, False]
 
 
 # =============================================================================
@@ -48,6 +49,7 @@ def setup_function():
     (datasets, "graph_file"),
     (IS_DIRECTED, "directed"),
     (HAS_VERTEX_PAIR, "has_vertex_pair"),
+    (IS_WEIGHTED, "is_weighted"),
 )
 
 
@@ -57,7 +59,9 @@ def input_combo(request):
     Simply return the current combination of params as a dictionary for use in
     tests or other parameterized fixtures.
     """
-    parameters = dict(zip(("graph_file", "directed", "has_vertex_pair"), request.param))
+    parameters = dict(
+        zip(("graph_file", "directed", "has_vertex_pair", "is_weighted"), request.param)
+    )
 
     return parameters
 
@@ -72,7 +76,10 @@ def input_expected_output(input_combo):
     input_data_path = input_combo["graph_file"]
     directed = input_combo["directed"]
     has_vertex_pair = input_combo["has_vertex_pair"]
-    G = utils.generate_cugraph_graph_from_file(input_data_path, directed=directed)
+    is_weighted = input_combo["is_weighted"]
+    G = utils.generate_cugraph_graph_from_file(
+        input_data_path, directed=directed, edgevals=is_weighted
+    )
     if has_vertex_pair:
         # Sample random vertices from the graph and compute the two_hop_neighbors
         # with those seeds
@@ -84,7 +91,9 @@ def input_expected_output(input_combo):
         vertex_pair = None
 
     input_combo["vertex_pair"] = vertex_pair
-    sg_cugraph_jaccard = cugraph.experimental.jaccard(G, input_combo["vertex_pair"])
+    sg_cugraph_jaccard = cugraph.jaccard(
+        G, input_combo["vertex_pair"], use_weight=is_weighted
+    )
     # Save the results back to the input_combo dictionary to prevent redundant
     # cuGraph runs. Other tests using the input_combo fixture will look for
     # them, and if not present they will have to re-run the same cuGraph call.
@@ -104,6 +113,7 @@ def input_expected_output(input_combo):
         ddf,
         source="src",
         destination="dst",
+        edge_attr="value" if is_weighted else None,
         renumber=True,
         store_transposed=True,
     )
@@ -122,8 +132,11 @@ def input_expected_output(input_combo):
 def test_dask_mg_jaccard(dask_client, benchmark, input_expected_output):
 
     dg = input_expected_output["MGGraph"]
+    use_weight = input_expected_output["is_weighted"]
 
-    result_jaccard = benchmark(dcg.jaccard, dg, input_expected_output["vertex_pair"])
+    result_jaccard = benchmark(
+        dcg.jaccard, dg, input_expected_output["vertex_pair"], use_weight=use_weight
+    )
 
     result_jaccard = (
         result_jaccard.compute()
@@ -151,41 +164,3 @@ def test_dask_mg_jaccard(dask_client, benchmark, input_expected_output):
 
     assert len(jaccard_coeff_diffs1) == 0
     assert len(jaccard_coeff_diffs2) == 0
-
-
-@pytest.mark.mg
-def test_dask_mg_weighted_jaccard(dask_client):
-    input_data_path = datasets[0]
-    chunksize = dcg.get_chunksize(input_data_path)
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    dg = cugraph.Graph(directed=False)
-    dg.from_dask_cudf_edgelist(
-        ddf,
-        source="src",
-        destination="dst",
-        edge_attr="value",
-        renumber=True,
-        store_transposed=True,
-    )
-    with pytest.raises(ValueError):
-        dcg.jaccard(dg)
-
-    dg = cugraph.Graph(directed=False)
-    dg.from_dask_cudf_edgelist(
-        ddf,
-        source="src",
-        destination="dst",
-        edge_attr="value",
-        store_transposed=True,
-    )
-
-    use_weight = True
-    with pytest.raises(ValueError):
-        dcg.jaccard(dg, use_weight=use_weight)
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py
@@ -34,6 +34,7 @@ def setup_function():
 
 IS_DIRECTED = [False]
 HAS_VERTEX_PAIR = [True, False]
+IS_WEIGHTED = [True, False]
 
 
 # =============================================================================
@@ -48,6 +49,7 @@ def setup_function():
     (datasets, "graph_file"),
     (IS_DIRECTED, "directed"),
     (HAS_VERTEX_PAIR, "has_vertex_pair"),
+    (IS_WEIGHTED, "is_weighted"),
 )
 
 
@@ -57,7 +59,9 @@ def input_combo(request):
     Simply return the current combination of params as a dictionary for use in
     tests or other parameterized fixtures.
     """
-    parameters = dict(zip(("graph_file", "directed", "has_vertex_pair"), request.param))
+    parameters = dict(
+        zip(("graph_file", "directed", "has_vertex_pair", "is_weighted"), request.param)
+    )
 
     return parameters
 
@@ -72,7 +76,10 @@ def input_expected_output(input_combo):
     input_data_path = input_combo["graph_file"]
     directed = input_combo["directed"]
     has_vertex_pair = input_combo["has_vertex_pair"]
-    G = utils.generate_cugraph_graph_from_file(input_data_path, directed=directed)
+    is_weighted = input_combo["is_weighted"]
+    G = utils.generate_cugraph_graph_from_file(
+        input_data_path, directed=directed, edgevals=is_weighted
+    )
     if has_vertex_pair:
         # Sample random vertices from the graph and compute the two_hop_neighbors
         # with those seeds
@@ -84,7 +91,9 @@ def input_expected_output(input_combo):
         vertex_pair = None
 
     input_combo["vertex_pair"] = vertex_pair
-    sg_cugraph_overlap = cugraph.experimental.overlap(G, input_combo["vertex_pair"])
+    sg_cugraph_overlap = cugraph.overlap(
+        G, input_combo["vertex_pair"], use_weight=is_weighted
+    )
     # Save the results back to the input_combo dictionary to prevent redundant
     # cuGraph runs. Other tests using the input_combo fixture will look for
     # them, and if not present they will have to re-run the same cuGraph call.
@@ -104,6 +113,7 @@ def input_expected_output(input_combo):
         ddf,
         source="src",
         destination="dst",
+        edge_attr="value" if is_weighted else None,
         renumber=True,
         store_transposed=True,
     )
@@ -125,8 +135,11 @@ def input_expected_output(input_combo):
 def test_dask_mg_overlap(dask_client, benchmark, input_expected_output):
 
     dg = input_expected_output["MGGraph"]
+    use_weight = input_expected_output["is_weighted"]
 
-    result_overlap = benchmark(dcg.overlap, dg, input_expected_output["vertex_pair"])
+    result_overlap = benchmark(
+        dcg.overlap, dg, input_expected_output["vertex_pair"], use_weight=use_weight
+    )
 
     result_overlap = (
         result_overlap.compute()
@@ -154,41 +167,3 @@ def test_dask_mg_overlap(dask_client, benchmark, input_expected_output):
 
     assert len(overlap_coeff_diffs1) == 0
     assert len(overlap_coeff_diffs2) == 0
-
-
-@pytest.mark.mg
-def test_dask_mg_weighted_overlap():
-    input_data_path = datasets[0]
-    chunksize = dcg.get_chunksize(input_data_path)
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    dg = cugraph.Graph(directed=False)
-    dg.from_dask_cudf_edgelist(
-        ddf,
-        source="src",
-        destination="dst",
-        edge_attr="value",
-        renumber=True,
-        store_transposed=True,
-    )
-    with pytest.raises(ValueError):
-        dcg.overlap(dg)
-
-    dg = cugraph.Graph(directed=False)
-    dg.from_dask_cudf_edgelist(
-        ddf,
-        source="src",
-        destination="dst",
-        edge_attr="value",
-        store_transposed=True,
-    )
-
-    use_weight = True
-    with pytest.raises(ValueError):
-        dcg.overlap(dg, use_weight=use_weight)