Skip to content

Commit

Permalink
update tests to use dataset metadata when getting golden networkx res…
Browse files Browse the repository at this point in the history
…ults
  • Loading branch information
Ralph Liu committed Aug 10, 2023
1 parent c55151c commit 1be0201
Showing 1 changed file with 12 additions and 14 deletions.
26 changes: 12 additions & 14 deletions python/cugraph/cugraph/tests/community/test_ecg.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# limitations under the License.

import gc
from pathlib import PurePath

import pytest
import networkx as nx
Expand All @@ -32,12 +31,12 @@ def cugraph_call(G, min_weight, ensemble_size):
return score, num_parts


def golden_call(graph_file):
if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR) / "dolphins.csv":
def golden_call(filename):
if filename == "dolphins":
return 0.4962422251701355
if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR) / "karate.csv":
if filename == "karate":
return 0.38428664207458496
if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR) / "netscience.csv":
if filename == "netscience":
return 0.9279554486274719


Expand All @@ -49,38 +48,37 @@ def golden_call(graph_file):


@pytest.mark.sg
@pytest.mark.parametrize("graph_file", DATASETS)
@pytest.mark.parametrize("dataset", DATASETS)
@pytest.mark.parametrize("min_weight", MIN_WEIGHTS)
@pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES)
def test_ecg_clustering(graph_file, min_weight, ensemble_size):
def test_ecg_clustering(dataset, min_weight, ensemble_size):
gc.collect()

# Read in the graph and get a cugraph object

G = graph_file.get_graph()
dataset_path = graph_file.get_path()
G = dataset.get_graph()
# read_weights_in_sp=False => value column dtype is float64
G.edgelist.edgelist_df["weights"] = G.edgelist.edgelist_df["weights"].astype(
"float64"
)

# Get the modularity score for partitioning versus random assignment
cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size)
golden_score = golden_call(dataset_path)
filename = dataset.metadata["name"]
golden_score = golden_call(filename)

# Assert that the partitioning has better modularity than the random
# assignment
assert cu_score > (0.95 * golden_score)


@pytest.mark.sg
@pytest.mark.parametrize("graph_file", DATASETS)
@pytest.mark.parametrize("dataset", DATASETS)
@pytest.mark.parametrize("min_weight", MIN_WEIGHTS)
@pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES)
def test_ecg_clustering_nx(graph_file, min_weight, ensemble_size):
def test_ecg_clustering_nx(dataset, min_weight, ensemble_size):

gc.collect()
dataset_path = graph_file.get_path()
dataset_path = dataset.get_path()
# Read in the graph and get a NetworkX graph
M = utils.read_csv_for_nx(dataset_path, read_weights_in_sp=True)
G = nx.from_pandas_edgelist(
Expand Down

0 comments on commit 1be0201

Please sign in to comment.