From c22b9703eb5e6180cf2175b8578f3d2c45c26379 Mon Sep 17 00:00:00 2001 From: acostadon Date: Mon, 31 Jul 2023 11:43:28 -0400 Subject: [PATCH 1/6] fix to force atlas to allow graphs with string labels and a test for that by adding dining_prefs data set --- datasets/dining_prefs.csv | 52 ++++++++++++++++++ datasets/dining_prefs.mtx | 54 +++++++++++++++++++ .../cugraph/experimental/datasets/__init__.py | 2 +- .../datasets/metadata/dining_prefs.yaml | 23 ++++++++ .../cugraph/layout/force_atlas2_wrapper.pyx | 5 +- .../cugraph/tests/layout/test_force_atlas2.py | 12 +++-- 6 files changed, 142 insertions(+), 6 deletions(-) create mode 100644 datasets/dining_prefs.csv create mode 100644 datasets/dining_prefs.mtx create mode 100644 python/cugraph/cugraph/experimental/datasets/metadata/dining_prefs.yaml diff --git a/datasets/dining_prefs.csv b/datasets/dining_prefs.csv new file mode 100644 index 00000000000..5c25b4d87e5 --- /dev/null +++ b/datasets/dining_prefs.csv @@ -0,0 +1,52 @@ +Ada Cora 1 +Ada Louise 2 +Cora Ada 1 +Cora Jean 2 +Louise Marion 1 +Louise Lena 2 +Jean Helen 1 +Jean Robin 2 +Helen Jean 1 +Helen Eva 2 +Martha Marion 2 +Martha Anna 1 +Alice Martha 2 +Alice Eva 1 +Robin Helen 2 +Robin Eva 1 +Marion Martha 1 +Marion Frances 2 +Maxine Eva 2 +Maxine Adele 1 +Lena Louise 2 +Lena Marion 1 +Hazel Hilda 1 +Hazel Anna 2 +Hilda Hazel 2 +Hilda Betty 1 +Frances Marion 2 +Frances Eva 1 +Eva Marion 2 +Eva Maxine 1 +Ruth Hilda 2 +Ruth Jane 1 +Edna Adele 2 +Edna Mary 1 +Adele Marion 2 +Adele Frances 1 +Jane Adele 1 +Jane Mary 2 +Anna Maxine 1 +Anna Lena 2 +Mary Edna 1 +Mary Jane 2 +Betty Hilda 2 +Betty Edna 1 +Ella Helen 2 +Ella Ellen 1 +Ellen Edna 2 +Ellen Anna 1 +Laura Eva 1 +Laura Edna 2 +Irene Hilda 1 +Irene Ellen 2 \ No newline at end of file diff --git a/datasets/dining_prefs.mtx b/datasets/dining_prefs.mtx new file mode 100644 index 00000000000..191d370eb45 --- /dev/null +++ b/datasets/dining_prefs.mtx @@ -0,0 +1,54 @@ +%%MatrixMarket matrix coordinate pattern symmetric +26 26 52 +1 2 1 +1 3 2 +2 1 1 +2 4 2 +3 9 1 +3 11 2 +4 5 1 +4 8 2 +5 4 1 +5 15 2 +6 9 2 +6 20 1 +7 6 2 +7 15 1 +8 5 2 +8 15 1 +9 6 1 +9 14 2 +10 15 2 +10 18 1 +11 3 2 +11 9 1 +12 13 1 +12 20 2 +13 12 2 +13 22 1 +14 9 2 +14 15 1 +15 9 2 +15 10 1 +16 13 2 +16 19 1 +17 18 2 +17 21 1 +18 9 2 +18 14 1 +19 18 1 +19 21 2 +20 10 1 +20 11 2 +21 17 1 +21 19 2 +22 13 2 +22 17 1 +23 5 2 +23 24 1 +24 17 2 +24 20 1 +25 15 1 +25 17 2 +26 13 1 +26 24 2 \ No newline at end of file diff --git a/python/cugraph/cugraph/experimental/datasets/__init__.py b/python/cugraph/cugraph/experimental/datasets/__init__.py index a1dd45b3d9f..0f9d8b5962b 100644 --- a/python/cugraph/cugraph/experimental/datasets/__init__.py +++ b/python/cugraph/cugraph/experimental/datasets/__init__.py @@ -24,7 +24,7 @@ meta_path = Path(__file__).parent / "metadata" - +dining_prefs = Dataset(meta_path / "dining_prefs.yaml") karate = Dataset(meta_path / "karate.yaml") karate_data = Dataset(meta_path / "karate_data.yaml") karate_undirected = Dataset(meta_path / "karate_undirected.yaml") diff --git a/python/cugraph/cugraph/experimental/datasets/metadata/dining_prefs.yaml b/python/cugraph/cugraph/experimental/datasets/metadata/dining_prefs.yaml new file mode 100644 index 00000000000..f9d34c11414 --- /dev/null +++ b/python/cugraph/cugraph/experimental/datasets/metadata/dining_prefs.yaml @@ -0,0 +1,23 @@ +name: dining_prefs +file_type: .csv +author: J.L. Moreno +url: https://data.rapids.ai/cugraph/datasets/dining_prefs.csv +refs: + J. L. Moreno (1960). The Sociometry Reader. The Free Press, Glencoe, Illinois, pg.35 +delim: " " +header: None +col_names: + - src + - dst + - wgt +col_types: + - string + - string + - int +has_loop: true +is_directed: true +is_multigraph: false +is_symmetric: true +number_of_edges: 52 +number_of_nodes: 26 +number_of_lines: 52 diff --git a/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx b/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx index 4258be3ef71..0ba890c0927 100644 --- a/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx +++ b/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx @@ -58,7 +58,10 @@ def force_atlas2(input_graph, # FIXME: This implementation assumes that the number of vertices # is the max vertex ID + 1 which is not always the case. - num_verts = input_graph.nodes().max() + 1 + if input_graph.is_renumbered(): + num_verts = input_graph.renumber_map.df_internal_to_external['id'].max()+1 + else: + num_verts = input_graph.nodes().max() + 1 num_edges = len(input_graph.edgelist.edgelist_df['src']) cdef GraphCOOView[int,int,float] graph_float diff --git a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py index 12d0a4e3aa6..df46a0e6818 100644 --- a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py +++ b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py @@ -19,7 +19,7 @@ from cugraph.internals import GraphBasedDimRedCallback from sklearn.manifold import trustworthiness import scipy.io -from cugraph.experimental.datasets import karate, polbooks, dolphins, netscience +from cugraph.experimental.datasets import karate, polbooks, dolphins, netscience, dining_prefs # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -43,11 +43,15 @@ def cugraph_call( strong_gravity_mode, gravity, callback=None, + renumber=False ): - G = cugraph.Graph() + if cu_M['src'] is not int or cu_M['dst'] is not int: + renumber = True + else: + renumber = False G.from_cudf_edgelist( - cu_M, source="src", destination="dst", edge_attr="wgt", renumber=False + cu_M, source="src", destination="dst", edge_attr="wgt", renumber=renumber ) t1 = time.time() @@ -72,7 +76,7 @@ def cugraph_call( return pos -DATASETS = [(karate, 0.70), (polbooks, 0.75), (dolphins, 0.66), (netscience, 0.66)] +DATASETS = [(karate, 0.70), (polbooks, 0.75), (dolphins, 0.66), (netscience, 0.66), (dining_prefs, 0.50)] MAX_ITERATIONS = [500] From 9a806613b64dd0a0a35507c347747b39d6a732e5 Mon Sep 17 00:00:00 2001 From: acostadon Date: Mon, 31 Jul 2023 14:29:46 -0400 Subject: [PATCH 2/6] removed fixme note and added comment --- python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx b/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx index 0ba890c0927..41a7a807000 100644 --- a/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx +++ b/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx @@ -56,8 +56,7 @@ def force_atlas2(input_graph, if not input_graph.edgelist: input_graph.view_edge_list() - # FIXME: This implementation assumes that the number of vertices - # is the max vertex ID + 1 which is not always the case. + # this code allows handling of graphs that have been renumbered if input_graph.is_renumbered(): num_verts = input_graph.renumber_map.df_internal_to_external['id'].max()+1 else: From 3a5fb2504ec1f38a6e2a5fee082245a4ab1c301c Mon Sep 17 00:00:00 2001 From: acostadon Date: Mon, 31 Jul 2023 15:39:02 -0400 Subject: [PATCH 3/6] fixed style issue --- .../cugraph/tests/layout/test_force_atlas2.py | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py index df46a0e6818..9336b71fa94 100644 --- a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py +++ b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py @@ -19,7 +19,13 @@ from cugraph.internals import GraphBasedDimRedCallback from sklearn.manifold import trustworthiness import scipy.io -from cugraph.experimental.datasets import karate, polbooks, dolphins, netscience, dining_prefs +from cugraph.experimental.datasets import ( + karate, + polbooks, + dolphins, + netscience, + dining_prefs, +) # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -43,10 +49,10 @@ def cugraph_call( strong_gravity_mode, gravity, callback=None, - renumber=False + renumber=False, ): G = cugraph.Graph() - if cu_M['src'] is not int or cu_M['dst'] is not int: + if cu_M["src"] is not int or cu_M["dst"] is not int: renumber = True else: renumber = False @@ -76,7 +82,13 @@ def cugraph_call( return pos -DATASETS = [(karate, 0.70), (polbooks, 0.75), (dolphins, 0.66), (netscience, 0.66), (dining_prefs, 0.50)] +DATASETS = [ + (karate, 0.70), + (polbooks, 0.75), + (dolphins, 0.66), + (netscience, 0.66), + (dining_prefs, 0.50), +] MAX_ITERATIONS = [500] From db5ff0c1361918807a2ba22003643b8a9ebbb0d7 Mon Sep 17 00:00:00 2001 From: acostadon Date: Tue, 1 Aug 2023 12:41:27 -0400 Subject: [PATCH 4/6] trying verify From c5bfb16996a69dec8287653ae4eec24c98ce5a1b Mon Sep 17 00:00:00 2001 From: acostadon Date: Tue, 1 Aug 2023 14:06:37 -0400 Subject: [PATCH 5/6] fixed comment --- python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx b/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx index 41a7a807000..5a2784e2363 100644 --- a/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx +++ b/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx @@ -56,7 +56,7 @@ def force_atlas2(input_graph, if not input_graph.edgelist: input_graph.view_edge_list() - # this code allows handling of graphs that have been renumbered + # this code allows handling of renumbered graphs if input_graph.is_renumbered(): num_verts = input_graph.renumber_map.df_internal_to_external['id'].max()+1 else: From 6d2f694b3ef43be306d4a0a544c600e1a65b17bf Mon Sep 17 00:00:00 2001 From: acostadon Date: Wed, 2 Aug 2023 07:59:29 -0400 Subject: [PATCH 6/6] added new dining preferences dataset to docs --- datasets/README.md | 3 +++ docs/cugraph/source/references/datasets.md | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/datasets/README.md b/datasets/README.md index e42413fc996..ba855df0fcf 100644 --- a/datasets/README.md +++ b/datasets/README.md @@ -9,6 +9,7 @@ This directory contains small public datasets in `mtx` and `csv` format used by | karate | 34 | 156 | No | No | | dolphin | 62 | 318 | No | No | | netscience | 1,589 | 5,484 | No | Yes | +| dining_prefs | 26 | 52 | Yes | Yes | **karate** : The graph "karate" contains the network of friendships between the 34 members of a karate club at a US university, as described by Wayne Zachary in 1977. @@ -16,6 +17,8 @@ This directory contains small public datasets in `mtx` and `csv` format used by **netscience** : The graph netscience contains a coauthorship network of scientists working on network theory and experiment, as compiled by M. Newman in May 2006. +**dining_prefs** : The graph dining_prefs contains dining partner preferences from a classic social network dataset originated by J. L. Moreno (1960). The Sociometry Reader. The Free Press, Glencoe, Illinois, pg.35 + ### Modified datasets diff --git a/docs/cugraph/source/references/datasets.md b/docs/cugraph/source/references/datasets.md index 3d45dec188a..44a277912f5 100644 --- a/docs/cugraph/source/references/datasets.md +++ b/docs/cugraph/source/references/datasets.md @@ -2,14 +2,20 @@ karate - W. W. Zachary, *An information flow model for conflict and fission in small groups*, Journal of Anthropological Research 33, 452-473 (1977). + +dining_prefs + - J. L. Moreno (1960). *The Sociometry Reader*, The Free Press, Glencoe, Illinois, pg.35 + dolphins - D. Lusseau, K. Schneider, O. J. Boisseau, P. Haase, E. Slooten, and S. M. Dawson, *The bottlenose dolphin community of Doubtful Sound features a large proportion of long-lasting associations*, Behavioral Ecology and Sociobiology 54, 396-405 (2003). + netscience - M. E. J. Newman, *Finding community structure in networks using the eigenvectors of matrices*, Preprint physics/0605087 (2006). + email-Eu-core - Hao Yin, Austin R. Benson, Jure Leskovec, and David F. Gleich. *Local Higher-order Graph Clustering.* @@ -17,5 +23,6 @@ email-Eu-core - J. Leskovec, J. Kleinberg and C. Faloutsos. *Graph Evolution: Densification and Shrinking Diameters*. ACM Transactions on Knowledge Discovery from Data (ACM TKDD), 1(1), 2007. http://www.cs.cmu.edu/~jure/pubs/powergrowth-tkdd.pdf + polbooks - V. Krebs, unpublished, http://www.orgnet.com/. \ No newline at end of file