diff --git a/python/cugraph/cugraph/community/louvain.py b/python/cugraph/cugraph/community/louvain.py index 35ca864824f..a995b1b7bac 100644 --- a/python/cugraph/cugraph/community/louvain.py +++ b/python/cugraph/cugraph/community/louvain.py @@ -21,7 +21,8 @@ from pylibcugraph import ResourceHandle -def louvain(G, max_iter=100, resolution=1.0): +# FIXME: max_iter should be renamed to max_level +def louvain(G, max_iter=100, resolution=1.0, threshold=1e-7): """ Compute the modularity optimizing partition of the input graph using the Louvain method @@ -41,17 +42,24 @@ def louvain(G, max_iter=100, resolution=1.0): The current implementation only supports undirected graphs. max_iter : integer, optional (default=100) - This controls the maximum number of levels/iterations of the Louvain + This controls the maximum number of levels of the Louvain algorithm. When specified the algorithm will terminate after no more - than the specified number of iterations. No error occurs when the + than the specified number of levels. No error occurs when the algorithm terminates early in this manner. - resolution: float/double, optional (default=1.0) + resolution: float, optional (default=1.0) Called gamma in the modularity formula, this changes the size of the communities. Higher resolutions lead to more smaller communities, lower resolutions lead to fewer larger communities. Defaults to 1. + threshold: float + Modularity gain threshold for each level. If the gain of + modularity between 2 levels of the algorithm is less than the + given threshold then the algorithm stops and returns the + resulting communities. + Defaults to 1e-7. + Returns ------- parts : cudf.DataFrame @@ -84,6 +92,7 @@ def louvain(G, max_iter=100, resolution=1.0): resource_handle=ResourceHandle(), graph=G._plc_graph, max_level=max_iter, + threshold=threshold, resolution=resolution, do_expensive_check=False, ) diff --git a/python/cugraph/cugraph/dask/community/louvain.py b/python/cugraph/cugraph/dask/community/louvain.py index c003939f5eb..36f625068e5 100644 --- a/python/cugraph/cugraph/dask/community/louvain.py +++ b/python/cugraph/cugraph/dask/community/louvain.py @@ -45,19 +45,29 @@ def convert_to_cudf(result: cp.ndarray) -> Tuple[cudf.DataFrame, float]: def _call_plc_louvain( - sID: bytes, mg_graph_x, max_iter: int, resolution: int, do_expensive_check: bool + sID: bytes, + mg_graph_x, + max_iter: int, + threshold: float, + resolution: float, + do_expensive_check: bool, ) -> Tuple[cp.ndarray, cp.ndarray, float]: return pylibcugraph_louvain( resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), graph=mg_graph_x, max_level=max_iter, + threshold=threshold, resolution=resolution, do_expensive_check=do_expensive_check, ) +# FIXME: max_iter should be renamed to max_level def louvain( - input_graph: Graph, max_iter: int = 100, resolution: int = 1.0 + input_graph: Graph, + max_iter: int = 100, + resolution: float = 1.0, + threshold: float = 1e-7, ) -> Tuple[dask_cudf.DataFrame, float]: """ Compute the modularity optimizing partition of the input graph using the @@ -78,16 +88,21 @@ def louvain( The current implementation only supports undirected graphs. max_iter : integer, optional (default=100) - This controls the maximum number of levels/iterations of the Louvain + This controls the maximum number of levels of the Louvain algorithm. When specified the algorithm will terminate after no more - than the specified number of iterations. No error occurs when the + than the specified number of levels. No error occurs when the algorithm terminates early in this manner. resolution: float/double, optional (default=1.0) Called gamma in the modularity formula, this changes the size of the communities. Higher resolutions lead to more smaller communities, lower resolutions lead to fewer larger communities. - Defaults to 1. + + threshold: float/double, optional (default=1e-7) + Modularity gain threshold for each level. If the gain of + modularity between 2 levels of the algorithm is less than the + given threshold then the algorithm stops and returns the + resulting communities. Returns ------- @@ -126,6 +141,7 @@ def louvain( Comms.get_session_id(), input_graph._plc_graph[w], max_iter, + threshold, resolution, do_expensive_check, workers=[w], diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd index 67ba43bf611..af065cd9dc8 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd @@ -104,6 +104,18 @@ cdef extern from "cugraph_c/community_algorithms.h": cugraph_error_t** error ) + cdef cugraph_error_code_t \ + cugraph_louvain_with_threshold( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + size_t max_level, + double threshold, + double resolution, + bool_t do_expensive_check, + cugraph_hierarchical_clustering_result_t** result, + cugraph_error_t** error + ) + # extract_ego cdef cugraph_error_code_t \ cugraph_extract_ego( diff --git a/python/pylibcugraph/pylibcugraph/louvain.pyx b/python/pylibcugraph/pylibcugraph/louvain.pyx index ecae7e700b4..926fb7e777f 100644 --- a/python/pylibcugraph/pylibcugraph/louvain.pyx +++ b/python/pylibcugraph/pylibcugraph/louvain.pyx @@ -31,6 +31,7 @@ from pylibcugraph._cugraph_c.graph cimport ( from pylibcugraph._cugraph_c.community_algorithms cimport ( cugraph_hierarchical_clustering_result_t, cugraph_louvain, + cugraph_louvain_with_threshold, cugraph_hierarchical_clustering_result_get_vertices, cugraph_hierarchical_clustering_result_get_clusters, cugraph_hierarchical_clustering_result_get_modularity, @@ -51,6 +52,7 @@ from pylibcugraph.utils cimport ( def louvain(ResourceHandle resource_handle, _GPUGraph graph, size_t max_level, + double threshold, double resolution, bool_t do_expensive_check): """ @@ -72,11 +74,16 @@ def louvain(ResourceHandle resource_handle, than the specified number of iterations. No error occurs when the algorithm terminates early in this manner. + threshold: double + Modularity gain threshold for each level. If the gain of + modularity between 2 levels of the algorithm is less than the + given threshold then the algorithm stops and returns the + resulting communities. + resolution: double Called gamma in the modularity formula, this changes the size of the communities. Higher resolutions lead to more smaller communities, lower resolutions lead to fewer larger communities. - Defaults to 1. do_expensive_check : bool_t If True, performs more extensive tests on the inputs to ensure @@ -100,7 +107,7 @@ def louvain(ResourceHandle resource_handle, ... resource_handle, graph_props, srcs, dsts, weights, ... store_transposed=True, renumber=False, do_expensive_check=False) >>> (vertices, clusters, modularity) = pylibcugraph.louvain( - resource_handle, G, 100, 1., False) + resource_handle, G, 100, 1e-7, 1., False) >>> vertices [0, 1, 2] >>> clusters @@ -116,13 +123,14 @@ def louvain(ResourceHandle resource_handle, cdef cugraph_error_code_t error_code cdef cugraph_error_t* error_ptr - error_code = cugraph_louvain(c_resource_handle_ptr, - c_graph_ptr, - max_level, - resolution, - do_expensive_check, - &result_ptr, - &error_ptr) + error_code = cugraph_louvain_with_threshold(c_resource_handle_ptr, + c_graph_ptr, + max_level, + threshold, + resolution, + do_expensive_check, + &result_ptr, + &error_ptr) assert_success(error_code, error_ptr, "cugraph_louvain") # Extract individual device array pointers from result and copy to cupy