rapidsai · rapids-bot · Aug 17, 2023 · Aug 11, 2023 · Aug 15, 2023 · Aug 15, 2023
@@ -11,14 +11,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Optional, Tuple
+
 from cugraph.utilities.utils import import_optional
 
 torch = import_optional("torch")
-nn = import_optional("torch.nn")
 ops_torch = import_optional("pylibcugraphops.pytorch")
 
 
-class BaseConv(nn.Module):
+class BaseConv(torch.nn.Module):
     r"""An abstract base class for cugraph-ops nn module."""
 
     def __init__(self):
@@ -48,3 +49,85 @@ def pad_offsets(self, offsets: torch.Tensor, size: int) -> torch.Tensor:
         self._cached_offsets_fg[offsets.numel() : size] = offsets[-1]
 
         return self._cached_offsets_fg[:size]
+
+
+class SparseGraph(object):
+    r"""A god-class to store different sparse formats needed by cugraph-ops
+    and facilitate sparse format conversions.
+
+    Parameters
+    ----------
+    size: tuple of int
+        Size of the adjacency matrix: (num_src_nodes, num_dst_nodes).
+
+    src_ids: torch.Tensor
+        Source indices of the edges.
+
+    dst_ids: torch.Tensor, optional
+        Destination indices of the edges.
+
+    csrc_ids: torch.Tensor, optional
+        Compressed source indices. It is a monotonically increasing array of
+        size (num_src_nodes + 1,). For the k-th source node, its neighborhood
+        consists of the destinations between `dst_indices[csrc_indices[k]]` and
+        `dst_indices[csrc_indices[k+1]]`.
+
+    cdst_ids: torch.Tensor, optional
+        Compressed destination indices. It is a monotonically increasing array of
+        size (num_dst_nodes + 1,). For the k-th destination node, its neighborhood
+        consists of the sources between `src_indices[cdst_indices[k]]` and
+        `src_indices[cdst_indices[k+1]]`.
+
+    dst_ids_is_sorted: bool
+        Whether `dst_ids` has been sorted in an ascending order.
+
+    Notes
+    -----
+    COO-format requires `src_ids` and `dst_ids`.
+    CSC-format requires `cdst_ids` and `src_ids`.
+    CSR-format requires `csrc_ids` and `dst_ids`.
+
+    For MFGs (sampled graphs), the node ids must have been renumbered.
+    """
+
+    def __init__(
+        self,
+        size: Tuple[int, int],
+        src_ids: torch.Tensor,
+        dst_ids: Optional[torch.Tensor] = None,
+        csrc_ids: Optional[torch.Tensor] = None,
+        cdst_ids: Optional[torch.Tensor] = None,
+        dst_ids_is_sorted: bool = False,
+    ):
+        if dst_ids is None and cdst_ids is None:
+            raise ValueError("One of 'dst_ids' and 'cdst_ids' must be given.")
+
+        if src_ids is not None:
+            src_ids = src_ids.contiguous()
+        if dst_ids is not None:
+            dst_ids = dst_ids.contiguous()
+        if csrc_ids is not None:
+            csrc_ids = csrc_ids.contiguous()
+        if cdst_ids is not None:
+            cdst_ids = cdst_ids.contiguous()
+
+        self._src_ids = src_ids
+        self._dst_ids = dst_ids
+        self._csrc_ids = csrc_ids
+        self._cdst_ids = cdst_ids
+        self.num_src_nodes, self.num_dst_nodes = size
+
+        # Force create CSC format.
+        if self._cdst_ids is None:
+            if not dst_ids_is_sorted:
+                self._dst_ids, self._perm = torch.sort(self._dst_ids)
+                self._src_ids = self._src_ids[self._perm]
+            self._cdst_ids = torch._convert_indices_from_coo_to_csr(
+                self._dst_ids,
+                self.num_dst_nodes,
+                out_int32=self._dst_ids.dtype == torch.int32,
+            )
+
+    def csc(self) -> Tuple[torch.Tensor, torch.Tensor]:
+        r"""Return CSC format."""
+        return (self._cdst_ids, self._src_ids)
@@ -14,9 +14,9 @@
 cugraph-ops"""
 # pylint: disable=no-member, arguments-differ, invalid-name, too-many-arguments
 from __future__ import annotations
-from typing import Optional
+from typing import Optional, Union
 
-from cugraph_dgl.nn.conv.base import BaseConv
+from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph
 from cugraph.utilities.utils import import_optional
 
 dgl = import_optional("dgl")
@@ -98,50 +98,58 @@ def reset_parameters(self):
 
     def forward(
         self,
-        g: dgl.DGLHeteroGraph,
+        g: Union[SparseGraph, dgl.DGLHeteroGraph],
         feat: torch.Tensor,
         max_in_degree: Optional[int] = None,
     ) -> torch.Tensor:
         r"""Forward computation.
 
         Parameters
         ----------
-        g : DGLGraph
+        g : DGLGraph or SparseGraph
             The graph.
         feat : torch.Tensor
             Node features. Shape: :math:`(|V|, D_{in})`.
         max_in_degree : int
-            Maximum in-degree of destination nodes. It is only effective when
-            :attr:`g` is a :class:`DGLBlock`, i.e., bipartite graph. When
-            :attr:`g` is generated from a neighbor sampler, the value should be
-            set to the corresponding :attr:`fanout`. If not given,
-            :attr:`max_in_degree` will be calculated on-the-fly.
+            Maximum in-degree of destination nodes. When :attr:`g` is generated
+            from a neighbor sampler, the value should be set to the corresponding
+            :attr:`fanout`. This option is used to invoke the MFG-variant of
+            cugraph-ops kernel.
 
         Returns
         -------
         torch.Tensor
             Output node features. Shape: :math:`(|V|, D_{out})`.
         """
-        offsets, indices, _ = g.adj_tensors("csc")
-
-        if g.is_block:
-            if max_in_degree is None:
-                max_in_degree = g.in_degrees().max().item()
-
-            if max_in_degree < self.MAX_IN_DEGREE_MFG:
-                _graph = ops_torch.SampledCSC(
-                    offsets, indices, max_in_degree, g.num_src_nodes()
-                )
-            else:
-                offsets_fg = self.pad_offsets(offsets, g.num_src_nodes() + 1)
-                _graph = ops_torch.StaticCSC(offsets_fg, indices)
+        if max_in_degree is None:
+            max_in_degree = -1
+
+        if isinstance(g, SparseGraph):
+            offsets, indices = g.csc()
+            _graph = ops_torch.CSC(
+                offsets=offsets,
+                indices=indices,
+                num_src_nodes=g.num_src_nodes,
+                dst_max_in_degree=max_in_degree,
+            )
+            num_dst_nodes = g.num_dst_nodes
+        elif isinstance(g, dgl.DGLHeteroGraph):
+            offsets, indices, _ = g.adj_tensors("csc")
+            _graph = ops_torch.CSC(
+                offsets=offsets,
+                indices=indices,
+                num_src_nodes=g.num_src_nodes(),
+                dst_max_in_degree=max_in_degree,
+            )
+            num_dst_nodes = g.num_dst_nodes()
         else:
-            _graph = ops_torch.StaticCSC(offsets, indices)
+            raise TypeError(
+                f"The graph has to be either a 'SparseGraph' or "
+                f"'dgl.DGLHeteroGraph', but got '{type(g)}'."
+            )
 
         feat = self.feat_drop(feat)
-        h = ops_torch.operators.agg_concat_n2n(feat, _graph, self.aggr)[
-            : g.num_dst_nodes()
-        ]
+        h = ops_torch.operators.agg_concat_n2n(feat, _graph, self.aggr)[:num_dst_nodes]
         h = self.linear(h)
 
         return h
@@ -14,12 +14,9 @@
 
 import pytest
 
-try:
-    import cugraph_dgl
-except ModuleNotFoundError:
-    pytest.skip("cugraph_dgl not available", allow_module_level=True)
-
 from cugraph.utilities.utils import import_optional
+from cugraph_dgl.nn.conv.base import SparseGraph
+from cugraph_dgl.nn import SAGEConv as CuGraphSAGEConv
 from .common import create_graph1
 
 torch = import_optional("torch")
@@ -30,20 +27,29 @@
 @pytest.mark.parametrize("idtype_int", [False, True])
 @pytest.mark.parametrize("max_in_degree", [None, 8])
 @pytest.mark.parametrize("to_block", [False, True])
-def test_SAGEConv_equality(bias, idtype_int, max_in_degree, to_block):
+@pytest.mark.parametrize("sparse_graph", ["coo", "csc", None])
+def test_SAGEConv_equality(bias, idtype_int, max_in_degree, to_block, sparse_graph):
     SAGEConv = dgl.nn.SAGEConv
-    CuGraphSAGEConv = cugraph_dgl.nn.SAGEConv
     device = "cuda"
 
     in_feat, out_feat = 5, 2
     kwargs = {"aggregator_type": "mean", "bias": bias}
     g = create_graph1().to(device)
+
     if idtype_int:
         g = g.int()
     if to_block:
         g = dgl.to_block(g)
+
+    size = (g.num_src_nodes(), g.num_dst_nodes())
     feat = torch.rand(g.num_src_nodes(), in_feat).to(device)
 
+    if sparse_graph == "coo":
+        sg = SparseGraph(size=size, src_ids=g.edges()[0], dst_ids=g.edges()[1])
+    elif sparse_graph == "csc":
+        offsets, indices, _ = g.adj_tensors("csc")
+        sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets)
+
     torch.manual_seed(0)
     conv1 = SAGEConv(in_feat, out_feat, **kwargs).to(device)
 
@@ -57,7 +63,10 @@ def test_SAGEConv_equality(bias, idtype_int, max_in_degree, to_block):
             conv2.linear.bias.data[:] = conv1.fc_self.bias.data
 
     out1 = conv1(g, feat)
-    out2 = conv2(g, feat, max_in_degree=max_in_degree)
+    if sparse_graph is not None:
+        out2 = conv2(sg, feat, max_in_degree=max_in_degree)
+    else:
+        out2 = conv2(g, feat, max_in_degree=max_in_degree)
     assert torch.allclose(out1, out2, atol=1e-06)
 
     grad_out = torch.rand_like(out1)