Skip to content

Commit

Permalink
Confirmed that pytests pass
Browse files Browse the repository at this point in the history
  • Loading branch information
VibhuJawa committed Jul 27, 2023
1 parent f62d773 commit 42ebfda
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 5 deletions.
6 changes: 6 additions & 0 deletions python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,12 @@ def __iter__(self):
self._sampling_output_dir, "epoch_" + str(self.epoch_number)
)
if isinstance(self.cugraph_dgl_dataset, HomogenousBulkSamplerDataset):
deduplicate_sources = True
prior_sources_behavior = "carryover"
renumber = True
else:
deduplicate_sources = False
prior_sources_behavior = None
renumber = False

bs = BulkSampler(
Expand All @@ -223,6 +227,8 @@ def __iter__(self):
seeds_per_call=self._seeds_per_call,
fanout_vals=self.graph_sampler._reversed_fanout_vals,
with_replacement=self.graph_sampler.replace,
deduplicate_sources=deduplicate_sources,
prior_sources_behavior=prior_sources_behavior,
renumber=renumber,
)
if self.shuffle:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,14 @@ def _create_homogeneous_sampled_graphs_from_tensors_perhop(tensors_batch_d, edge
if edge_dir == "out":
raise ValueError("Outwards edges not supported yet")
graph_per_hop_ls = []
seednodes = None
for hop_id, tensor_per_hop_d in tensors_batch_d.items():
if hop_id != "map":
block = _create_homogeneous_dgl_block_from_tensor_d(
tensor_per_hop_d, tensors_batch_d["map"]
tensor_per_hop_d, tensors_batch_d["map"], seednodes
)
seednodes = torch.concat(
[tensor_per_hop_d["sources"], tensor_per_hop_d["destinations"]]
)
graph_per_hop_ls.append(block)

Expand All @@ -184,10 +188,28 @@ def _create_homogeneous_sampled_graphs_from_tensors_perhop(tensors_batch_d, edge
return input_nodes, output_nodes, graph_per_hop_ls


def _create_homogeneous_dgl_block_from_tensor_d(tensor_d, renumber_map):
def _create_homogeneous_dgl_block_from_tensor_d(tensor_d, renumber_map, seednodes=None):
rs = tensor_d["sources"]
rd = tensor_d["destinations"]
block = dgl.create_block((rs, rd))

max_src_nodes = rs.max()
max_dst_nodes = rd.max()
if seednodes is not None:
# If we have isolated vertices
# sources can be missing from seednodes
# so we add them
# to ensure all the blocks are
# linedup correctly
max_dst_nodes = max(max_dst_nodes, seednodes.max())

data_dict = {("_N", "_E", "_N"): (rs, rd)}
num_src_nodes = {"_N": max_src_nodes.item() + 1}
num_dst_nodes = {"_N": max_dst_nodes.item() + 1}
print("num_src_nodes", num_src_nodes)
print("num_dst_nodes", num_dst_nodes)
block = dgl.create_block(
data_dict=data_dict, num_src_nodes=num_src_nodes, num_dst_nodes=num_dst_nodes
)
if "edge_id" in tensor_d:
block.edata[dgl.EID] = tensor_d["edge_id"]
block.srcdata[dgl.NID] = renumber_map[block.srcnodes()]
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph-dgl/examples/dataset_from_disk_cudf.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"7.32 s ± 778 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
"7.08 s ± 596 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
Expand Down Expand Up @@ -217,7 +217,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"7.39 s ± 524 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
"7.34 s ± 353 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
Expand Down

0 comments on commit 42ebfda

Please sign in to comment.