-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrun_node_classification.py
94 lines (82 loc) · 3.6 KB
/
run_node_classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from attrdict import AttrDict
from torch_geometric.datasets import WebKB, WikipediaNetwork, Actor, Planetoid
from torch_geometric.utils import to_networkx, from_networkx, to_undirected
from torch_geometric.transforms import LargestConnectedComponents, ToUndirected
from experiments.node_classification import Experiment
import torch
import numpy as np
import pandas as pd
from hyperparams import get_args_from_input, get_args_from_input_node
from preprocessing import rewiring, sdrf, fosr, digl
largest_cc = LargestConnectedComponents()
cornell = WebKB(root="data", name="Cornell")
wisconsin = WebKB(root="data", name="Wisconsin")
texas = WebKB(root="data", name="Texas")
chameleon = WikipediaNetwork(root="data", name="chameleon")
squirrel = WikipediaNetwork(root="data", name="squirrel")
actor = Actor(root="data")
cora = Planetoid(root="data", name="cora")
citeseer = Planetoid(root="data", name="citeseer")
pubmed = Planetoid(root="data", name="pubmed")
datasets = {"cornell": cornell, "wisconsin": wisconsin, "texas": texas, "squirrel": squirrel, "actor": actor, "cora": cora, "citeseer": citeseer}
for key in datasets:
dataset = datasets[key]
dataset.data.edge_index = to_undirected(dataset.data.edge_index)
def log_to_file(message, filename="results/node_classification.txt"):
print(message)
file = open(filename, "a")
file.write(message)
file.close()
default_args = AttrDict({
"dropout": 0.5,
"num_layers": 3,
"hidden_dim": 128,
"learning_rate": 1e-3,
"layer_type": "R-GCN",
"display": False,
"num_trials": 100,
"eval_every": 1,
"rewiring": "none",
"num_iterations": 50,
"num_relations": 2,
"patience": 100,
"dataset": None
})
results = []
args = get_args_from_input_node()
if args.dataset:
# restricts to just the given dataset if this mode is chosen
name = args.dataset
datasets = {name: datasets[name]}
for key in datasets:
accuracies = []
print(f"TESTING: {key} ({args.rewiring})")
dataset = datasets[key]
if args.rewiring == "fosr":
edge_index, edge_type, _ = fosr.edge_rewire(dataset.data.edge_index.numpy(), num_iterations=args.num_iterations)
dataset.data.edge_index = torch.tensor(edge_index)
dataset.data.edge_type = torch.tensor(edge_type)
elif args.rewiring == "sdrf":
dataset.data.edge_index, dataset.data.edge_type = sdrf.sdrf(dataset.data, loops=args.num_iterations, remove_edges=False, is_undirected=True)
elif args.rewiring == "digl":
dataset.data.edge_index, dataset.data.edge_type = sdrf.sdrf(dataset.data, loops=args.num_iterations, remove_edges=False, is_undirected=True)
dataset.data.edge_index = digl.rewire(dataset.data, alpha=0.1, eps=0.05)
m = dataset.data.edge_index.shape[1]
dataset.data.edge_type = torch.tensor(np.zeros(m, dtype=np.int64))
for trial in range(args.num_trials):
print(f"TRIAL {trial+1}")
train_acc, validation_acc, test_acc = Experiment(args=args, dataset=dataset).run()
accuracies.append(test_acc)
log_to_file(f"RESULTS FOR {key} ({args.rewiring}):\n")
log_to_file(f"average acc: {np.mean(accuracies)}\n")
log_to_file(f"plus/minus: {2 * np.std(accuracies)/(args.num_trials ** 0.5)}\n\n")
results.append({
"dataset": key,
"rewiring": args.rewiring,
"num_iterations": args.num_iterations,
"avg_accuracy": np.mean(accuracies),
"ci": 2 * np.std(accuracies)/(args.num_trials ** 0.5)
})
results_df = pd.DataFrame(results)
with open('results/node_classification.csv', 'a') as f:
results_df.to_csv(f, mode='a', header=f.tell()==0)