|
- """Modeling Relational Data with Graph Convolutional Networks
- Paper: https://arxiv.org/abs/1703.06103
- Reference Code: https://github.com/tkipf/relational-gcn
- """
- import os
-
- os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
- import argparse
- import numpy as np
- import time
- import torch as th
- import torch.nn.functional as F
-
- import dgl
- from model import EntityClassify
- from icdm2022_dataset import ICDM2022Dataset
- import json
- from sklearn.metrics import average_precision_score
-
-
- def extract_embed(node_embed, input_nodes):
- emb = {}
- for ntype, nid in input_nodes.items():
- nid = input_nodes[ntype]
- emb[ntype] = node_embed[ntype][nid]
- return emb
-
-
- def evaluate(model, loader, node_embed, labels, category, device):
- model.eval()
- y_predicts = []
- y_trues = []
- total_acc = 0
- total_loss = 0
- count = 0
- for input_nodes, seeds, blocks in loader:
- blocks = [blk.to(device) for blk in blocks]
- seeds = seeds[category]
- emb = extract_embed(node_embed, input_nodes)
- emb = {k: e.to(device) for k, e in emb.items()}
- lbl = labels[seeds].to(device)
- logits = model(emb, blocks)[category]
- loss = F.cross_entropy(logits, lbl)
- acc = th.sum(logits.argmax(dim=1) == lbl).item()
- y_trues.append(lbl.detach().cpu())
- y_predicts.append(logits.detach().cpu())
- total_acc += acc
- total_loss += loss.item() * len(seeds)
- count += len(seeds)
-
- y_predicts = th.cat(y_predicts, dim=0)
- y_predicts = F.softmax(y_predicts, dim=1)[:, 1]
- y_trues = th.cat(y_trues, dim=0)
- return total_loss / count, total_acc / count, average_precision_score(y_trues.numpy(), y_predicts.numpy())
-
-
- def test_inference(model, loader, node_embed, category, device):
- model.eval()
- y_predicts = []
- test_ids = []
- for input_nodes, seeds, blocks in loader:
- blocks = [blk.to(device) for blk in blocks]
- seeds = seeds[category]
- test_ids.append(seeds.detach().cpu())
- emb = extract_embed(node_embed, input_nodes)
- emb = {k: e.to(device) for k, e in emb.items()}
- logits = model(emb, blocks)[category]
- y_predicts.append(logits.detach().cpu())
- y_predicts = th.cat(y_predicts, dim=0)
- y_predicts = F.softmax(y_predicts, dim=1)[:, 1].detach().cpu()
- test_ids = th.cat(test_ids, dim=0)
- return test_ids, y_predicts
-
-
- def main(args):
- # check cuda
- device = 'cpu'
- use_cuda = args.gpu >= 0 and th.cuda.is_available()
- if use_cuda:
- th.cuda.set_device(args.gpu)
- device = 'cuda:%d' % args.gpu
-
- load_labels = not args.load_model # If load model is True, no need to load labels.
- dataset = ICDM2022Dataset(session=args.session, load_labels=load_labels, raw_dir=args.data_dir, verbose=True)
- g = dataset[0]
- category = dataset.category
- num_classes = dataset.num_classes
-
- node_embed = dict()
- for ntype in g.ntypes:
- node_embed[ntype] = g.nodes[ntype].data.pop('h')
-
- # create model
- model = EntityClassify(g,
- args.n_hidden,
- num_classes,
- num_bases=args.n_bases,
- num_hidden_layers=args.n_layers - 2,
- dropout=args.dropout,
- use_self_loop=args.use_self_loop)
-
- if use_cuda:
- model.cuda()
-
- if not args.load_model: # train and save the model
- train_mask = g.nodes[category].data.pop('train_mask')
- val_mask = g.nodes[category].data.pop('val_mask')
- train_idx = th.nonzero(train_mask, as_tuple=False).squeeze()
- val_idx = th.nonzero(val_mask, as_tuple=False).squeeze()
- labels = g.nodes[category].data.pop('label')
- if not args.data_cpu:
- labels = labels.to(device)
- for ntype in g.ntypes:
- node_embed[ntype] = node_embed[ntype].to(device)
-
- # train sampler
- sampler = dgl.dataloading.MultiLayerNeighborSampler([args.fanout] * args.n_layers)
- loader = dgl.dataloading.DataLoader(
- g, {category: train_idx}, sampler,
- batch_size=args.batch_size, shuffle=True, num_workers=0)
-
- # validation sampler
- val_sampler = dgl.dataloading.MultiLayerNeighborSampler([args.fanout] * args.n_layers)
- val_loader = dgl.dataloading.DataLoader(
- g, {category: val_idx}, val_sampler,
- batch_size=args.batch_size, shuffle=True, num_workers=0)
-
- # optimizer
- # all_params = itertools.chain(model.parameters(), embed_layer.parameters())
- # optimizer = th.optim.Adam(all_params, lr=args.lr, weight_decay=args.l2norm)
-
- optimizer = th.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2norm)
-
- # training loop
- print("start training...")
- dur = []
- best_score = 0
- best_epoch = -1
- for epoch in range(args.n_epochs):
- model.train()
- optimizer.zero_grad()
- t0 = time.time()
-
- for i, (input_nodes, seeds, blocks) in enumerate(loader):
- blocks = [blk.to(device) for blk in blocks]
- seeds = seeds[category] # we only predict the nodes with type "category"
- batch_tic = time.time()
- emb = extract_embed(node_embed, input_nodes)
- lbl = labels[seeds]
- if use_cuda:
- emb = {k: e.cuda() for k, e in emb.items()}
- lbl = lbl.cuda()
- logits = model(emb, blocks)[category]
- loss = F.cross_entropy(logits, lbl)
- loss.backward()
- optimizer.step()
-
- train_acc = th.sum(logits.argmax(dim=1) == lbl).item() / len(seeds)
- y_preds = F.softmax(logits, dim=1)[:, 1].detach().cpu()
- y_trues = lbl.detach().cpu()
- train_ap = average_precision_score(y_trues.numpy(), y_preds.numpy())
- print(
- "Epoch {:03d} | Batch {:03d} | Train AP: {:.4f} | Train Acc: {:.4f} | Train Loss: {:.4f} | Time: {:.4f}".
- format(epoch, i, train_ap, train_acc, loss.item(), time.time() - batch_tic))
-
- dur.append(time.time() - t0)
- val_loss, val_acc, val_ap = evaluate(model, val_loader, node_embed, labels, category, device)
- print("Epoch {:03d} | Valid AP: {:.4f} | Valid Acc: {:.4f} | Valid loss: {:.4f} | Time: {:.4f}".
- format(epoch, val_ap, val_acc, val_loss, np.average(dur)))
- if val_ap > best_score:
- best_score = val_ap
- best_epoch = epoch
- th.save(model.state_dict(), args.model_path)
- print('load model from best epoch {} with best ap {}'.format(best_epoch, best_score))
-
- # load model and evaluate
- model.load_state_dict(th.load(args.model_path))
- test_mask = g.nodes[category].data.pop('test_mask')
- test_idx = th.nonzero(test_mask, as_tuple=False).squeeze()
- test_sampler = dgl.dataloading.MultiLayerNeighborSampler([args.fanout] * args.n_layers)
- test_loader = dgl.dataloading.DataLoader(
- g, {category: test_idx}, test_sampler,
- batch_size=args.batch_size, shuffle=True, num_workers=0)
- test_ids, y_predicts = test_inference(model, test_loader, node_embed, category, device)
- with open(args.result_path, 'w+') as f:
- for i in range(len(test_ids)):
- y_dict = {}
- y_dict["item_id"] = int(dataset.rev_item_map[int(test_ids[i])])
- y_dict["score"] = float(y_predicts[i])
- json.dump(y_dict, f)
- f.write('\n')
-
-
- if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='RGCN')
- parser.add_argument("--dropout", type=float, default=0,
- help="dropout probability")
- parser.add_argument("--n-hidden", type=int, default=256,
- help="number of hidden units")
- parser.add_argument("--gpu", type=int, default=0,
- help="gpu")
- parser.add_argument("--lr", type=float, default=1e-2,
- help="learning rate")
- parser.add_argument("--n-bases", type=int, default=-1,
- help="number of filter weight matrices, default: -1 [use all]")
- parser.add_argument("--n-layers", type=int, default=2,
- help="number of propagation rounds")
- parser.add_argument("-e", "--n-epochs", type=int, default=3,
- help="number of training epochs")
- parser.add_argument("--l2norm", type=float, default=0,
- help="l2 norm coef")
- parser.add_argument("--use-self-loop", default=False, action='store_true',
- help="include self feature as a special relation")
- parser.add_argument("--batch-size", type=int, default=1024,
- help="Mini-batch size. If -1, use full graph training.")
- parser.add_argument("--fanout", type=int, default=1,
- help="Fan-out of neighbor sampling.")
- parser.add_argument('--data-cpu', default=False, action='store_true',
- help="By default the script puts all node features and labels "
- "on GPU when using it to save time for data copy. This may "
- "be undesired if they cannot fit in GPU memory at once. "
- "This flag disables that.")
- parser.add_argument("--session", type=str, default='small',
- help='dataset session')
- parser.add_argument("--model-path", type=str, default=None,
- help='path for save the model')
- parser.add_argument('--load-model', default=False, action='store_true',
- help='whether to load the model from model path')
- parser.add_argument('--result-path', type=str, default=None,
- help='result path to save predictions')
- parser.add_argument('--data-dir', type=str, default=None,
- help='the directory from which the dataset is loaded')
- parser.set_defaults(validation=True)
-
- args = parser.parse_args()
- print(args)
- main(args)
|