-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmain.py
191 lines (182 loc) · 9.98 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import torch
import numpy as np
import sys, copy, math, time, pdb, warnings, traceback
import pickle
import scipy.io as sio
import scipy.sparse as ssp
import os.path
import random
import argparse
from shutil import copy, rmtree, copytree
from torch.optim.lr_scheduler import ReduceLROnPlateau
from util_functions import *
from preprocessing import *
from train_eval import *
from model import *
import traceback
import warnings
import sys
parser = argparse.ArgumentParser(description='Inductive Graph-based Matrix Completion')
parser.add_argument('--testing', action='store_true', default=False,
help='if set, use testing mode which splits all ratings into train/test;\
otherwise, use validation model which splits all ratings into \
train/val/test and evaluate on val only')
parser.add_argument('--no-train', action='store_true', default=False,
help='if set, skip the training and directly perform the \
transfer/ensemble/visualization')
parser.add_argument('--debug', action='store_true', default=False,
help='turn on debugging mode which uses a small number of data')
parser.add_argument('--data-name', default='ml_100k', help='dataset name')
parser.add_argument('--own', default=False, help='own dataset')
parser.add_argument('--data-appendix', default='',
help='what to append to save-names when saving datasets')
parser.add_argument('--save-appendix', default='',
help='what to append to save-names when saving results')
parser.add_argument('--max-train-num', type=int, default=None,
help='set maximum number of train data to use')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--data-seed', type=int, default=1234, metavar='S',
help='seed to shuffle data (1234,2341,3412,4123,1324 are used), \
valid only for ml_1m and ml_10m')
parser.add_argument('--reprocess', action='store_true', default=False,
help='if True, reprocess data instead of using prestored .pkl data')
parser.add_argument('--dynamic-dataset', action='store_true', default=False,
help='if True, extract enclosing subgraphs on the fly instead of \
storing in disk; works for large datasets that cannot fit into memory')
parser.add_argument('--keep-old', action='store_true', default=False,
help='if True, do not overwrite old .py files in the result folder')
parser.add_argument('--save-interval', type=int, default=10,
help='save model states every # epochs ')
# subgraph extraction settings
parser.add_argument('--hop', default=1, metavar='S',
help='enclosing subgraph hop number')
parser.add_argument('--sample-ratio', type=float, default=1.0,
help='if < 1, subsample nodes per hop according to the ratio')
parser.add_argument('--max-nodes-per-hop', default=10000,
help='if > 0, upper bound the # nodes per hop by another subsampling')
parser.add_argument('--use-features', action='store_true', default=False,
help='whether to use node features (side information)')
# edge dropout settings
parser.add_argument('--adj-dropout', type=float, default=0.2,
help='if not 0, random drops edges from adjacency matrix with this prob')
parser.add_argument('--force-undirected', action='store_true', default=False,
help='in edge dropout, force (x, y) and (y, x) to be dropped together')
# optimization settings
parser.add_argument('--continue-from', type=int, default=None,
help="from which epoch's checkpoint to continue training")
parser.add_argument('--lr', type=float, default=1e-3, metavar='LR',
help='learning rate (default: 1e-3)')
parser.add_argument('--lr-decay-step-size', type=int, default=50,
help='decay lr by factor A every B steps')
parser.add_argument('--lr-decay-factor', type=float, default=0.1,
help='decay lr by factor A every B steps')
parser.add_argument('--epochs', type=int, default=10, metavar='N',
help='number of epochs to train')
parser.add_argument('--batch-size', type=int, default=50, metavar='N',
help='batch size during training')
parser.add_argument('--ARR', type=float, default=0.001,
help='The adjacenct rating regularizer. If not 0, regularize the \
differences between graph convolution parameters W associated with\
adjacent ratings')
# transfer learning, ensemble, and visualization settings
parser.add_argument('--transfer', default='',
help='if not empty, load the pretrained models in this path')
parser.add_argument('--num-relations', type=int, default=5,
help='if transfer, specify num_relations in the transferred model')
parser.add_argument('--multiply-by', type=int, default=1,
help='if transfer, specify how many times to multiply the predictions by')
parser.add_argument('--visualize', action='store_true', default=False,
help='if True, load a pretrained model and do visualization exps')
parser.add_argument('--ensemble', action='store_true', default=False,
help='if True, load a series of model checkpoints and ensemble the results')
parser.add_argument('--standard-rating', action='store_true', default=False,
help='if True, maps all ratings to standard 1, 2, 3, 4, 5 before training')
# sparsity experiment settings
parser.add_argument('--ratio', type=float, default=1.0,
help="For ml datasets, if ratio < 1, downsample training data to the\
target ratio")
args = parser.parse_args()
torch.manual_seed(args.seed)
args.file_dir = os.path.dirname(os.path.realpath('__file__'))
if args.testing:
val_test_appendix = 'testmode'
else:
val_test_appendix = 'valmode'
if torch.cuda.is_available():
torch.cuda.manual_seed(args.seed)
args.res_dir = os.path.join(args.file_dir, 'results/{}{}_{}'.format(args.data_name, args.save_appendix, val_test_appendix))
if not os.path.exists(args.res_dir):
os.makedirs(args.res_dir)
rating_map = None
post_rating_map = None
args.data_name = 'flixster'
if args.data_name in ['flixster', 'douban', 'yahoo_music']:
(
u_features, v_features, adj_train, train_labels, train_u_indices, train_v_indices,
val_labels, val_u_indices, val_v_indices, test_labels, test_u_indices,
test_v_indices, class_values, num_user
) = load_data_monti(args.data_name, args.testing, rating_map, post_rating_map)
elif args.data_name == 'ml_100k':
print("Using official MovieLens split u1.base/u1.test with 20% validation...")
(
u_features, v_features, adj_train, train_labels, train_u_indices, train_v_indices,
val_labels, val_u_indices, val_v_indices, test_labels, test_u_indices,
test_v_indices, class_values, num_user
) = load_official_trainvaltest_split(
args.data_name, args.testing, rating_map, post_rating_map, args.ratio
)
else:
(
u_features, v_features, adj_train, train_labels, train_u_indices, train_v_indices,
val_labels, val_u_indices, val_v_indices, test_labels, test_u_indices,
test_v_indices, class_values, num_user
) = create_trainvaltest_split(
args.data_name, 1234, args.testing, datasplit_path, True, True, rating_map,
post_rating_map, args.ratio
)
adj_trains = [train_u_indices, train_v_indices, train_labels]
train_indices = (train_u_indices, train_v_indices)
test_indices = (test_u_indices, test_v_indices)
G = build_all_graph(adj_trains, num_user, class_values)
train_save_graph = LocalGraphDataset(G, train_indices, train_labels, class_values, num_user, pre_save = False, testing = False, dataset = args.data_name, parallel = False)
train_save_graph.save_subgraphs()
test_save_graph = LocalGraphDataset(G, test_indices, test_labels, class_values, num_user, pre_save = False, testing = True, dataset = args.data_name, parallel = False)
test_save_graph.save_subgraphs()
train_graphs = LocalGraphDataset(G, train_indices, train_labels, class_values, num_user, testing = False, dataset = args.data_name)
test_graphs = LocalGraphDataset(G, test_indices, test_labels, class_values, num_user, testing = True, dataset = args.data_name)
num_rels = len(class_values)
model = IGMC(args.hop*2+2,
latent_dim = [32, 32, 32, 32],
num_rels = num_rels,
num_bases = 4)
def logger(info, model, optimizer, best_mrr = 0.0):
epoch, train_loss, test_rmse, test_mrr = info['epoch'], info['train_loss'], info['test_rmse'], info['test_mrr']
with open(os.path.join(args.res_dir, 'log.txt'), 'a') as f:
f.write('Epoch {}, train loss {:.4f}, test rmse {:.6f}\n'.format(
epoch, train_loss, test_rmse
))
if type(epoch) == int and best_mrr < test_mrr:
best_mrr = test_mrr
print('Saving model states...')
model_name = os.path.join(args.res_dir, 'model_checkpoint{}.pth'.format(epoch))
optimizer_name = os.path.join(args.res_dir, 'optimizer_checkpoint{}.pth'.format(epoch))
print(model.lin2.weight.shape)
if model is not None:
torch.save(model.state_dict(), model_name)
if optimizer is not None:
torch.save(optimizer.state_dict(), optimizer_name)
return best_mrr
train_multiple_epochs(train_graphs,
test_graphs,
model,
args.epochs,
args.batch_size,
args.lr,
lr_decay_factor=args.lr_decay_factor,
lr_decay_step_size=args.lr_decay_step_size,
weight_decay=0,
ARR = args.ARR,
logger = logger,
continue_from=args.continue_from,
res_dir=args.res_dir)