Neumf
In [1]:
Copied!
import numpy as np
import pandas as pd
import scipy.sparse as sp
import torch.nn as nn
import torch.nn.functional as F
from dataclasses import dataclass
import os
import time
import torch
import torch.optim as optim
import torch.utils.data as data
import torch.backends.cudnn as cudnn
@dataclass
class Config:
train_rating = '/kaggle/input/ratings-test/ml-1m.train.rating'
test_negative = '/kaggle/input/ratings-test/ml-1m.test.negative'
model_path = '/kaggle/working/model/'
out = True
model : str
batch_size : int = 256
factor_num : int = 32
num_layers : int = 3
test_num_ng : int = 99
num_ng : int = 4
dropout : float = 0.0
lr : float = 0.001
epochs : int = 20
top_k = 10
gpu = "0"
config = Config(model='NeuMF-end')
os.environ["CUDA_VISIBLE_DEVICES"] = config.gpu
cudnn.benchmark = True
import numpy as np
import pandas as pd
import scipy.sparse as sp
import torch.nn as nn
import torch.nn.functional as F
from dataclasses import dataclass
import os
import time
import torch
import torch.optim as optim
import torch.utils.data as data
import torch.backends.cudnn as cudnn
@dataclass
class Config:
train_rating = '/kaggle/input/ratings-test/ml-1m.train.rating'
test_negative = '/kaggle/input/ratings-test/ml-1m.test.negative'
model_path = '/kaggle/working/model/'
out = True
model : str
batch_size : int = 256
factor_num : int = 32
num_layers : int = 3
test_num_ng : int = 99
num_ng : int = 4
dropout : float = 0.0
lr : float = 0.001
epochs : int = 20
top_k = 10
gpu = "0"
config = Config(model='NeuMF-end')
os.environ["CUDA_VISIBLE_DEVICES"] = config.gpu
cudnn.benchmark = True
In [2]:
Copied!
import torch
import torch.nn as nn
import torch.nn.functional as F
class NCF(nn.Module):
def __init__(self, user_num, item_num, factor_num, num_layers,
dropout, model, GMF_model=None, MLP_model=None):
super(NCF, self).__init__()
"""
user_num: number of users;
item_num: number of items;
factor_num: number of predictive factors;
num_layers: the number of layers in MLP model;
dropout: dropout rate between fully connected layers;
model: 'MLP', 'GMF', 'NeuMF-end', and 'NeuMF-pre';
GMF_model: pre-trained GMF weights;
MLP_model: pre-trained MLP weights.
"""
self.dropout = dropout
self.model = model
self.GMF_model = GMF_model
self.MLP_model = MLP_model
self.embed_user_GMF = nn.Embedding(user_num, factor_num)
self.embed_item_GMF = nn.Embedding(item_num, factor_num)
self.embed_user_MLP = nn.Embedding(
user_num, factor_num * (2 ** (num_layers - 1)))
self.embed_item_MLP = nn.Embedding(
item_num, factor_num * (2 ** (num_layers - 1)))
MLP_modules = []
for i in range(num_layers):
input_size = factor_num * (2 ** (num_layers - i))
MLP_modules.append(nn.Dropout(p=self.dropout))
MLP_modules.append(nn.Linear(input_size, input_size//2))
MLP_modules.append(nn.ReLU())
self.MLP_layers = nn.Sequential(*MLP_modules)
if self.model in ['MLP', 'GMF']:
predict_size = factor_num
else:
predict_size = factor_num * 2
self.predict_layer = nn.Linear(predict_size, 1)
self._init_weight_()
def _init_weight_(self):
if not self.model == 'NeuMF-pre':
nn.init.normal_(self.embed_user_GMF.weight, std=0.01)
nn.init.normal_(self.embed_user_MLP.weight, std=0.01)
nn.init.normal_(self.embed_item_GMF.weight, std=0.01)
nn.init.normal_(self.embed_item_MLP.weight, std=0.01)
for m in self.MLP_layers:
if isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)
nn.init.kaiming_uniform_(self.predict_layer.weight,
a=1, nonlinearity='sigmoid')
for m in self.modules():
if isinstance(m, nn.Linear) and m.bias is not None:
m.bias.data.zero_()
else:
# embedding layers
self.embed_user_GMF.weight.data.copy_(
self.GMF_model.embed_user_GMF.weight)
self.embed_item_GMF.weight.data.copy_(
self.GMF_model.embed_item_GMF.weight)
self.embed_user_MLP.weight.data.copy_(
self.MLP_model.embed_user_MLP.weight)
self.embed_item_MLP.weight.data.copy_(
self.MLP_model.embed_item_MLP.weight)
# mlp layers
for (m1, m2) in zip(
self.MLP_layers, self.MLP_model.MLP_layers):
if isinstance(m1, nn.Linear) and isinstance(m2, nn.Linear):
m1.weight.data.copy_(m2.weight)
m1.bias.data.copy_(m2.bias)
# predict layers
predict_weight = torch.cat([
self.GMF_model.predict_layer.weight,
self.MLP_model.predict_layer.weight], dim=1)
precit_bias = self.GMF_model.predict_layer.bias + \
self.MLP_model.predict_layer.bias
self.predict_layer.weight.data.copy_(0.5 * predict_weight)
self.predict_layer.bias.data.copy_(0.5 * precit_bias)
def forward(self, user, item):
if not self.model == 'MLP':
embed_user_GMF = self.embed_user_GMF(user)
embed_item_GMF = self.embed_item_GMF(item)
output_GMF = embed_user_GMF * embed_item_GMF
if not self.model == 'GMF':
embed_user_MLP = self.embed_user_MLP(user)
embed_item_MLP = self.embed_item_MLP(item)
interaction = torch.cat((embed_user_MLP, embed_item_MLP), -1)
output_MLP = self.MLP_layers(interaction)
if self.model == 'GMF':
concat = output_GMF
elif self.model == 'MLP':
concat = output_MLP
else:
concat = torch.cat((output_GMF, output_MLP), -1)
prediction = self.predict_layer(concat)
return prediction.view(-1)
import torch
import torch.nn as nn
import torch.nn.functional as F
class NCF(nn.Module):
def __init__(self, user_num, item_num, factor_num, num_layers,
dropout, model, GMF_model=None, MLP_model=None):
super(NCF, self).__init__()
"""
user_num: number of users;
item_num: number of items;
factor_num: number of predictive factors;
num_layers: the number of layers in MLP model;
dropout: dropout rate between fully connected layers;
model: 'MLP', 'GMF', 'NeuMF-end', and 'NeuMF-pre';
GMF_model: pre-trained GMF weights;
MLP_model: pre-trained MLP weights.
"""
self.dropout = dropout
self.model = model
self.GMF_model = GMF_model
self.MLP_model = MLP_model
self.embed_user_GMF = nn.Embedding(user_num, factor_num)
self.embed_item_GMF = nn.Embedding(item_num, factor_num)
self.embed_user_MLP = nn.Embedding(
user_num, factor_num * (2 ** (num_layers - 1)))
self.embed_item_MLP = nn.Embedding(
item_num, factor_num * (2 ** (num_layers - 1)))
MLP_modules = []
for i in range(num_layers):
input_size = factor_num * (2 ** (num_layers - i))
MLP_modules.append(nn.Dropout(p=self.dropout))
MLP_modules.append(nn.Linear(input_size, input_size//2))
MLP_modules.append(nn.ReLU())
self.MLP_layers = nn.Sequential(*MLP_modules)
if self.model in ['MLP', 'GMF']:
predict_size = factor_num
else:
predict_size = factor_num * 2
self.predict_layer = nn.Linear(predict_size, 1)
self._init_weight_()
def _init_weight_(self):
if not self.model == 'NeuMF-pre':
nn.init.normal_(self.embed_user_GMF.weight, std=0.01)
nn.init.normal_(self.embed_user_MLP.weight, std=0.01)
nn.init.normal_(self.embed_item_GMF.weight, std=0.01)
nn.init.normal_(self.embed_item_MLP.weight, std=0.01)
for m in self.MLP_layers:
if isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)
nn.init.kaiming_uniform_(self.predict_layer.weight,
a=1, nonlinearity='sigmoid')
for m in self.modules():
if isinstance(m, nn.Linear) and m.bias is not None:
m.bias.data.zero_()
else:
# embedding layers
self.embed_user_GMF.weight.data.copy_(
self.GMF_model.embed_user_GMF.weight)
self.embed_item_GMF.weight.data.copy_(
self.GMF_model.embed_item_GMF.weight)
self.embed_user_MLP.weight.data.copy_(
self.MLP_model.embed_user_MLP.weight)
self.embed_item_MLP.weight.data.copy_(
self.MLP_model.embed_item_MLP.weight)
# mlp layers
for (m1, m2) in zip(
self.MLP_layers, self.MLP_model.MLP_layers):
if isinstance(m1, nn.Linear) and isinstance(m2, nn.Linear):
m1.weight.data.copy_(m2.weight)
m1.bias.data.copy_(m2.bias)
# predict layers
predict_weight = torch.cat([
self.GMF_model.predict_layer.weight,
self.MLP_model.predict_layer.weight], dim=1)
precit_bias = self.GMF_model.predict_layer.bias + \
self.MLP_model.predict_layer.bias
self.predict_layer.weight.data.copy_(0.5 * predict_weight)
self.predict_layer.bias.data.copy_(0.5 * precit_bias)
def forward(self, user, item):
if not self.model == 'MLP':
embed_user_GMF = self.embed_user_GMF(user)
embed_item_GMF = self.embed_item_GMF(item)
output_GMF = embed_user_GMF * embed_item_GMF
if not self.model == 'GMF':
embed_user_MLP = self.embed_user_MLP(user)
embed_item_MLP = self.embed_item_MLP(item)
interaction = torch.cat((embed_user_MLP, embed_item_MLP), -1)
output_MLP = self.MLP_layers(interaction)
if self.model == 'GMF':
concat = output_GMF
elif self.model == 'MLP':
concat = output_MLP
else:
concat = torch.cat((output_GMF, output_MLP), -1)
prediction = self.predict_layer(concat)
return prediction.view(-1)
In [3]:
Copied!
def hit(gt_item, pred_items):
if gt_item in pred_items:
return 1
return 0
def ndcg(gt_item, pred_items):
if gt_item in pred_items:
index = pred_items.index(gt_item)
return np.reciprocal(np.log2(index+2))
return 0
def metrics(model, test_loader, top_k):
HR, NDCG = [], []
for user, item, label in test_loader:
user = user.cuda()
item = item.cuda()
predictions = model(user, item)
_, indices = torch.topk(predictions, top_k)
recommends = torch.take(
item, indices).cpu().numpy().tolist()
gt_item = item[0].item()
HR.append(hit(gt_item, recommends))
NDCG.append(ndcg(gt_item, recommends))
return np.mean(HR), np.mean(NDCG)
def hit(gt_item, pred_items):
if gt_item in pred_items:
return 1
return 0
def ndcg(gt_item, pred_items):
if gt_item in pred_items:
index = pred_items.index(gt_item)
return np.reciprocal(np.log2(index+2))
return 0
def metrics(model, test_loader, top_k):
HR, NDCG = [], []
for user, item, label in test_loader:
user = user.cuda()
item = item.cuda()
predictions = model(user, item)
_, indices = torch.topk(predictions, top_k)
recommends = torch.take(
item, indices).cpu().numpy().tolist()
gt_item = item[0].item()
HR.append(hit(gt_item, recommends))
NDCG.append(ndcg(gt_item, recommends))
return np.mean(HR), np.mean(NDCG)
In [4]:
Copied!
def load_all(test_num=100):
# load training data (positive samples only)
train_data = pd.read_csv(
config.train_rating,
sep='\t', header=None, names=['user', 'item'],
usecols=[0, 1], dtype={0: np.int32, 1: np.int32})
user_num = train_data['user'].max() + 1
item_num = train_data['item'].max() + 1
train_data = train_data.values.tolist()
# load user/film rating combinations as a dok matrix
train_mat = sp.dok_matrix((user_num, item_num), dtype=np.float32)
for x in train_data:
train_mat[x[0], x[1]] = 1.0
# load test data (positive & 99 negative samples)
test_data = []
with open(config.test_negative, 'r') as fd:
line = fd.readline()
while line != None and line != '':
arr = line.split('\t')
u = eval(arr[0])[0]
test_data.append([u, eval(arr[0])[1]])
for i in arr[1:]:
test_data.append([u, int(i)])
line = fd.readline()
return train_data, test_data, user_num, item_num, train_mat
def load_all(test_num=100):
# load training data (positive samples only)
train_data = pd.read_csv(
config.train_rating,
sep='\t', header=None, names=['user', 'item'],
usecols=[0, 1], dtype={0: np.int32, 1: np.int32})
user_num = train_data['user'].max() + 1
item_num = train_data['item'].max() + 1
train_data = train_data.values.tolist()
# load user/film rating combinations as a dok matrix
train_mat = sp.dok_matrix((user_num, item_num), dtype=np.float32)
for x in train_data:
train_mat[x[0], x[1]] = 1.0
# load test data (positive & 99 negative samples)
test_data = []
with open(config.test_negative, 'r') as fd:
line = fd.readline()
while line != None and line != '':
arr = line.split('\t')
u = eval(arr[0])[0]
test_data.append([u, eval(arr[0])[1]])
for i in arr[1:]:
test_data.append([u, int(i)])
line = fd.readline()
return train_data, test_data, user_num, item_num, train_mat
In [5]:
Copied!
class NCFData(data.Dataset):
def __init__(self,
features,
num_item,
train_mat=None,
num_ng=0,
is_training=None):
super(NCFData, self).__init__()
self.features_ps = features
self.num_item = num_item
self.train_mat = train_mat
self.num_ng = num_ng
self.is_training = is_training
self.labels = [0 for _ in range(len(features))]
# add negative samples to the positive samples (train)
def ng_sample(self):
assert self.is_training, 'no need to sampling when testing'
self.features_ng = []
for x in self.features_ps:
u = x[0]
for t in range(self.num_ng):
j = np.random.randint(self.num_item)
while (u, j) in self.train_mat:
j = np.random.randint(self.num_item)
self.features_ng.append([u, j])
labels_ps = [1 for _ in range(len(self.features_ps))]
labels_ng = [0 for _ in range(len(self.features_ng))]
self.features_fill = self.features_ps + self.features_ng
self.labels_fill = labels_ps + labels_ng
def __len__(self):
return (self.num_ng + 1) * len(self.labels)
# get items during training
def __getitem__(self, idx):
features = self.features_fill if self.is_training \
else self.features_ps
labels = self.labels_fill if self.is_training \
else self.labels
user = features[idx][0]
item = features[idx][1]
label = labels[idx]
return user, item ,label
class NCFData(data.Dataset):
def __init__(self,
features,
num_item,
train_mat=None,
num_ng=0,
is_training=None):
super(NCFData, self).__init__()
self.features_ps = features
self.num_item = num_item
self.train_mat = train_mat
self.num_ng = num_ng
self.is_training = is_training
self.labels = [0 for _ in range(len(features))]
# add negative samples to the positive samples (train)
def ng_sample(self):
assert self.is_training, 'no need to sampling when testing'
self.features_ng = []
for x in self.features_ps:
u = x[0]
for t in range(self.num_ng):
j = np.random.randint(self.num_item)
while (u, j) in self.train_mat:
j = np.random.randint(self.num_item)
self.features_ng.append([u, j])
labels_ps = [1 for _ in range(len(self.features_ps))]
labels_ng = [0 for _ in range(len(self.features_ng))]
self.features_fill = self.features_ps + self.features_ng
self.labels_fill = labels_ps + labels_ng
def __len__(self):
return (self.num_ng + 1) * len(self.labels)
# get items during training
def __getitem__(self, idx):
features = self.features_fill if self.is_training \
else self.features_ps
labels = self.labels_fill if self.is_training \
else self.labels
user = features[idx][0]
item = features[idx][1]
label = labels[idx]
return user, item ,label
In [6]:
Copied!
train_data, test_data, user_num ,item_num, train_mat = load_all()
# construct the train and test datasets
train_dataset = NCFData(train_data,
item_num,
train_mat,
config.num_ng,
True)
test_dataset = NCFData(test_data,
item_num,
train_mat,
0,
False)
train_loader = data.DataLoader(train_dataset,
batch_size=config.batch_size,
shuffle=True,
num_workers=4)
test_loader = data.DataLoader(test_dataset,
batch_size=config.test_num_ng+1,
shuffle=False,
num_workers=0)
train_data, test_data, user_num ,item_num, train_mat = load_all()
# construct the train and test datasets
train_dataset = NCFData(train_data,
item_num,
train_mat,
config.num_ng,
True)
test_dataset = NCFData(test_data,
item_num,
train_mat,
0,
False)
train_loader = data.DataLoader(train_dataset,
batch_size=config.batch_size,
shuffle=True,
num_workers=4)
test_loader = data.DataLoader(test_dataset,
batch_size=config.test_num_ng+1,
shuffle=False,
num_workers=0)
In [7]:
Copied!
if config.model == 'NeuMF-pre':
assert os.path.exists(config.GMF_model_path), 'lack of GMF model'
assert os.path.exists(config.MLP_model_path), 'lack of MLP model'
GMF_model = torch.load(config.GMF_model_path)
MLP_model = torch.load(config.MLP_model_path)
else:
GMF_model = None
MLP_model = None
model = NCF(user_num,
item_num,
config.factor_num,
config.num_layers,
config.dropout,
config.model,
GMF_model,
MLP_model)
model.cuda()
loss_function = nn.BCEWithLogitsLoss()
if config.model == 'NeuMF-pre':
optimizer = optim.SGD(model.parameters(), lr=config.lr)
else:
optimizer = optim.Adam(model.parameters(), lr=config.lr)
if config.model == 'NeuMF-pre':
assert os.path.exists(config.GMF_model_path), 'lack of GMF model'
assert os.path.exists(config.MLP_model_path), 'lack of MLP model'
GMF_model = torch.load(config.GMF_model_path)
MLP_model = torch.load(config.MLP_model_path)
else:
GMF_model = None
MLP_model = None
model = NCF(user_num,
item_num,
config.factor_num,
config.num_layers,
config.dropout,
config.model,
GMF_model,
MLP_model)
model.cuda()
loss_function = nn.BCEWithLogitsLoss()
if config.model == 'NeuMF-pre':
optimizer = optim.SGD(model.parameters(), lr=config.lr)
else:
optimizer = optim.Adam(model.parameters(), lr=config.lr)
In [8]:
Copied!
count, best_hr = 0, 0
for epoch in range(config.epochs):
model.train()
start_time = time.time()
train_loader.dataset.ng_sample()
for user, item, label in train_loader:
user = user.cuda()
item = item.cuda()
label = label.float().cuda()
model.zero_grad()
prediction = model(user, item)
loss = loss_function(prediction, label)
loss.backward()
optimizer.step()
count += 1
model.eval()
HR, NDCG = metrics(model, test_loader, config.top_k)
elapsed_time = time.time() - start_time
print("The time elapse of epoch {:03d}".format(epoch) + " is: " +
time.strftime("%H: %M: %S", time.gmtime(elapsed_time)))
print("HR: {:.3f}\tNDCG: {:.3f}".format(np.mean(HR), np.mean(NDCG)))
if HR > best_hr:
best_hr, best_ndcg, best_epoch = HR, NDCG, epoch
if config.out:
if not os.path.exists(config.model_path):
os.mkdir(config.model_path)
torch.save(model,
'{}{}.pth'.format(config.model_path, config.model))
print("End. Best epoch {:03d}: HR = {:.3f}, NDCG = {:.3f}".format(
best_epoch, best_hr, best_ndcg))
count, best_hr = 0, 0
for epoch in range(config.epochs):
model.train()
start_time = time.time()
train_loader.dataset.ng_sample()
for user, item, label in train_loader:
user = user.cuda()
item = item.cuda()
label = label.float().cuda()
model.zero_grad()
prediction = model(user, item)
loss = loss_function(prediction, label)
loss.backward()
optimizer.step()
count += 1
model.eval()
HR, NDCG = metrics(model, test_loader, config.top_k)
elapsed_time = time.time() - start_time
print("The time elapse of epoch {:03d}".format(epoch) + " is: " +
time.strftime("%H: %M: %S", time.gmtime(elapsed_time)))
print("HR: {:.3f}\tNDCG: {:.3f}".format(np.mean(HR), np.mean(NDCG)))
if HR > best_hr:
best_hr, best_ndcg, best_epoch = HR, NDCG, epoch
if config.out:
if not os.path.exists(config.model_path):
os.mkdir(config.model_path)
torch.save(model,
'{}{}.pth'.format(config.model_path, config.model))
print("End. Best epoch {:03d}: HR = {:.3f}, NDCG = {:.3f}".format(
best_epoch, best_hr, best_ndcg))
The time elapse of epoch 000 is: 00: 02: 01 HR: 0.629 NDCG: 0.366 The time elapse of epoch 001 is: 00: 02: 00 HR: 0.670 NDCG: 0.397 The time elapse of epoch 002 is: 00: 02: 01 HR: 0.686 NDCG: 0.411 The time elapse of epoch 003 is: 00: 02: 00 HR: 0.685 NDCG: 0.410 The time elapse of epoch 004 is: 00: 01: 59 HR: 0.698 NDCG: 0.419 The time elapse of epoch 005 is: 00: 02: 01 HR: 0.702 NDCG: 0.425 The time elapse of epoch 006 is: 00: 01: 59 HR: 0.700 NDCG: 0.423 The time elapse of epoch 007 is: 00: 02: 01 HR: 0.698 NDCG: 0.418 The time elapse of epoch 008 is: 00: 01: 59 HR: 0.699 NDCG: 0.422 The time elapse of epoch 009 is: 00: 02: 01 HR: 0.699 NDCG: 0.423 The time elapse of epoch 010 is: 00: 01: 59 HR: 0.698 NDCG: 0.422 The time elapse of epoch 011 is: 00: 02: 01 HR: 0.691 NDCG: 0.420 The time elapse of epoch 012 is: 00: 01: 59 HR: 0.695 NDCG: 0.420 The time elapse of epoch 013 is: 00: 02: 00 HR: 0.691 NDCG: 0.416 The time elapse of epoch 014 is: 00: 01: 58 HR: 0.693 NDCG: 0.418 The time elapse of epoch 015 is: 00: 01: 59 HR: 0.688 NDCG: 0.417 The time elapse of epoch 016 is: 00: 01: 58 HR: 0.683 NDCG: 0.413 The time elapse of epoch 017 is: 00: 02: 00 HR: 0.678 NDCG: 0.410 The time elapse of epoch 018 is: 00: 01: 59 HR: 0.681 NDCG: 0.414 The time elapse of epoch 019 is: 00: 01: 59 HR: 0.676 NDCG: 0.407 End. Best epoch 005: HR = 0.702, NDCG = 0.425