BPR TripletLoss Recommender System
-
11-12-2020 - |
Question
I am trying to modify the code of this repo to build a recommender system based on BPR triplet loss.
In particular I modified the TripletLoss layer class like this
class TripletLossLayer(Layer):
"""
Layer object to minimise the triplet loss.
Here we implement the Bayesian Personal Ranking triplet loss.
"""
def __init__(self, **kwargs):
super(TripletLossLayer, self).__init__(**kwargs)
def bpr_triplet_loss(self, inputs):
"""
Bayesian Personal Ranking triplet loss.
"""
anchor, positive, negative = inputs
p_score = K.dot(anchor,K.transpose(positive))
n_score = K.dot(anchor,K.transpose(negative))
return (1.0 - K.sigmoid(p_score - n_score))
def call(self, inputs):
loss = self.bpr_triplet_loss(inputs)
self.add_loss(loss)
return loss
class ScoreLayer(Layer):
"""
Layer object to predict positive matches.
"""
def __init__(self, **kwargs):
super(ScoreLayer, self).__init__(**kwargs)
def rec_similarity(self, inputs):
"""
rec_similarity function
"""
anchor, item = inputs
score = K.dot(anchor,K.transpose(item))
return score
def call(self, inputs):
pred = self.rec_similarity(inputs)
return pred
and the model is defined through a function
def build_model(n_users, n_items, emb_dim = 30):
'''
Define the Keras Model for training
Parameters
----------
n_users : int
number of users
n_items : int
number of items
user_features : list of str
list of categorical features (columns of df_users)
item_features : list of str
list of categorical features (columns of df_items)
emb_dim : int
dimension of the embedding space
'''
n_user_features = 3
n_item_features = 18
### Input Layers
user_input = Input((n_user_features,), name='user_input')
positive_item_input = Input((n_item_features,), name='pos_item_input')
negative_item_input = Input((n_item_features,), name='neg_item_input')
inputs = [user_input, positive_item_input, negative_item_input]
### Embedding Layers
user_emb = Embedding(n_users, emb_dim, input_length=n_user_features, name='user_emb')
# Positive and negative items will share the same embedding
item_emb = Embedding(n_items, emb_dim, input_length=n_item_features, name='item_emb')
# Layer to convert embedding vectors in the same dimensional vectors
vec_conv64 = Dense(64, name = 'dense_vec64', activation = 'relu')
vec_conv32 = Dense(32, name = 'dense_vec32', activation = 'relu')
vec_conv = Dense(emb_dim, name = 'dense_vec', activation = 'softmax')
# Anchor
a = Flatten(name = 'flatten_usr_emb')(user_emb(user_input))
a = Dense(emb_dim, name = 'dense_user', activation = 'softmax')(a)
# Positive
p = Flatten(name = 'flatten_pos_emb')(item_emb(positive_item_input))
p = vec_conv64(p)
p = vec_conv32(p)
p = Dropout(0.5)(p)
p = vec_conv(p)
# Negative
n = Flatten(name = 'flatten_neg_emb')(item_emb(negative_item_input))
n = vec_conv64(n)
n = vec_conv32(n)
n = Dropout(0.5)(n)
n = vec_conv(n)
# Score layers
p_rec_score = ScoreLayer(name='pos_recommendation_score')([a, p])
n_rec_score = ScoreLayer(name='neg_recommendation_score')([a, n])
# TripletLoss Layer
loss_layer = TripletLossLayer(name='triplet_loss_layer')([a, p, n])
# Connect the inputs with the outputs
network_train = Model(inputs=inputs, outputs=loss_layer, name = 'training_model')
network_predict = Model(inputs=inputs[:-1], outputs=p_rec_score, name = 'inference_model')
# return the model
return network_train, network_predict
by printing network_train.layers
and network_predict.layers
one can check they share the layers as they should.
My problem comes at training.
I build the batch as follows
def get_triplets_hard(batch_size, X_usr, X_item, df, return_cache = False):
"""
Returns the list of three arrays to feed the model.
Parameters
----------
batch_size : int
size of the batch.
X_usr : numpy array of shape (n_users, n_user_features)
array of user metadata.
X_item : numpy array of shape (n_items, n_item_features)
array of item metadata.
df : Pandas DataFrame
dataframe containing user-item ratings.
return_cache : bool
parameter to triggere whether we want the list of ids corresponding to
triplets.
default: False
Returns
-------
triplets : list of numpy arrays
list containing 3 tensors A,P,N corresponding to:
- Anchor A : (batch_size, n_user_features)
- Positive P : (batch_size, n_item_features)
- Negative N : (batch_size, n_item_features)
"""
# constant values
n_user_features = X_usr.shape[1]
n_item_features = X_item.shape[1]
# define user_list
user_list = list(df.index.values)
# initialise result
triplets = [np.zeros((batch_size, n_user_features)), # anchor
np.zeros((batch_size, n_item_features)), # pos
np.zeros((batch_size, n_item_features)) # neg
]
user_ids = []
p_ids = []
n_ids = []
for i in range(batch_size):
# pick one random user for anchor
anchor_id = random.choice(user_list)
user_ids.append(anchor_id)
# all possible positive/negative samples for selected anchor
p_item_ids = get_pos(df, anchor_id)
n_item_ids = get_neg(df, anchor_id)
# pick one of the positve ids
try:
positive_id = random.choice(p_item_ids)
except IndexError:
positive_id = 0
p_ids.append(positive_id)
# pick the most similar negative id
try:
n_min = np.argmin([(cosine_dist(X_item[positive_id-1], X_item[k-1])) for k in n_item_ids])
negative_id = n_item_ids[n_min]
except:
try:
negative_id = random.choice(n_item_ids)
except IndexError:
negative_id = 0
n_ids.append(negative_id)
# define triplet
triplets[0][i,:] = X_usr[anchor_id-1][:]
if positive_id == 0:
triplets[1][i,:] = np.zeros((n_item_features,))
else:
triplets[1][i,:] = X_item[positive_id-1][:]
if negative_id == 0:
triplets[2][i,:] = np.zeros((n_item_features,))
else:
triplets[2][i,:] = X_item[negative_id-1][:]
if return_cache:
cache = {'users': user_ids, 'positive': p_ids, 'negative': n_ids}
return triplets, cache
return triplets
and then my hyperparameters are
# Hyper parameters
evaluate_every = 100 # interval for evaluating on one-shot tasks
batch_size = 64
n_iter = 100000 # No. of training iterations
n_val = 100 # how many one-shot tasks to validate on
I make use of network_train.train_on_batch(get_triplets_hard(batch_size, X_usr, X_item, df))
to train my model, but the loss never goes down.
Any Idea?
I am getting frustrated as I do not see where to improve/change the model or the hyperparameter choice.
Solution
Interestingly the problem was a vanishing gradient one: I substituted loss with logloss and (even if slowly) I managed to train the model. I leave it here in case someone may have the same problem with other implementations.