-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRecommender.py
65 lines (57 loc) · 3.02 KB
/
Recommender.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import sys
from numpy.linalg import solve
import numpy as np
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
from time import perf_counter
import sqlite3
import pandas as pd
import csr
class Recommender:
def __init__(self) -> None:
pass
def link_db(self,path):
self.con = sqlite3.connect(path)
def renumber_column(df:pd.Series,column:str):
all_values = df[column].unique()
all_values.sort()
#print(all_values)
converter = dict(zip(all_values,range(all_values.shape[0])))
df[column] = df[column].map(converter)
#print(sorted(df[column]))
return df
def load_samples_from_sql(self,n):
start = perf_counter()
self.samples = pd.read_sql(f"select username,anime_id,score from user_records order by username limit {n}",self.con)
print(f"Done loading samples in {perf_counter()-start} s.")
self.samples = Recommender.renumber_column(self.samples,"username")
self.samples = Recommender.renumber_column(self.samples,"anime_id")
#np.random.shuffle(self.samples.values)
self.n_users = self.samples["username"].max() + 1
self.n_items = self.samples["anime_id"].max() + 1
drop_indices = np.random.choice(self.samples.index,size=int(self.samples.shape[0]/10),replace=False)
self.test_samples = self.samples.iloc[drop_indices,:]
self.samples = self.samples[~self.samples.index.isin(drop_indices)]
#self.train_samples = self.samples.iloc[0:int(self.samples.shape[0]*.8),:]
print(f"# of train samples: {self.samples.shape[0]}, # of test samples: {self.test_samples.shape[0]}")
# self.ratings = sparse.coo_matrix((self.samples["score"], (self.samples["username"], self.samples["anime_id"]))).tocsc(.astype("float32")
# sparse.save_npz('sparse_matrix.npz', self.ratings)
# start = perf_counter()
# self.ratings = sparse.load_npz("sparse_matrix.npz")
self.ratings = csr.CSR.from_coo(self.samples["username"].to_numpy(), self.samples["anime_id"].to_numpy(),self.sample["score"].to_numpy())
print(f"Done loading samples from npz file in {perf_counter()-start} s.")
def load_samples_from_npy(self,path,n):
start = perf_counter()
a = np.load(path)
#a[:,2] *= 2 #adjusts ratings on a 5 pt scale to ints on a 10 pt scale
a = a.astype(np.int32) #makes it convertible to CSR
if n != "all":
a = a[:n]
self.n_users = a[:,0].max() + 1
self.n_items = a[:,1].max() + 1
drop_indices = np.random.choice(a.shape[0],size=int(a.shape[0]/10),replace=False)
self.test_samples = a[drop_indices,:]
self.samples = np.delete(a,drop_indices,axis=0)
print(f"# of train samples: {self.samples.shape[0]}, # of test samples: {self.test_samples.shape[0]}")
self.ratings = csr.CSR.from_coo(self.samples[:,0], self.samples[:,1],self.samples[:,2])
print(f"Done loading samples from npz file in {perf_counter()-start} s.")