-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathabmodelr.r
165 lines (121 loc) · 5.35 KB
/
abmodelr.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
library(tidyverse)
library(DT)
library(data.table)
library(yaml)
library(rstudioapi)
#file <- selectFile(caption = "Select File", label = "Select",
# path = getActiveProject(), filter = "All YAML Files (*.yml)",
# existing = TRUE)
#config <- read_yaml(file)
config <- read_yaml("config.yml")
# User generation ----
user_ids <- 1:config$n_users
# initialize interest_resource with 3 time the amount of topics. So users have haed_room to increase their interest
interest_ressource <- rep(config$n_topics * 3, config$n_users)
# setup a data.frame for all users
topic_interests <- data.frame(user_ids)
# Generate interests for the users by topic
for (i in 1:config$n_topics) {
# randomly assign interest to topics using the uniform distribution
topic_interest_level <- data.frame(runif(config$n_users))
# assign sensible column names
names(topic_interest_level) <- paste0("topic_", i)
# bind all topic interest to the user
topic_interests <- topic_interests %>% bind_cols(topic_interest_level)
}
# create the actual user data frame
user <- data.frame(topic_interests, interest_ressource)
# posts generation ----
# create ids
news_ids <- 1:config$n_newsposts
topic_relevances <- data.frame(news_ids)
# create news posts with certain topics
for (i in 1:config$n_topics) {
# assign topic relevante by uniform distribution
topic_relevance <- data.frame(runif(config$n_newsposts))
names(topic_relevance) <- paste0("topic_", i)
topic_relevances <- topic_relevances %>% bind_cols(topic_relevance)
}
news_posts <- data.frame(topic_relevances)
# calculate the sum of topic interests, to measure the "likeability"(?) of post
news_posts %>% select(starts_with("topic")) %>% mutate(sumtopics = rowSums(.)) %>% select(sumtopics) -> sumcol
news_posts %>% bind_cols(sumcol) -> news_posts
# Define a random likebility score for all news posts (uniform distribution, between 0 and topic-limit)
news_scores <- runif(config$n_newsposts, min = 1, max = config$topic_limit)
# get all topic values into a matrix for normalization
news_posts %>% select(starts_with("topic")) -> matrix_of_initial_values
# normalize all topic-value by the rowwise sum (generated above) - now sums should be 1
updated_topics <- (matrix_of_initial_values / t(sumcol))
# update the news_posts
news_posts <- bind_cols(data.frame(news_ids), updated_topics, data.frame(news_scores))
# initilize recommender
user %>% select(starts_with("topic")) -> mat_user
news_posts %>% select(starts_with("topic")) -> mat_posts
cosine_matrix <- matrix(c(0), nrow = config$n_users, ncol = config$n_newsposts)
for(i in 1:config$n_users) {
for(j in 1:config$n_newsposts) {
cosine_matrix[i,j] <- lsa::cosine(unlist(mat_user[i,]), unlist(mat_posts[j,]))
}
}
#scosine_matrix
#' Generate recommendation for a user_id from a cosine similariy matrix
#'
#' @param user_id the user id (i.e. the row number in the cosine matrix)
#' @param cosine_matrix a cosine similarity matrix where rownumber is the number of users, and colnumer is the number of items
#' @param n how many recommendations to generate
#'
#' @return The ordered recommendations
#'
generate_topn_rec <- function(user_id, cosine_matrix, n = 1) {
df <- as_tibble(data.frame(t(cosine_matrix)))
df <- df %>% mutate(id = 1:dim(df)[1]) %>% select(id, user_id)
suppressMessages(
res <- top_n(df, n)
)
names(res) <- c("id", "match")
res %>% arrange(desc(match))
}
# generate an empty exposre matrix with rownumbers = posts , colnumbers = simulationsteps
exposure <- matrix(c(0), nrow = config$n_newsposts, ncol = config$n_steps)
# run all simulation steps
pb <- txtProgressBar(min = 0, max = config$n_steps, initial = 0, char = "=",
width = NA, title="Simulation Run", label, style = 3, file = "")
for (steps in 1:config$n_steps) {
# for all users
for(user_id in 1:config$n_users){
# generate top 10 recommendations
recs <- generate_topn_rec(user_id, cosine_matrix = cosine_matrix, 10)
# update user interests ----
# draw one random sample from recommendations to consume
i <- sample(1:dim(recs)[1], 1)
# get position of newspost in news post data frame
position <- unlist(recs[i,1] )
# get scores from posts
news_posts[position,] %>% select(starts_with("topic")) -> post_scores
# get user interests
user[user_id, ] %>% select(starts_with("topic")) -> user_interest
# update user_interest from posts
user_interest <- user_interest + post_scores
#noramlize if interest_ressource is maxed out
if(sum(user_interest) > user[user_id,]$interest_ressource){
user_interest <- user_interest / sum(user_interest) * user[user_id,]$interest_ressource
}
# Update only top topics?
# update user interest
user[user_id,] <- c(user_ids = user_id, user_interest, user[user_id,]$interest_ressource)
# update exposure counts in each step for all recommendations
position <- unlist(recs[i,1] )
exposure[position, steps] + 1 -> temp
exposure[position, steps] <- temp
}
if(steps > 1){
exposure[,steps] <- exposure[, steps] + exposure[, steps -1]
}
setTxtProgressBar(pb, steps)
}
close(pb)
#View(exposure)
# save results
results_data <- list(user = user, news_posts = news_posts, exposure = exposure)
rds_filename <- config$outputfilename
write_rds(results_data, rds_filename)