-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlib-BG.R
176 lines (152 loc) · 4.53 KB
/
lib-BG.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# Author: Koba Khitalishvili
#
# I use Google's R style guide
# https://google.github.io/styleguide/Rguide.xml#functiondefinition
#
# A set of functions based on Peter Fader's and Bruce Hardie's research in using shifted beta
# geometric distribution (sBG) to project customer retention and calculate customer lifetime
# value
# Paper 1. "How to project customer retention" Fader and Hardie (2007)
# https://faculty.wharton.upenn.edu/wp-content/uploads/2012/04/Fader_hardie_jim_07.pdf
churnBG <- Vectorize(function(alpha,beta,period){
# Computes churn probabilities based on sBG distribution
# Equation (7) in Paper 1
#
# Args:
# alpha: numeric
# beta: numeric
# period: integer or vector of integers
#
# Returns:
# Vector of churn probabilities for period(s)
t1 = alpha/(alpha+beta)
result = t1
if (period > 1) {
result = churnBG(alpha,beta,period-1)*(beta+period-2)/(alpha+beta+period-1)
}
return(result)
}, vectorize.args = c("period"))
survivalBG = Vectorize(function(alpha, beta, period){
# Computes survival probabilites based on a sBG distribution
#
# Args:
# alpha: numeric
# beta: numeric
# period: integer or vector of integers
#
# Returns:
# Vector of survival probabilities for period(s)
t1 = 1-churnBG(alpha,beta,1)
result = t1
if(period>1){
result = survivalBG(alpha,beta,period-1)-churnBG(alpha,beta,period)
}
return(result)
}, vectorize.args = c("period"))
estimateMLL <- function(active.cust, lost.cust) {
# Estimates MLL to fit the sBG distribution curve
#
# Args:
#
MLL = function(alphabeta){
# Computes likelihood. Equation (B3) in Paper 1
#
# Args:
# alphabeta: vector with alpha being the first and beta being the second elements, c(a,b)
#
# Returns:
# Vector of churn probabilities for period(s)
#
# Error handling
if(length(activeCust) != length(lostCust)){
stop("Variables activeCust and lostCust have different lengths: ",
length(activeCust), " and ", length(lostCust), ".")
}
# Example data for seven periods
# activeCust = c(869,743,653,593,551,517,491)
# lostCust = c(131,126,90,60,42,34,26)
t=length(activeCust) # number of periods
alpha = alphabeta[1]
beta = alphabeta[2]
return(-as.numeric(
sum(lostCust*log(churnBG(alpha,beta,1:t))) +
activeCust[t]*log(survivalBG(alpha,beta,t))
))
}
activeCust = active.cust
lostCust = lost.cust
return(optim(c(1,1), MLL))
}
estimateMLL2 <- function(active.cust, lost.cust) {
# Alternative function that estimates MLL to fit the sBG distribution curve
#
# Args:
# alphabeta: vector with alpha being the first and beta being the second elements, c(a,b)
#
# Taken from
# http://stats.stackexchange.com/questions/76678/how-to-write-log-likelihood-for-beta-geometric-with-optim-in-r
active = active.cust
lost = lost.cust
loop.lik<-function(params) {
a<-params[1]
b<-params[2]
ll<-0
for (i in 1:length(lost)) {
ll<-ll+lost[i]*log(beta(a+1,b+i-1)/beta(a,b))
}
ll<-ll+active[i]*log(beta(a,b+i)/beta(a,b))
return(-ll) #return the negative of the function to maximize likelihood
}
return(optim(c(1,1), loop.lik))
}
retentionRates = Vectorize(function(alpha,beta,period){
# Computes retention rates for period(s) t given alpha and beta
#
# Args:
# alpha: numeric
# beta: numeric
# period: integer or vector of integers
#
# Returns:
# Vector of retention rates
#
return((beta+period-1)/(alpha+beta+period-1))
}, vectorize.args = c("period"))
DEL <- function(alpha,beta,discount=0.025,periods = 70){
# Computes the discounted expected lifetime
#
# Args:
# alpha: numeric
# beta: numeric
# discount: discount rate
# periods: integer
#
# Returns:
# numeric DEL value
return(sum(c(1,survivalBG(alpha,beta,seq(1,periods))) *
sapply(seq(0,periods),function(x) 1/(1+discount)^x))
)
}
DERL <- function(alpha,beta,renewals = 2,discount=0.025,periods = 70){
# Computes the discounted expected residual lifetime
#
# Args:
# alpha: numeric
# beta: numeric
# discount: numeric
# renewals: integer
# periods: integer
#
# Returns:
# numeric DERL value
#
# Error handling
if(renewals<2){
stop("Renewals value should be greater than one: renewals used is ",
renewals)
}
return(sum(survivalBG(alpha,beta,seq(renewals+1,periods)) /
survivalBG(alpha,beta,renewals) *
sapply(seq(0,periods-renewals-1),function(x) 1/(1+discount)^x))
)
}