-
Notifications
You must be signed in to change notification settings - Fork 1
/
webapp.py
333 lines (273 loc) · 13.6 KB
/
webapp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 2 08:40:16 2021
@author: pooja
"""
import streamlit as st
import warnings
warnings.filterwarnings('ignore')
st.set_option('deprecation.showPyplotGlobalUse', False)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as plty
import plotly.graph_objs as go
from plotly import tools
from plotly.offline import init_notebook_mode, iplot, plot
import pyttsx3
def data_prep(data_file):
data_file.drop(["Unnamed: 0"],inplace=True,axis=1)
data_file['totals.sessionQualityDim'].fillna(0, inplace=True)
data_file['totals.timeOnSite'].fillna(0, inplace=True)
data_file['trafficSource.keyword'].replace(['nan','NaN','(not provided)'], 'others',inplace=True)
data_file['trafficSource.keyword'].fillna('others', inplace=True)
data_file['totals.transactions'].fillna(0, inplace=True)
data_file['totals.transactionRevenue'].fillna(0, inplace=True)
data_file.replace(to_replace=" None", value=0,inplace=True)
return data_file
def explore_revenue(df_train):
fig = plt.figure(figsize=(20,5))
plt.suptitle('Distribuition of Revenue', fontsize=30)
ax1 = fig.add_subplot(121)
_ = sns.distplot(np.log(df_train[df_train['totals.transactionRevenue'] > 0]["totals.transactionRevenue"]), bins=40,color='#e56b6f', ax=ax1)
_ = ax1.set_ylabel('Distribution', fontsize=20)
_ = ax1.set_xlabel('Transaction Revenue Log', fontsize=20)
ax2 = fig.add_subplot(122)
_ = plt.scatter(range(df_train.shape[0]),np.sort(df_train['totals.transactionRevenue'].values))
_ = ax2.set_ylabel('Distribution', fontsize=20)
_ = ax2.set_xlabel('Revenue', fontsize=20)
st.pyplot()
observations = pyttsx3.init()
observations.say("The above graph shows that only a small percentage of customers produce most of the revenue.")
observations.runAndWait()
def horizontal_bar_chart(cnt_srs, color):
trace = go.Bar(
y=cnt_srs.index[::-1],
x=cnt_srs.values[::-1],
showlegend=False,
orientation = 'h',
marker=dict(
color=color,
),
)
return trace
def device_revenue(df_train):
df_train['device.browser'].value_counts()[:10].reset_index()
# Device Browser
cnt_srs = df_train.groupby('device.browser')['totals.transactionRevenue'].agg(['size', 'sum', 'mean'])
cnt_srs.columns = ["count", "total revenue", "mean"]
cnt_srs = cnt_srs.sort_values(by="count", ascending=False)
trace1 = horizontal_bar_chart(cnt_srs["count"].head(10), '#073b4c')
trace2 = horizontal_bar_chart(cnt_srs["total revenue"].head(10), '#073b4c')
trace3 = horizontal_bar_chart(cnt_srs["mean"].head(10), '#073b4c')
# Device Category
cnt_srs = df_train.groupby('device.deviceCategory')['totals.transactionRevenue'].agg(['size', 'sum', 'mean'])
cnt_srs.columns = ["count", "total revenue", "mean"]
cnt_srs = cnt_srs.sort_values(by="count", ascending=False)
trace4 = horizontal_bar_chart(cnt_srs["count"].head(10), '#118ab2')
trace5 = horizontal_bar_chart(cnt_srs["total revenue"].head(10), '#118ab2')
trace6 = horizontal_bar_chart(cnt_srs["mean"].head(10), '#118ab2')
# Operating system
cnt_srs = df_train.groupby('device.operatingSystem')['totals.transactionRevenue'].agg(['size', 'sum', 'mean'])
cnt_srs.columns = ["count", "total revenue", "mean"]
cnt_srs = cnt_srs.sort_values(by="count", ascending=False)
trace7 = horizontal_bar_chart(cnt_srs["count"].head(10), '#ef476f')
trace8 = horizontal_bar_chart(cnt_srs["total revenue"].head(10),'#ef476f')
trace9 = horizontal_bar_chart(cnt_srs["mean"].head(10),'#ef476f')
# Creating two subplots
fig = tools.make_subplots(rows=3, cols=3, vertical_spacing=0.04,
subplot_titles=["Device Browser - Count", "Device Browser - Total Revenue", "Device Browser - Mean Revenue",
"Device Category - Count", "Device Category - Total Revenue ", "Device Category - Mean Revenue",
"Device OS - Count", "Device OS - Total Revenue", "Device OS - Mean Revenue"])
fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.append_trace(trace3, 1, 3)
fig.append_trace(trace4, 2, 1)
fig.append_trace(trace5, 2, 2)
fig.append_trace(trace6, 2, 3)
fig.append_trace(trace7, 3, 1)
fig.append_trace(trace8, 3, 2)
fig.append_trace(trace9, 3, 3)
fig['layout'].update(height=1200, width=1500, template='plotly_white',paper_bgcolor='#ffffff', title="Device Plots")
#plty.iplot(fig, filename='device-plots')
fig.show()
observations = pyttsx3.init()
observations.say("The above graph shows that customers with desktop, operating with windows and using chrome are genrating highest revenue.")
observations.runAndWait()
def channel_revenue(df_train):
group = df_train.groupby('channelGrouping')['totals.transactionRevenue'].agg(['count','sum','mean']).reset_index()
color = ["#ffa69e","#faf3dd","#b8f2e6","#aed9e0","#5e6472",'#f6bd60','#84a59d','#f8edeb']
customPalette = sns.set_palette(sns.color_palette(color))
fig = plt.figure(figsize=(10,10))
ax1 = fig.add_subplot(311)
_ = sns.barplot(data=group, x='channelGrouping', y='count', palette= customPalette, ax=ax1)
xlabels = group['channelGrouping'].to_list()
ylabels = group['count']
_ = ax1.set_title('Number of Users', fontsize=20)
_ = ax1.set_ylabel('Number of Users', fontsize=14)
_ = ax1.set_xlabel('')
_ = ax1.set_xticklabels(xlabels, rotation=30, fontsize=10)
ax2 = fig.add_subplot(312)
_ = sns.barplot(data=group, x='channelGrouping', y='sum', palette= customPalette, ax=ax2)
xlabels = group['channelGrouping'].to_list()
ylabels = group['sum']
_ = ax2.set_title('Total Revenue', fontsize=20)
_ = ax2.set_ylabel('Total Revenue', fontsize=14)
_ = ax2.set_xlabel('')
_ = ax2.set_xticklabels(xlabels, rotation=30, fontsize=10)
ax3 = fig.add_subplot(313)
_ = sns.barplot(data=group, x='channelGrouping', y='mean', palette= customPalette, ax=ax3)
xlabels = group['channelGrouping'].to_list()
ylabels = group['mean']
_ = ax3.set_title('Average Revenue', fontsize=20)
_ = ax3.set_ylabel('Average Revenue', fontsize=14)
_ = ax3.set_xlabel('')
_ = ax3.set_xticklabels(xlabels, rotation=30, fontsize=10)
fig.tight_layout(pad=0.5)
st.pyplot()
observations = pyttsx3.init()
observations.say("These plots tells us that most people are doing their search via organic search whereas highest revenue is generated by referral.")
observations.runAndWait()
def browser_devices(df_train):
st.image("img.png", use_column_width = True)
observations = pyttsx3.init()
observations.say("The above plot shows that Chrome is quite popular in most of the devices and Safari is mostly used on Mac OS and IOS devices")
observations.runAndWait()
def os_revenue(df_train):
group = df_train[(df_train['device.operatingSystem'].isin\
(df_train[df_train['totals.transactionRevenue'] > 0]['device.operatingSystem'].value_counts().reset_index()[:6]['index']))
& (df_train['totals.transactionRevenue'] > 0)]
_ = sns.FacetGrid(group,
hue='device.operatingSystem', height=3, aspect=1)\
.map(sns.kdeplot, 'totals.transactionRevenue', shade=True)\
.add_legend()
st.pyplot()
observations = pyttsx3.init()
observations.say("Windows has the highest widespread of transaction revenue whereas linux has the highest peak of transaction revenue.")
observations.runAndWait()
def device_dist(df_train):
color = ['#14213d','#fca311','#d90429']
customPalette = sns.set_palette(sns.color_palette(color))
_ = sns.FacetGrid(df_train[df_train['totals.transactionRevenue'] > 0],
hue='device.deviceCategory', height=3, aspect=1, palette=customPalette)\
.map(sns.kdeplot, 'totals.transactionRevenue', shade=True)\
.add_legend()
st.pyplot()
observations = pyttsx3.init()
observations.say("Desktop has the highest widespread of transaction revenue whereas Tablet has the highest peak of transaction revenue.")
observations.runAndWait()
def day_info(df_train):
#seting some static color options
color_op = ['#5527A0', '#BB93D7', '#834CF7', '#6C941E', '#93EAEA', '#7425FF', '#F2098A', '#7E87AC',
'#EBE36F', '#7FD394', '#49C35D', '#3058EE', '#44FDCF', '#A38F85', '#C4CEE0', '#B63A05',
'#4856BF', '#F0DB1B', '#9FDBD9', '#B123AC']
# Visits by time train
# couting all entries by date to get number of visits by each date
dates_temp = df_train['date'].value_counts().reset_index().sort_values('index')
# renaming the columns to apropriate names
dates_temp.columns = ['date','visits']
# creating the first trace with the necessary parameters
trace = go.Scatter(x=dates_temp.date.astype(str), y=dates_temp.visits,
opacity = 0.8, line = dict(color = '#38C788'), name= 'Visits by day')
# Below we will get the total values by Transaction Revenue Log by date
dates_temp_sum = df_train.groupby('date')['totals.transactionRevenue'].sum().reset_index()
# using the new dates_temp_sum we will create the second trace
trace1 = go.Scatter(x=dates_temp_sum.date.astype(str), line = dict(color = '#C73877'), name="RevenueLog by day",
y=dates_temp_sum['totals.transactionRevenue'], opacity = 0.8)
# Getting the total values by Transactions by each date
dates_temp_count = df_train[df_train['totals.transactionRevenue'] > 0].groupby('date')['totals.transactionRevenue'].count().reset_index()
# using the new dates_temp_count we will create the third trace
trace2 = go.Scatter(x=dates_temp_count.date.astype(str), line = dict(color = color_op[5]), name="Sellings by day",
y=dates_temp_count['totals.transactionRevenue'], opacity = 0.8)
#creating the layout the will allow us to give an title and
# give us some interesting options to handle with the outputs of graphs
layout = dict(
title= "Informations by Date",
paper_bgcolor='#ffffff',
template='plotly_white',
xaxis=dict(
rangeselector=dict(buttons=list([
dict(count=1, label='1m', step='month', stepmode='backward'),
dict(count=3, label='3m', step='month', stepmode='backward'),
dict(count=6, label='6m', step='month', stepmode='backward'),
dict(step='all')
])
),
rangeslider=dict(visible = True),
type='date'
)
)
# creating figure with the both traces and layout
fig = dict(data= [trace, trace1, trace2], layout=layout)
#rendering the graphs
iplot(fig) #it's an equivalent to plt.show()
st.pyplot()
def country_visits(df_train):
countMaps = pd.DataFrame(df_train['geoNetwork.country'].value_counts()).reset_index()
countMaps.columns=['country', 'counts'] #renaming columns
countMaps = countMaps.reset_index().drop('index', axis=1) #reseting index and droping the column
data = [ dict(
type = 'choropleth',
locations = countMaps['country'],
locationmode = 'country names',
z = countMaps['counts'],
text = countMaps['country'],
colorscale = 'YlGnBu',
autocolorscale = False,
marker = dict(
line = dict (
color = 'rgb(180,180,180)',
width = 0.5
) ),
colorbar = dict(
autotick = False,
tickprefix = '',
title = 'Number of Visits'),
) ]
layout = dict(
title = 'Couting Visits Per Country',
geo = dict(
showframe = False,
showcoastlines = True,
projection = dict(
type = 'natural earth'
)
)
)
figure = dict( data=data, layout=layout )
iplot(figure, validate=False, filename='map-countrys-count')
st.pyplot()
def viz(df):
if(st.checkbox("Revenue Exploration")):
explore_revenue(df)
elif(st.checkbox("Revenue based on Browsers/Device/Operating System")):
device_revenue(df)
elif(st.checkbox("Channel Groups and their Revenue")):
channel_revenue(df)
elif(st.checkbox("Browsers used on Different Devices")):
browser_devices(df)
elif(st.checkbox("Distribution of transaction Revenue of OS's")):
os_revenue(df)
elif(st.checkbox("Distribution of transaction Revenue of Device Category")):
device_dist(df)
elif(st.checkbox("Vistis/Revenue/Sellings by Day")):
day_info(df)
elif(st.checkbox("Counting Visits Per Country")):
country_visits(df)
def main():
st.title("Multichannel Marketing")
data_file = st.file_uploader("Upload CSV",type=["csv"])
if data_file is not None:
st.write(type(data_file))
file_details ={"filename":data_file.name, "filetype":data_file.type, "filesize":data_file.size}
st.write(file_details)
data_file = pd.read_csv(data_file)
data_file = data_prep(data_file)
add_selectbox = st.selectbox('',('Choose an option', 'Data Description', 'Plots'))
if(add_selectbox == 'Data Description'):
pass
elif(add_selectbox == 'Plots'):
viz(data_file)
if __name__ == '__main__':
main()