-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
178 lines (159 loc) · 5.27 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import collections
import pandas as pd
import matplotlib.pyplot as plt
def init_func(df):
"""
初始化函数 找出所有用户
"""
user_list = set()
for i in range(df.shape[0]):
user_list.add(df['user_id'][i])
user_list = list(user_list)
print('用户数量:', len(user_list))
return user_list
def find_top_shopping(df, user_list, top_num):
"""
找出购买效率达人
"""
shop_dict = collections.OrderedDict()
total_dict = {}
for i in user_list:
shop_dict[str(i)] = 0
total_dict[str(i)] = 0
for i in range(df.shape[0]):
str_user_id = str(df['user_id'][i])
total_dict[str_user_id] += 1
if (df['behavior_type'][i]) == 4:
shop_dict[str_user_id] += 1
for i in user_list:
shop_dict[str(i)] = shop_dict[str(i)] / total_dict[str(i)] * 100
shop_dict = sorted(shop_dict.items(), key=lambda x: x[1], reverse=True)
print('购买效率达人:')
flag = 0
for key, val in shop_dict:
flag += 1
print('user_id:{}\t购买行为占比:{}%'.format(key, val))
if flag == top_num:
break
def find_similar(user_id, user_list, df, top_num):
"""
找出同道中人
"""
item_dict = collections.OrderedDict()
compare_dict = collections.OrderedDict()
compare_dict[str(user_id)] = 0
for i in user_list:
item_dict[str(i)] = set()
for i in range(df.shape[0]):
str_user_id = str(df['user_id'][i])
item_type = df['item_category'][i]
item_dict[str_user_id].add(str(item_type))
for i in user_list:
if str(i) == str(user_id):
continue
same_item = len(item_dict[user_id] & item_dict[str(i)])
compare_dict[str(i)] = same_item
compare_dict = sorted(compare_dict.items(), key=lambda x: x[1], reverse=True)
print('用户{}的同道中人:'.format(user_id))
flag = 0
for key, val in compare_dict:
flag += 1
print('user_id:{},相似种类数:{}'.format(key, val))
if flag == top_num:
break
def draw_pie(df):
"""
用户关注的商品分类饼图(前5)
"""
type_dict = collections.OrderedDict()
for i in range(df.shape[0]):
str_type = str(df['item_category'][i])
type_dict[str_type] = 0
for i in range(df.shape[0]):
str_type = str(df['item_category'][i])
type_dict[str_type] += 1
type_dict = sorted(type_dict.items(), key=lambda x: x[1], reverse=True)
labels = []
numbers = []
flag = 0
for key, val in type_dict:
flag += 1
if flag <= 5:
labels.append('商品分类:' + key)
numbers.append(val)
elif flag == 6:
labels.append('商品分类:其他')
numbers.append(val)
else:
numbers[-1] += val
plt.rcParams['font.family'] = 'FangSong'
plt.pie(numbers, labels=labels, autopct='%0.1f%%')
plt.title('用户关注的商品分类饼图(前5)')
plt.savefig('用户关注的商品分类前5.svg')
plt.close()
def draw_plot(df, user_id):
date_frequency_dict = collections.OrderedDict()
for i in range(df.shape[0]):
if str(df['user_id'][i]) == user_id:
date_info, _ = df['time'][i].split()
date_frequency_dict[str(date_info)] = 0
for i in range(df.shape[0]):
if str(df['user_id'][i]) == user_id:
date_info, _ = df['time'][i].split()
date_frequency_dict[str(date_info)] += 1
date_frequency_dict = sorted(date_frequency_dict.items(), key=lambda item: item[0])
x = []
y = []
for key, val in date_frequency_dict:
x.append(key)
y.append(val)
plt.rcParams['font.family'] = 'FangSong'
plt.title('用户{}的购物活动时间折线图'.format(user_id))
plt.tick_params(axis='x', labelsize=4.5)
plt.xlabel('日期')
plt.ylabel('4种购物活动次数')
plt.plot(x, y)
plt.savefig('用户{}的购物活动时间图.svg'.format(user_id))
plt.close()
def draw_bar(df, user_list):
"""
用户活跃度柱状图(前10)
"""
frequency_dict = collections.OrderedDict()
for i in user_list:
frequency_dict[str(i)] = 0
for i in range(df.shape[0]):
str_user_id = str(df['user_id'][i])
frequency_dict[str_user_id] += 1
frequency_dict = sorted(frequency_dict.items(), key=lambda x: x[1], reverse=True)
flag = 0
id_list = []
frequency_list = []
for key, val in frequency_dict:
flag += 1
id_list.append(key)
frequency_list.append(val)
if flag == 10:
break
x = range(len(id_list))
plt.rcParams['font.family'] = 'STSong'
plt.bar(x, height=frequency_list)
plt.xticks(x, id_list)
plt.xlabel('user_id')
plt.ylabel('活跃度')
plt.title('用户活跃度(前10)')
plt.tick_params(axis='x', labelsize=6)
plt.savefig('用户活跃度前10.svg')
plt.close()
if __name__ == '__main__':
feature_path = 'small_user.csv'
df = pd.read_csv(feature_path, header=0)
user_list = init_func(df)
# 找出购买效率达人
find_top_shopping(df, user_list, 10)
# 找出同道中人
find_similar('10001082', user_list, df, 10)
# 给出三种数据画像
draw_pie(df)
draw_bar(df, user_list)
draw_plot(df, '10001082')