-
Notifications
You must be signed in to change notification settings - Fork 0
/
05_dict_performance.py
90 lines (77 loc) · 3.15 KB
/
05_dict_performance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# dict 性能远高于list
# list 随着list数据增大,查找时间增大
# dict 随着dict数据增大,查找时间几乎不变,因为数组偏移量,连续
# dict的key,和set,都必须是可hash的,不可变对象,str,frozenset,tuple,__hash__
# dict实现原理:hash表,key必须是可hash的, 解决hash冲突是进行多位运算,有偏移量
# 缺点:dict内存花销大,但查询速度快,自定义的对象或者python的内部对象都是用dict封装的
# 2. 存储顺序和元素添加顺序有关,不期待顺序性, orderdict可实现顺序
# 3. 添加数据有可能改变已有数据的结构, 重新申请空间
from random import randint
def load_list_data(total_nums, target_nums):
"""
从文件中读取数据,以list的方式返回
:param total_nums: 读取的数量
:param target_nums: 需要查询的数据的数量
"""
all_data = []
target_data = []
file_name = "G:/AdvancePython/performance.txt"
with open(file_name, encoding="utf8", mode="r") as f_open:
for count, line in enumerate(f_open):
if count < total_nums:
all_data.append(line)
else:
break
for x in range(target_nums):
random_index = randint(0, total_nums)
if all_data[random_index] not in target_data:
target_data.append(all_data[random_index])
if len(target_data) == target_nums:
break
return all_data, target_data
def load_dict_data(total_nums, target_nums):
"""
从文件中读取数据,以dict的方式返回
:param total_nums: 读取的数量
:param target_nums: 需要查询的数据的数量
"""
all_data = {}
target_data = []
file_name = "G:/AdvancePython/performance.txt"
with open(file_name, encoding="utf8", mode="r") as f_open:
for count, line in enumerate(f_open):
if count < total_nums:
all_data[line] = 0
else:
break
all_data_list = list(all_data)
for x in range(target_nums):
random_index = randint(0, total_nums - 1)
if all_data_list[random_index] not in target_data:
target_data.append(all_data_list[random_index])
if len(target_data) == target_nums:
break
return all_data, target_data
def find_test(all_data, target_data):
# 测试运行时间
test_times = 100
total_times = 0
import time
for i in range(test_times):
find = 0
start_time = time.time()
for data in target_data:
if data in all_data:
find += 1
last_time = time.time() - start_time
total_times += last_time
return total_times / test_times
if __name__ == "__main__":
all_data, target_data = load_list_data(10000, 1000)
# all_data, target_data = load_list_data(100000, 1000)
# all_data, target_data = load_list_data(1000000, 1000)
# all_data, target_data = load_dict_data(10000, 1000)
# all_data, target_data = load_dict_data(100000, 1000)
# all_data, target_data = load_dict_data(1000000, 1000)
last_time = find_test(all_data, target_data)
print(last_time)