-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathring.py
173 lines (118 loc) · 5.14 KB
/
ring.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
from heapq import nlargest
from heapq import nsmallest
import sys
# Check arguments
# (note 2 includes arg 0 which is this script!)
if len(sys.argv) == 2:
filename=sys.argv[1]
else:
print ("\n***",sys.argv[0], "***\n")
print ('Incorrect number of arguments, please run script as follows:')
print ('\n\n'+str(sys.argv[0])+' <Extract of one DC nodetool ring>')
sys.exit(0)
token_dictionary = {}
with open(filename) as f:
content = f.readlines()
for line in content:
if len(line.split()) == 8:
token = line.split()[7]
# print(token)
ipaddress = line.split()[0]
# print(ipaddress)
params = {}
params["ipaddress"]={"ipaddress":ipaddress}
token_dictionary[token]=params["ipaddress"]
# print(token_dictionary)
token_list = []
for token in token_dictionary:
token_list.append(token)
for i in range(0, len(token_list)):
if (i == 0):
minToken = -9223372036854775808
maxToken = 9223372036854775808
mydiff = abs (minToken - int(token_list[0])) + abs (maxToken - int(token_list[i-1]))
token_dictionary[token_list[i]]["diff_to_next"] = mydiff
token_dictionary[token_list[i]]["position_in_ring"] = i
else:
mydiff = int(token_list[i]) - int(token_list[i-1])
# print(mydiff)
diff_to_next = {"diff_to_next": mydiff}
token_dictionary[token_list[i]]["diff_to_next"]=mydiff
token_dictionary[token_list[i]]["position_in_ring"] = i
# print(token_dictionary)
ip_dict = {}
for token in token_dictionary:
# print("the token difference is: ", token_dictionary[token]["diff_to_next"], " and IP is: ", token_dictionary[token]["ipaddress"])
ip_dict[token_dictionary[token]["ipaddress"]] = {"total_token": 0}
# print(ip_dict[token_dictionary[token]["ipaddress"]])
# print(ip_dict)
for token in token_dictionary:
# print("the token difference is: ", token_dictionary[token]["diff_to_next"], " and IP is: ", token_dictionary[token]["ipaddress"])
ip_dict[token_dictionary[token]["ipaddress"]]["total_token"] = int(ip_dict[token_dictionary[token]["ipaddress"]]["total_token"]) + int(token_dictionary[token]["diff_to_next"])
#print("----- IP dictionary -----\n" , ip_dict)
total_token = 0
for ip in ip_dict:
# print("ip address", ip, "\thas\t", ip_dict[ip]["total_token"] , " tokens in total")
total_token = total_token + ip_dict[ip]["total_token"]
position = 0
for ip in ip_dict:
ip_dict[ip]["ratio"] = ip_dict[ip]["total_token"]/18446744073709551616
ip_dict[ip]["position"] = position
position = position + 1
print("ip address", ip, "\thas\t", ip_dict[ip]["total_token"] , " tokens in total and a ratio of:\t", round(ip_dict[ip]["ratio"]*100,2))
# print("-----\n ip dictionary with position: " , ip_dict)
average_token_number = 18446744073709551616/len(ip_dict)
for ip in ip_dict:
ip_dict[ip]["deviation"]=(ip_dict[ip]["total_token"]- average_token_number)/average_token_number
# print(ip_dict)
deviations = []
for ip in ip_dict:
deviations.append(ip_dict[ip]["deviation"])
# print(min(deviations))
deviation_dict = {}
for ip in ip_dict:
deviation_dict[ip] = ip_dict[ip]["deviation"]
# print(deviation_dict)
positions = []
for ip in ip_dict:
deviations.append(ip_dict[ip]["position"])
position_dict = {}
for ip in ip_dict:
position_dict[ip] = ip_dict[ip]["position"]
# print("ips to positions; ", position_dict)
ip_of_positons_dict = {v: k for k, v in position_dict.items()}
# print("positions to ips: ", ip_of_positons_dict)
print("min deviation per node: ", min(deviation_dict, key=deviation_dict.get), ":", deviation_dict[min(deviation_dict, key=deviation_dict.get)])
print("max deviation per node: ", max(deviation_dict, key=deviation_dict.get), ":", deviation_dict[max(deviation_dict, key=deviation_dict.get)])
ThreeHighest = nlargest(3, deviation_dict, key = deviation_dict.get)
ThreeLowest = nsmallest(3, deviation_dict, key = deviation_dict.get)
print("3 highest nodes")
for val in ThreeHighest:
print(val, " : ", deviation_dict.get(val))
print("3 lowest nodes")
for val in ThreeLowest:
print(val, " : ", deviation_dict.get(val))
# print(18446744073709551616 - total_token)
#
# print(ratios)
#
# print(int(18446744073709551616/len(ip_dict)))
ratio_dict = {}
for ip in ip_dict:
ratio_dict[ip] = ip_dict[ip]["ratio"]
# print(ratio_dict)
rep_factor = 3
data_by_ip = {}
#calculate the amount of data held per node
# take the ration per node, add to that the ratio of the previous nodes until replication factor is reached
for ip in ip_dict:
ip_ratio = 0
for i in range (0, rep_factor):
ip_ratio = ip_ratio + ratio_dict[ip_of_positons_dict[(position_dict[ip]-i)%len(ip_dict)]]
data_by_ip[ip] = round(ip_ratio*100, 1)
'''
print("token ratio by ip: ", ratio_dict)
print("data held by ip with replication factor ", rep_factor, " : ", data_by_ip)
print("min ownership per node with RF factored in: ", min(data_by_ip, key=data_by_ip.get), ":", data_by_ip[min(data_by_ip, key=data_by_ip.get)])
print("max ownership per node with RF factored in: ", max(data_by_ip, key=data_by_ip.get), ":", data_by_ip[max(data_by_ip, key=data_by_ip.get)])
'''