-
Notifications
You must be signed in to change notification settings - Fork 14
/
stats.py
executable file
·107 lines (94 loc) · 5.53 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#! /usr/bin/python
import datetime
import re
from sqlalchemy import *
def do_it(file='instance1.log'):
first_request_time = None
last_request_time = None
total_rendering_time = 0
total_rss_change = 0
num_requests = 0
drawer = {}
with open(file, 'rb') as logfile:
for line in logfile.readlines():
num_requests += 1
if line.count('INFO collective.stats'):
pattern = re.compile("^(?P<access_time>\d+-\d+-\w+:\d+:\d+) INFO collective\.stats \| (?P<publisher_time>\d+\.\d+) (?P<traverse_time>\d+\.\d+) (?P<commit_time>\d+\.\d+) (?P<transform_time>\d+\.\d+) (?P<setstate_time>\d+\.\d+) (?P<total_object_loads>\d+) (?P<object_loads_from_cache>\d+) (?P<objects_modified>\d+) \| (?P<action>\w+:)(?P<url>.*) \| .* \| RSS\: (?P<start_RSS>\d+) - (?P<end_RSS>\d+)")
match_result = re.match(pattern, line)
if match_result:
result = match_result.groupdict()
if not first_request_time:
first_request_time = datetime.datetime.strptime(result['access_time'],
"%Y-%m-%dT%H:%M:%S")
last_request_time = datetime.datetime.strptime(result['access_time'],
"%Y-%m-%dT%H:%M:%S")
for row in result:
print row
# id = result['url']
# if id not in drawer:
# drawer[id] = {
# 'rss_diff': 0,
# 'details': [],
# 'actual_url': id,
# 'object_cache_misses': 0,
# 'url_hits': 0,
# 'avg_rss': 0.0,
# 'total_rss': 0.0,
# 'time_rendering': 0.0,
# 'avg_req_time': 0.0,
# }
# item = drawer[id]
# item['details'].append(result)
# item['url_hits'] += 1
# rss_diff = int(result['end_RSS']) - int(result['start_RSS'])
# item['rss_diff'] += rss_diff
# total_rss_change += rss_diff
# item['avg_rss'] = rss_diff and rss_diff/item['url_hits'] or 0.0
# item['total_rss'] += rss_diff
# item['time_rendering'] += float(result['publisher_time'])
# total_rendering_time += float(result['publisher_time'])
# item['avg_req_time'] = item['time_rendering']/item['url_hits']
# total_time = last_request_time - first_request_time
# reqs_sec = num_requests/total_time.total_seconds()
# print '=' * 80
# print 'SUMMARY'
# print '=' * 80
# print "This instance is receiving %s requests per second." % (reqs_sec)
# time_per_request = total_rendering_time/num_requests
# print "The average rendering time is %s seconds." % (time_per_request)
# optimal_capacity = total_time.total_seconds() * (1/time_per_request)
# optimal_requests = optimal_capacity/total_time.total_seconds()
# print "It should be able to handle %.2f requests/sec (per thread)" % (optimal_requests)
# print "It is at %.2f%% capacity" % ((reqs_sec/optimal_requests) * 100)
# num_offenders = 20
# fucked_threshold = 2 # pages longer than 2 seconds, should really be 1....
# print "The following URLs are likely screwing you on rendering time: \n"
# significatly_fucked = dict((k, v) for k, v in drawer.iteritems() if v['avg_req_time'] >= fucked_threshold)
# sorted_drawer = sorted(significatly_fucked.items(), key=lambda x: x[1]['time_rendering'], reverse=True)[:num_offenders]
# print_section('SLOW REQUESTS', sorted_drawer, total_rendering_time)
# sorted_drawer = sorted(drawer.items(), key=lambda x: x[1]['time_rendering'], reverse=True)[:num_offenders]
# print_section('INSTANCE CHOKERS', sorted_drawer, total_rendering_time)
# print "The following items are consuming too much memory: \n\n"
# sorted_drawer = sorted(drawer.items(), key=lambda x: x[1]['rss_diff'], reverse=True)[:num_offenders]
# print_section('MEMORY HOGS', sorted_drawer, total_rendering_time, total_rss_change)
# return drawer
# def print_section(section_name, sorted_drawer, total_rendering_time, total_rss_change=None):
# print '\n\n', '=' * 80
# print section_name
# print '=' * 80
# for num, (id, fucker) in enumerate(sorted_drawer):
# print "%s. %s" %(num, get_normalized_url(id))
# print '-' *80
# print "Total time server spent rendering this page: %.2f seconds" % fucker['time_rendering']
# print "Average time to render: %s" % fucker['avg_req_time']
# print "Number of renders for this instance: %s " % fucker['url_hits']
# if total_rss_change:
# print "%% of memory used: %.2f" % ((fucker['total_rss']/total_rss_change) * 100)
# time_saved_render_once = fucker['time_rendering'] - fucker['avg_req_time']
# print "If you cached this page as a middle man, you would save %.4f%% of server load" % ((time_saved_render_once/total_rendering_time) * 100)
# print
# print
# def get_normalized_url(id):
# return id.replace("/VirtualHostBase/http/", "http://").replace("VirtualHostRoot/", "").replace(":80/engin", "")
if __name__ == '__main__':
do_it()