-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmeasure_performance.py
91 lines (69 loc) · 2.41 KB
/
measure_performance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gc
import time
import psutil
import csv
from pyspark.storagelevel import StorageLevel
from sparkmeasure import StageMetrics
import requests
import json
# Get the Spark application ID
def get_app_id():
response = requests.get("http://83.212.81.54:4040/api/v1/applications")
applications = json.loads(response.text)
app_id = applications[0]["id"]
return app_id
# Get the executor summary
def get_executor_summary(app_id):
response = requests.get(f"http://83.212.81.54:4040/api/v1/applications/{app_id}/executors")
executors = json.loads(response.text)
return executors
# Calculate the average CPU usage
def get_average_cpu_usage(executors):
total_cpu = 0
for executor in executors:
print
total_cpu += executor["totalCores"]
average_cpu = total_cpu / len(executors)
return average_cpu
# Calculate the total memory usage
def get_total_memory_usage(executors):
total_memory = 0
for executor in executors:
total_memory += executor["memoryUsed"]
return total_memory
# get the total time
def get_total_time(executors):
total_time = 0
for executor in executors:
total_time += executor["totalDuration"]
return total_time
def measure_performance(model, dataset, file):
# Start a process to measure the CPU and memory usage
process = psutil.Process()
start_time = time.time()
# Fit the model to the dataset
model.fit(dataset)
end_time = time.time()
total_time = end_time - start_time
app_id = get_app_id()
executors = get_executor_summary(app_id)
# Get the total memory from all the executors
memory_usage = get_total_memory_usage(executors)
# Read the values from the data.libsvm.meta file
with open("data.libsvm.meta", "r") as meta_file:
reader = csv.DictReader(meta_file, fieldnames=["key", "value"], delimiter=",")
meta_values = {}
for row in reader:
meta_values[row["key"]] = row["value"]
# Write the performance statistics to the file
with open(file, "a", newline="") as f:
writer = csv.writer(f)
# Write the headers if the file is empty
if f.tell() == 0:
writer.writerow(["Total Time","Memory Usage"] + list(meta_values.keys()))
writer.writerow([total_time, memory_usage] + list(meta_values.values()))
# free up memory
del model
del dataset
del process
del meta_values