-
Notifications
You must be signed in to change notification settings - Fork 0
/
allsteps.py
66 lines (44 loc) · 2.7 KB
/
allsteps.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
# Load the generated dataset from the CSV file
data_frame = pd.read_csv("generated_data.csv")
# Preprocess categorical features to numerical
data_frame['Workload_Intensity_Num'] = data_frame['Workload_Intensity'].map({'Low': 0, 'Moderate': 1, 'High': 2})
data_frame['Network_Traffic_Num'] = data_frame['Network_Traffic'].str.extract('(\d+)').astype(float)
# Select the features for analysis
features = [
'CPU_Utilization', 'Memory_Usage', 'Power_Consumption',
'CPU_Temperature', 'Workload_Intensity_Num', 'Network_Traffic_Num'
]
# Step 1: Outlier Detection using Isolation Forest
outlier_detector = IsolationForest(contamination=0.05)
data_frame['outlier'] = outlier_detector.fit_predict(data_frame[features])
# Plot outlier graphs for each feature
for feature in features:
plt.figure(figsize=(10, 6))
sns.scatterplot(data=data_frame, x='Timestamp', y=feature, hue='outlier', palette={1: 'blue', -1: 'red'})
plt.title(f'Outliers in {feature}')
plt.xlabel('Timestamp')
plt.ylabel(feature)
plt.legend(['Inliers', 'Outliers'])
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
# Rest of the code for finding max CPU utilization nodes and saving results
# ...
# Rest of the code for finding max CPU utilization nodes and saving results
# ...
#This code ensures that the 'Network_Traffic' column is included when applying the Isolation Forest algorithm and creating the outlier graphs.
# Rest of the code for finding max CPU utilization nodes and saving results
# ...
#In this code, the "Network_Traffic" feature is preprocessed to extract the numeric values using a regular expression. The extracted numeric values are then converted to float. This numeric version of the feature is included in the Isolation Forest outlier detection process.
# Step 2: Find Nodes with Maximum CPU Utilization at Different Time Stamps
max_cpu_nodes = data_frame[data_frame['CPU_Utilization'] == data_frame.groupby('Timestamp')['CPU_Utilization'].transform('max')]
# Step 3: Save Results to a Separate CSV File
results = pd.DataFrame(columns=['Timestamp', 'Max_CPU_Utilization_Node', 'Max_CPU_Utilization_Value'])
for timestamp, node_id, cpu_utilization in zip(max_cpu_nodes['Timestamp'], max_cpu_nodes['Node_ID'], max_cpu_nodes['CPU_Utilization']):
results = results.append({'Timestamp': timestamp, 'Max_CPU_Utilization_Node': node_id, 'Max_CPU_Utilization_Value': cpu_utilization}, ignore_index=True)
results.to_csv("outliers_max_cpu_utilization.csv", index=False)
print("Results saved to 'outliers_max_cpu_utilization.csv'")