-
Notifications
You must be signed in to change notification settings - Fork 0
/
clean_me.py
23 lines (17 loc) · 958 Bytes
/
clean_me.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import pandas as pd
from os import path
csv_file_name = 'last_test_alberto.csv'
data_path = path.join('data', csv_file_name)
cleaned_data_path = path.join('data', f'cleaned-{csv_file_name}')
df = pd.read_csv(data_path)
list_of_column_names = list(df.columns)
df_no_empty_rows = df.dropna(axis=0, how='all', subset=list_of_column_names[1:]) #Drop rows that have all cells empty
row_iterator = df_no_empty_rows.iterrows()
iNextRow, nextRow = next(row_iterator)
# Iterate through rows and merge any two rows that have the same time
for i, row in row_iterator:
if row['Time'] == nextRow['Time']:
df_no_empty_rows.at[i, 'ceph-gw3.gridpp.rl.ac.uk'] += nextRow['ceph-gw3.gridpp.rl.ac.uk']
df_no_empty_rows = df_no_empty_rows[df_no_empty_rows[i] == row['Time']]
# Mika: it doesn't look like the above 7 lines do anything, which is why they're not present in ./performanceChecker.py.
df_no_empty_rows.to_csv(cleaned_data_path, index=False)