-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathPrepareDataset.py
122 lines (80 loc) · 5.1 KB
/
PrepareDataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import pandas as pd
import numpy as np
class PrepareDataset():
def __init__(self, dataset: pd.DataFrame) -> None:
self.dataset = dataset
self.lenght = len(self.dataset)
def early_preparation(self):
self.unix = self.dataset['Time - Unix Format']
self.dataset = self.dataset.drop(['Time - Unix Format'], axis=1)
self.wind_speed = self.dataset.pop('Wind Speed [m/s]')
wind_degree = self.dataset.pop('Wind Degree [°]') * np.pi/180
self.dataset['Wind in axis X'] = self.wind_speed * np.cos(wind_degree)
self.dataset['Wind in axis Y'] = self.wind_speed * np.sin(wind_degree)
date_time = pd.to_datetime(self.dataset.pop('Time - y/m/d/h Format'), format='%Y.%m.%d %H')
timestamp = date_time.map(pd.Timestamp.timestamp)
day = 24*60*60
year = (365.2425)*day
self.dataset['Day sin'] = np.sin(timestamp * (2 * np.pi / day))
self.dataset['Day cos'] = np.cos(timestamp * (2 * np.pi / day))
self.dataset['Year sin'] = np.sin(timestamp * (2 * np.pi / year))
self.dataset['Year cos'] = np.cos(timestamp * (2 * np.pi / year))
self.date_time = date_time
return self
def set_rows_to_zeros(self):
for i in range(len(self.dataset.index)):
if self.dataset.at[i, 'Total Average Power [W]'] == 0:
self.dataset.at[i, 'Temperature [K]'] = 0
self.dataset.at[i, 'Dew Point [K]'] = 0
self.dataset.at[i, 'Pressure [hPa]'] = 0
self.dataset.at[i, 'Humidity [%]'] = 0
self.dataset.at[i, 'Cloudiness [%]'] = 0
self.dataset.at[i, 'Wind in axis X'] = 0
self.dataset.at[i, 'Wind in axis Y'] = 0
self.dataset.at[i, 'Day sin'] = 0
self.dataset.at[i, 'Day cos'] = 0
self.dataset.at[i, 'Year sin'] = 0
self.dataset.at[i, 'Year cos'] = 0
return self
def split_power(self):
self.power = np.array(self.dataset.pop('Total Average Power [W]'))//(10**6)
return self
def standard_scaled_dataset(self):
self.dataset_mean = self.dataset.mean()
self.dataset_std = self.dataset.std()
return self
def train(self) -> tuple:
train_dataset = self.dataset[0:int(self.lenght * 0.8)]
train_dataset = (train_dataset - self.dataset_mean) / self.dataset_std
self.train_dataset = np.array(train_dataset).reshape(train_dataset.shape[0], train_dataset.shape[1], 1)
return self.train_dataset, self.power[0:int(self.lenght * 0.8)]
def val(self) -> tuple:
val_dataset = self.dataset[int(self.lenght * 0.8):int(self.lenght * 0.9)]
val_dataset = (val_dataset - self.dataset_mean) / self.dataset_std
self.val_dataset = np.array(val_dataset).reshape(val_dataset.shape[0], val_dataset.shape[1], 1)
return self.val_dataset, self.power[int(self.lenght * 0.8):int(self.lenght * 0.9)]
def test(self) -> tuple:
test_dataset = self.dataset[int(self.lenght * 0.9):]
test_dataset = (test_dataset - self.dataset_mean) / self.dataset_std
self.test_dataset = np.array(test_dataset).reshape(test_dataset.shape[0], test_dataset.shape[1], 1)
return self.test_dataset, self.power[int(self.lenght * 0.9):]
class PrepareDatasetTimeDistributed(PrepareDataset):
def __init__(self, dataset: pd.DataFrame, subsequences: int) -> None:
self.dataset = dataset
self.subsequences = subsequences
self.lenght = len(self.dataset)
def train(self) -> tuple:
train_dataset = self.dataset[0:int(self.lenght * 0.8)]
train_dataset = (train_dataset - self.dataset_mean) / self.dataset_std
self.train_dataset = np.array(train_dataset).reshape(int(train_dataset.shape[0]/self.subsequences), self.subsequences, train_dataset.shape[1], 1)
return self.train_dataset, self.power[0:int(self.lenght * 0.8)].reshape(int(len(self.power[0:int(self.lenght * 0.8)])/self.subsequences), self.subsequences)
def val(self) -> tuple:
val_dataset = self.dataset[int(self.lenght * 0.8):int(self.lenght * 0.9)]
val_dataset = (val_dataset - self.dataset_mean) / self.dataset_std
self.val_dataset = np.array(val_dataset).reshape(int(val_dataset.shape[0]/self.subsequences), self.subsequences, val_dataset.shape[1], 1)
return self.val_dataset, self.power[int(self.lenght * 0.8):int(self.lenght * 0.9)].reshape( int(len(self.power[int(self.lenght * 0.8):int(self.lenght * 0.9)])/self.subsequences), self.subsequences)
def test(self) -> tuple:
test_dataset = self.dataset[int(self.lenght * 0.9):]
test_dataset = (test_dataset - self.dataset_mean) / self.dataset_std
self.test_dataset = np.array(test_dataset).reshape(int(test_dataset.shape[0]/self.subsequences), self.subsequences, test_dataset.shape[1], 1)
return self.test_dataset, self.power[int(self.lenght * 0.9):].reshape(int(len(self.power[int(self.lenght * 0.9):])/self.subsequences), self.subsequences), self.power[int(self.lenght * 0.9):][::self.subsequences]