-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyses.py
executable file
·144 lines (117 loc) · 4.06 KB
/
analyses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/usr/bin/env uv run
# -*- coding: utf-8 -*-
# SPDX-License-Identifier: Apache-2.0
# Copyright 2023 Sam Harrison
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from solar_analyses import analysis, modelling, plots, utilities
# -----------------------------------------------------------------------------
figure_dir = Path("figures")
figures = {}
# -----------------------------------------------------------------------------
# Load and preprocess data
reports = list(Path("reports").glob("Energy_balance_Monthly_report_*.csv"))
df = pd.concat([utilities.load_report(report) for report in reports])
df = df.sort_index()
# Convert to kWh
df = df / 1000.0
# Exclude data from before solar system was switched on
df = df.loc[df.index >= "2022-12-21", :]
# -----------------------------------------------------------------------------
# Basic analyses of the raw data
# Summarise total production by year
print(
df["Total production"]
.groupby(lambda x: x.year)
.aggregate(["sum", "count"])
.rename_axis("Year", axis="index")
.rename(columns={"sum": "Total production (kWh)", "count": "# days"})
.to_markdown()
)
print()
# Compare monthly breakdown to the predictions from Dunedin Solar
print(
analysis.compare_with_predictions(df, utilities.load_predictions()).to_markdown(
floatfmt=".0f"
)
)
print()
# Generate some initial plots
figures = {**figures, **plots.plot_raw_data(df)}
# -----------------------------------------------------------------------------
# Fit the model
stan_fit = modelling.fit_model(df)
# -----------------------------------------------------------------------------
# Generate summary plots of posterior
figures = {
**figures,
**plots.plot_annual_variation(df, stan_fit),
**plots.plot_optimal_production(df, stan_fit),
**plots.plot_weather_effect(df, stan_fit),
}
# -----------------------------------------------------------------------------
# Save and display figures
for name, fig in figures.items():
for extension in ["jpg", "pdf"]:
fig.savefig(figure_dir / (name + "." + extension))
plt.show()
# -----------------------------------------------------------------------------
# Prototype code
# for n in range(10):
# plt.figure()
# plt.hist(
# stan_fit.loc[n, stan_fit.columns.str.startswith("weather_effect")],
# bins=25,
# density=True,
# rwidth=0.9,
# )
# x = np.linspace(0.0001, 0.9999, 500)
# p = 0.25
# plt.plot(
# x,
# (1.0 - p) * scipy.stats.beta.pdf(x, 2.0, 2.0)
# + p * scipy.stats.beta.pdf(x, 10.0, 1.0),
# )
# plt.figure()
# plt.hist(stan_fit["max"], bins=99)
# plt.figure()
# plt.hist(stan_fit["min"], bins=99)
# plt.figure()
# plt.hist(stan_fit["saturation"], bins=99)
# fig, ax = plt.subplots()
# plt.plot(
# stan_fit.loc[:, stan_fit.columns.str.startswith("weather_effect")]
# .to_numpy()
# .flatten(),
# stan_fit.loc[:, stan_fit.columns.str.startswith("lambda")].to_numpy().flatten(),
# ".",
# markersize=2,
# )
phase = (
(365 * stan_fit.loc[:, "phase"] / (2 * math.pi))
.apply(lambda x: pd.to_datetime("2001-01-01") - pd.DateOffset(math.floor(x)))
.value_counts()
)
fig, ax = plt.subplots()
ax.bar(phase.index, phase / len(stan_fit))
ax.xaxis.set_major_formatter(mpl.dates.DateFormatter("%d-%b"))
ax.set_xlabel(r"Phase ($\delta$)")
ax.set_ylabel("Probability density")
fig.autofmt_xdate()
plt.show()
# -----------------------------------------------------------------------------