Skip to content

Commit

Permalink
Changes made to fix issue with python 3.9 submissions and start of fi…
Browse files Browse the repository at this point in the history
…le progress tracking
  • Loading branch information
MitchellAV committed Sep 2, 2024
1 parent 43e9a47 commit 929a360
Show file tree
Hide file tree
Showing 13 changed files with 266 additions and 214 deletions.
5 changes: 0 additions & 5 deletions compressions/2/pvanalytics-submission/requirements.txt

This file was deleted.

Binary file not shown.
6 changes: 6 additions & 0 deletions compressions/2/pvanalytics_submission/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
matplotlib==3.9.1
numpy==1.26.4
pandas==2.2.2
pvanalytics==0.2.0
pvlib==0.10.5
ruptures==1.1.8
Original file line number Diff line number Diff line change
@@ -1,151 +1,128 @@
"""
PVAnalytics-CPD based module. This module will be uploaded by the user
and tested using the data sets accordingly.
"""

import pandas as pd
from pvanalytics.quality.time import shifts_ruptures
from pvanalytics.features import daytime
from pvanalytics.quality.outliers import zscore
from pvanalytics.quality import gaps
import pvlib
import matplotlib.pyplot as plt
import pvanalytics
import ruptures


def detect_time_shifts(
time_series, latitude, longitude, data_sampling_frequency
):
"""
Master function for testing for time shifts in a series and returning
time-shifted periods
"""
# Save the dates of the time series index for reindexing at the end
date_index = pd.Series(time_series.index.date).drop_duplicates()
# Data pre-processing:
# 1) Removal of frozen/stuck data
# 2) Removal of data periods with low data 'completeness'
# 3) Removal of negative data
# 4) Removal of outliers via Hampel + outlier filter
# Trim based on frozen data values
# REMOVE STALE DATA (that isn't during nighttime periods)
# Day/night mask
daytime_mask = daytime.power_or_irradiance(time_series)
# Stale data mask
stale_data_mask = gaps.stale_values_round(
time_series, window=3, decimals=2
)
stale_data_mask = stale_data_mask & daytime_mask

# REMOVE NEGATIVE DATA
negative_mask = time_series < 0

# FIND ABNORMAL PERIODS
daily_min = time_series.resample("D").min()
series_min = 0.1 * time_series.mean()
erroneous_mask = daily_min >= series_min
erroneous_mask = erroneous_mask.reindex(
index=time_series.index, method="ffill", fill_value=False
)
# FIND OUTLIERS (Z-SCORE FILTER)
zscore_outlier_mask = zscore(time_series, zmax=3.5, nan_policy="omit")

# Filter the time series, taking out all of the issues
issue_mask = (
(~stale_data_mask)
& (~negative_mask)
& (~erroneous_mask)
& (~zscore_outlier_mask)
)

time_series = time_series[issue_mask]
time_series = time_series.asfreq(str(data_sampling_frequency) + "T")
# Data completeness
# Trim the series based on daily completeness score
trim_series_mask = pvanalytics.quality.gaps.trim_incomplete(
time_series,
minimum_completeness=0.25,
freq=str(data_sampling_frequency) + "T",
)

time_series = time_series[trim_series_mask]
if len(time_series) > 0:
# Calculate a nighttime offset
# Mask daytime periods for the time series
daytime_mask = daytime.power_or_irradiance(
time_series,
freq=str(data_sampling_frequency) + "T",
low_value_threshold=0.005,
)
# Get the modeled sunrise and sunset time series based on the system's
# latitude-longitude coordinates
modeled_sunrise_sunset_df = (
pvlib.solarposition.sun_rise_set_transit_spa(
time_series.index, latitude, longitude
)
)

# Calculate the midday point between sunrise and sunset for each day
# in the modeled irradiance series
modeled_midday_series = (
modeled_sunrise_sunset_df["sunrise"]
+ (
modeled_sunrise_sunset_df["sunset"]
- modeled_sunrise_sunset_df["sunrise"]
)
/ 2
)

# Generate the sunrise, sunset, and halfway pts for the data stream
sunrise_series = daytime.get_sunrise(daytime_mask)
sunset_series = daytime.get_sunset(daytime_mask)
midday_series = sunrise_series + ((sunset_series - sunrise_series) / 2)
# Convert the midday and modeled midday series to daily values
midday_series_daily, modeled_midday_series_daily = (
midday_series.resample("D").mean(),
modeled_midday_series.resample("D").mean(),
)

# Set midday value series as minutes since midnight, from midday datetime
# values
midday_series_daily = (
midday_series_daily.dt.hour * 60
+ midday_series_daily.dt.minute
+ midday_series_daily.dt.second / 60
)
modeled_midday_series_daily = (
modeled_midday_series_daily.dt.hour * 60
+ modeled_midday_series_daily.dt.minute
+ modeled_midday_series_daily.dt.second / 60
)

# Estimate the time shifts by comparing the modelled midday point to the
# measured midday point.
is_shifted, time_shift_series = shifts_ruptures(
midday_series_daily,
modeled_midday_series_daily,
period_min=15,
shift_min=15,
zscore_cutoff=0.75,
)
time_shift_series = -1 * time_shift_series

# Create a midday difference series between modeled and measured midday, to
# visualize time shifts. First, resample each time series to daily frequency,
# and compare the data stream's daily halfway point to the modeled halfway
# point
midday_diff_series = (
modeled_midday_series.resample("D").mean()
- midday_series.resample("D").mean()
).dt.total_seconds() / 60

midday_diff_series.plot()
time_shift_series.plot()
plt.show()
plt.close()
time_shift_series.index = time_shift_series.index.date
return time_shift_series
else:
return pd.Series(0, index=date_index)
"""
PVAnalytics-CPD based module. This module will be uploaded by the user
and tested using the data sets accordingly.
"""

import pandas as pd
from pvanalytics.quality.time import shifts_ruptures
from pvanalytics.features import daytime
from pvanalytics.quality.outliers import zscore
from pvanalytics.quality import gaps
import pvlib
import matplotlib.pyplot as plt
import pvanalytics
import ruptures

def detect_time_shifts(time_series,
latitude, longitude,
data_sampling_frequency):
"""
Master function for testing for time shifts in a series and returning
time-shifted periods
"""
# Save the dates of the time series index for reindexing at the end
date_index = pd.Series(time_series.index.date).drop_duplicates()
# Data pre-processing:
# 1) Removal of frozen/stuck data
# 2) Removal of data periods with low data 'completeness'
# 3) Removal of negative data
# 4) Removal of outliers via Hampel + outlier filter
# Trim based on frozen data values
# REMOVE STALE DATA (that isn't during nighttime periods)
# Day/night mask
daytime_mask = daytime.power_or_irradiance(time_series)
# Stale data mask
stale_data_mask = gaps.stale_values_round(time_series,
window=3,
decimals=2)
stale_data_mask = stale_data_mask & daytime_mask

# REMOVE NEGATIVE DATA
negative_mask = (time_series < 0)

# FIND ABNORMAL PERIODS
daily_min = time_series.resample('D').min()
series_min = 0.1 * time_series.mean()
erroneous_mask = (daily_min >= series_min)
erroneous_mask = erroneous_mask.reindex(index=time_series.index,
method='ffill',
fill_value=False)
# FIND OUTLIERS (Z-SCORE FILTER)
zscore_outlier_mask = zscore(time_series, zmax=3.5,
nan_policy='omit')

# Filter the time series, taking out all of the issues
issue_mask = ((~stale_data_mask) & (~negative_mask) &
(~erroneous_mask) & (~zscore_outlier_mask))

time_series = time_series[issue_mask]
time_series = time_series.asfreq(str(data_sampling_frequency) + 'T')
# Data completeness
# Trim the series based on daily completeness score
trim_series_mask = pvanalytics.quality.gaps.trim_incomplete(time_series,
minimum_completeness=.25,
freq=str(data_sampling_frequency) + 'T')

time_series = time_series[trim_series_mask]
if len(time_series) > 0:
# Calculate a nighttime offset
# Mask daytime periods for the time series
daytime_mask = daytime.power_or_irradiance(time_series,
freq=str(data_sampling_frequency) + 'T',
low_value_threshold=.005)
# Get the modeled sunrise and sunset time series based on the system's
# latitude-longitude coordinates
modeled_sunrise_sunset_df = pvlib.solarposition.sun_rise_set_transit_spa(
time_series.index, latitude, longitude)

# Calculate the midday point between sunrise and sunset for each day
# in the modeled irradiance series
modeled_midday_series = modeled_sunrise_sunset_df['sunrise'] + \
(modeled_sunrise_sunset_df['sunset'] -
modeled_sunrise_sunset_df['sunrise']) / 2

#Generate the sunrise, sunset, and halfway pts for the data stream
sunrise_series = daytime.get_sunrise(daytime_mask)
sunset_series = daytime.get_sunset(daytime_mask)
midday_series = sunrise_series + ((sunset_series - sunrise_series)/2)
# Convert the midday and modeled midday series to daily values
midday_series_daily, modeled_midday_series_daily = (
midday_series.resample('D').mean(),
modeled_midday_series.resample('D').mean())

# Set midday value series as minutes since midnight, from midday datetime
# values
midday_series_daily = (midday_series_daily.dt.hour * 60 +
midday_series_daily.dt.minute +
midday_series_daily.dt.second / 60)
modeled_midday_series_daily = \
(modeled_midday_series_daily.dt.hour * 60 +
modeled_midday_series_daily.dt.minute +
modeled_midday_series_daily.dt.second / 60)

# Estimate the time shifts by comparing the modelled midday point to the
# measured midday point.
is_shifted, time_shift_series = shifts_ruptures(midday_series_daily,
modeled_midday_series_daily,
period_min=15,
shift_min=15,
zscore_cutoff=.75)
time_shift_series = -1 * time_shift_series

# Create a midday difference series between modeled and measured midday, to
# visualize time shifts. First, resample each time series to daily frequency,
# and compare the data stream's daily halfway point to the modeled halfway
# point
midday_diff_series = (modeled_midday_series.resample('D').mean() -
midday_series.resample('D').mean()
).dt.total_seconds() / 60

midday_diff_series.plot()
time_shift_series.plot()
plt.show()
plt.close()
time_shift_series.index = time_shift_series.index.date
return time_shift_series
else:
return pd.Series(0, index=date_index)
17 changes: 4 additions & 13 deletions valhub/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.11-slim as base
FROM python:3.11-slim

# docker build --progress=plain -t "hmm:Dockerfile" -f valhub/Dockerfile .
WORKDIR /root
Expand All @@ -13,16 +13,7 @@ COPY . .


# Set Django admin username, password, and other environment variables
ARG admin_username
ARG admin_password
ARG admin_email
ARG djangosk

ENV DJANGO_SUPERUSER_USERNAME=$admin_username
ENV DJANGO_SUPERUSER_PASSWORD=$admin_password
ENV DJANGO_SUPERUSER_EMAIL=$admin_email
ENV DJANGO_SECRET_KEY=${djangosk}
ENV PORT 8005
ENV PORT=8005

RUN apt-get update -qq
RUN apt-get install -y libpq-dev python3-psycopg2
Expand All @@ -34,6 +25,6 @@ WORKDIR /root/valhub
RUN python3 -m pip install -r requirements.txt

# RUN cd ./valhub
RUN python3 manage.py makemigrations
RUN python3 manage.py collectstatic --noinput
# RUN python3 manage.py makemigrations
# RUN python3 manage.py collectstatic --noinput
CMD ["/usr/bin/supervisord", "-c", "/root/valhub/supervisord.conf"]
14 changes: 14 additions & 0 deletions valhub/supervisord.conf
Original file line number Diff line number Diff line change
@@ -1,12 +1,26 @@
[supervisord]
nodaemon = true

[program:valhub_makemigrations]
directory=/root/valhub
command=python3 /root/valhub/manage.py makemigrations
autostart=true
autorestart=false

[program:valhub_collectstatic]
directory=/root/valhub
command=python3 /root/valhub/manage.py collectstatic --noinput
autostart=true
autorestart=false
depends_on=valhub_makemigrations

[program:valhub_webserver]
directory=/root/valhub
command=python3 /root/valhub/manage.py runserver 0.0.0.0:8005
stdout_logfile=/dev/fd/1
stdout_logfile_maxbytes=0
redirect_stderr=true
depends_on=valhub_collectstatic
autostart=true

[program:valhub_migrate]
Expand Down
9 changes: 6 additions & 3 deletions workers/src/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ RUN apt-get update && apt-get install -y \
libhdf5-dev \
python3-dev \
cmake \
pkg-config
pkg-config \
unzip

COPY unzip.py .
# COPY unzip.py .
COPY requirements.txt .

# Install the Python dependencies for the submission wrapper
Expand All @@ -27,12 +28,14 @@ COPY $zip_file .

# Unzip the submission package

RUN python -m unzip $zip_file submission
RUN unzip -j $zip_file -d submission

WORKDIR /app/submission

# Install the Python dependencies
RUN pip install --upgrade pip
RUN pip install -r requirements.txt
RUN pip install typing-extensions


# Set the working directory in the container
Expand Down
2 changes: 0 additions & 2 deletions workers/src/docker/submission/requirements.txt

This file was deleted.

10 changes: 0 additions & 10 deletions workers/src/docker/submission/submission_wrapper.py

This file was deleted.

Loading

0 comments on commit 929a360

Please sign in to comment.