Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Final Cleanup #407

Merged
merged 4 commits into from
Dec 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ authors:
identifiers:
- type: doi
value: 'https://zenodo.org/doi/10.5281/zenodo.7062459'
version: 0.5.0
version: 0.6.0
license: MIT
date-released: '2024-08-01'
date-released: '2024-12-21'
2,526 changes: 1,354 additions & 1,172 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ pip install .
The pipeline can be run as follows:
```bash
cd src/oge
python data_pipeline.py --year 2022
python data_pipeline.py --year 2023
```
independently of the installation method you chose.

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "hatchling.build"

[project]
name = "oge"
version = "0.5.0"
version = "0.6.0"
requires-python = ">=3.11,<3.12"
readme = "README.md"
authors = [
Expand Down
2 changes: 1 addition & 1 deletion src/oge/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
"WDS",
]

TIME_RESOLUTIONS = {"hourly": "H", "monthly": "M", "annual": "A"}
TIME_RESOLUTIONS = {"hourly": "h", "monthly": "M", "annual": "A"}

# derived from table 2.4-4 of the EPA's AP-42 document
nox_lb_per_mmbtu_flared_landfill_gas = 0.078
Expand Down
33 changes: 17 additions & 16 deletions src/oge/data_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,21 @@ def main(args):
helpers.create_subplant_attributes_table(
monthly_subplant_data, plant_attributes, primary_fuel_table, year, path_prefix
)
# For years before 2019, export the plant attributes table now
if year < earliest_hourly_data_year:
# export plant static attributes to csv
output_data.output_intermediate_data(
plant_attributes,
"plant_static_attributes",
path_prefix,
year,
args.skip_outputs,
)
if not args.skip_outputs:
plant_attributes.to_csv(
results_folder(f"{path_prefix}plant_data/plant_static_attributes.csv"),
index=False,
)

validation.check_for_complete_monthly_timeseries(
df=monthly_subplant_data,
Expand Down Expand Up @@ -447,22 +462,8 @@ def main(args):
del monthly_subplant_data
del fleet_data

# For 2019 onward, calculate hourly data, otherwise skip these steps
if year < earliest_hourly_data_year:
# export plant static attributes to csv
output_data.output_intermediate_data(
plant_attributes,
"plant_static_attributes",
path_prefix,
year,
args.skip_outputs,
)
if not args.skip_outputs:
plant_attributes.to_csv(
results_folder(f"{path_prefix}plant_data/plant_static_attributes.csv"),
index=False,
)
elif year >= earliest_hourly_data_year:
# calculate hourly outputs for years after 2019
if year >= earliest_hourly_data_year:
# 13. Clean and Reconcile EIA-930 data
################################################################################
logger.info("13. Cleaning EIA-930 data")
Expand Down
1 change: 1 addition & 0 deletions src/oge/download_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ def download_raw_eia923(year: int):
year (int): a four-digit year.
"""
if year < 2008:
os.makedirs(downloads_folder("eia923"), exist_ok=True)
logger.warning(
"EIA-923 data is not available before 2008. "
"Downloading EIA-906/920 files instead"
Expand Down
8 changes: 4 additions & 4 deletions src/oge/eia930.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ def manual_930_adjust(raw: pd.DataFrame):
new = raw[cols].copy()
new.loc[raw.index < "2021-11-01 00:00:00+00", cols] = new.loc[
raw.index < "2021-11-01 00:00:00+00", cols
].shift(1, freq="H")
].shift(1, freq="h")
raw = raw.drop(columns=cols)
raw = pd.concat([raw, new], axis="columns")

Expand All @@ -466,7 +466,7 @@ def manual_930_adjust(raw: pd.DataFrame):
& (raw.index < "2022-06-16 07:00:00+00")
),
cols,
].shift(1, freq="H")
].shift(1, freq="h")
raw = raw.drop(columns=cols)
raw = pd.concat([raw, new], axis="columns")

Expand Down Expand Up @@ -500,7 +500,7 @@ def manual_930_adjust(raw: pd.DataFrame):
new = raw[cols].copy()
new.loc[raw.index < "2021-10-25 00:00:00+00", cols] = new.loc[
raw.index < "2021-10-25 00:00:00+00", cols
].shift(-7, freq="H")
].shift(-7, freq="h")
raw = raw.drop(columns=cols)
raw = pd.concat([raw, new], axis="columns")

Expand All @@ -509,7 +509,7 @@ def manual_930_adjust(raw: pd.DataFrame):
new = raw[col].copy()
new.loc["2021-01-01 08:00:00+00:00":"2022-01-01 07:00:00+00:00", col] = new.loc[
"2021-01-01 08:00:00+00:00":"2022-01-01 07:00:00+00:00", col
].shift(4, freq="H")
].shift(4, freq="h")
raw = raw.drop(columns=col)
raw = pd.concat([raw, new], axis="columns")

Expand Down
23 changes: 17 additions & 6 deletions src/oge/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1103,12 +1103,23 @@ def search_location_from_coordinates(latitude: float, longitude: float) -> tuple
Returns:
tuple[str]: state, county and city of the location.
"""
try:
address = geolocator.reverse(f"{latitude}, {longitude}").raw["address"]
if address["country_code"] != "us":
return pd.NA, pd.NA, pd.NA
except ReadTimeoutError:
return pd.NA, pd.NA, pd.NA

# try to look up the address. This often fails when contacting the server, so retry
# once. If it fails on the retry, return no value
for i in range(0, 2):
while True:
try:
address = geolocator.reverse(f"{latitude}, {longitude}").raw["address"]
if address["country_code"] != "us":
return pd.NA, pd.NA, pd.NA
except (ReadTimeoutError, GeocoderUnavailable) as error:
if i < 1:
logger.warning(f"{error} for reverse address lookup")
continue
else:
logger.warning(f"{error} for reverse address lookup, returning NA")
return pd.NA, pd.NA, pd.NA
break

# Check for State
state = (
Expand Down
16 changes: 15 additions & 1 deletion src/oge/output_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,6 @@ def write_plant_data_to_results(
[
"plant_id_eia",
"plant_name_eia",
"ba_code",
"fuel_category",
"capacity_mw",
"ba_code",
Expand All @@ -344,6 +343,21 @@ def write_plant_data_to_results(
validate="m:1",
)

# rearrange columns
df = df[
[
"plant_id_eia",
"plant_name_eia",
"fuel_category",
"capacity_mw",
"ba_code",
"city",
"county",
"state",
]
+ DATA_COLUMNS
]

# calculate emission rates
df = add_generated_emission_rate_columns(df)

Expand Down
2 changes: 1 addition & 1 deletion src/oge/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def day_hour_heatmap(timeseries: pd.Series, year: int = 2022):
timeseries.index = timeseries.index.tz_convert("EST")
hours_index = pd.DataFrame(
index=pd.date_range(
f"{year}-01-01 T00:00", f"{year}-12-31 T23:00", freq="H"
f"{year}-01-01 T00:00", f"{year}-12-31 T23:00", freq="h"
).tz_localize("EST")
)
hours_index = hours_index.merge(
Expand Down
Loading