Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add subplant attributes table #405

Merged
merged 4 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/oge/data_cleaning.py
Original file line number Diff line number Diff line change
Expand Up @@ -2183,6 +2183,16 @@ def aggregate_subplant_data_to_fleet(
fuel_category_col="fuel_category",
)

# drop subplants that have missing fuel category and no generation or fuel data
# this prevents them from creating blank entries in the power sector results data
ba_fuel_data = ba_fuel_data[
~(
ba_fuel_data["fuel_category"].isna()
& (ba_fuel_data["net_generation_mwh"] == 0)
& (ba_fuel_data["fuel_consumed_for_electricity_mmbtu"] == 0)
)
]

# if the input data is hourly, aggregate at the hourly level
if "datetime_utc" in ba_fuel_data.columns:
agg_cols = ["ba_code", "fuel_category", "datetime_utc", "report_date"]
Expand Down
6 changes: 6 additions & 0 deletions src/oge/data_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,12 @@ def main(args):
monthly_eia_data_to_shape,
resolution="monthly",
)

# export subplant attributes table
helpers.create_subplant_attributes_table(
monthly_subplant_data, plant_attributes, primary_fuel_table, year, path_prefix
)

validation.check_for_complete_monthly_timeseries(
df=monthly_subplant_data,
df_name="monthly_plant_data",
Expand Down
53 changes: 1 addition & 52 deletions src/oge/gross_to_net_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import oge.validation as validation

from oge.data_cleaning import assign_fuel_type_to_cems
from oge.helpers import create_plant_ba_table, add_subplant_ids_to_df
from oge.helpers import create_plant_ba_table, calculate_subplant_nameplate_capacity
from oge.logging_util import get_logger

logger = get_logger(__name__)
Expand Down Expand Up @@ -497,57 +497,6 @@ def calculate_gross_to_net_conversion_factors(
return gtn_conversions


def calculate_subplant_nameplate_capacity(year):
"""Calculates the total nameplate capacity and primary prime mover for each CEMS subplant."""
# load generator data
gen_capacity = load_data.load_pudl_table(
"core_eia860__scd_generators",
year,
columns=[
"plant_id_eia",
"generator_id",
"prime_mover_code",
"capacity_mw",
"operational_status_code",
],
)

# add subplant ids to the generator data
logger.info("Adding subplant_id to gen_capacity")
gen_capacity = add_subplant_ids_to_df(
gen_capacity,
year,
plant_part_to_map="generator_id",
how_merge="inner",
validate_merge="1:1",
)
subplant_capacity = (
gen_capacity.groupby(["plant_id_eia", "subplant_id"])["capacity_mw"]
.sum()
.reset_index()
)

# identify the primary prime mover for each subplant based on capacity
subplant_prime_mover = gen_capacity[
gen_capacity.groupby(["plant_id_eia", "subplant_id"], dropna=False)[
"capacity_mw"
].transform("max")
== gen_capacity["capacity_mw"]
][["plant_id_eia", "subplant_id", "prime_mover_code"]].drop_duplicates(
subset=["plant_id_eia", "subplant_id"], keep="first"
)

# add the prime mover information
subplant_capacity = subplant_capacity.merge(
subplant_prime_mover,
how="left",
on=["plant_id_eia", "subplant_id"],
validate="1:1",
)

return subplant_capacity


def filter_gtn_conversion_factors(gtn_conversions: pd.DataFrame) -> pd.DataFrame:
"""Filters the calculated GTN ratios to remove anomalous or incomplete factors.

Expand Down
206 changes: 145 additions & 61 deletions src/oge/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
latest_validated_year,
current_early_release_year,
)
from oge.filepaths import reference_table_folder, outputs_folder
from oge.filepaths import reference_table_folder, outputs_folder, results_folder

import oge.load_data as load_data
from oge.logging_util import get_logger
Expand Down Expand Up @@ -349,73 +349,56 @@ def assign_fleet_to_subplant_data(

# check that there is no missing ba or fuel codes for subplants with nonzero gen
# for CEMS data, check only units that report positive gross generaiton
if "gross_generation_mwh" in subplant_data.columns:
missing_fleet_keys = subplant_data[
(
if (
"gross_generation_mwh" in subplant_data.columns
or "net_generation_mwh" in subplant_data.columns
):
if "gross_generation_mwh" in subplant_data.columns:
missing_fleet_keys = subplant_data[
(
(subplant_data["ba_code"].isna())
| (subplant_data[fuel_category_col].isna())
(
(subplant_data["ba_code"].isna())
| (subplant_data[fuel_category_col].isna())
)
& (
(subplant_data["gross_generation_mwh"] > 0)
| (subplant_data["fuel_consumed_for_electricity_mmbtu"] > 0)
)
)
& (
(subplant_data["gross_generation_mwh"] > 0)
| (subplant_data["fuel_consumed_for_electricity_mmbtu"] > 0)
)
)
]
# otherwise, check units that report non-zero net generation
else:
missing_fleet_keys = subplant_data[
(
]
# otherwise, check units that report non-zero net generation
else:
missing_fleet_keys = subplant_data[
(
(subplant_data["ba_code"].isna())
| (subplant_data[fuel_category_col].isna())
)
& (
(subplant_data["net_generation_mwh"] != 0)
| (subplant_data["fuel_consumed_for_electricity_mmbtu"] != 0)
(
(subplant_data["ba_code"].isna())
| (subplant_data[fuel_category_col].isna())
)
& (
(subplant_data["net_generation_mwh"] != 0)
| (subplant_data["fuel_consumed_for_electricity_mmbtu"] != 0)
)
)
]
if len(missing_fleet_keys) > 0:
logger.error(
"The plant attributes table is missing ba_code or fuel_category data for some plants. This will result in incomplete power sector results."
)
]
if len(missing_fleet_keys) > 0:
logger.error(
"The plant attributes table is missing ba_code or fuel_category data for "
"some plants. This will result in incomplete power sector results."
)
logger.error(
missing_fleet_keys.groupby(
[
"plant_id_eia",
"subplant_id",
"ba_code",
fuel_category_col,
],
dropna=False,
)[["net_generation_mwh", "fuel_consumed_for_electricity_mmbtu"]]
.sum()
.to_string()
)
"""raise UserWarning(
"The plant attributes table is missing ba_code or fuel_category data for some plants. This will result in incomplete power sector results."
)"""

logger.info(
"Dropping subplants that have zero fuel consumption and zero generation from "
"fleet aggregation"
)
if "gross_generation_mwh" in subplant_data.columns:
subplant_data = subplant_data[
~(
(subplant_data["gross_generation_mwh"] == 0)
& (subplant_data["fuel_consumed_for_electricity_mmbtu"] == 0)
logger.error(
missing_fleet_keys.groupby(
[
"plant_id_eia",
"subplant_id",
"ba_code",
fuel_category_col,
],
dropna=False,
)[["net_generation_mwh", "fuel_consumed_for_electricity_mmbtu"]]
.sum()
.to_string()
)
]
else:
subplant_data = subplant_data[
~(
(subplant_data["net_generation_mwh"] == 0)
& (subplant_data["fuel_consumed_for_electricity_mmbtu"] == 0)
)
]
pass

return subplant_data

Expand Down Expand Up @@ -1230,3 +1213,104 @@ def add_subplant_ids_to_df(
validation.test_for_missing_subplant_id(df, plant_part_to_map)

return df


def calculate_subplant_nameplate_capacity(year):
"""Calculates the total nameplate capacity and primary prime mover for each CEMS subplant."""
# load generator data
gen_capacity = load_data.load_pudl_table(
"core_eia860__scd_generators",
year,
columns=[
"plant_id_eia",
"generator_id",
"prime_mover_code",
"capacity_mw",
"operational_status_code",
],
)

# add subplant ids to the generator data
logger.info("Adding subplant_id to gen_capacity")
gen_capacity = add_subplant_ids_to_df(
gen_capacity,
year,
plant_part_to_map="generator_id",
how_merge="inner",
validate_merge="1:1",
)
subplant_capacity = (
gen_capacity.groupby(["plant_id_eia", "subplant_id"])["capacity_mw"]
.sum()
.reset_index()
)

# identify the primary prime mover for each subplant based on capacity
subplant_prime_mover = gen_capacity[
gen_capacity.groupby(["plant_id_eia", "subplant_id"], dropna=False)[
"capacity_mw"
].transform("max")
== gen_capacity["capacity_mw"]
][["plant_id_eia", "subplant_id", "prime_mover_code"]].drop_duplicates(
subset=["plant_id_eia", "subplant_id"], keep="first"
)

# add the prime mover information
subplant_capacity = subplant_capacity.merge(
subplant_prime_mover,
how="left",
on=["plant_id_eia", "subplant_id"],
validate="1:1",
)

return subplant_capacity


def create_subplant_attributes_table(
monthly_subplant_data: pd.DataFrame,
plant_attributes: pd.DataFrame,
primary_fuel_table: pd.DataFrame,
year: int,
path_prefix: str,
):
"""Writes a "subplant_attributes" table to the results/plant_data folder that
contains subplant-specific attributes including the primary fuel, fuel category,
nameplate capacity, and primary prime mover for each subplant in
monthly_subplant_data.

Args:
monthly_subplant_data (pd.DataFrame): Used to determine the full set of
subplants in the data
plant_attributes (pd.DataFrame): Used for assigning fleet
primary_fuel_table (pd.DataFrame): used for assigning fleet
year (int): the data year
path_prefix (str): used for exporting data
"""
# create subplant attributes

# get list of unique subplants
subplant_attributes = monthly_subplant_data[
["plant_id_eia", "subplant_id"]
].drop_duplicates()

# assign fleet to each subplant
subplant_attributes = assign_fleet_to_subplant_data(
subplant_attributes,
plant_attributes,
primary_fuel_table,
year,
drop_primary_fuel_col=False,
)
subplant_attributes = subplant_attributes.drop(columns="ba_code")

# add subplant capacity and primary fuel
subplant_capacity = calculate_subplant_nameplate_capacity(year)

subplant_attributes = subplant_attributes.merge(
subplant_capacity, how="left", on=["plant_id_eia", "subplant_id"]
)

subplant_attributes.to_csv(
results_folder(f"{path_prefix}plant_data/subplant_attributes.csv"),
index=False,
)
Loading