singularity-energy · grgmiller · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024
diff --git a/src/oge/data_cleaning.py b/src/oge/data_cleaning.py
@@ -2183,6 +2183,16 @@ def aggregate_subplant_data_to_fleet(
         fuel_category_col="fuel_category",
     )
 
+    # drop subplants that have missing fuel category and no generation or fuel data
+    # this prevents them from creating blank entries in the power sector results data
+    ba_fuel_data = ba_fuel_data[
+        ~(
+            ba_fuel_data["fuel_category"].isna()
+            & (ba_fuel_data["net_generation_mwh"] == 0)
+            & (ba_fuel_data["fuel_consumed_for_electricity_mmbtu"] == 0)
+        )
+    ]
+
     # if the input data is hourly, aggregate at the hourly level
     if "datetime_utc" in ba_fuel_data.columns:
         agg_cols = ["ba_code", "fuel_category", "datetime_utc", "report_date"]

diff --git a/src/oge/data_pipeline.py b/src/oge/data_pipeline.py
@@ -402,6 +402,12 @@ def main(args):
         monthly_eia_data_to_shape,
         resolution="monthly",
     )
+
+    # export subplant attributes table
+    helpers.create_subplant_attributes_table(
+        monthly_subplant_data, plant_attributes, primary_fuel_table, year, path_prefix
+    )
+
     validation.check_for_complete_monthly_timeseries(
         df=monthly_subplant_data,
         df_name="monthly_plant_data",

diff --git a/src/oge/gross_to_net_generation.py b/src/oge/gross_to_net_generation.py
@@ -9,7 +9,7 @@
 import oge.validation as validation
 
 from oge.data_cleaning import assign_fuel_type_to_cems
-from oge.helpers import create_plant_ba_table, add_subplant_ids_to_df
+from oge.helpers import create_plant_ba_table, calculate_subplant_nameplate_capacity
 from oge.logging_util import get_logger
 
 logger = get_logger(__name__)
@@ -497,57 +497,6 @@ def calculate_gross_to_net_conversion_factors(
     return gtn_conversions
 
 
-def calculate_subplant_nameplate_capacity(year):
-    """Calculates the total nameplate capacity and primary prime mover for each CEMS subplant."""
-    # load generator data
-    gen_capacity = load_data.load_pudl_table(
-        "core_eia860__scd_generators",
-        year,
-        columns=[
-            "plant_id_eia",
-            "generator_id",
-            "prime_mover_code",
-            "capacity_mw",
-            "operational_status_code",
-        ],
-    )
-
-    # add subplant ids to the generator data
-    logger.info("Adding subplant_id to gen_capacity")
-    gen_capacity = add_subplant_ids_to_df(
-        gen_capacity,
-        year,
-        plant_part_to_map="generator_id",
-        how_merge="inner",
-        validate_merge="1:1",
-    )
-    subplant_capacity = (
-        gen_capacity.groupby(["plant_id_eia", "subplant_id"])["capacity_mw"]
-        .sum()
-        .reset_index()
-    )
-
-    # identify the primary prime mover for each subplant based on capacity
-    subplant_prime_mover = gen_capacity[
-        gen_capacity.groupby(["plant_id_eia", "subplant_id"], dropna=False)[
-            "capacity_mw"
-        ].transform("max")
-        == gen_capacity["capacity_mw"]
-    ][["plant_id_eia", "subplant_id", "prime_mover_code"]].drop_duplicates(
-        subset=["plant_id_eia", "subplant_id"], keep="first"
-    )
-
-    # add the prime mover information
-    subplant_capacity = subplant_capacity.merge(
-        subplant_prime_mover,
-        how="left",
-        on=["plant_id_eia", "subplant_id"],
-        validate="1:1",
-    )
-
-    return subplant_capacity
-
-
 def filter_gtn_conversion_factors(gtn_conversions: pd.DataFrame) -> pd.DataFrame:
     """Filters the calculated GTN ratios to remove anomalous or incomplete factors.
 

diff --git a/src/oge/helpers.py b/src/oge/helpers.py
@@ -12,7 +12,7 @@
     latest_validated_year,
     current_early_release_year,
 )
-from oge.filepaths import reference_table_folder, outputs_folder
+from oge.filepaths import reference_table_folder, outputs_folder, results_folder
 
 import oge.load_data as load_data
 from oge.logging_util import get_logger
@@ -349,73 +349,56 @@ def assign_fleet_to_subplant_data(
 
     # check that there is no missing ba or fuel codes for subplants with nonzero gen
     # for CEMS data, check only units that report positive gross generaiton
-    if "gross_generation_mwh" in subplant_data.columns:
-        missing_fleet_keys = subplant_data[
-            (
+    if (
+        "gross_generation_mwh" in subplant_data.columns
+        or "net_generation_mwh" in subplant_data.columns
+    ):
+        if "gross_generation_mwh" in subplant_data.columns:
+            missing_fleet_keys = subplant_data[
                 (
-                    (subplant_data["ba_code"].isna())
-                    | (subplant_data[fuel_category_col].isna())
+                    (
+                        (subplant_data["ba_code"].isna())
+                        | (subplant_data[fuel_category_col].isna())
+                    )
+                    & (
+                        (subplant_data["gross_generation_mwh"] > 0)
+                        | (subplant_data["fuel_consumed_for_electricity_mmbtu"] > 0)
+                    )
                 )
-                & (
-                    (subplant_data["gross_generation_mwh"] > 0)
-                    | (subplant_data["fuel_consumed_for_electricity_mmbtu"] > 0)
-                )
-            )
-        ]
-    # otherwise, check units that report non-zero net generation
-    else:
-        missing_fleet_keys = subplant_data[
-            (
+            ]
+        # otherwise, check units that report non-zero net generation
+        else:
+            missing_fleet_keys = subplant_data[
                 (
-                    (subplant_data["ba_code"].isna())
-                    | (subplant_data[fuel_category_col].isna())
-                )
-                & (
-                    (subplant_data["net_generation_mwh"] != 0)
-                    | (subplant_data["fuel_consumed_for_electricity_mmbtu"] != 0)
+                    (
+                        (subplant_data["ba_code"].isna())
+                        | (subplant_data[fuel_category_col].isna())
+                    )
+                    & (
+                        (subplant_data["net_generation_mwh"] != 0)
+                        | (subplant_data["fuel_consumed_for_electricity_mmbtu"] != 0)
+                    )
                 )
+            ]
+        if len(missing_fleet_keys) > 0:
+            logger.error(
+                "The plant attributes table is missing ba_code or fuel_category data for some plants. This will result in incomplete power sector results."
             )
-        ]
-    if len(missing_fleet_keys) > 0:
-        logger.error(
-            "The plant attributes table is missing ba_code or fuel_category data for "
-            "some plants. This will result in incomplete power sector results."
-        )
-        logger.error(
-            missing_fleet_keys.groupby(
-                [
-                    "plant_id_eia",
-                    "subplant_id",
-                    "ba_code",
-                    fuel_category_col,
-                ],
-                dropna=False,
-            )[["net_generation_mwh", "fuel_consumed_for_electricity_mmbtu"]]
-            .sum()
-            .to_string()
-        )
-        """raise UserWarning(
-            "The plant attributes table is missing ba_code or fuel_category data for some plants. This will result in incomplete power sector results."
-        )"""
-
-    logger.info(
-        "Dropping subplants that have zero fuel consumption and zero generation from "
-        "fleet aggregation"
-    )
-    if "gross_generation_mwh" in subplant_data.columns:
-        subplant_data = subplant_data[
-            ~(
-                (subplant_data["gross_generation_mwh"] == 0)
-                & (subplant_data["fuel_consumed_for_electricity_mmbtu"] == 0)
+            logger.error(
+                missing_fleet_keys.groupby(
+                    [
+                        "plant_id_eia",
+                        "subplant_id",
+                        "ba_code",
+                        fuel_category_col,
+                    ],
+                    dropna=False,
+                )[["net_generation_mwh", "fuel_consumed_for_electricity_mmbtu"]]
+                .sum()
+                .to_string()
             )
-        ]
     else:
-        subplant_data = subplant_data[
-            ~(
-                (subplant_data["net_generation_mwh"] == 0)
-                & (subplant_data["fuel_consumed_for_electricity_mmbtu"] == 0)
-            )
-        ]
+        pass
 
     return subplant_data
 
@@ -1230,3 +1213,104 @@ def add_subplant_ids_to_df(
     validation.test_for_missing_subplant_id(df, plant_part_to_map)
 
     return df
+
+
+def calculate_subplant_nameplate_capacity(year):
+    """Calculates the total nameplate capacity and primary prime mover for each CEMS subplant."""
+    # load generator data
+    gen_capacity = load_data.load_pudl_table(
+        "core_eia860__scd_generators",
+        year,
+        columns=[
+            "plant_id_eia",
+            "generator_id",
+            "prime_mover_code",
+            "capacity_mw",
+            "operational_status_code",
+        ],
+    )
+
+    # add subplant ids to the generator data
+    logger.info("Adding subplant_id to gen_capacity")
+    gen_capacity = add_subplant_ids_to_df(
+        gen_capacity,
+        year,
+        plant_part_to_map="generator_id",
+        how_merge="inner",
+        validate_merge="1:1",
+    )
+    subplant_capacity = (
+        gen_capacity.groupby(["plant_id_eia", "subplant_id"])["capacity_mw"]
+        .sum()
+        .reset_index()
+    )
+
+    # identify the primary prime mover for each subplant based on capacity
+    subplant_prime_mover = gen_capacity[
+        gen_capacity.groupby(["plant_id_eia", "subplant_id"], dropna=False)[
+            "capacity_mw"
+        ].transform("max")
+        == gen_capacity["capacity_mw"]
+    ][["plant_id_eia", "subplant_id", "prime_mover_code"]].drop_duplicates(
+        subset=["plant_id_eia", "subplant_id"], keep="first"
+    )
+
+    # add the prime mover information
+    subplant_capacity = subplant_capacity.merge(
+        subplant_prime_mover,
+        how="left",
+        on=["plant_id_eia", "subplant_id"],
+        validate="1:1",
+    )
+
+    return subplant_capacity
+
+
+def create_subplant_attributes_table(
+    monthly_subplant_data: pd.DataFrame,
+    plant_attributes: pd.DataFrame,
+    primary_fuel_table: pd.DataFrame,
+    year: int,
+    path_prefix: str,
+):
+    """Writes a "subplant_attributes" table to the results/plant_data folder that
+    contains subplant-specific attributes including the primary fuel, fuel category,
+    nameplate capacity, and primary prime mover for each subplant in
+    monthly_subplant_data.
+
+    Args:
+        monthly_subplant_data (pd.DataFrame): Used to determine the full set of
+            subplants in the data
+        plant_attributes (pd.DataFrame): Used for assigning fleet
+        primary_fuel_table (pd.DataFrame): used for assigning fleet
+        year (int): the data year
+        path_prefix (str): used for exporting data
+    """
+    # create subplant attributes
+
+    # get list of unique subplants
+    subplant_attributes = monthly_subplant_data[
+        ["plant_id_eia", "subplant_id"]
+    ].drop_duplicates()
+
+    # assign fleet to each subplant
+    subplant_attributes = assign_fleet_to_subplant_data(
+        subplant_attributes,
+        plant_attributes,
+        primary_fuel_table,
+        year,
+        drop_primary_fuel_col=False,
+    )
+    subplant_attributes = subplant_attributes.drop(columns="ba_code")
+
+    # add subplant capacity and primary fuel
+    subplant_capacity = calculate_subplant_nameplate_capacity(year)
+
+    subplant_attributes = subplant_attributes.merge(
+        subplant_capacity, how="left", on=["plant_id_eia", "subplant_id"]
+    )
+
+    subplant_attributes.to_csv(
+        results_folder(f"{path_prefix}plant_data/subplant_attributes.csv"),
+        index=False,
+    )