diff --git a/src/pudl/analysis/classify_plants_ferc1.py b/src/pudl/analysis/classify_plants_ferc1.py index 1a0f2835f6..9a8e6a1096 100644 --- a/src/pudl/analysis/classify_plants_ferc1.py +++ b/src/pudl/analysis/classify_plants_ferc1.py @@ -654,9 +654,11 @@ def fuel_by_plant_ferc1( ] # Ensure that the dataframe we've gotten has all the information we need: - for col in keep_cols: - if col not in fuel_df.columns: - raise AssertionError(f"Required column {col} not found in input fuel_df.") + missing_cols = [col for col in keep_cols if col not in fuel_df.columns] + if missing_cols: + raise AssertionError( + f"Required columns not found in input fuel_df: {missing_cols}" + ) # Calculate per-fuel derived values and add them to the DataFrame df = ( @@ -679,7 +681,8 @@ def fuel_by_plant_ferc1( "plant_name_ferc1", "report_year", "fuel_type_code_pudl", - ] + ], + observed=True, ) .sum() .reset_index() @@ -732,6 +735,13 @@ def fuel_by_plant_ferc1( ).reset_index() # Label each plant-year record by primary fuel: + df.loc[:, ["primary_fuel_by_cost", "primary_fuel_by_mmbtu"]] = pd.NA + df = df.astype( + { + "primary_fuel_by_cost": pd.StringDtype(), + "primary_fuel_by_mmbtu": pd.StringDtype(), + } + ) for fuel_str in fuel_categories: try: mmbtu_mask = df[f"{fuel_str}_fraction_mmbtu"] > thresh diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py index fe11acb346..5ed3551f3e 100644 --- a/src/pudl/output/ferc1.py +++ b/src/pudl/output/ferc1.py @@ -810,10 +810,6 @@ def drop_other_fuel_types(df): return df[df.fuel_type_code_pudl != "other"].copy() thresh = context.op_config["thresh"] - # The existing function expects `fuel_type_code_pudl` to be an object, rather than - # a category. This is a legacy of pre-dagster code, and we convert here to prevent - # further retooling in the code-base. - fuel_ferc1["fuel_type_code_pudl"] = fuel_ferc1["fuel_type_code_pudl"].astype(str) fuel_categories = list( pudl.transform.ferc1.FuelFerc1TableTransformer()