From 095d31bdf34ca12ca86f5f5b69f3a4b0479968d3 Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Tue, 7 Nov 2023 16:55:58 -0500
Subject: [PATCH 1/3] idk exactly why the "nan"s began existing but this fixes
 it

---
 src/pudl/analysis/classify_plants_ferc1.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pudl/analysis/classify_plants_ferc1.py b/src/pudl/analysis/classify_plants_ferc1.py
index 1a0f2835f6..68393c0699 100644
--- a/src/pudl/analysis/classify_plants_ferc1.py
+++ b/src/pudl/analysis/classify_plants_ferc1.py
@@ -546,7 +546,7 @@ def revert_filled_in_string_nulls(df: pd.DataFrame) -> pd.DataFrame:
         if col in df.columns:
             # the replace to_replace={column_name: {"", pd.NA}} mysteriously doesn't work.
             df[col] = df[col].replace(
-                to_replace=[""],
+                to_replace=["", "nan"],
                 value=pd.NA,
             )
     return df

From 14e6bc8e6909e5f52a0cbd27bba2bc54f875afec Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Tue, 7 Nov 2023 18:00:58 -0500
Subject: [PATCH 2/3] revert the replace of "nan" by stopping introducing them!
 plus some light clean up

---
 src/pudl/analysis/classify_plants_ferc1.py | 14 +++++++++-----
 src/pudl/output/ferc1.py                   |  4 ----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/pudl/analysis/classify_plants_ferc1.py b/src/pudl/analysis/classify_plants_ferc1.py
index 68393c0699..ef6f800aa6 100644
--- a/src/pudl/analysis/classify_plants_ferc1.py
+++ b/src/pudl/analysis/classify_plants_ferc1.py
@@ -546,7 +546,7 @@ def revert_filled_in_string_nulls(df: pd.DataFrame) -> pd.DataFrame:
         if col in df.columns:
             # the replace to_replace={column_name: {"", pd.NA}} mysteriously doesn't work.
             df[col] = df[col].replace(
-                to_replace=["", "nan"],
+                to_replace=[""],
                 value=pd.NA,
             )
     return df
@@ -654,9 +654,11 @@ def fuel_by_plant_ferc1(
     ]
 
     # Ensure that the dataframe we've gotten has all the information we need:
-    for col in keep_cols:
-        if col not in fuel_df.columns:
-            raise AssertionError(f"Required column {col} not found in input fuel_df.")
+    missing_cols = [col for col in keep_cols if col not in fuel_df.columns]
+    if missing_cols:
+        raise AssertionError(
+            f"Required columns not found in input fuel_df: {missing_cols}"
+        )
 
     # Calculate per-fuel derived values and add them to the DataFrame
     df = (
@@ -679,7 +681,8 @@ def fuel_by_plant_ferc1(
                 "plant_name_ferc1",
                 "report_year",
                 "fuel_type_code_pudl",
-            ]
+            ],
+            observed=True,
         )
         .sum()
         .reset_index()
@@ -732,6 +735,7 @@ def fuel_by_plant_ferc1(
     ).reset_index()
 
     # Label each plant-year record by primary fuel:
+    df.loc[:, ["primary_fuel_by_cost", "primary_fuel_by_mmbtu"]] = pd.NA
     for fuel_str in fuel_categories:
         try:
             mmbtu_mask = df[f"{fuel_str}_fraction_mmbtu"] > thresh
diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py
index fe11acb346..5ed3551f3e 100644
--- a/src/pudl/output/ferc1.py
+++ b/src/pudl/output/ferc1.py
@@ -810,10 +810,6 @@ def drop_other_fuel_types(df):
         return df[df.fuel_type_code_pudl != "other"].copy()
 
     thresh = context.op_config["thresh"]
-    # The existing function expects `fuel_type_code_pudl` to be an object, rather than
-    # a category. This is a legacy of pre-dagster code, and we convert here to prevent
-    # further retooling in the code-base.
-    fuel_ferc1["fuel_type_code_pudl"] = fuel_ferc1["fuel_type_code_pudl"].astype(str)
 
     fuel_categories = list(
         pudl.transform.ferc1.FuelFerc1TableTransformer()

From b7533cfbff5ff8538cab5208e7fde1b90c52132e Mon Sep 17 00:00:00 2001
From: Christina Gosnell <cgosnell@catalyst.coop>
Date: Wed, 8 Nov 2023 10:23:14 -0500
Subject: [PATCH 3/3] REALLY REALLY its a nullable string

---
 src/pudl/analysis/classify_plants_ferc1.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/pudl/analysis/classify_plants_ferc1.py b/src/pudl/analysis/classify_plants_ferc1.py
index ef6f800aa6..9a8e6a1096 100644
--- a/src/pudl/analysis/classify_plants_ferc1.py
+++ b/src/pudl/analysis/classify_plants_ferc1.py
@@ -736,6 +736,12 @@ def fuel_by_plant_ferc1(
 
     # Label each plant-year record by primary fuel:
     df.loc[:, ["primary_fuel_by_cost", "primary_fuel_by_mmbtu"]] = pd.NA
+    df = df.astype(
+        {
+            "primary_fuel_by_cost": pd.StringDtype(),
+            "primary_fuel_by_mmbtu": pd.StringDtype(),
+        }
+    )
     for fuel_str in fuel_categories:
         try:
             mmbtu_mask = df[f"{fuel_str}_fraction_mmbtu"] > thresh