diff --git a/src/pudl/extract/eia923.py b/src/pudl/extract/eia923.py index cd0546699e..48d824bcd2 100644 --- a/src/pudl/extract/eia923.py +++ b/src/pudl/extract/eia923.py @@ -60,7 +60,6 @@ def process_raw(self, df, page, **partition): f"column with {partition['year']}" ) df.loc[mask, "report_year"] = partition["year"] - df = self.add_data_maturity(df, page, **partition) return df @staticmethod diff --git a/src/pudl/extract/extractor.py b/src/pudl/extract/extractor.py index e9584e033e..57e473e8f3 100644 --- a/src/pudl/extract/extractor.py +++ b/src/pudl/extract/extractor.py @@ -183,16 +183,16 @@ def validate(self, df: pd.DataFrame, page: str, **partition: PartitionSelection) page_cols = self.get_page_cols(page, partition_selection) expected_cols = page_cols.union(self.cols_added) if set(df.columns) != set(expected_cols): - # TODO (bendnorman): Enforce canonical fields for all raw fields? + # Ensure that expected and actually extracted columns match extra_raw_cols = set(df.columns).difference(expected_cols) missing_raw_cols = set(expected_cols).difference(df.columns) if extra_raw_cols: - logger.warning( + raise ValueError( f"{page}/{partition_selection}: Extra columns found in extracted table:" f"\n{extra_raw_cols}" ) if missing_raw_cols: - logger.warning( + raise ValueError( f"{page}/{partition_selection}: Expected columns not found in extracted table:" f"\n{missing_raw_cols}" ) diff --git a/src/pudl/extract/phmsagas.py b/src/pudl/extract/phmsagas.py index 45271d1784..e824585104 100644 --- a/src/pudl/extract/phmsagas.py +++ b/src/pudl/extract/phmsagas.py @@ -27,7 +27,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def process_renamed(self, newdata: pd.DataFrame, page: str, **partition): - """Drop columns that get mapped to other assets. + """Drop columns that get mapped to other assets and columns with unstructured data. Older years of PHMSA data have one Excel tab in the raw data, while newer data has multiple tabs. To extract data into tables that follow the newer data format @@ -52,6 +52,37 @@ def process_renamed(self, newdata: pd.DataFrame, page: str, **partition): f"\n{to_drop}" ) newdata = newdata.drop(columns=to_drop, errors="ignore") + # there is an annoying middling number of columns in phmsa raw data that are unnamed + # and have a smattering of random values. we want to drop these guys. but we are going + # to enumerate what we expect to need to drop so if lots of new unmapped columns happen + # unexpectedly a warning will happen here (and a extraction validation error will happen) + # FYI: In 2009 there were a ton of extra columns seemingly from two records that had a + # multi-line comment that shifted the rest of the cells over. Presumably we could map + # them all and do a manual shift of the data. But its only 2 records so rn we are just + # droppign them. + unnamed_page_years = { + "yearly_distribution": [ + 2000, + 2001, + 2002, + 2003, + 2004, + 2005, + 2006, + 2007, + 2009, + ] + } + unnamed_columns = newdata.filter(like="unnamed").columns + if (page in unnamed_page_years) and ( + int(partition["year"]) in unnamed_page_years[page] + ): + newdata = newdata.drop(columns=unnamed_columns) + elif not unnamed_columns.empty: + logger.warning( + "We found some unnamed columns that are probably not expected. " + f"Consider dropping them. Columns found: {unnamed_columns}" + ) return newdata diff --git a/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv b/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv index 03fbe0b408..03fecb562d 100644 --- a/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv +++ b/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv @@ -66,4 +66,4 @@ ash_content_pct_december,ash_content_december,ash_content_december,ash_content_d total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity,total_fuel_consumption_quantity balancing_authority_code_eia,,,,,,,,,,,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,ba_code report_year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year -early_release,,,,,,,,,,,,,,,,, +early_release,,,,,,,,,,,,,,,,early_release_data_july_2024_not_fully_edited_use_with_caution_do_not_aggregate_to_state_regional_or_national_totals, diff --git a/src/pudl/package_data/eia923/column_maps/byproduct_disposition.csv b/src/pudl/package_data/eia923/column_maps/byproduct_disposition.csv index feba8f9b6b..712414dd3f 100644 --- a/src/pudl/package_data/eia923/column_maps/byproduct_disposition.csv +++ b/src/pudl/package_data/eia923/column_maps/byproduct_disposition.csv @@ -12,3 +12,4 @@ stored_onsite_1000_tons,stored_onsite,stored_onsite,stored_onsite,stored_onsite, stored_offsite_1000_tons,stored_offsite,stored_offsite,stored_offsite,stored_offsite,stored_offsite,stored_offsite_thousand_tons,stored_offsite_thousand_tons,stored_offsite,stored_offsite,stored_offsite,stored_offsite,stored_offsite,stored_offsite,stored_offsite,stored_offsite,stored_offsite total_disposal_1000_tons,total,total,total,total,total,total_thousand_tons_or_for_steam_mmbtu,total_thousand_tons_or_for_steam_mmbtu,total,total,total,total,total,total,total,total,total byproducts_to_report,no_byproducts,no_byproducts,no_byproducts,no_byproducts,no_byproducts,no_byproducts,no_byproducts,no_byproducts,no_byproducts,no_byproducts,no_byproducts,no_byproducts,no_byproducts,no_byproducts,no_byproducts,no_byproducts +early_release,,,,,,,,,,,,,,,,early_release_data_july_2024_not_fully_edited_use_with_caution_do_not_aggregate_to_state_regional_or_national_totals diff --git a/src/pudl/package_data/eia923/column_maps/byproduct_expenses_and_revenues.csv b/src/pudl/package_data/eia923/column_maps/byproduct_expenses_and_revenues.csv index 1778082783..b6a82763b4 100644 --- a/src/pudl/package_data/eia923/column_maps/byproduct_expenses_and_revenues.csv +++ b/src/pudl/package_data/eia923/column_maps/byproduct_expenses_and_revenues.csv @@ -29,3 +29,4 @@ revenues_fly_bottom_ash_intermingled_1000_dollars,fly_bottom_ash_intermingled_re revenues_fgd_byproducts_1000_dollars,fgd_byproducts_revenues,fgd_byproducts_revenues,fgd_byproducts_revenues,fgd_byproducts_revenues,fgd_byproducts_revenues,fgd_byproducts_revenues_thousand_dollars,fgd_byproducts_revenues_thousand_dollars,fgd_byproducts_revenues,fgd_byproducts_revenues,fgd_byproducts_revenues,fgd_byproducts_revenues,fgd_byproducts_revenues,fgd_byproducts_revenues,fgd_byproducts_revenues,fgd_byproducts_revenues,fgd_byproducts_revenues revenues_other_byproducts_1000_dollars,other_byproducts_revenues,other_byproducts_revenues,other_byproducts_revenues,other_byproducts_revenues,other_byproducts_revenues,other_byproducts_revenues_thousand_dollars,other_byproducts_revenues_thousand_dollars,other_byproducts_revenues,other_byproducts_revenues,other_byproducts_revenues,other_byproducts_revenues,other_byproducts_revenues,other_byproducts_revenues,other_byproducts_revenues,other_byproducts_revenues,other_byproducts_revenues revenues_total_byproduct_1000_dollars,total_revenues,total_revenues,total_revenues,total_revenues,total_revenues,total_revenues_thousand_dollars,total_revenues_thousand_dollars,total_revenues,total_revenues,total_revenues,total_revenues,total_revenues,total_revenues,total_revenues,total_revenues,total_revenues +early_release,,unnamed_30,,,,,,,,,,,,,,early_release_data_july_2024_not_fully_edited_use_with_caution_do_not_aggregate_to_state_regional_or_national_totals diff --git a/src/pudl/package_data/eia923/column_maps/cooling_system_information.csv b/src/pudl/package_data/eia923/column_maps/cooling_system_information.csv index 7960e7487f..6ae38360ef 100644 --- a/src/pudl/package_data/eia923/column_maps/cooling_system_information.csv +++ b/src/pudl/package_data/eia923/column_maps/cooling_system_information.csv @@ -29,3 +29,4 @@ annual_maximum_intake_winter_temperature_fahrenheit,intake_peak_winter_temperatu annual_maximum_intake_summer_temperature_fahrenheit,intake_peak_summer_temperature,intake_peak_summer_temperature,,,,,,,,,,,,,, annual_maximum_outlet_winter_temperature_fahrenheit,outlet_peak_winter_temperature,outlet_peak_winter_temperature,,,,,,,,,,,,,, annual_maximum_outlet_summer_temperature_fahrenheit,outlet_peak_summer_temperature,outlet_peak_summer_temperature,,,,,,,,,,,,,, +early_release,,,,,,,,,,,,,,,,early_release_data_july_2024_not_fully_edited_use_with_caution_do_not_aggregate_to_state_regional_or_national_totals diff --git a/src/pudl/package_data/eia923/column_maps/emissions_control.csv b/src/pudl/package_data/eia923/column_maps/emissions_control.csv index 9600b23e1f..67997f7b00 100644 --- a/src/pudl/package_data/eia923/column_maps/emissions_control.csv +++ b/src/pudl/package_data/eia923/column_maps/emissions_control.csv @@ -22,3 +22,4 @@ fgd_electricity_consumption_mwh,fgd_electricity_consumption_megawatthours,fgd_el mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency,mercury_removal_efficiency mercury_emission_rate_lb_per_trillion_btu,,,,,mercury_emission_rate,mercury_emission_rate,mercury_emission_rate,mercury_emission_rate,mercury_emission_rate,mercury_emission_rate,mercury_emission_rate,mercury_emission_rate acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency,acid_gas_removal_efficiency +early_release,,,,,,,,,,,,early_release_data_july_2024_not_fully_edited_use_with_caution_do_not_aggregate_to_state_regional_or_national_totals diff --git a/src/pudl/package_data/eia923/column_maps/energy_storage.csv b/src/pudl/package_data/eia923/column_maps/energy_storage.csv index 567159a54d..afb74da7f6 100644 --- a/src/pudl/package_data/eia923/column_maps/energy_storage.csv +++ b/src/pudl/package_data/eia923/column_maps/energy_storage.csv @@ -69,3 +69,4 @@ total_fuel_consumed_for_electricity_units,electric_fuel_consumption_quantity,ele total_gross_generation_mwh,gross_generation_megawatthours,gross_generation_megawatthours,gross_generation_megawatthours,gross_generation_megawatthours,gross_generation_megawatthours,gross_generation_megawatthours,gross_generation_megawatthours,gross_generation_megawatthours,gross_generation_megawatthours,gross_generation_megawatthours,gross_generation_megawatthours total_net_generation_mwh,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours report_year,year,year,year,year,year,year,year,year,year,year,year +early_release,,,,,,,,,,early_release_data_july_2024_not_fully_edited_use_with_caution_do_not_aggregate_to_state_regional_or_national_totals, diff --git a/src/pudl/package_data/eia923/column_maps/fgd_operation_maintenance.csv b/src/pudl/package_data/eia923/column_maps/fgd_operation_maintenance.csv index be3e4bb4b3..ad387f5285 100644 --- a/src/pudl/package_data/eia923/column_maps/fgd_operation_maintenance.csv +++ b/src/pudl/package_data/eia923/column_maps/fgd_operation_maintenance.csv @@ -16,3 +16,4 @@ fgd_electricity_consumption_mwh,electric_energy_consumption,electric_energy_cons so2_removal_efficiency_annual,efficiency_at_annual_operating_factor,efficiency_at_annual_operating_factor,efficiency_at_annual_operating_factor,efficiency_at_annual_operating_factor,,,,,,,,,,,, so2_removal_efficiency_tested,tested_efficiency_at_100_load,tested_efficiency_at_100_load,tested_efficiency_at_100_load,tested_efficiency_at_100_load,,,,,,,,,,,, so2_test_date,date_of_latest_efficiency_test,date_of_latest_efficiency_test,date_of_latest_efficiency_test,date_of_latest_efficiency_test,,,,,,,,,,,, +early_release,,,,,,,,,,,,,,,,early_release_data_july_2024_not_fully_edited_use_with_caution_do_not_aggregate_to_state_regional_or_national_totals diff --git a/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv b/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv index 75954cd60c..98c67c3213 100644 --- a/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv +++ b/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv @@ -31,4 +31,4 @@ natural_gas_delivery_contract_type_code,,,,,,,natural_gas_delivery_contract_type moisture_content_pct,,,,,,,moisture_content,moisture_content,moisture_content,moisture_content,moisture_content,moisture_content,moisture_content,moisture_content,moisture_content,moisture_content,moisture_content chlorine_content_ppm,,,,,,,chlorine_content,chlorine_content,chlorine_content,chlorine_content,chlorine_content,chlorine_content,chlorine_content,chlorine_content,chlorine_content,chlorine_content,chlorine_content balancing_authority_code_eia,,,,,,,,,,,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,ba_code -early_release,,,,,,,,,,,,,,,,, +early_release,,,,,,,,,,,,,,,,early_release_data_july_2024_not_fully_edited_use_with_caution_do_not_aggregate_to_state_regional_or_national_totals, diff --git a/src/pudl/package_data/eia923/column_maps/generation_fuel.csv b/src/pudl/package_data/eia923/column_maps/generation_fuel.csv index b6f1ff5987..e5890fb66e 100644 --- a/src/pudl/package_data/eia923/column_maps/generation_fuel.csv +++ b/src/pudl/package_data/eia923/column_maps/generation_fuel.csv @@ -95,4 +95,4 @@ total_fuel_consumption_mmbtu,total_fuel_consumption_mmbtus,total_fuel_consumptio elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtus,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu,elec_fuel_consumption_mmbtu total_net_generation_mwh,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours,net_generation_megawatthours report_year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year -early_release,,,,,,,,,,,,,,,,,,,,,,,, +early_release,,,,,,,,,,,,,,,,,,,,,,,early_release_data_july_2024_not_fully_edited_use_with_caution_do_not_aggregate_to_state_regional_or_national_totals, diff --git a/src/pudl/package_data/eia923/column_maps/generator.csv b/src/pudl/package_data/eia923/column_maps/generator.csv index 36106f6643..5428646590 100644 --- a/src/pudl/package_data/eia923/column_maps/generator.csv +++ b/src/pudl/package_data/eia923/column_maps/generator.csv @@ -28,4 +28,4 @@ net_generation_mwh_december,net_generation_december,net_generation_december,net_ net_generation_mwh_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date,net_generation_year_to_date balancing_authority_code_eia,,,,,,,,,,,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,ba_code report_year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year,year -early_release,,,,,,,,,,,,,,,,, +early_release,,,,,,,,,,,,,,,,early_release_data_july_2024_not_fully_edited_use_with_caution_do_not_aggregate_to_state_regional_or_national_totals, diff --git a/src/pudl/package_data/eia923/column_maps/plant_frame.csv b/src/pudl/package_data/eia923/column_maps/plant_frame.csv index 7b8376df70..559e43c90b 100644 --- a/src/pudl/package_data/eia923/column_maps/plant_frame.csv +++ b/src/pudl/package_data/eia923/column_maps/plant_frame.csv @@ -9,4 +9,4 @@ plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name associated_combined_heat_power,combined_heat_and_power_status_y_chp_n_non_chp,combined_heat_and_power_status,combined_heat_and_power_status_y_chp_n_non_chp,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status,combined_heat_and_power_status reporting_frequency_code,reporting_frequency_annual_or_monthly,reporting_frequency,reporting_frequency_annual_or_monthly,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,respondent_frequency,respondent_frequency,respondent_frequency,respondent_frequency,respondent_frequency nameplate_capacity_mw,nameplate_capacity_mw,,,,,,,,,,,,, -early_release,,,,,,,,,,,,,, +early_release,,,,,,,,,,,,,early_release_data_july_2024_not_fully_edited_use_with_caution_do_not_aggregate_to_state_regional_or_national_totals, diff --git a/src/pudl/package_data/eia923/column_maps/stocks.csv b/src/pudl/package_data/eia923/column_maps/stocks.csv index 389d5abe1b..22f1db68e1 100644 --- a/src/pudl/package_data/eia923/column_maps/stocks.csv +++ b/src/pudl/package_data/eia923/column_maps/stocks.csv @@ -36,4 +36,4 @@ petcoke_september,petcoke_sep,petcoke_sep,petcoke_sep,petcoke_sep,petcoke_sep,pe petcoke_october,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_oct,petcoke_october,petcoke_oct,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october,petcoke_october petcoke_november,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_nov,petcoke_november,petcoke_nov,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november,petcoke_november petcoke_december,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_dec,petcoke_december,petcoke_dec,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december,petcoke_december -early_release,,,,,,,,,,,,,,,,,,,,,,,, +early_release,,,,,,,,,,,,,,,,,,,,,,,early_release_data_july_2024_not_fully_edited_use_with_caution_do_not_aggregate_to_state_regional_or_national_totals, diff --git a/src/pudl/package_data/phmsagas/column_maps/yearly_gathering_pipe_miles_by_nps.csv b/src/pudl/package_data/phmsagas/column_maps/yearly_gathering_pipe_miles_by_nps.csv index ea63809922..02138eab74 100644 --- a/src/pudl/package_data/phmsagas/column_maps/yearly_gathering_pipe_miles_by_nps.csv +++ b/src/pudl/package_data/phmsagas/column_maps/yearly_gathering_pipe_miles_by_nps.csv @@ -41,7 +41,6 @@ onshore_type_a_gathering_pipe_nps_48_miles,,,,,,,,,,,,,,,,,,,,,partiona48,partio onshore_type_a_gathering_pipe_nps_52_miles,,,,,,,,,,,,,,,,,,,,,partiona52,partiona52,partiona52,partiona52,partiona52,partiona52,partiona52,partiona52,partiona52,partiona52,partiona52,partiona52,partiona52 onshore_type_a_gathering_pipe_nps_56_miles,,,,,,,,,,,,,,,,,,,,,partiona56,partiona56,partiona56,partiona56,partiona56,partiona56,partiona56,partiona56,partiona56,partiona56,partiona56,partiona56,partiona56 onshore_type_a_gathering_pipe_nps_58_and_over_miles,,,,,,,,,,,,,,,,,,,,,partiona58over,partiona58over,partiona58over,partiona58over,partiona58over,partiona58over,partiona58over,partiona58over,partiona58over,partiona58over,partiona58over,partiona58over,partiona58over -onshore_type_a_gathering_pipe_other_size_miles,,,,,,,,,,,,,,,,,,,,,partionaadditional,partionaadditional,partionaadditional,partionaadditional,partionaadditional,partionaadditional,partionaadditional,partionaadditional,partionaadditional,partionaadditional,partionaadditional,partionaadditional,partionaadditional onshore_type_a_gathering_pipe_other_size_total_miles,,,,,,,,,,,,,,,,,,,,,partiona_other_pipe_mile_total,partiona_other_pipe_mile_total,partiona_other_pipe_mile_total,partiona_other_pipe_mile_total,partiona_other_pipe_mile_total,partiona_other_pipe_mile_total,partiona_other_pipe_mile_total,partiona_other_pipe_mile_total,partiona_other_pipe_mile_total,partiona_other_pipe_mile_total,partiona_other_pipe_mile_total,partiona_other_pipe_mile_total,partiona_other_pipe_mile_total onshore_type_a_gathering_pipe_other_size_detail_miles,,,,,,,,,,,,,,,,,,,,,partiona_other_pipe_detail,partiona_other_pipe_detail,partiona_other_pipe_detail,partiona_other_pipe_detail,partiona_other_pipe_detail,partiona_other_pipe_detail,partiona_other_pipe_detail,partiona_other_pipe_detail,partiona_other_pipe_detail,partiona_other_pipe_detail,partiona_other_pipe_detail,partiona_other_pipe_detail,partiona_other_pipe_detail onshore_type_a_gathering_pipe_total_miles,,,,,,,,,,,,,,,,,,,,,partionatotal,partionatotal,partionatotal,partionatotal,partionatotal,partionatotal,partionatotal,partionatotal,partionatotal,partionatotal,partionatotal,partionatotal,partionatotal @@ -71,7 +70,6 @@ onshore_type_b_gathering_pipe_nps_48_miles,,,,,,,,,,,,,,,,,,,,,partionb48,partio onshore_type_b_gathering_pipe_nps_52_miles,,,,,,,,,,,,,,,,,,,,,partionb52,partionb52,partionb52,partionb52,partionb52,partionb52,partionb52,partionb52,partionb52,partionb52,partionb52,partionb52,partionb52 onshore_type_b_gathering_pipe_nps_56_miles,,,,,,,,,,,,,,,,,,,,,partionb56,partionb56,partionb56,partionb56,partionb56,partionb56,partionb56,partionb56,partionb56,partionb56,partionb56,partionb56,partionb56 onshore_type_b_gathering_pipe_nps_58_and_over_miles,,,,,,,,,,,,,,,,,,,,,partionb58over,partionb58over,partionb58over,partionb58over,partionb58over,partionb58over,partionb58over,partionb58over,partionb58over,partionb58over,partionb58over,partionb58over,partionb58over -onshore_type_b_gathering_pipe_other_size_miles,,,,,,,,,,,,,,,,,,,,,partionbadditional,partionbadditional,partionbadditional,partionbadditional,partionbadditional,partionbadditional,partionbadditional,partionbadditional,partionbadditional,partionbadditional,partionbadditional,partionbadditional,partionbadditional onshore_type_b_gathering_pipe_other_size_total_miles,,,,,,,,,,,,,,,,,,,,,partionb_other_pipe_mile_total,partionb_other_pipe_mile_total,partionb_other_pipe_mile_total,partionb_other_pipe_mile_total,partionb_other_pipe_mile_total,partionb_other_pipe_mile_total,partionb_other_pipe_mile_total,partionb_other_pipe_mile_total,partionb_other_pipe_mile_total,partionb_other_pipe_mile_total,partionb_other_pipe_mile_total,partionb_other_pipe_mile_total,partionb_other_pipe_mile_total onshore_type_b_gathering_pipe_other_size_detail_miles,,,,,,,,,,,,,,,,,,,,,partionb_other_pipe_detail,partionb_other_pipe_detail,partionb_other_pipe_detail,partionb_other_pipe_detail,partionb_other_pipe_detail,partionb_other_pipe_detail,partionb_other_pipe_detail,partionb_other_pipe_detail,partionb_other_pipe_detail,partionb_other_pipe_detail,partionb_other_pipe_detail,partionb_other_pipe_detail,partionb_other_pipe_detail onshore_type_b_gathering_pipe_total_miles,,,,,,,,,,,,,,,,,,,,,partionbtotal,partionbtotal,partionbtotal,partionbtotal,partionbtotal,partionbtotal,partionbtotal,partionbtotal,partionbtotal,partionbtotal,partionbtotal,partionbtotal,partionbtotal @@ -129,7 +127,6 @@ offshore_gathering_pipe_nps_48_miles,,,,,,,,,,,,,,,,,,,,,partioff48,partioff48,p offshore_gathering_pipe_nps_52_miles,,,,,,,,,,,,,,,,,,,,,partioff52,partioff52,partioff52,partioff52,partioff52,partioff52,partioff52,partioff52,partioff52,partioff52,partioff52,partioff52,partioff52 offshore_gathering_pipe_nps_56_miles,,,,,,,,,,,,,,,,,,,,,partioff56,partioff56,partioff56,partioff56,partioff56,partioff56,partioff56,partioff56,partioff56,partioff56,partioff56,partioff56,partioff56 offshore_gathering_pipe_nps_58_and_over_miles,,,,,,,,,,,,,,,,,,,,,partioff58over,partioff58over,partioff58over,partioff58over,partioff58over,partioff58over,partioff58over,partioff58over,partioff58over,partioff58over,partioff58over,partioff58over,partioff58over -offshore_gathering_pipe_other_size_miles,,,,,,,,,,,,,,,,,,,,,partioffadditional,partioffadditional,partioffadditional,partioffadditional,partioffadditional,partioffadditional,partioffadditional,partioffadditional,partioffadditional,partioffadditional,partioffadditional,partioffadditional,partioffadditional offshore_gathering_pipe_other_size_total_miles,,,,,,,,,,,,,,,,,,,,,partioff_other_pipe_mile_total,partioff_other_pipe_mile_total,partioff_other_pipe_mile_total,partioff_other_pipe_mile_total,partioff_other_pipe_mile_total,partioff_other_pipe_mile_total,partioff_other_pipe_mile_total,partioff_other_pipe_mile_total,partioff_other_pipe_mile_total,partioff_other_pipe_mile_total,partioff_other_pipe_mile_total,partioff_other_pipe_mile_total,partioff_other_pipe_mile_total offshore_gathering_pipe_other_size_detail_miles,,,,,,,,,,,,,,,,,,,,,partioff_other_pipe_detail,partioff_other_pipe_detail,partioff_other_pipe_detail,partioff_other_pipe_detail,partioff_other_pipe_detail,partioff_other_pipe_detail,partioff_other_pipe_detail,partioff_other_pipe_detail,partioff_other_pipe_detail,partioff_other_pipe_detail,partioff_other_pipe_detail,partioff_other_pipe_detail,partioff_other_pipe_detail offshore_gathering_pipe_unknown_size_miles,g2m2_1,g2m2_1,g2m2_1,g2m2_1,g2m2_1,g2m2_1,g2m2_1,g2m2_1,g2m2_1,g2m2_1,g2m2_1,b2goff_1,b2goff_1,b2goff_1,b2goff_1,b2goff_1,b2goff_1,b2goff_1,b2goff_1,b2goff_1,,,,,,,,,,,,, diff --git a/src/pudl/package_data/phmsagas/column_maps/yearly_transmission_gathering_preparer_certification.csv b/src/pudl/package_data/phmsagas/column_maps/yearly_transmission_gathering_preparer_certification.csv index 17e9aa00a6..7c26976146 100644 --- a/src/pudl/package_data/phmsagas/column_maps/yearly_transmission_gathering_preparer_certification.csv +++ b/src/pudl/package_data/phmsagas/column_maps/yearly_transmission_gathering_preparer_certification.csv @@ -13,10 +13,10 @@ headquarters_address_county,hcounty,hcounty,hcounty,hcounty,hcounty,hcnty,hcnty, headquarters_address_state,hstate,hstate,hstate,hstate,hstate,hst,hst,hst,hst,hstate,hstate,hqstate,hqstate,hqstate,hqstate,hqstate,hqstate,hqstate,hqstate,hqstate,parta4state,parta4state,parta4state,parta4state,parta4state,parta4state,parta4state,parta4state,parta4state,parta4state,parta4state,parta4state,parta4state,parta4state headquarters_address_zip,hzip,hzip,hzip,hzip,hzip,hzip,hzip,hzip,hzip,hzip,hzip,hqzip,hqzip,hqzip,hqzip,hqzip,hqzip,hqzip,hqzip,hqzip,parta4zip,parta4zip,parta4zip,parta4zip,parta4zip,parta4zip,parta4zip,parta4zip,parta4zip,parta4zip,parta4zip,parta4zip,parta4zip,parta4zip commodity_group,,,,,,,,,,,,,,,,,,,,,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity -preparer_name,pname,pname,pname,pname,pname,pname,pname,pname,pname,pname,pname,pname,pname,pname,pname,pname,pname,pname,pname,pname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname +preparer_name,pname,pname,pname,pname,pname,pname,pname,pname,pname,pname,,pname,pname,pname,pname,pname,pname,pname,pname,pname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname,partnprepname preparer_title,,,,,,,,,,,,,,,,,,,,,partnpreptitle,partnpreptitle,partnpreptitle,partnpreptitle,partnpreptitle,partnpreptitle,partnpreptitle,partnpreptitle,partnpreptitle,partnpreptitle,partnpreptitle,partnpreptitle,partnpreptitle,partnpreptitle preparer_email,,,,,,,,,,,,pemail,pemail,pemail,pemail,pemail,pemail,pemail,pemail,pemail,partnprepemail,partnprepemail,partnprepemail,partnprepemail,partnprepemail,partnprepemail,partnprepemail,partnprepemail,partnprepemail,partnprepemail,partnprepemail,partnprepemail,partnprepemail,partnprepemail -preparer_telephone,phone,phone,phone,phone,phone,phone,phone,phone,phone,phone,phone,phone,phone,phone,phone,phone,phone,phone,phone,phone,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele +preparer_telephone,phone,phone,phone,phone,phone,phone,phone,phone,phone,phone,,phone,phone,phone,phone,phone,phone,phone,phone,phone,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele,partnpreptele preparer_fax,,,,,,,,,,,,pfax,pfax,pfax,pfax,pfax,pfax,pfax,pfax,pfax,,,,,,,,,,,,,, certifier_telephone,,,,,,,,,,,,,,,,,,,,,partoprepsename,partoprepsename,partoprepsename,partoprepsename,partoprepsename,partoprepsename,partoprepsename,partoprepsename,partoprepsename,partoprepsename,partoprepsename,partoprepsename,partoprepsename,partoprepsename certifier_name,,,,,,,,,,,,,,,,,,,,,partoprepsetitle,partoprepsetitle,partoprepsetitle,partoprepsetitle,partoprepsetitle,partoprepsetitle,partoprepsetitle,partoprepsetitle,partoprepsetitle,partoprepsetitle,partoprepsetitle,partoprepsetitle,partoprepsetitle,partoprepsetitle diff --git a/src/pudl/package_data/phmsagas/column_maps/yearly_transmission_pipe_miles_by_nps.csv b/src/pudl/package_data/phmsagas/column_maps/yearly_transmission_pipe_miles_by_nps.csv index f0d0910c67..cac067e55e 100644 --- a/src/pudl/package_data/phmsagas/column_maps/yearly_transmission_pipe_miles_by_nps.csv +++ b/src/pudl/package_data/phmsagas/column_maps/yearly_transmission_pipe_miles_by_nps.csv @@ -39,7 +39,6 @@ onshore_transmission_pipe_nps_48_miles,,,,,,,,,,,,,,,,,,,,,parthon48,parthon48,p onshore_transmission_pipe_nps_52_miles,,,,,,,,,,,,,,,,,,,,,parthon52,parthon52,parthon52,parthon52,parthon52,parthon52,parthon52,parthon52,parthon52,parthon52,parthon52,parthon52,parthon52 onshore_transmission_pipe_nps_56_miles,,,,,,,,,,,,,,,,,,,,,parthon56,parthon56,parthon56,parthon56,parthon56,parthon56,parthon56,parthon56,parthon56,parthon56,parthon56,parthon56,parthon56 onshore_transmission_pipe_nps_58_and_over_miles,,,,,,,,,,,,,,,,,,,,,parthon58over,parthon58over,parthon58over,parthon58over,parthon58over,parthon58over,parthon58over,parthon58over,parthon58over,parthon58over,parthon58over,parthon58over,parthon58over -onshore_transmission_pipe_other_size_miles,,,,,,,,,,,,,,,,,,,,,parthonadditional,parthonadditional,parthonadditional,parthonadditional,parthonadditional,parthonadditional,parthonadditional,parthonadditional,parthonadditional,parthonadditional,parthonadditional,parthonadditional,parthonadditional onshore_transmission_pipe_other_size_total_miles,,,,,,,,,,,,,,,,,,,,,parthon_other_pipe_mile_total,parthon_other_pipe_mile_total,parthon_other_pipe_mile_total,parthon_other_pipe_mile_total,parthon_other_pipe_mile_total,parthon_other_pipe_mile_total,parthon_other_pipe_mile_total,parthon_other_pipe_mile_total,parthon_other_pipe_mile_total,parthon_other_pipe_mile_total,parthon_other_pipe_mile_total,parthon_other_pipe_mile_total,parthon_other_pipe_mile_total onshore_transmission_pipe_other_size_detail_miles,,,,,,,,,,,,,,,,,,,,,parthon_other_pipe_detail,parthon_other_pipe_detail,parthon_other_pipe_detail,parthon_other_pipe_detail,parthon_other_pipe_detail,parthon_other_pipe_detail,parthon_other_pipe_detail,parthon_other_pipe_detail,parthon_other_pipe_detail,parthon_other_pipe_detail,parthon_other_pipe_detail,parthon_other_pipe_detail,parthon_other_pipe_detail onshore_transmission_pipe_total_miles,t2m1t,t2m1t,t2m1t,t2m1t,t2m1t,t2m1t,t2m1t,t2m1t,t2m1t,t2m1t,t2m1t,b2ton_t,b2ton_t,b2ton_t,b2ton_t,b2ton_t,b2ton_t,b2ton_t,b2ton_t,b2ton_t,parthontotal,parthontotal,parthontotal,parthontotal,parthontotal,parthontotal,parthontotal,parthontotal,parthontotal,parthontotal,parthontotal,parthontotal,parthontotal @@ -73,7 +72,6 @@ offshore_transmission_pipe_nps_48_miles,,,,,,,,,,,,,,,,,,,,,parthoff48,parthoff4 offshore_transmission_pipe_nps_52_miles,,,,,,,,,,,,,,,,,,,,,parthoff52,parthoff52,parthoff52,parthoff52,parthoff52,parthoff52,parthoff52,parthoff52,parthoff52,parthoff52,parthoff52,parthoff52,parthoff52 offshore_transmission_pipe_nps_56_miles,,,,,,,,,,,,,,,,,,,,,parthoff56,parthoff56,parthoff56,parthoff56,parthoff56,parthoff56,parthoff56,parthoff56,parthoff56,parthoff56,parthoff56,parthoff56,parthoff56 offshore_transmission_pipe_nps_58_and_over_miles,,,,,,,,,,,,,,,,,,,,,parthoff58over,parthoff58over,parthoff58over,parthoff58over,parthoff58over,parthoff58over,parthoff58over,parthoff58over,parthoff58over,parthoff58over,parthoff58over,parthoff58over,parthoff58over -offshore_transmission_pipe_other_size_miles,,,,,,,,,,,,,,,,,,,,,parthoffadditional,parthoffadditional,parthoffadditional,parthoffadditional,parthoffadditional,parthoffadditional,parthoffadditional,parthoffadditional,parthoffadditional,parthoffadditional,parthoffadditional,parthoffadditional,parthoffadditional offshore_transmission_pipe_other_size_total_miles,,,,,,,,,,,,,,,,,,,,,parthoff_other_pipe_mile_total,parthoff_other_pipe_mile_total,parthoff_other_pipe_mile_total,parthoff_other_pipe_mile_total,parthoff_other_pipe_mile_total,parthoff_other_pipe_mile_total,parthoff_other_pipe_mile_total,parthoff_other_pipe_mile_total,parthoff_other_pipe_mile_total,parthoff_other_pipe_mile_total,parthoff_other_pipe_mile_total,parthoff_other_pipe_mile_total,parthoff_other_pipe_mile_total offshore_transmission_pipe_other_size_detail_miles,,,,,,,,,,,,,,,,,,,,,parthoff_other_pipe_detail,parthoff_other_pipe_detail,parthoff_other_pipe_detail,parthoff_other_pipe_detail,parthoff_other_pipe_detail,parthoff_other_pipe_detail,parthoff_other_pipe_detail,parthoff_other_pipe_detail,parthoff_other_pipe_detail,parthoff_other_pipe_detail,parthoff_other_pipe_detail,parthoff_other_pipe_detail,parthoff_other_pipe_detail offshore_transmission_pipe_unknown_size_miles,t2m2_1,t2m2_1,t2m2_1,t2m2_1,t2m2_1,t2m2_1,t2m2_1,t2m2_1,t2m2_1,t2m2_1,t2m2_1,b2toff_1,b2toff_1,b2toff_1,b2toff_1,b2toff_1,b2toff_1,b2toff_1,b2toff_1,b2toff_1,,,,,,,,,,,,, diff --git a/test/unit/extract/csv_test.py b/test/unit/extract/csv_test.py index 026877f30a..8f47bf653c 100644 --- a/test/unit/extract/csv_test.py +++ b/test/unit/extract/csv_test.py @@ -3,7 +3,7 @@ from unittest.mock import MagicMock, patch import pandas as pd -from pytest import raises +import pytest from pudl.extract.csv import CsvExtractor from pudl.extract.extractor import GenericMetadata @@ -22,30 +22,31 @@ def __init__(self): super().__init__(ds=MagicMock()) -def test_source_filename_valid_partition(): - extractor = FakeExtractor() +@pytest.fixture +def extractor(): + # Create an instance of the CsvExtractor class + return FakeExtractor() + + +def test_source_filename_valid_partition(extractor): assert extractor.source_filename(PAGE, **PARTITION) == CSV_FILENAME -def test_source_filename_multipart_partition(): - extractor = FakeExtractor() +def test_source_filename_multipart_partition(extractor): multipart_partition = PARTITION.copy() multipart_partition["month"] = 12 - with raises(AssertionError): + with pytest.raises(AssertionError): extractor.source_filename(PAGE, **multipart_partition) -def test_source_filename_multiple_selections(): - extractor = FakeExtractor() +def test_source_filename_multiple_selections(extractor): multiple_selections = {"year": [PARTITION_SELECTION, 2024]} - with raises(AssertionError): + with pytest.raises(AssertionError): extractor.source_filename(PAGE, **multiple_selections) @patch("pudl.extract.csv.pd") -def test_load_source(mock_pd): - extractor = FakeExtractor() - +def test_load_source(mock_pd, extractor): assert extractor.load_source(PAGE, **PARTITION) == mock_pd.read_csv.return_value extractor.ds.get_zipfile_resource.assert_called_once_with(DATASET, **PARTITION) zipfile = extractor.ds.get_zipfile_resource.return_value.__enter__.return_value @@ -54,8 +55,7 @@ def test_load_source(mock_pd): mock_pd.read_csv.assert_called_once_with(file) -def test_extract(): - extractor = FakeExtractor() +def test_extract(extractor): # Create a sample of data we could expect from an EIA CSV company_field = "company" company_data = "Total of All Companies" @@ -70,7 +70,7 @@ def test_extract(): # Testing the rename GenericMetadata, "get_column_map", - return_value={company_field: "company_rename"}, + return_value={"company_rename": company_field}, ), patch.object( # Transposing the df here to get the orientation we expect get_page_cols to return @@ -83,5 +83,71 @@ def test_extract(): assert len(res) == 1 # Assert only one page extracted assert list(res.keys()) == [PAGE] # Assert it is named correctly assert ( - res[PAGE]["company_rename"][0] == company_data + res[PAGE][company_field][0] == company_data ) # Assert that column correctly renamed and data is there. + + +@patch.object(FakeExtractor, "METADATA") +def test_validate_exact_columns(mock_metadata, extractor): + # Mock the partition selection and page columns + # mock_metadata._get_partition_selection.return_value = "partition1" + extractor.get_page_cols = MagicMock(return_value={"col1", "col2"}) + + # Create a DataFrame with the exact expected columns + df = pd.DataFrame(columns=["col1", "col2"]) + + # Call the validate method. No exceptions should be raised. + extractor.validate(df, "page1", partition="partition1") + + +@patch.object(FakeExtractor, "METADATA") +def test_validate_extra_columns(mock_metadata, extractor): + # Mock the partition selection and page columns + mock_metadata._get_partition_selection.return_value = "partition1" + extractor.get_page_cols = MagicMock(return_value={"col1", "col2"}) + + # Create a DataFrame with extra columns + df = pd.DataFrame(columns=["col1", "col2", "col3"]) + + # Call the validate method and check for ValueError + with pytest.raises(ValueError, match="Extra columns found in extracted table"): + extractor.validate(df, "page1", partition="partition1") + + +@patch.object(FakeExtractor, "METADATA") +def test_validate_missing_columns(mock_metadata, extractor): + # Mock the partition selection and page columns + mock_metadata._get_partition_selection.return_value = "partition1" + extractor.get_page_cols = MagicMock(return_value={"col1", "col2"}) + + # Create a DataFrame with missing columns + df = pd.DataFrame(columns=["col1"]) + + # Call the validate method and check for ValueError + with pytest.raises( + ValueError, match="Expected columns not found in extracted table" + ): + extractor.validate(df, "page1", partition="partition1") + + +@patch.object(FakeExtractor, "METADATA") +def test_validate_extra_and_missing_columns(mock_metadata, extractor): + # Mock the partition selection and page columns + mock_metadata._get_partition_selection.return_value = "partition1" + extractor.get_page_cols = MagicMock(return_value={"col1", "col2"}) + + # Create a DataFrame with both extra and missing columns + df = pd.DataFrame(columns=["col1", "col3"]) + + # Call the validate method and check for ValueError + with pytest.raises(ValueError, match="Extra columns found in extracted table"): + extractor.validate(df, "page1", partition="partition1") + + # Adjust the DataFrame to only have missing columns + df = pd.DataFrame(columns=["col1"]) + + # Call the validate method and check for ValueError + with pytest.raises( + ValueError, match="Expected columns not found in extracted table" + ): + extractor.validate(df, "page1", partition="partition1") diff --git a/test/unit/extract/phmsagas_test.py b/test/unit/extract/phmsagas_test.py new file mode 100644 index 0000000000..ace2b9d966 --- /dev/null +++ b/test/unit/extract/phmsagas_test.py @@ -0,0 +1,92 @@ +from unittest.mock import MagicMock, patch + +import pandas as pd +import pytest + +from pudl.extract.excel import ExcelMetadata +from pudl.extract.phmsagas import Extractor + + +class FakeExtractor(Extractor): + def __init__(self): + self.METADATA = ExcelMetadata("phmsagas") + super().__init__(ds=MagicMock()) + self._metadata = MagicMock() + + +@pytest.fixture +def extractor(): + # Create an instance of the CsvExtractor class + return FakeExtractor() + + +@patch("pudl.extract.phmsagas.logger") +def test_process_renamed_drop_columns(mock_logger, extractor): + # Mock metadata methods + extractor._metadata.get_form.return_value = "gas_transmission_gathering" + extractor._metadata.get_all_columns.return_value = ["col1", "col2"] + + # Create a DataFrame with extra columns + data = {"col1": [1, 2], "col2": [3, 4], "extra_col": [5, 6]} + df = pd.DataFrame(data) + + # Call the method + result = extractor.process_renamed(df, "some_page", year=2009) + + # Check that the extra column was dropped + assert "extra_col" not in result.columns + mock_logger.info.assert_called_once() + + +@patch("pudl.extract.phmsagas.logger") +def test_process_renamed_keep_columns(mock_logger, extractor): + # Mock metadata methods + extractor._metadata.get_form.return_value = "gas_transmission_gathering" + extractor._metadata.get_all_columns.return_value = ["col1", "col2"] + + # Create a DataFrame without extra columns + data = {"col1": [1, 2], "col2": [3, 4]} + df = pd.DataFrame(data) + + # Call the method + result = extractor.process_renamed(df, "some_page", year=2009) + + # Check that no columns were dropped + assert list(result.columns) == ["col1", "col2"] + mock_logger.info.assert_not_called() + + +@patch("pudl.extract.phmsagas.logger") +def test_process_renamed_drop_unnamed_columns(mock_logger, extractor): + # Mock metadata methods + extractor._metadata.get_form.return_value = "some_form" + extractor._metadata.get_all_columns.return_value = ["col1", "col2"] + + # Create a DataFrame with unnamed columns + data = {"col1": [1, 2], "col2": [3, 4], "unnamed_0": [5, 6]} + df = pd.DataFrame(data) + + # Call the method + result = extractor.process_renamed(df, "yearly_distribution", year=2000) + + # Check that the unnamed column was dropped + assert "Unnamed: 0" not in result.columns + mock_logger.warning.assert_not_called() + + +@patch("pudl.extract.phmsagas.logger") +def test_process_renamed_warn_unnamed_columns(mock_logger, extractor): + # Mock metadata methods + extractor._metadata.get_form.return_value = "some_form" + extractor._metadata.get_all_columns.return_value = ["col1", "col2"] + + # Create a DataFrame with unnamed columns + data = {"col1": [1, 2], "col2": [3, 4], "unnamed_0": [5, 6]} + df = pd.DataFrame(data) + + # Call the method + result = extractor.process_renamed(df, "some_page", year=2011) + + # Check that the unnamed column was not dropped but a warning was logged + assert "unnamed_0" in result.columns + mock_logger.warning.assert_called_once()