From c05c15d95c277420fbf1fc55b9411ff76f037119 Mon Sep 17 00:00:00 2001 From: Christina Gosnell Date: Thu, 18 Jan 2024 16:09:36 -0500 Subject: [PATCH 1/5] begining of column mapping for K --- src/pudl/package_data/phmsagas/file_map.csv | 1 + src/pudl/package_data/phmsagas/page_map.csv | 1 + src/pudl/package_data/phmsagas/page_part_map.csv | 1 + src/pudl/package_data/phmsagas/skipfooter.csv | 1 + src/pudl/package_data/phmsagas/skiprows.csv | 1 + 5 files changed, 5 insertions(+) diff --git a/src/pudl/package_data/phmsagas/file_map.csv b/src/pudl/package_data/phmsagas/file_map.csv index 0bcc4d672a..b0827d5d31 100644 --- a/src/pudl/package_data/phmsagas/file_map.csv +++ b/src/pudl/package_data/phmsagas/file_map.csv @@ -4,3 +4,4 @@ yearly_transmission_gathering_summary_by_commodity,annual_gas_transmission_gathe yearly_miles_of_transmission_pipe_by_nps,annual_gas_transmission_gathering_1990.xlsx,annual_gas_transmission_gathering_1991.xlsx,annual_gas_transmission_gathering_1992.xlsx,annual_gas_transmission_gathering_1993.xlsx,annual_gas_transmission_gathering_1994.xlsx,annual_gas_transmission_gathering_1995.xlsx,annual_gas_transmission_gathering_1996.xlsx,annual_gas_transmission_gathering_1997.xlsx,annual_gas_transmission_gathering_1998.xlsx,annual_gas_transmission_gathering_1999.xlsx,annual_gas_transmission_gathering_2000.xlsx,annual_gas_transmission_gathering_2001.xlsx,annual_gas_transmission_gathering_2002.xlsx,annual_gas_transmission_gathering_2003.xlsx,annual_gas_transmission_gathering_2004.xlsx,annual_gas_transmission_gathering_2005.xlsx,annual_gas_transmission_gathering_2006.xlsx,annual_gas_transmission_gathering_2007.xlsx,annual_gas_transmission_gathering_2008.xlsx,annual_gas_transmission_gathering_2009.xlsx,annual_gas_transmission_gathering_2010.xlsx,annual_gas_transmission_gathering_2011.xlsx,annual_gas_transmission_gathering_2012.xlsx,annual_gas_transmission_gathering_2013.xlsx,annual_gas_transmission_gathering_2014.xlsx,annual_gas_transmission_gathering_2015.xlsx,annual_gas_transmission_gathering_2016.xlsx,annual_gas_transmission_gathering_2017.xlsx,annual_gas_transmission_gathering_2018.xlsx,annual_gas_transmission_gathering_2019.xlsx,annual_gas_transmission_gathering_2020.xlsx,annual_gas_transmission_gathering_2021.xlsx,annual_gas_transmission_gathering_2022.xlsx yearly_miles_of_gathering_pipe_by_nps,annual_gas_transmission_gathering_1990.xlsx,annual_gas_transmission_gathering_1991.xlsx,annual_gas_transmission_gathering_1992.xlsx,annual_gas_transmission_gathering_1993.xlsx,annual_gas_transmission_gathering_1994.xlsx,annual_gas_transmission_gathering_1995.xlsx,annual_gas_transmission_gathering_1996.xlsx,annual_gas_transmission_gathering_1997.xlsx,annual_gas_transmission_gathering_1998.xlsx,annual_gas_transmission_gathering_1999.xlsx,annual_gas_transmission_gathering_2000.xlsx,annual_gas_transmission_gathering_2001.xlsx,annual_gas_transmission_gathering_2002.xlsx,annual_gas_transmission_gathering_2003.xlsx,annual_gas_transmission_gathering_2004.xlsx,annual_gas_transmission_gathering_2005.xlsx,annual_gas_transmission_gathering_2006.xlsx,annual_gas_transmission_gathering_2007.xlsx,annual_gas_transmission_gathering_2008.xlsx,annual_gas_transmission_gathering_2009.xlsx,annual_gas_transmission_gathering_2010.xlsx,annual_gas_transmission_gathering_2011.xlsx,annual_gas_transmission_gathering_2012.xlsx,annual_gas_transmission_gathering_2013.xlsx,annual_gas_transmission_gathering_2014.xlsx,annual_gas_transmission_gathering_2015.xlsx,annual_gas_transmission_gathering_2016.xlsx,annual_gas_transmission_gathering_2017.xlsx,annual_gas_transmission_gathering_2018.xlsx,annual_gas_transmission_gathering_2019.xlsx,annual_gas_transmission_gathering_2020.xlsx,annual_gas_transmission_gathering_2021.xlsx,annual_gas_transmission_gathering_2022.xlsx yearly_inspections_and_assessments,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,annual_gas_transmission_gathering_2010.xlsx,annual_gas_transmission_gathering_2011.xlsx,annual_gas_transmission_gathering_2012.xlsx,annual_gas_transmission_gathering_2013.xlsx,annual_gas_transmission_gathering_2014.xlsx,annual_gas_transmission_gathering_2015.xlsx,annual_gas_transmission_gathering_2016.xlsx,annual_gas_transmission_gathering_2017.xlsx,annual_gas_transmission_gathering_2018.xlsx,annual_gas_transmission_gathering_2019.xlsx,annual_gas_transmission_gathering_2020.xlsx,annual_gas_transmission_gathering_2021.xlsx,annual_gas_transmission_gathering_2022.xlsx +yearly_miles_of_transmission_pipe_by_smys,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,annual_gas_transmission_gathering_2010.xlsx,annual_gas_transmission_gathering_2011.xlsx,annual_gas_transmission_gathering_2012.xlsx,annual_gas_transmission_gathering_2013.xlsx,annual_gas_transmission_gathering_2014.xlsx,annual_gas_transmission_gathering_2015.xlsx,annual_gas_transmission_gathering_2016.xlsx,annual_gas_transmission_gathering_2017.xlsx,annual_gas_transmission_gathering_2018.xlsx,annual_gas_transmission_gathering_2019.xlsx,annual_gas_transmission_gathering_2020.xlsx,annual_gas_transmission_gathering_2021.xlsx,annual_gas_transmission_gathering_2022.xlsx diff --git a/src/pudl/package_data/phmsagas/page_map.csv b/src/pudl/package_data/phmsagas/page_map.csv index e5765f2fb8..0eb28564df 100644 --- a/src/pudl/package_data/phmsagas/page_map.csv +++ b/src/pudl/package_data/phmsagas/page_map.csv @@ -4,3 +4,4 @@ yearly_transmission_gathering_summary_by_commodity,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 yearly_miles_of_transmission_pipe_by_nps,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2 yearly_miles_of_gathering_pipe_by_nps,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3,3,3,3,3,3 yearly_inspections_and_assessments,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1,1,1,1,1,1 +yearly_miles_of_transmission_pipe_by_smys,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,5,5,5,5,5,5,5,5,5,5,5,5,5 diff --git a/src/pudl/package_data/phmsagas/page_part_map.csv b/src/pudl/package_data/phmsagas/page_part_map.csv index d4b321a5b6..99b8c1c486 100644 --- a/src/pudl/package_data/phmsagas/page_part_map.csv +++ b/src/pudl/package_data/phmsagas/page_part_map.csv @@ -4,3 +4,4 @@ yearly_transmission_gathering_summary_by_commodity,gas_transmission_gathering yearly_miles_of_transmission_pipe_by_nps,gas_transmission_gathering yearly_miles_of_gathering_pipe_by_nps,gas_transmission_gathering yearly_inspections_and_assessments,gas_transmission_gathering +yearly_miles_of_transmission_pipe_by_smys,gas_transmission_gathering diff --git a/src/pudl/package_data/phmsagas/skipfooter.csv b/src/pudl/package_data/phmsagas/skipfooter.csv index 4086d12554..6583d55935 100644 --- a/src/pudl/package_data/phmsagas/skipfooter.csv +++ b/src/pudl/package_data/phmsagas/skipfooter.csv @@ -4,3 +4,4 @@ yearly_transmission_gathering_summary_by_commodity,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 yearly_miles_of_transmission_pipe_by_nps,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 yearly_miles_of_gathering_pipe_by_nps,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 yearly_inspections_and_assessments,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0 +yearly_miles_of_transmission_pipe_by_smys,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/src/pudl/package_data/phmsagas/skiprows.csv b/src/pudl/package_data/phmsagas/skiprows.csv index 080d7e0223..c1c724bf5f 100644 --- a/src/pudl/package_data/phmsagas/skiprows.csv +++ b/src/pudl/package_data/phmsagas/skiprows.csv @@ -4,3 +4,4 @@ yearly_transmission_gathering_summary_by_commodity,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 yearly_miles_of_transmission_pipe_by_nps,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2 yearly_miles_of_gathering_pipe_by_nps,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2 yearly_inspections_and_assessments,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,2,2,2,2,2,2,2,2,2,2,2,2,2 +yearly_miles_of_transmission_pipe_by_smys,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,2,2,2,2,2,2,2,2,2,2,2,2,2 From 7e8c45270499b6f8e385d408efe18baa75370bb7 Mon Sep 17 00:00:00 2001 From: Christina Gosnell Date: Fri, 19 Jan 2024 09:35:22 -0500 Subject: [PATCH 2/5] column map for part k --- src/pudl/extract/excel.py | 1 - ...rly_miles_of_transmission_pipe_by_smys.csv | 76 +++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 src/pudl/package_data/phmsagas/column_maps/yearly_miles_of_transmission_pipe_by_smys.csv diff --git a/src/pudl/extract/excel.py b/src/pudl/extract/excel.py index 2ac14aba05..a0ff3ae983 100644 --- a/src/pudl/extract/excel.py +++ b/src/pudl/extract/excel.py @@ -200,7 +200,6 @@ def __init__(self, ds): def process_raw(self, df, page, **partition): """Transforms raw dataframe and rename columns.""" df = self.add_data_maturity(df, page, **partition) - self.cols_added.append("data_label") return df.rename(columns=self._metadata.get_column_map(page, **partition)) def add_data_maturity(self, df: pd.DataFrame, page, **partition) -> pd.DataFrame: diff --git a/src/pudl/package_data/phmsagas/column_maps/yearly_miles_of_transmission_pipe_by_smys.csv b/src/pudl/package_data/phmsagas/column_maps/yearly_miles_of_transmission_pipe_by_smys.csv new file mode 100644 index 0000000000..9c98d14abc --- /dev/null +++ b/src/pudl/package_data/phmsagas/column_maps/yearly_miles_of_transmission_pipe_by_smys.csv @@ -0,0 +1,76 @@ +year_index,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022 +data_date,,,,,,,,,,,,,,,,,,,,,datafile_as_of,datafile_as_of,datafile_as_of,datafile_as_of,datafile_as_of,datafile_as_of,datafile_as_of,datafile_as_of,datafile_as_of,datafile_as_of,datafile_as_of,datafile_as_of,datafile_as_of +interstate_or_intrastate,,,,,,,,,,,,,,,,,,,,,inter_intra,inter_intra,inter_intra,inter_intra,inter_intra,inter_intra,inter_intra,inter_intra,inter_intra,inter_intra,inter_intra,inter_intra,inter_intra +operator_id_phmsa,,,,,,,,,,,,,,,,,,,,,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id +report_number,,,,,,,,,,,,,,,,,,,,,report_number,report_number,report_number,report_number,report_number,report_number,report_number,report_number,report_number,report_number,report_number,report_number,report_number +report_year,,,,,,,,,,,,,,,,,,,,,report_year,report_year,report_year,report_year,report_year,report_year,report_year,report_year,report_year,report_year,report_year,report_year,report_year +state,,,,,,,,,,,,,,,,,,,,,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name +supplemental_report_number,,,,,,,,,,,,,,,,,,,,,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number +operator_name_phmsa,,,,,,,,,,,,,,,,,,,,,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp +commodity_group,,,,,,,,,,,,,,,,,,,,,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity +pipe_onshore_steel_smys_20_to_29_class_1_miles,,,,,,,,,,,,,,,,,,,,,partk2029c1,partk2029c1,partk2029c1,partk2029c1,partk2029c1,partk2029c1,partk2029c1,partk2029c1,partk2029c1,partk2029c1,partk2029c1,partk2029c1,partk2029c1 +pipe_onshore_steel_smys_20_to_29_class_2_miles,,,,,,,,,,,,,,,,,,,,,partk2029c2,partk2029c2,partk2029c2,partk2029c2,partk2029c2,partk2029c2,partk2029c2,partk2029c2,partk2029c2,partk2029c2,partk2029c2,partk2029c2,partk2029c2 +pipe_onshore_steel_smys_20_to_29_class_3_miles,,,,,,,,,,,,,,,,,,,,,partk2029c3,partk2029c3,partk2029c3,partk2029c3,partk2029c3,partk2029c3,partk2029c3,partk2029c3,partk2029c3,partk2029c3,partk2029c3,partk2029c3,partk2029c3 +pipe_onshore_steel_smys_20_to_29_class_4_miles,,,,,,,,,,,,,,,,,,,,,partk2029c4,partk2029c4,partk2029c4,partk2029c4,partk2029c4,partk2029c4,partk2029c4,partk2029c4,partk2029c4,partk2029c4,partk2029c4,partk2029c4,partk2029c4 +pipe_onshore_steel_smys_20_to_29_class_total_miles,,,,,,,,,,,,,,,,,,,,,partk2029tot,partk2029tot,partk2029tot,partk2029tot,partk2029tot,partk2029tot,partk2029tot,partk2029tot,partk2029tot,partk2029tot,partk2029tot,partk2029tot,partk2029tot +pipe_onshore_steel_smys_0_to_19_class_1_miles,,,,,,,,,,,,,,,,,,,,,partk20lessc1,partk20lessc1,partk20lessc1,partk20lessc1,partk20lessc1,partk20lessc1,partk20lessc1,partk20lessc1,partk20lessc1,partk20lessc1,partk20lessc1,partk20lessc1,partk20lessc1 +pipe_onshore_steel_smys_0_to_19_class_2_miles,,,,,,,,,,,,,,,,,,,,,partk20lessc2,partk20lessc2,partk20lessc2,partk20lessc2,partk20lessc2,partk20lessc2,partk20lessc2,partk20lessc2,partk20lessc2,partk20lessc2,partk20lessc2,partk20lessc2,partk20lessc2 +pipe_onshore_steel_smys_0_to_19_class_3_miles,,,,,,,,,,,,,,,,,,,,,partk20lessc3,partk20lessc3,partk20lessc3,partk20lessc3,partk20lessc3,partk20lessc3,partk20lessc3,partk20lessc3,partk20lessc3,partk20lessc3,partk20lessc3,partk20lessc3,partk20lessc3 +pipe_onshore_steel_smys_0_to_19_class_4_miles,,,,,,,,,,,,,,,,,,,,,partk20lessc4,partk20lessc4,partk20lessc4,partk20lessc4,partk20lessc4,partk20lessc4,partk20lessc4,partk20lessc4,partk20lessc4,partk20lessc4,partk20lessc4,partk20lessc4,partk20lessc4 +pipe_onshore_steel_smys_0_to_19_class_total_miles,,,,,,,,,,,,,,,,,,,,,partk20lesstot,partk20lesstot,partk20lesstot,partk20lesstot,partk20lesstot,partk20lesstot,partk20lesstot,partk20lesstot,partk20lesstot,partk20lesstot,partk20lesstot,partk20lesstot,partk20lesstot +pipe_onshore_steel_smys_30_to_40_class_1_miles,,,,,,,,,,,,,,,,,,,,,partk3040c1,partk3040c1,partk3040c1,partk3040c1,partk3040c1,partk3040c1,partk3040c1,partk3040c1,partk3040c1,partk3040c1,partk3040c1,partk3040c1,partk3040c1 +pipe_onshore_steel_smys_30_to_40_class_2_miles,,,,,,,,,,,,,,,,,,,,,partk3040c2,partk3040c2,partk3040c2,partk3040c2,partk3040c2,partk3040c2,partk3040c2,partk3040c2,partk3040c2,partk3040c2,partk3040c2,partk3040c2,partk3040c2 +pipe_onshore_steel_smys_30_to_40_class_3_miles,,,,,,,,,,,,,,,,,,,,,partk3040c3,partk3040c3,partk3040c3,partk3040c3,partk3040c3,partk3040c3,partk3040c3,partk3040c3,partk3040c3,partk3040c3,partk3040c3,partk3040c3,partk3040c3 +pipe_onshore_steel_smys_30_to_40_class_4_miles,,,,,,,,,,,,,,,,,,,,,partk3040c4,partk3040c4,partk3040c4,partk3040c4,partk3040c4,partk3040c4,partk3040c4,partk3040c4,partk3040c4,partk3040c4,partk3040c4,partk3040c4,partk3040c4 +pipe_onshore_steel_smys_30_to_40_class_total_miles,,,,,,,,,,,,,,,,,,,,,partk3040tot,partk3040tot,partk3040tot,partk3040tot,partk3040tot,partk3040tot,partk3040tot,partk3040tot,partk3040tot,partk3040tot,partk3040tot,partk3040tot,partk3040tot +pipe_onshore_steel_smys_41_to_50_class_1_miles,,,,,,,,,,,,,,,,,,,,,partk4150c1,partk4150c1,partk4150c1,partk4150c1,partk4150c1,partk4150c1,partk4150c1,partk4150c1,partk4150c1,partk4150c1,partk4150c1,partk4150c1,partk4150c1 +pipe_onshore_steel_smys_41_to_50_class_2_miles,,,,,,,,,,,,,,,,,,,,,partk4150c2,partk4150c2,partk4150c2,partk4150c2,partk4150c2,partk4150c2,partk4150c2,partk4150c2,partk4150c2,partk4150c2,partk4150c2,partk4150c2,partk4150c2 +pipe_onshore_steel_smys_41_to_50_class_3_miles,,,,,,,,,,,,,,,,,,,,,partk4150c3,partk4150c3,partk4150c3,partk4150c3,partk4150c3,partk4150c3,partk4150c3,partk4150c3,partk4150c3,partk4150c3,partk4150c3,partk4150c3,partk4150c3 +pipe_onshore_steel_smys_41_to_50_class_4_miles,,,,,,,,,,,,,,,,,,,,,partk4150c4,partk4150c4,partk4150c4,partk4150c4,partk4150c4,partk4150c4,partk4150c4,partk4150c4,partk4150c4,partk4150c4,partk4150c4,partk4150c4,partk4150c4 +pipe_onshore_steel_smys_41_to_50_class_total_miles,,,,,,,,,,,,,,,,,,,,,partk4150tot,partk4150tot,partk4150tot,partk4150tot,partk4150tot,partk4150tot,partk4150tot,partk4150tot,partk4150tot,partk4150tot,partk4150tot,partk4150tot,partk4150tot +pipe_onshore_steel_smys_51_to_60_class_1_miles,,,,,,,,,,,,,,,,,,,,,partk5160c1,partk5160c1,partk5160c1,partk5160c1,partk5160c1,partk5160c1,partk5160c1,partk5160c1,partk5160c1,partk5160c1,partk5160c1,partk5160c1,partk5160c1 +pipe_onshore_steel_smys_51_to_60_class_2_miles,,,,,,,,,,,,,,,,,,,,,partk5160c2,partk5160c2,partk5160c2,partk5160c2,partk5160c2,partk5160c2,partk5160c2,partk5160c2,partk5160c2,partk5160c2,partk5160c2,partk5160c2,partk5160c2 +pipe_onshore_steel_smys_51_to_60_class_3_miles,,,,,,,,,,,,,,,,,,,,,partk5160c3,partk5160c3,partk5160c3,partk5160c3,partk5160c3,partk5160c3,partk5160c3,partk5160c3,partk5160c3,partk5160c3,partk5160c3,partk5160c3,partk5160c3 +pipe_onshore_steel_smys_51_to_60_class_4_miles,,,,,,,,,,,,,,,,,,,,,partk5160c4,partk5160c4,partk5160c4,partk5160c4,partk5160c4,partk5160c4,partk5160c4,partk5160c4,partk5160c4,partk5160c4,partk5160c4,partk5160c4,partk5160c4 +pipe_onshore_steel_smys_51_to_60_class_total_miles,,,,,,,,,,,,,,,,,,,,,partk5160tot,partk5160tot,partk5160tot,partk5160tot,partk5160tot,partk5160tot,partk5160tot,partk5160tot,partk5160tot,partk5160tot,partk5160tot,partk5160tot,partk5160tot +pipe_onshore_steel_smys_61_to_72_class_1_miles,,,,,,,,,,,,,,,,,,,,,partk6172c1,partk6172c1,partk6172c1,partk6172c1,partk6172c1,partk6172c1,partk6172c1,partk6172c1,partk6172c1,partk6172c1,partk6172c1,partk6172c1,partk6172c1 +pipe_onshore_steel_smys_61_to_72_class_2_miles,,,,,,,,,,,,,,,,,,,,,partk6172c2,partk6172c2,partk6172c2,partk6172c2,partk6172c2,partk6172c2,partk6172c2,partk6172c2,partk6172c2,partk6172c2,partk6172c2,partk6172c2,partk6172c2 +pipe_onshore_steel_smys_61_to_72_class_3_miles,,,,,,,,,,,,,,,,,,,,,partk6172c3,partk6172c3,partk6172c3,partk6172c3,partk6172c3,partk6172c3,partk6172c3,partk6172c3,partk6172c3,partk6172c3,partk6172c3,partk6172c3,partk6172c3 +pipe_onshore_steel_smys_61_to_72_class_4_miles,,,,,,,,,,,,,,,,,,,,,partk6172c4,partk6172c4,partk6172c4,partk6172c4,partk6172c4,partk6172c4,partk6172c4,partk6172c4,partk6172c4,partk6172c4,partk6172c4,partk6172c4,partk6172c4 +pipe_onshore_steel_smys_61_to_72_class_total_miles,,,,,,,,,,,,,,,,,,,,,partk6172tot,partk6172tot,partk6172tot,partk6172tot,partk6172tot,partk6172tot,partk6172tot,partk6172tot,partk6172tot,partk6172tot,partk6172tot,partk6172tot,partk6172tot +pipe_onshore_steel_smys_73_to_80_class_1_miles,,,,,,,,,,,,,,,,,,,,,partk7380c1,partk7380c1,partk7380c1,partk7380c1,partk7380c1,partk7380c1,partk7380c1,partk7380c1,partk7380c1,partk7380c1,partk7380c1,partk7380c1,partk7380c1 +pipe_onshore_steel_smys_73_to_80_class_2_miles,,,,,,,,,,,,,,,,,,,,,partk7380c2,partk7380c2,partk7380c2,partk7380c2,partk7380c2,partk7380c2,partk7380c2,partk7380c2,partk7380c2,partk7380c2,partk7380c2,partk7380c2,partk7380c2 +pipe_onshore_steel_smys_73_to_80_class_3_miles,,,,,,,,,,,,,,,,,,,,,partk7380c3,partk7380c3,partk7380c3,partk7380c3,partk7380c3,partk7380c3,partk7380c3,partk7380c3,partk7380c3,partk7380c3,partk7380c3,partk7380c3,partk7380c3 +pipe_onshore_steel_smys_73_to_80_class_4_miles,,,,,,,,,,,,,,,,,,,,,partk7380c4,partk7380c4,partk7380c4,partk7380c4,partk7380c4,partk7380c4,partk7380c4,partk7380c4,partk7380c4,partk7380c4,partk7380c4,partk7380c4,partk7380c4 +pipe_onshore_steel_smys_73_to_80_class_total_miles,,,,,,,,,,,,,,,,,,,,,partk7380tot,partk7380tot,partk7380tot,partk7380tot,partk7380tot,partk7380tot,partk7380tot,partk7380tot,partk7380tot,partk7380tot,partk7380tot,partk7380tot,partk7380tot +pipe_onshore_steel_smys_81_to_100_class_1_miles,,,,,,,,,,,,,,,,,,,,,partk80morec1,partk80morec1,partk80morec1,partk80morec1,partk80morec1,partk80morec1,partk80morec1,partk80morec1,partk80morec1,partk80morec1,partk80morec1,partk80morec1,partk80morec1 +pipe_onshore_steel_smys_81_to_100_class_2_miles,,,,,,,,,,,,,,,,,,,,,partk80morec2,partk80morec2,partk80morec2,partk80morec2,partk80morec2,partk80morec2,partk80morec2,partk80morec2,partk80morec2,partk80morec2,partk80morec2,partk80morec2,partk80morec2 +pipe_onshore_steel_smys_81_to_100_class_3_miles,,,,,,,,,,,,,,,,,,,,,partk80morec3,partk80morec3,partk80morec3,partk80morec3,partk80morec3,partk80morec3,partk80morec3,partk80morec3,partk80morec3,partk80morec3,partk80morec3,partk80morec3,partk80morec3 +pipe_onshore_steel_smys_81_to_100_class_4_miles,,,,,,,,,,,,,,,,,,,,,partk80morec4,partk80morec4,partk80morec4,partk80morec4,partk80morec4,partk80morec4,partk80morec4,partk80morec4,partk80morec4,partk80morec4,partk80morec4,partk80morec4,partk80morec4 +pipe_onshore_steel_smys_81_to_100_class_total_miles,,,,,,,,,,,,,,,,,,,,,partk80moretot,partk80moretot,partk80moretot,partk80moretot,partk80moretot,partk80moretot,partk80moretot,partk80moretot,partk80moretot,partk80moretot,partk80moretot,partk80moretot,partk80moretot +pipe_total_total_smys_total_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkc1tot,partkc1tot,partkc1tot,partkc1tot,partkc1tot,partkc1tot,partkc1tot,partkc1tot,partkc1tot,partkc1tot,partkc1tot,partkc1tot,partkc1tot +pipe_total_total_smys_total_class_2_miles,,,,,,,,,,,,,,,,,,,,,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot +pipe_total_total_smys_total_class_3_miles,,,,,,,,,,,,,,,,,,,,,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot +pipe_total_total_smys_total_class_4_miles,,,,,,,,,,,,,,,,,,,,,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot +pipe_onshore_non_steel_smys_total_class_1_miles,,,,,,,,,,,,,,,,,,,,,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1 +pipe_onshore_non_steel_smys_total_class_2_miles,,,,,,,,,,,,,,,,,,,,,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2 +pipe_onshore_non_steel_smys_total_class_3_miles,,,,,,,,,,,,,,,,,,,,,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3 +pipe_onshore_non_steel_smys_total_class_4_miles,,,,,,,,,,,,,,,,,,,,,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4 +pipe_onshore_non_steel_smys_total_class_total_miles,,,,,,,,,,,,,,,,,,,,,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot +pipe_offshore_steel_smys_51_to_72_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172 +pipe_offshore_steel_smys_72_to_100_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more +pipe_offshore_steel_smys_0_to_49_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50 +pipe_offshore_non_steel_smys_total_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel +pipe_offshore_total_smys_total_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal +pipe_offshore_steel_smys_unknown_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown +pipe_onshore_total_smys_total_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot +pipe_onshore_total_smys_total_class_2_miles,,,,,,,,,,,,,,,,,,,,,partkonc2tot,partkonc2tot,partkonc2tot,partkonc2tot,partkonc2tot,partkonc2tot,partkonc2tot,partkonc2tot,partkonc2tot,partkonc2tot,partkonc2tot,partkonc2tot,partkonc2tot +pipe_onshore_total_smys_total_class_3_miles,,,,,,,,,,,,,,,,,,,,,partkonc3tot,partkonc3tot,partkonc3tot,partkonc3tot,partkonc3tot,partkonc3tot,partkonc3tot,partkonc3tot,partkonc3tot,partkonc3tot,partkonc3tot,partkonc3tot,partkonc3tot +pipe_onshore_total_smys_total_class_4_miles,,,,,,,,,,,,,,,,,,,,,partkonc4tot,partkonc4tot,partkonc4tot,partkonc4tot,partkonc4tot,partkonc4tot,partkonc4tot,partkonc4tot,partkonc4tot,partkonc4tot,partkonc4tot,partkonc4tot,partkonc4tot +pipe_onshore_total_smys_total_class_total_miles,,,,,,,,,,,,,,,,,,,,,partkontotal,partkontotal,partkontotal,partkontotal,partkontotal,partkontotal,partkontotal,partkontotal,partkontotal,partkontotal,partkontotal,partkontotal,partkontotal +pipe_total_total_smys_total_class_total_miles,,,,,,,,,,,,,,,,,,,,,partktotal,partktotal,partktotal,partktotal,partktotal,partktotal,partktotal,partktotal,partktotal,partktotal,partktotal,partktotal,partktotal +pipe_onshore_steel_smys_unknown_class1_miles,,,,,,,,,,,,,,,,,,,,,partkunknownc1,partkunknownc1,partkunknownc1,partkunknownc1,partkunknownc1,partkunknownc1,partkunknownc1,partkunknownc1,partkunknownc1,partkunknownc1,partkunknownc1,partkunknownc1,partkunknownc1 +pipe_onshore_steel_smys_unknown_class2_miles,,,,,,,,,,,,,,,,,,,,,partkunknownc2,partkunknownc2,partkunknownc2,partkunknownc2,partkunknownc2,partkunknownc2,partkunknownc2,partkunknownc2,partkunknownc2,partkunknownc2,partkunknownc2,partkunknownc2,partkunknownc2 +pipe_onshore_steel_smys_unknown_class3_miles,,,,,,,,,,,,,,,,,,,,,partkunknownc3,partkunknownc3,partkunknownc3,partkunknownc3,partkunknownc3,partkunknownc3,partkunknownc3,partkunknownc3,partkunknownc3,partkunknownc3,partkunknownc3,partkunknownc3,partkunknownc3 +pipe_onshore_steel_smys_unknown_class4_miles,,,,,,,,,,,,,,,,,,,,,partkunknownc4,partkunknownc4,partkunknownc4,partkunknownc4,partkunknownc4,partkunknownc4,partkunknownc4,partkunknownc4,partkunknownc4,partkunknownc4,partkunknownc4,partkunknownc4,partkunknownc4 +pipe_onshore_steel_smys_unknown_class_total_miles,,,,,,,,,,,,,,,,,,,,,partkunknowntot,partkunknowntot,partkunknowntot,partkunknowntot,partkunknowntot,partkunknowntot,partkunknowntot,partkunknowntot,partkunknowntot,partkunknowntot,partkunknowntot,partkunknowntot,partkunknowntot From 8de2ee337de3abb38cca62e4cfe347270d69b067 Mon Sep 17 00:00:00 2001 From: Christina Gosnell Date: Fri, 19 Jan 2024 09:46:36 -0500 Subject: [PATCH 3/5] add park k into the raw assets --- src/pudl/extract/phmsagas.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pudl/extract/phmsagas.py b/src/pudl/extract/phmsagas.py index d6c6dd4f88..069c4297e1 100644 --- a/src/pudl/extract/phmsagas.py +++ b/src/pudl/extract/phmsagas.py @@ -56,6 +56,7 @@ def process_final_page(self, df, page): "raw_phmsagas__yearly_miles_of_gathering_pipe_by_nps", "raw_phmsagas__yearly_miles_of_transmission_pipe_by_nps", "raw_phmsagas__yearly_inspections_and_assessments", + "raw_phmsagas__yearly_miles_of_transmission_pipe_by_smys", ) phmsagas_raw_dfs = excel.raw_df_factory(Extractor, name="phmsagas") From ab3217c3909f4a50c144c0b7e6fbee340ff6771a Mon Sep 17 00:00:00 2001 From: Christina Gosnell Date: Fri, 19 Jan 2024 12:25:16 -0500 Subject: [PATCH 4/5] make extractor logs more informative & change state to report_state to be more consistent --- devtools/debug-column-mapping.ipynb | 101 +++++++++++++++--- src/pudl/extract/excel.py | 8 +- src/pudl/extract/phmsagas.py | 30 +++--- ...rly_miles_of_transmission_pipe_by_smys.csv | 2 +- 4 files changed, 108 insertions(+), 33 deletions(-) diff --git a/devtools/debug-column-mapping.ipynb b/devtools/debug-column-mapping.ipynb index 75e091f8d7..3d9155d7fa 100644 --- a/devtools/debug-column-mapping.ipynb +++ b/devtools/debug-column-mapping.ipynb @@ -18,28 +18,37 @@ "First, select the raw dataset you're going to be mapping and locate all relevant file directories." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "import pudl\n", - "from pudl.workspace.datastore import ZenodoDoiSettings\n", "import os\n", "import importlib\n", "from pathlib import Path\n", "import pandas as pd\n", "from zipfile import ZipFile\n", - "import logging\n", "import sys\n", + "import types\n", + "\n", + "import pudl\n", + "from pudl.workspace.datastore import ZenodoDoiSettings\n", + "from pudl.extract.phmsagas import Extractor\n", "\n", - "logger = logging.getLogger()\n", - "logger.setLevel(logging.INFO)\n", - "handler = logging.StreamHandler(stream=sys.stdout)\n", - "formatter = logging.Formatter('%(message)s')\n", - "handler.setFormatter(formatter)\n", - "logger.handlers = [handler]" + "logger = pudl.logging_helpers.get_logger(\"__name__\")" ] }, { @@ -50,8 +59,19 @@ "source": [ "dataset = \"phmsagas\"\n", "doi_path = getattr(ZenodoDoiSettings(), dataset).replace(\"/\", \"-\")\n", - "data_path = os.path.join(os.getenv(\"PUDL_INPUT\"),dataset,doi_path) # Get path to raw data\n", - "map_path = os.path.join(Path(pudl.package_data.__file__).parents[0], dataset) # Get path to mapping CSVs" + "pudl_paths = pudl.workspace.setup.PudlPaths()\n", + "data_path = os.path.join(pudl_paths.pudl_input,dataset,doi_path) # Get path to raw data\n", + "map_path = os.path.join(Path(pudl.package_data.__file__).parents[0], dataset) # Get path to mapping CSVs\n", + "ds = pudl.workspace.datastore.Datastore(pudl_paths.pudl_input)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "## File Check" ] }, { @@ -107,6 +127,13 @@ " )" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Column Map Check" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -139,7 +166,7 @@ " continue\n", " return match[0]\n", "\n", - "ds = pudl.workspace.datastore.Datastore()\n", + "\n", "for page in file_map.index:\n", " if not table_subset or page in table_subset:\n", " column_maps = pd.read_csv(\n", @@ -178,11 +205,55 @@ "source": [ "Go back and fix any incorrectly labelled columns. Then run the cell above again, until all columns are correctly labelled." ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extractor Check" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## SETTINGS FOR EXTRACTOR\n", + "extractor_phmsagas = Extractor(ds=ds)\n", + "\n", + "# recommend changing the loglevel here to warning to only get the baddies\n", + "pudl.logging_helpers.configure_root_logger(loglevel=\"WARNING\")\n", + "\n", + "# IF you wanna restrict the years\n", + "working_years = list(range(1990,2023))\n", + "# IF you want to restrict the pages to extract here is a lil way to do that\n", + "# you give pages_you_want_to_extract a lil of pages you want to extract\n", + "# if pages_you_want_to_extract if nothing, you'll get the standard pages\n", + "pages_you_want_to_extract = []\n", + "all_pages = extractor_phmsagas._metadata.get_all_pages()\n", + "def _new_page_getter(self):\n", + " if pages_you_want_to_extract:\n", + " return pages_you_want_to_extract\n", + " else:\n", + " return all_pages\n", + "extractor_phmsagas._metadata.get_all_pages = types.MethodType(_new_page_getter, extractor_phmsagas)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## RUN THE EXTRACTOR\n", + "extracted_dfs = extractor_phmsagas.extract(year=working_years)" + ] } ], "metadata": { "kernelspec": { - "display_name": "pudl-dev", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -196,9 +267,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.6" + "version": "3.11.7" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/src/pudl/extract/excel.py b/src/pudl/extract/excel.py index cb59769533..dfa8c7ca4a 100644 --- a/src/pudl/extract/excel.py +++ b/src/pudl/extract/excel.py @@ -320,13 +320,13 @@ def extract(self, **partitions): missing_raw_cols = set(expected_cols).difference(newdata.columns) if extra_raw_cols: logger.warning( - f"Extra columns found in extracted table of " - f"{page}/{str_part}: {extra_raw_cols}" + f"{page}/{str_part}:Extra columns found in extracted table:" + f"\n{extra_raw_cols}" ) if missing_raw_cols: logger.warning( - "Expected columns not found in extracted table of " - f"{page}/{str_part}: {missing_raw_cols}" + f"{page}/{str_part}: Expected columns not found in extracted table:" + f"\n{missing_raw_cols}" ) df = pd.concat(dfs, sort=True, ignore_index=True) diff --git a/src/pudl/extract/phmsagas.py b/src/pudl/extract/phmsagas.py index 069c4297e1..374cb84e64 100644 --- a/src/pudl/extract/phmsagas.py +++ b/src/pudl/extract/phmsagas.py @@ -4,6 +4,7 @@ """ +import pandas as pd from dagster import AssetOut, Output, multi_asset import pudl.logging_helpers @@ -25,7 +26,7 @@ def __init__(self, *args, **kwargs): self.cols_added = [] super().__init__(*args, **kwargs) - def process_final_page(self, df, page): + def process_renamed(self, newdata: pd.DataFrame, page: str, **partition): """Drop columns that get mapped to other assets. Older years of PHMSA data have one Excel tab in the raw data, while newer data @@ -35,18 +36,21 @@ def process_final_page(self, df, page): older years, filter by the list of columns specified for the page, with a warning. """ - to_drop = [ - c - for c in df.columns - if c not in self._metadata.get_all_columns(page) - and c not in self.cols_added - ] - if to_drop: - logger.warning( - f"Dropping columns {to_drop} that are not mapped to this asset." - ) - df = df.drop(columns=to_drop, errors="ignore") - return df + if int(partition["year"]) < 2010: + to_drop = [ + c + for c in newdata.columns + if c not in self._metadata.get_all_columns(page) + and c not in self.cols_added + ] + str_part = str(list(partition.values())[0]) + if to_drop: + logger.info( + f"{page}/{str_part}: Dropping columns that are not mapped to this asset:" + f"\n{to_drop}" + ) + newdata = newdata.drop(columns=to_drop, errors="ignore") + return newdata # TODO (bendnorman): Add this information to the metadata diff --git a/src/pudl/package_data/phmsagas/column_maps/yearly_miles_of_transmission_pipe_by_smys.csv b/src/pudl/package_data/phmsagas/column_maps/yearly_miles_of_transmission_pipe_by_smys.csv index 9c98d14abc..1d4ceef4b6 100644 --- a/src/pudl/package_data/phmsagas/column_maps/yearly_miles_of_transmission_pipe_by_smys.csv +++ b/src/pudl/package_data/phmsagas/column_maps/yearly_miles_of_transmission_pipe_by_smys.csv @@ -4,7 +4,7 @@ interstate_or_intrastate,,,,,,,,,,,,,,,,,,,,,inter_intra,inter_intra,inter_intra operator_id_phmsa,,,,,,,,,,,,,,,,,,,,,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id report_number,,,,,,,,,,,,,,,,,,,,,report_number,report_number,report_number,report_number,report_number,report_number,report_number,report_number,report_number,report_number,report_number,report_number,report_number report_year,,,,,,,,,,,,,,,,,,,,,report_year,report_year,report_year,report_year,report_year,report_year,report_year,report_year,report_year,report_year,report_year,report_year,report_year -state,,,,,,,,,,,,,,,,,,,,,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name +report_state,,,,,,,,,,,,,,,,,,,,,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name,state_name supplemental_report_number,,,,,,,,,,,,,,,,,,,,,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number,supplemental_number operator_name_phmsa,,,,,,,,,,,,,,,,,,,,,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp,parta2nameofcomp commodity_group,,,,,,,,,,,,,,,,,,,,,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity,parta5commodity From 7ba0d0db543b7e3cf5e683c66fdd5a9aa28626e9 Mon Sep 17 00:00:00 2001 From: Christina Gosnell Date: Fri, 19 Jan 2024 13:19:26 -0500 Subject: [PATCH 5/5] make drop cols tied only to transmission form --- src/pudl/extract/excel.py | 4 ++++ src/pudl/extract/phmsagas.py | 4 +++- .../yearly_miles_of_transmission_pipe_by_smys.csv | 12 ++++++------ 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/pudl/extract/excel.py b/src/pudl/extract/excel.py index dfa8c7ca4a..a0674e3a70 100644 --- a/src/pudl/extract/excel.py +++ b/src/pudl/extract/excel.py @@ -125,6 +125,10 @@ def get_all_pages(self): """Returns list of all known pages.""" return sorted(self._column_map.keys()) + def get_form(self, page) -> str: + """Returns the form name for a given page.""" + return self._page_part_map.loc[page, "form"] + @staticmethod def _load_csv(package, filename): """Load metadata from a filename that is found in a package.""" diff --git a/src/pudl/extract/phmsagas.py b/src/pudl/extract/phmsagas.py index 374cb84e64..364bae7afe 100644 --- a/src/pudl/extract/phmsagas.py +++ b/src/pudl/extract/phmsagas.py @@ -36,7 +36,9 @@ def process_renamed(self, newdata: pd.DataFrame, page: str, **partition): older years, filter by the list of columns specified for the page, with a warning. """ - if int(partition["year"]) < 2010: + if (int(partition["year"]) < 2010) and ( + self._metadata.get_form(page) == "gas_transmission_gathering" + ): to_drop = [ c for c in newdata.columns diff --git a/src/pudl/package_data/phmsagas/column_maps/yearly_miles_of_transmission_pipe_by_smys.csv b/src/pudl/package_data/phmsagas/column_maps/yearly_miles_of_transmission_pipe_by_smys.csv index 1d4ceef4b6..31f0157d8f 100644 --- a/src/pudl/package_data/phmsagas/column_maps/yearly_miles_of_transmission_pipe_by_smys.csv +++ b/src/pudl/package_data/phmsagas/column_maps/yearly_miles_of_transmission_pipe_by_smys.csv @@ -52,15 +52,15 @@ pipe_total_total_smys_total_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkc1tot,partkc1 pipe_total_total_smys_total_class_2_miles,,,,,,,,,,,,,,,,,,,,,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot,partkc2tot pipe_total_total_smys_total_class_3_miles,,,,,,,,,,,,,,,,,,,,,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot,partkc3tot pipe_total_total_smys_total_class_4_miles,,,,,,,,,,,,,,,,,,,,,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot,partkc4tot -pipe_onshore_non_steel_smys_total_class_1_miles,,,,,,,,,,,,,,,,,,,,,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1 -pipe_onshore_non_steel_smys_total_class_2_miles,,,,,,,,,,,,,,,,,,,,,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2 -pipe_onshore_non_steel_smys_total_class_3_miles,,,,,,,,,,,,,,,,,,,,,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3 -pipe_onshore_non_steel_smys_total_class_4_miles,,,,,,,,,,,,,,,,,,,,,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4 -pipe_onshore_non_steel_smys_total_class_total_miles,,,,,,,,,,,,,,,,,,,,,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot +pipe_onshore_nonsteel_smys_total_class_1_miles,,,,,,,,,,,,,,,,,,,,,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1,partknonsteelc1 +pipe_onshore_nonsteel_smys_total_class_2_miles,,,,,,,,,,,,,,,,,,,,,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2,partknonsteelc2 +pipe_onshore_nonsteel_smys_total_class_3_miles,,,,,,,,,,,,,,,,,,,,,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3,partknonsteelc3 +pipe_onshore_nonsteel_smys_total_class_4_miles,,,,,,,,,,,,,,,,,,,,,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4,partknonsteelc4 +pipe_onshore_nonsteel_smys_total_class_total_miles,,,,,,,,,,,,,,,,,,,,,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot,partknonsteeltot pipe_offshore_steel_smys_51_to_72_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172,partkoff5172 pipe_offshore_steel_smys_72_to_100_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more,partkoff72more pipe_offshore_steel_smys_0_to_49_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50,partkoffless50 -pipe_offshore_non_steel_smys_total_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel +pipe_offshore_nonsteel_smys_total_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel,partkoffnonsteel pipe_offshore_total_smys_total_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal,partkofftotal pipe_offshore_steel_smys_unknown_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown,partkoffunknown pipe_onshore_total_smys_total_class_1_miles,,,,,,,,,,,,,,,,,,,,,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot,partkonc1tot