From 799282a6390c60ec6cb9374d9826a2b4fc72418d Mon Sep 17 00:00:00 2001 From: bendnorman Date: Tue, 3 Oct 2023 16:09:52 +0200 Subject: [PATCH 1/2] Rename a few ferc1 core assets to be plural entities and rename exploded assets --- docs/release_notes.rst | 5 +- ...c_rename_core_ferc1_assets_to_be_plural.py | 355 ++++++++++++++++++ src/pudl/analysis/classify_plants_ferc1.py | 2 +- src/pudl/extract/ferc1.py | 10 +- src/pudl/glue/ferc1_eia.py | 8 +- src/pudl/metadata/fields.py | 4 +- src/pudl/metadata/resources/ferc1.py | 10 +- src/pudl/output/ferc1.py | 43 ++- .../xbrl_calculation_component_fixes.csv | 68 ++-- src/pudl/transform/ferc1.py | 50 +-- src/pudl/transform/params/__init__.py | 2 +- src/pudl/transform/params/ferc1.py | 12 +- src/pudl/validate.py | 10 +- test/integration/etl_test.py | 2 +- test/validate/ferc1_test.py | 2 +- test/validate/fuel_ferc1_test.py | 2 +- test/validate/plants_steam_ferc1_test.py | 12 +- 17 files changed, 478 insertions(+), 119 deletions(-) create mode 100644 migrations/versions/ab0540a6484c_rename_core_ferc1_assets_to_be_plural.py diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 7de89275a5..df63badce8 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -97,7 +97,8 @@ Data Coverage :pr:`2134`. * :ref:`core_ferc1__yearly_depreciation_amortization_summary`, see issue :issue:`1816` & PR :pr:`2143`. - * :ref:`core_ferc1__yearly_income_statement`, see issue :issue:`1813` & PR :pr:`2147`. + * :ref:`core_ferc1__yearly_income_statements`, see issue :issue:`1813` & PR + :pr:`2147`. * :ref:`core_ferc1__yearly_electric_plant_depreciation_changes` see issue :issue:`1808` & :pr:`2119`. * :ref:`core_ferc1__yearly_electric_plant_depreciation_functional` see issue @@ -223,7 +224,7 @@ Data Cleaning * Fixed column naming issues in the :ref:`core_ferc1__yearly_electric_operating_revenues` table. * Made minor calculation fixes in the metadata for - :ref:`core_ferc1__yearly_income_statement`, + :ref:`core_ferc1__yearly_income_statements`, :ref:`core_ferc1__yearly_utility_plant_summary`, :ref:`core_ferc1__yearly_electric_operating_revenues`, :ref:`core_ferc1__yearly_balance_sheet_assets`, diff --git a/migrations/versions/ab0540a6484c_rename_core_ferc1_assets_to_be_plural.py b/migrations/versions/ab0540a6484c_rename_core_ferc1_assets_to_be_plural.py new file mode 100644 index 0000000000..60835c8194 --- /dev/null +++ b/migrations/versions/ab0540a6484c_rename_core_ferc1_assets_to_be_plural.py @@ -0,0 +1,355 @@ +"""Rename core ferc1 assets to be plural + +Revision ID: ab0540a6484c +Revises: 6c66da98c4e1 +Create Date: 2023-10-03 14:09:07.104546 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = 'ab0540a6484c' +down_revision = '6c66da98c4e1' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('core_ferc1__yearly_income_statements', + sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), + sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), + sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), + sa.Column('utility_type', sa.Text(), nullable=False, comment='Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.'), + sa.Column('income_type', sa.Text(), nullable=False, comment='Type of income reported in income_statement_ferc1 table.'), + sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'), + sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), + sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), + sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['core_pudl__assn_utilities_ferc1.utility_id_ferc1'], name=op.f('fk_core_ferc1__yearly_income_statements_utility_id_ferc1_core_pudl__assn_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'income_type', name=op.f('pk_core_ferc1__yearly_income_statements')) + ) + op.create_table('core_ferc1__yearly_hydro_plants', + sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), + sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), + sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), + sa.Column('plant_name_ferc1', sa.Text(), nullable=True, comment='Name of the plant, as reported to FERC. This is a freeform string, not guaranteed to be consistent across references to the same plant.'), + sa.Column('project_num', sa.Integer(), nullable=True, comment='FERC Licensed Project Number.'), + sa.Column('plant_type', sa.Enum('run_of_river', 'hydro', 'run_of_river_with_storage', 'storage', 'na_category'), nullable=True), + sa.Column('construction_type', sa.Enum('conventional', 'outdoor', 'semioutdoor'), nullable=True, comment="Type of plant construction ('outdoor', 'semioutdoor', or 'conventional'). Categorized by PUDL based on our best guess of intended value in FERC1 freeform strings."), + sa.Column('construction_year', sa.Integer(), nullable=True, comment="Year the plant's oldest still operational unit was built."), + sa.Column('installation_year', sa.Integer(), nullable=True, comment="Year the plant's most recently built unit was installed."), + sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), + sa.Column('peak_demand_mw', sa.Float(), nullable=True, comment='Net peak demand for 60 minutes. Note: in some cases peak demand for other time periods may have been reported instead, if hourly peak demand was unavailable.'), + sa.Column('plant_hours_connected_while_generating', sa.Float(), nullable=True, comment='Hours the plant was connected to load while generating in the report year.'), + sa.Column('net_capacity_favorable_conditions_mw', sa.Float(), nullable=True, comment='Net plant capability under the most favorable operating conditions, in megawatts.'), + sa.Column('net_capacity_adverse_conditions_mw', sa.Float(), nullable=True, comment='Net plant capability under the least favorable operating conditions, in megawatts.'), + sa.Column('avg_num_employees', sa.Float(), nullable=True), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('capex_land', sa.Float(), nullable=True, comment='Cost of plant: land and land rights (USD).'), + sa.Column('capex_structures', sa.Float(), nullable=True, comment='Cost of plant: structures and improvements (USD).'), + sa.Column('capex_facilities', sa.Float(), nullable=True, comment='Cost of plant: reservoirs, dams, and waterways (USD).'), + sa.Column('capex_equipment', sa.Float(), nullable=True, comment='Cost of plant: equipment (USD).'), + sa.Column('capex_roads', sa.Float(), nullable=True, comment='Cost of plant: roads, railroads, and bridges (USD).'), + sa.Column('asset_retirement_cost', sa.Float(), nullable=True, comment='Asset retirement cost (USD).'), + sa.Column('capex_total', sa.Float(), nullable=True, comment='Total cost of plant (USD).'), + sa.Column('capex_per_mw', sa.Float(), nullable=True, comment='Cost of plant per megawatt of installed (nameplate) capacity. Nominal USD.'), + sa.Column('opex_operations', sa.Float(), nullable=True, comment='Production expenses: operations, supervision, and engineering (USD).'), + sa.Column('opex_water_for_power', sa.Float(), nullable=True, comment='Production expenses: water for power (USD).'), + sa.Column('opex_hydraulic', sa.Float(), nullable=True, comment='Production expenses: hydraulic expenses (USD).'), + sa.Column('opex_electric', sa.Float(), nullable=True, comment='Production expenses: electric expenses (USD).'), + sa.Column('opex_generation_misc', sa.Float(), nullable=True, comment='Production expenses: miscellaneous power generation expenses (USD).'), + sa.Column('opex_rents', sa.Float(), nullable=True, comment='Production expenses: rents (USD).'), + sa.Column('opex_engineering', sa.Float(), nullable=True, comment='Production expenses: maintenance, supervision, and engineering (USD).'), + sa.Column('opex_structures', sa.Float(), nullable=True, comment='Production expenses: maintenance of structures (USD).'), + sa.Column('opex_dams', sa.Float(), nullable=True, comment='Production expenses: maintenance of reservoirs, dams, and waterways (USD).'), + sa.Column('opex_plant', sa.Float(), nullable=True, comment='Production expenses: maintenance of electric plant (USD).'), + sa.Column('opex_misc_plant', sa.Float(), nullable=True, comment='Production expenses: maintenance of miscellaneous hydraulic plant (USD).'), + sa.Column('opex_total', sa.Float(), nullable=True, comment='Total production expenses, excluding fuel (USD).'), + sa.Column('opex_per_mwh', sa.Float(), nullable=True, comment='Total production expenses (USD per MWh generated).'), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['core_pudl__assn_plants_ferc1.utility_id_ferc1', 'core_pudl__assn_plants_ferc1.plant_name_ferc1'], name=op.f('fk_core_ferc1__yearly_hydro_plants_utility_id_ferc1_core_pudl__assn_plants_ferc1')) + ) + op.create_table('core_ferc1__yearly_pumped_storage_plants', + sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), + sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), + sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), + sa.Column('plant_name_ferc1', sa.Text(), nullable=True, comment='Name of the plant, as reported to FERC. This is a freeform string, not guaranteed to be consistent across references to the same plant.'), + sa.Column('project_num', sa.Integer(), nullable=True, comment='FERC Licensed Project Number.'), + sa.Column('construction_type', sa.Enum('conventional', 'outdoor', 'semioutdoor'), nullable=True, comment="Type of plant construction ('outdoor', 'semioutdoor', or 'conventional'). Categorized by PUDL based on our best guess of intended value in FERC1 freeform strings."), + sa.Column('construction_year', sa.Integer(), nullable=True, comment="Year the plant's oldest still operational unit was built."), + sa.Column('installation_year', sa.Integer(), nullable=True, comment="Year the plant's most recently built unit was installed."), + sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), + sa.Column('peak_demand_mw', sa.Float(), nullable=True, comment='Net peak demand for 60 minutes. Note: in some cases peak demand for other time periods may have been reported instead, if hourly peak demand was unavailable.'), + sa.Column('plant_hours_connected_while_generating', sa.Float(), nullable=True, comment='Hours the plant was connected to load while generating in the report year.'), + sa.Column('plant_capability_mw', sa.Float(), nullable=True, comment='Net plant capability in megawatts.'), + sa.Column('avg_num_employees', sa.Float(), nullable=True), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('energy_used_for_pumping_mwh', sa.Float(), nullable=True, comment='Energy used for pumping, in megawatt-hours.'), + sa.Column('net_load_mwh', sa.Float(), nullable=True, comment='Net output for load (net generation - energy used for pumping) in megawatt-hours.'), + sa.Column('capex_land', sa.Float(), nullable=True, comment='Cost of plant: land and land rights (USD).'), + sa.Column('capex_structures', sa.Float(), nullable=True, comment='Cost of plant: structures and improvements (USD).'), + sa.Column('capex_facilities', sa.Float(), nullable=True, comment='Cost of plant: reservoirs, dams, and waterways (USD).'), + sa.Column('capex_wheels_turbines_generators', sa.Float(), nullable=True, comment='Cost of plant: water wheels, turbines, and generators (USD).'), + sa.Column('capex_equipment_electric', sa.Float(), nullable=True, comment='Cost of plant: accessory electric equipment (USD).'), + sa.Column('capex_equipment_misc', sa.Float(), nullable=True, comment='Cost of plant: miscellaneous power plant equipment (USD).'), + sa.Column('capex_roads', sa.Float(), nullable=True, comment='Cost of plant: roads, railroads, and bridges (USD).'), + sa.Column('asset_retirement_cost', sa.Float(), nullable=True, comment='Asset retirement cost (USD).'), + sa.Column('capex_total', sa.Float(), nullable=True, comment='Total cost of plant (USD).'), + sa.Column('capex_per_mw', sa.Float(), nullable=True, comment='Cost of plant per megawatt of installed (nameplate) capacity. Nominal USD.'), + sa.Column('opex_operations', sa.Float(), nullable=True, comment='Production expenses: operations, supervision, and engineering (USD).'), + sa.Column('opex_water_for_power', sa.Float(), nullable=True, comment='Production expenses: water for power (USD).'), + sa.Column('opex_pumped_storage', sa.Float(), nullable=True, comment='Production expenses: pumped storage (USD).'), + sa.Column('opex_electric', sa.Float(), nullable=True, comment='Production expenses: electric expenses (USD).'), + sa.Column('opex_generation_misc', sa.Float(), nullable=True, comment='Production expenses: miscellaneous power generation expenses (USD).'), + sa.Column('opex_rents', sa.Float(), nullable=True, comment='Production expenses: rents (USD).'), + sa.Column('opex_engineering', sa.Float(), nullable=True, comment='Production expenses: maintenance, supervision, and engineering (USD).'), + sa.Column('opex_structures', sa.Float(), nullable=True, comment='Production expenses: maintenance of structures (USD).'), + sa.Column('opex_dams', sa.Float(), nullable=True, comment='Production expenses: maintenance of reservoirs, dams, and waterways (USD).'), + sa.Column('opex_plant', sa.Float(), nullable=True, comment='Production expenses: maintenance of electric plant (USD).'), + sa.Column('opex_misc_plant', sa.Float(), nullable=True, comment='Production expenses: maintenance of miscellaneous hydraulic plant (USD).'), + sa.Column('opex_production_before_pumping', sa.Float(), nullable=True, comment='Total production expenses before pumping (USD).'), + sa.Column('opex_pumping', sa.Float(), nullable=True, comment='Production expenses: We are here to PUMP YOU UP! (USD).'), + sa.Column('opex_total', sa.Float(), nullable=True, comment='Total production expenses, excluding fuel (USD).'), + sa.Column('opex_per_mwh', sa.Float(), nullable=True, comment='Total production expenses (USD per MWh generated).'), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['core_pudl__assn_plants_ferc1.utility_id_ferc1', 'core_pudl__assn_plants_ferc1.plant_name_ferc1'], name=op.f('fk_core_ferc1__yearly_pumped_storage_plants_utility_id_ferc1_core_pudl__assn_plants_ferc1')) + ) + op.create_table('core_ferc1__yearly_small_plants', + sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), + sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), + sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), + sa.Column('plant_name_ferc1', sa.Text(), nullable=True, comment='Name of the plant, as reported to FERC. This is a freeform string, not guaranteed to be consistent across references to the same plant.'), + sa.Column('plant_type', sa.Text(), nullable=True), + sa.Column('license_id_ferc1', sa.Integer(), nullable=True, comment='FERC issued operating license ID for the facility, if available. This value is extracted from the original plant name where possible.'), + sa.Column('construction_year', sa.Integer(), nullable=True, comment="Year the plant's oldest still operational unit was built."), + sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), + sa.Column('peak_demand_mw', sa.Float(), nullable=True, comment='Net peak demand for 60 minutes. Note: in some cases peak demand for other time periods may have been reported instead, if hourly peak demand was unavailable.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('capex_total', sa.Float(), nullable=True, comment='Total cost of plant (USD).'), + sa.Column('capex_per_mw', sa.Float(), nullable=True, comment='Cost of plant per megawatt of installed (nameplate) capacity. Nominal USD.'), + sa.Column('opex_operations', sa.Float(), nullable=True, comment='Production expenses: operations, supervision, and engineering (USD).'), + sa.Column('opex_fuel', sa.Float(), nullable=True, comment='Production expenses: fuel (USD).'), + sa.Column('opex_maintenance', sa.Float(), nullable=True, comment='Production expenses: Maintenance (USD).'), + sa.Column('fuel_type', sa.Text(), nullable=True), + sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['core_pudl__assn_plants_ferc1.utility_id_ferc1', 'core_pudl__assn_plants_ferc1.plant_name_ferc1'], name=op.f('fk_core_ferc1__yearly_small_plants_utility_id_ferc1_core_pudl__assn_plants_ferc1')) + ) + op.create_table('core_ferc1__yearly_steam_plants', + sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), + sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), + sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), + sa.Column('plant_id_ferc1', sa.Integer(), nullable=True, comment='Algorithmically assigned PUDL FERC Plant ID. WARNING: NOT STABLE BETWEEN PUDL DB INITIALIZATIONS.'), + sa.Column('plant_name_ferc1', sa.Text(), nullable=True, comment='Name of the plant, as reported to FERC. This is a freeform string, not guaranteed to be consistent across references to the same plant.'), + sa.Column('plant_type', sa.Enum('steam', 'internal_combustion', 'nuclear', 'combined_cycle', 'solar_thermal', 'na_category', 'combustion_turbine', 'wind', 'photovoltaic', 'geothermal'), nullable=True), + sa.Column('construction_type', sa.Enum('conventional', 'outdoor', 'semioutdoor'), nullable=True, comment="Type of plant construction ('outdoor', 'semioutdoor', or 'conventional'). Categorized by PUDL based on our best guess of intended value in FERC1 freeform strings."), + sa.Column('construction_year', sa.Integer(), nullable=True, comment="Year the plant's oldest still operational unit was built."), + sa.Column('installation_year', sa.Integer(), nullable=True, comment="Year the plant's most recently built unit was installed."), + sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), + sa.Column('peak_demand_mw', sa.Float(), nullable=True, comment='Net peak demand for 60 minutes. Note: in some cases peak demand for other time periods may have been reported instead, if hourly peak demand was unavailable.'), + sa.Column('plant_hours_connected_while_generating', sa.Float(), nullable=True, comment='Hours the plant was connected to load while generating in the report year.'), + sa.Column('plant_capability_mw', sa.Float(), nullable=True, comment='Net plant capability in megawatts.'), + sa.Column('water_limited_capacity_mw', sa.Float(), nullable=True, comment='Plant capacity in MW when limited by condenser water.'), + sa.Column('not_water_limited_capacity_mw', sa.Float(), nullable=True, comment='Plant capacity in MW when not limited by condenser water.'), + sa.Column('avg_num_employees', sa.Float(), nullable=True), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('capex_land', sa.Float(), nullable=True, comment='Cost of plant: land and land rights (USD).'), + sa.Column('capex_structures', sa.Float(), nullable=True, comment='Cost of plant: structures and improvements (USD).'), + sa.Column('capex_equipment', sa.Float(), nullable=True, comment='Cost of plant: equipment (USD).'), + sa.Column('capex_total', sa.Float(), nullable=True, comment='Total cost of plant (USD).'), + sa.Column('capex_per_mw', sa.Float(), nullable=True, comment='Cost of plant per megawatt of installed (nameplate) capacity. Nominal USD.'), + sa.Column('opex_operations', sa.Float(), nullable=True, comment='Production expenses: operations, supervision, and engineering (USD).'), + sa.Column('opex_fuel', sa.Float(), nullable=True, comment='Production expenses: fuel (USD).'), + sa.Column('opex_coolants', sa.Float(), nullable=True, comment='Cost of coolants and water (nuclear plants only)'), + sa.Column('opex_steam', sa.Float(), nullable=True, comment='Steam expenses.'), + sa.Column('opex_steam_other', sa.Float(), nullable=True, comment='Steam from other sources.'), + sa.Column('opex_transfer', sa.Float(), nullable=True, comment='Steam transferred (Credit).'), + sa.Column('opex_electric', sa.Float(), nullable=True, comment='Production expenses: electric expenses (USD).'), + sa.Column('opex_misc_power', sa.Float(), nullable=True, comment='Miscellaneous steam (or nuclear) expenses.'), + sa.Column('opex_rents', sa.Float(), nullable=True, comment='Production expenses: rents (USD).'), + sa.Column('opex_allowances', sa.Float(), nullable=True, comment='Allowances.'), + sa.Column('opex_engineering', sa.Float(), nullable=True, comment='Production expenses: maintenance, supervision, and engineering (USD).'), + sa.Column('opex_structures', sa.Float(), nullable=True, comment='Production expenses: maintenance of structures (USD).'), + sa.Column('opex_boiler', sa.Float(), nullable=True, comment='Maintenance of boiler (or reactor) plant.'), + sa.Column('opex_plants', sa.Float(), nullable=True, comment='Maintenance of electrical plant.'), + sa.Column('opex_misc_steam', sa.Float(), nullable=True, comment='Maintenance of miscellaneous steam (or nuclear) plant.'), + sa.Column('opex_production_total', sa.Float(), nullable=True, comment='Total operating expenses.'), + sa.Column('opex_per_mwh', sa.Float(), nullable=True, comment='Total production expenses (USD per MWh generated).'), + sa.Column('asset_retirement_cost', sa.Float(), nullable=True, comment='Asset retirement cost (USD).'), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['core_pudl__assn_plants_ferc1.utility_id_ferc1', 'core_pudl__assn_plants_ferc1.plant_name_ferc1'], name=op.f('fk_core_ferc1__yearly_steam_plants_utility_id_ferc1_core_pudl__assn_plants_ferc1')) + ) + op.drop_table('core_ferc1__yearly_plants_pumped_storage') + op.drop_table('core_ferc1__yearly_plants_small') + op.drop_table('core_ferc1__yearly_plants_hydro') + op.drop_table('core_ferc1__yearly_income_statement') + op.drop_table('core_ferc1__yearly_plants_steam') + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('core_ferc1__yearly_plants_steam', + sa.Column('record_id', sa.TEXT(), nullable=True), + sa.Column('utility_id_ferc1', sa.INTEGER(), nullable=True), + sa.Column('report_year', sa.INTEGER(), nullable=True), + sa.Column('plant_id_ferc1', sa.INTEGER(), nullable=True), + sa.Column('plant_name_ferc1', sa.TEXT(), nullable=True), + sa.Column('plant_type', sa.VARCHAR(length=19), nullable=True), + sa.Column('construction_type', sa.VARCHAR(length=12), nullable=True), + sa.Column('construction_year', sa.INTEGER(), nullable=True), + sa.Column('installation_year', sa.INTEGER(), nullable=True), + sa.Column('capacity_mw', sa.FLOAT(), nullable=True), + sa.Column('peak_demand_mw', sa.FLOAT(), nullable=True), + sa.Column('plant_hours_connected_while_generating', sa.FLOAT(), nullable=True), + sa.Column('plant_capability_mw', sa.FLOAT(), nullable=True), + sa.Column('water_limited_capacity_mw', sa.FLOAT(), nullable=True), + sa.Column('not_water_limited_capacity_mw', sa.FLOAT(), nullable=True), + sa.Column('avg_num_employees', sa.FLOAT(), nullable=True), + sa.Column('net_generation_mwh', sa.FLOAT(), nullable=True), + sa.Column('capex_land', sa.FLOAT(), nullable=True), + sa.Column('capex_structures', sa.FLOAT(), nullable=True), + sa.Column('capex_equipment', sa.FLOAT(), nullable=True), + sa.Column('capex_total', sa.FLOAT(), nullable=True), + sa.Column('capex_per_mw', sa.FLOAT(), nullable=True), + sa.Column('opex_operations', sa.FLOAT(), nullable=True), + sa.Column('opex_fuel', sa.FLOAT(), nullable=True), + sa.Column('opex_coolants', sa.FLOAT(), nullable=True), + sa.Column('opex_steam', sa.FLOAT(), nullable=True), + sa.Column('opex_steam_other', sa.FLOAT(), nullable=True), + sa.Column('opex_transfer', sa.FLOAT(), nullable=True), + sa.Column('opex_electric', sa.FLOAT(), nullable=True), + sa.Column('opex_misc_power', sa.FLOAT(), nullable=True), + sa.Column('opex_rents', sa.FLOAT(), nullable=True), + sa.Column('opex_allowances', sa.FLOAT(), nullable=True), + sa.Column('opex_engineering', sa.FLOAT(), nullable=True), + sa.Column('opex_structures', sa.FLOAT(), nullable=True), + sa.Column('opex_boiler', sa.FLOAT(), nullable=True), + sa.Column('opex_plants', sa.FLOAT(), nullable=True), + sa.Column('opex_misc_steam', sa.FLOAT(), nullable=True), + sa.Column('opex_production_total', sa.FLOAT(), nullable=True), + sa.Column('opex_per_mwh', sa.FLOAT(), nullable=True), + sa.Column('asset_retirement_cost', sa.FLOAT(), nullable=True), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['core_pudl__assn_plants_ferc1.utility_id_ferc1', 'core_pudl__assn_plants_ferc1.plant_name_ferc1'], name='fk_core_ferc1__yearly_plants_steam_utility_id_ferc1_core_pudl__assn_plants_ferc1') + ) + op.create_table('core_ferc1__yearly_income_statement', + sa.Column('record_id', sa.TEXT(), nullable=True), + sa.Column('utility_id_ferc1', sa.INTEGER(), nullable=False), + sa.Column('report_year', sa.INTEGER(), nullable=False), + sa.Column('utility_type', sa.TEXT(), nullable=False), + sa.Column('income_type', sa.TEXT(), nullable=False), + sa.Column('dollar_value', sa.FLOAT(), nullable=True), + sa.Column('balance', sa.TEXT(), nullable=True), + sa.Column('ferc_account', sa.TEXT(), nullable=True), + sa.Column('row_type_xbrl', sa.VARCHAR(length=16), nullable=True), + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['core_pudl__assn_utilities_ferc1.utility_id_ferc1'], name='fk_core_ferc1__yearly_income_statement_utility_id_ferc1_core_pudl__assn_utilities_ferc1'), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'income_type', name='pk_core_ferc1__yearly_income_statement') + ) + op.create_table('core_ferc1__yearly_plants_hydro', + sa.Column('record_id', sa.TEXT(), nullable=True), + sa.Column('utility_id_ferc1', sa.INTEGER(), nullable=True), + sa.Column('report_year', sa.INTEGER(), nullable=True), + sa.Column('plant_name_ferc1', sa.TEXT(), nullable=True), + sa.Column('project_num', sa.INTEGER(), nullable=True), + sa.Column('plant_type', sa.VARCHAR(length=25), nullable=True), + sa.Column('construction_type', sa.VARCHAR(length=12), nullable=True), + sa.Column('construction_year', sa.INTEGER(), nullable=True), + sa.Column('installation_year', sa.INTEGER(), nullable=True), + sa.Column('capacity_mw', sa.FLOAT(), nullable=True), + sa.Column('peak_demand_mw', sa.FLOAT(), nullable=True), + sa.Column('plant_hours_connected_while_generating', sa.FLOAT(), nullable=True), + sa.Column('net_capacity_favorable_conditions_mw', sa.FLOAT(), nullable=True), + sa.Column('net_capacity_adverse_conditions_mw', sa.FLOAT(), nullable=True), + sa.Column('avg_num_employees', sa.FLOAT(), nullable=True), + sa.Column('net_generation_mwh', sa.FLOAT(), nullable=True), + sa.Column('capex_land', sa.FLOAT(), nullable=True), + sa.Column('capex_structures', sa.FLOAT(), nullable=True), + sa.Column('capex_facilities', sa.FLOAT(), nullable=True), + sa.Column('capex_equipment', sa.FLOAT(), nullable=True), + sa.Column('capex_roads', sa.FLOAT(), nullable=True), + sa.Column('asset_retirement_cost', sa.FLOAT(), nullable=True), + sa.Column('capex_total', sa.FLOAT(), nullable=True), + sa.Column('capex_per_mw', sa.FLOAT(), nullable=True), + sa.Column('opex_operations', sa.FLOAT(), nullable=True), + sa.Column('opex_water_for_power', sa.FLOAT(), nullable=True), + sa.Column('opex_hydraulic', sa.FLOAT(), nullable=True), + sa.Column('opex_electric', sa.FLOAT(), nullable=True), + sa.Column('opex_generation_misc', sa.FLOAT(), nullable=True), + sa.Column('opex_rents', sa.FLOAT(), nullable=True), + sa.Column('opex_engineering', sa.FLOAT(), nullable=True), + sa.Column('opex_structures', sa.FLOAT(), nullable=True), + sa.Column('opex_dams', sa.FLOAT(), nullable=True), + sa.Column('opex_plant', sa.FLOAT(), nullable=True), + sa.Column('opex_misc_plant', sa.FLOAT(), nullable=True), + sa.Column('opex_total', sa.FLOAT(), nullable=True), + sa.Column('opex_per_mwh', sa.FLOAT(), nullable=True), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['core_pudl__assn_plants_ferc1.utility_id_ferc1', 'core_pudl__assn_plants_ferc1.plant_name_ferc1'], name='fk_core_ferc1__yearly_plants_hydro_utility_id_ferc1_core_pudl__assn_plants_ferc1') + ) + op.create_table('core_ferc1__yearly_plants_small', + sa.Column('record_id', sa.TEXT(), nullable=True), + sa.Column('utility_id_ferc1', sa.INTEGER(), nullable=True), + sa.Column('report_year', sa.INTEGER(), nullable=True), + sa.Column('plant_name_ferc1', sa.TEXT(), nullable=True), + sa.Column('plant_type', sa.TEXT(), nullable=True), + sa.Column('license_id_ferc1', sa.INTEGER(), nullable=True), + sa.Column('construction_year', sa.INTEGER(), nullable=True), + sa.Column('capacity_mw', sa.FLOAT(), nullable=True), + sa.Column('peak_demand_mw', sa.FLOAT(), nullable=True), + sa.Column('net_generation_mwh', sa.FLOAT(), nullable=True), + sa.Column('capex_total', sa.FLOAT(), nullable=True), + sa.Column('capex_per_mw', sa.FLOAT(), nullable=True), + sa.Column('opex_operations', sa.FLOAT(), nullable=True), + sa.Column('opex_fuel', sa.FLOAT(), nullable=True), + sa.Column('opex_maintenance', sa.FLOAT(), nullable=True), + sa.Column('fuel_type', sa.TEXT(), nullable=True), + sa.Column('fuel_cost_per_mmbtu', sa.FLOAT(), nullable=True), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['core_pudl__assn_plants_ferc1.utility_id_ferc1', 'core_pudl__assn_plants_ferc1.plant_name_ferc1'], name='fk_core_ferc1__yearly_plants_small_utility_id_ferc1_core_pudl__assn_plants_ferc1') + ) + op.create_table('core_ferc1__yearly_plants_pumped_storage', + sa.Column('record_id', sa.TEXT(), nullable=True), + sa.Column('utility_id_ferc1', sa.INTEGER(), nullable=True), + sa.Column('report_year', sa.INTEGER(), nullable=True), + sa.Column('plant_name_ferc1', sa.TEXT(), nullable=True), + sa.Column('project_num', sa.INTEGER(), nullable=True), + sa.Column('construction_type', sa.VARCHAR(length=12), nullable=True), + sa.Column('construction_year', sa.INTEGER(), nullable=True), + sa.Column('installation_year', sa.INTEGER(), nullable=True), + sa.Column('capacity_mw', sa.FLOAT(), nullable=True), + sa.Column('peak_demand_mw', sa.FLOAT(), nullable=True), + sa.Column('plant_hours_connected_while_generating', sa.FLOAT(), nullable=True), + sa.Column('plant_capability_mw', sa.FLOAT(), nullable=True), + sa.Column('avg_num_employees', sa.FLOAT(), nullable=True), + sa.Column('net_generation_mwh', sa.FLOAT(), nullable=True), + sa.Column('energy_used_for_pumping_mwh', sa.FLOAT(), nullable=True), + sa.Column('net_load_mwh', sa.FLOAT(), nullable=True), + sa.Column('capex_land', sa.FLOAT(), nullable=True), + sa.Column('capex_structures', sa.FLOAT(), nullable=True), + sa.Column('capex_facilities', sa.FLOAT(), nullable=True), + sa.Column('capex_wheels_turbines_generators', sa.FLOAT(), nullable=True), + sa.Column('capex_equipment_electric', sa.FLOAT(), nullable=True), + sa.Column('capex_equipment_misc', sa.FLOAT(), nullable=True), + sa.Column('capex_roads', sa.FLOAT(), nullable=True), + sa.Column('asset_retirement_cost', sa.FLOAT(), nullable=True), + sa.Column('capex_total', sa.FLOAT(), nullable=True), + sa.Column('capex_per_mw', sa.FLOAT(), nullable=True), + sa.Column('opex_operations', sa.FLOAT(), nullable=True), + sa.Column('opex_water_for_power', sa.FLOAT(), nullable=True), + sa.Column('opex_pumped_storage', sa.FLOAT(), nullable=True), + sa.Column('opex_electric', sa.FLOAT(), nullable=True), + sa.Column('opex_generation_misc', sa.FLOAT(), nullable=True), + sa.Column('opex_rents', sa.FLOAT(), nullable=True), + sa.Column('opex_engineering', sa.FLOAT(), nullable=True), + sa.Column('opex_structures', sa.FLOAT(), nullable=True), + sa.Column('opex_dams', sa.FLOAT(), nullable=True), + sa.Column('opex_plant', sa.FLOAT(), nullable=True), + sa.Column('opex_misc_plant', sa.FLOAT(), nullable=True), + sa.Column('opex_production_before_pumping', sa.FLOAT(), nullable=True), + sa.Column('opex_pumping', sa.FLOAT(), nullable=True), + sa.Column('opex_total', sa.FLOAT(), nullable=True), + sa.Column('opex_per_mwh', sa.FLOAT(), nullable=True), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['core_pudl__assn_plants_ferc1.utility_id_ferc1', 'core_pudl__assn_plants_ferc1.plant_name_ferc1'], name='fk_core_ferc1__yearly_plants_pumped_storage_utility_id_ferc1_core_pudl__assn_plants_ferc1') + ) + op.drop_table('core_ferc1__yearly_steam_plants') + op.drop_table('core_ferc1__yearly_small_plants') + op.drop_table('core_ferc1__yearly_pumped_storage_plants') + op.drop_table('core_ferc1__yearly_hydro_plants') + op.drop_table('core_ferc1__yearly_income_statements') + # ### end Alembic commands ### diff --git a/src/pudl/analysis/classify_plants_ferc1.py b/src/pudl/analysis/classify_plants_ferc1.py index 51091892ae..4c4dc2300e 100644 --- a/src/pudl/analysis/classify_plants_ferc1.py +++ b/src/pudl/analysis/classify_plants_ferc1.py @@ -633,7 +633,7 @@ def fuel_by_plant_ferc1( Returns: DataFrame with a single record for each plant-year, including the columns - required to merge it with the core_ferc1__yearly_plants_steam table/DataFrame (report_year, + required to merge it with the core_ferc1__yearly_steam_plants table/DataFrame (report_year, utility_id_ferc1, and plant_name) as well as totals for fuel mmbtu consumed in that plant-year, and the cost of fuel in that year, the proportions of heat content and fuel costs for each fuel in that year, and a column that labels the diff --git a/src/pudl/extract/ferc1.py b/src/pudl/extract/ferc1.py index 6c7f2d4c12..27562e5198 100644 --- a/src/pudl/extract/ferc1.py +++ b/src/pudl/extract/ferc1.py @@ -104,19 +104,19 @@ "dbf": "f1_fuel", "xbrl": "steam_electric_generating_plant_statistics_large_plants_fuel_statistics_402", }, - "core_ferc1__yearly_plants_steam": { + "core_ferc1__yearly_steam_plants": { "dbf": "f1_steam", "xbrl": "steam_electric_generating_plant_statistics_large_plants_402", }, - "core_ferc1__yearly_plants_small": { + "core_ferc1__yearly_small_plants": { "dbf": "f1_gnrt_plant", "xbrl": "generating_plant_statistics_410", }, - "core_ferc1__yearly_plants_hydro": { + "core_ferc1__yearly_hydro_plants": { "dbf": "f1_hydro", "xbrl": "hydroelectric_generating_plant_statistics_large_plants_406", }, - "core_ferc1__yearly_plants_pumped_storage": { + "core_ferc1__yearly_pumped_storage_plants": { "dbf": "f1_pumped_storage", "xbrl": "pumped_storage_generating_plant_statistics_large_plants_408", }, @@ -156,7 +156,7 @@ "dbf": "f1_comp_balance_db", "xbrl": "comparative_balance_sheet_assets_and_other_debits_110", }, - "core_ferc1__yearly_income_statement": { + "core_ferc1__yearly_income_statements": { "dbf": ["f1_income_stmnt", "f1_incm_stmnt_2"], "xbrl": "statement_of_income_114", }, diff --git a/src/pudl/glue/ferc1_eia.py b/src/pudl/glue/ferc1_eia.py index c370bf1297..6594003188 100644 --- a/src/pudl/glue/ferc1_eia.py +++ b/src/pudl/glue/ferc1_eia.py @@ -255,10 +255,10 @@ def get_plants_ferc1_raw_job() -> JobDefinition: This job expects ferc1.sqlite and ferc_xbrl.sqlite databases to be populated. """ plant_tables = [ - "core_ferc1__yearly_plants_hydro", - "core_ferc1__yearly_plants_small", - "core_ferc1__yearly_plants_pumped_storage", - "core_ferc1__yearly_plants_steam", + "core_ferc1__yearly_hydro_plants", + "core_ferc1__yearly_small_plants", + "core_ferc1__yearly_pumped_storage_plants", + "core_ferc1__yearly_steam_plants", "core_ferc1__yearly_fuel", # bc it has plants/is associated w/ the steam table ] diff --git a/src/pudl/metadata/fields.py b/src/pudl/metadata/fields.py index bc5ccfa11c..34d9c912d2 100644 --- a/src/pudl/metadata/fields.py +++ b/src/pudl/metadata/fields.py @@ -2817,7 +2817,7 @@ FIELD_METADATA_BY_RESOURCE: dict[str, dict[str, Any]] = { "sector_consolidated_eia": {"code": {"type": "integer"}}, - "core_ferc1__yearly_plants_hydro": { + "core_ferc1__yearly_hydro_plants": { "plant_type": { "type": "string", "constraints": { @@ -2835,7 +2835,7 @@ }, } }, - "core_ferc1__yearly_plants_steam": { + "core_ferc1__yearly_steam_plants": { "plant_type": { "type": "string", "constraints": { diff --git a/src/pudl/metadata/resources/ferc1.py b/src/pudl/metadata/resources/ferc1.py index 8593fd01cb..b585da698b 100644 --- a/src/pudl/metadata/resources/ferc1.py +++ b/src/pudl/metadata/resources/ferc1.py @@ -338,7 +338,7 @@ "etl_group": "ferc1", "field_namespace": "ferc1", }, - "core_ferc1__yearly_income_statement": { + "core_ferc1__yearly_income_statements": { "description": "Statement of Income. Schedule 114.", "schema": { "fields": [ @@ -432,7 +432,7 @@ "etl_group": "glue", "field_namespace": "ferc1", }, - "core_ferc1__yearly_plants_hydro": { + "core_ferc1__yearly_hydro_plants": { "description": ( "Hydroelectric generating plant statistics for large plants. Large plants " "have an installed nameplate capacity of more than 10 MW. As reported on " @@ -484,7 +484,7 @@ "etl_group": "ferc1", "field_namespace": "ferc1", }, - "core_ferc1__yearly_plants_pumped_storage": { + "core_ferc1__yearly_pumped_storage_plants": { "description": ( "Generating plant statistics for hydroelectric pumped storage plants with " "an installed nameplate capacity of 10+ MW. As reported in Scheudle 408 of " @@ -540,7 +540,7 @@ "etl_group": "ferc1", "field_namespace": "ferc1", }, - "core_ferc1__yearly_plants_small": { + "core_ferc1__yearly_small_plants": { "description": """The generating plant statistics for internal combustion plants, gas turbine-plants, conventional hydro plants, and pumped storage plants with less than 10 MW installed nameplate capacity and steam plants with less than 25 MW @@ -581,7 +581,7 @@ "etl_group": "ferc1", "field_namespace": "ferc1", }, - "core_ferc1__yearly_plants_steam": { + "core_ferc1__yearly_steam_plants": { "description": ( "Generating plant statistics for steam plants with a capacity of 25+ MW, " "internal combustion and gas-turbine plants of 10+ MW, and all nuclear " diff --git a/src/pudl/output/ferc1.py b/src/pudl/output/ferc1.py index 7664df964b..45889eeb61 100644 --- a/src/pudl/output/ferc1.py +++ b/src/pudl/output/ferc1.py @@ -40,7 +40,7 @@ class CalculationToleranceFerc1(BaseModel): EXPLOSION_CALCULATION_TOLERANCES: dict[str, CalculationToleranceFerc1] = { - "core_ferc1__yearly_income_statement": CalculationToleranceFerc1( + "core_ferc1__yearly_income_statements": CalculationToleranceFerc1( intertable_calculation_errors=0.20, ), "core_ferc1__yearly_balance_sheet_assets": CalculationToleranceFerc1( @@ -68,7 +68,7 @@ def _out_ferc1__yearly_plants_utilities( @asset(io_manager_key="pudl_sqlite_io_manager", compute_kind="Python") def _out_ferc1__yearly_steam_plants( _out_ferc1__yearly_plants_utilities: pd.DataFrame, - core_ferc1__yearly_plants_steam: pd.DataFrame, + core_ferc1__yearly_steam_plants: pd.DataFrame, ) -> pd.DataFrame: """Select and joins some useful fields from the FERC Form 1 steam table. @@ -81,13 +81,13 @@ def _out_ferc1__yearly_steam_plants( Args: _out_ferc1__yearly_plants_utilities: Denormalized dataframe of FERC Form 1 plants and utilities data. - core_ferc1__yearly_plants_steam: The normalized FERC Form 1 steam table. + core_ferc1__yearly_steam_plants: The normalized FERC Form 1 steam table. Returns: A DataFrame containing useful fields from the FERC Form 1 steam table. """ steam_df = ( - core_ferc1__yearly_plants_steam.merge( + core_ferc1__yearly_steam_plants.merge( _out_ferc1__yearly_plants_utilities, on=["utility_id_ferc1", "plant_name_ferc1"], how="left", @@ -122,12 +122,12 @@ def _out_ferc1__yearly_steam_plants( @asset(io_manager_key="pudl_sqlite_io_manager", compute_kind="Python") def _out_ferc1__yearly_small_plants( - core_ferc1__yearly_plants_small: pd.DataFrame, + core_ferc1__yearly_small_plants: pd.DataFrame, _out_ferc1__yearly_plants_utilities: pd.DataFrame, ) -> pd.DataFrame: """Pull a useful dataframe related to the FERC Form 1 small plants.""" plants_small_df = ( - core_ferc1__yearly_plants_small.merge( + core_ferc1__yearly_small_plants.merge( _out_ferc1__yearly_plants_utilities, on=["utility_id_ferc1", "plant_name_ferc1"], how="left", @@ -159,12 +159,12 @@ def _out_ferc1__yearly_small_plants( @asset(io_manager_key="pudl_sqlite_io_manager", compute_kind="Python") def _out_ferc1__yearly_hydro_plants( - core_ferc1__yearly_plants_hydro: pd.DataFrame, + core_ferc1__yearly_hydro_plants: pd.DataFrame, _out_ferc1__yearly_plants_utilities: pd.DataFrame, ) -> pd.DataFrame: """Pull a useful dataframe related to the FERC Form 1 hydro plants.""" plants_hydro_df = ( - core_ferc1__yearly_plants_hydro.merge( + core_ferc1__yearly_hydro_plants.merge( _out_ferc1__yearly_plants_utilities, on=["utility_id_ferc1", "plant_name_ferc1"], how="left", @@ -190,12 +190,12 @@ def _out_ferc1__yearly_hydro_plants( @asset(io_manager_key="pudl_sqlite_io_manager", compute_kind="Python") def _out_ferc1__yearly_pumped_storage_plants( - core_ferc1__yearly_plants_pumped_storage: pd.DataFrame, + core_ferc1__yearly_pumped_storage_plants: pd.DataFrame, _out_ferc1__yearly_plants_utilities: pd.DataFrame, ) -> pd.DataFrame: """Pull a dataframe of FERC Form 1 Pumped Storage plant data.""" pumped_storage_df = ( - core_ferc1__yearly_plants_pumped_storage.merge( + core_ferc1__yearly_pumped_storage_plants.merge( _out_ferc1__yearly_plants_utilities, on=["utility_id_ferc1", "plant_name_ferc1"], how="left", @@ -578,11 +578,11 @@ def out_ferc1__yearly_electricity_sales_by_rate_schedule( @asset(io_manager_key="pudl_sqlite_io_manager", compute_kind="Python") def out_ferc1__yearly_income_statement( - core_ferc1__yearly_income_statement: pd.DataFrame, + core_ferc1__yearly_income_statements: pd.DataFrame, core_pudl__assn_utilities_ferc1: pd.DataFrame, ) -> pd.DataFrame: """Pull a useful dataframe of FERC Form 1 Purchased Power data.""" - out_ferc1__yearly_income_statement = core_ferc1__yearly_income_statement.merge( + out_ferc1__yearly_income_statement = core_ferc1__yearly_income_statements.merge( core_pudl__assn_utilities_ferc1, on="utility_id_ferc1" ).pipe( pudl.helpers.organize_cols, @@ -1037,7 +1037,10 @@ def exploded_table_asset_factory( } ins |= {table_name: AssetIn(table_name) for table_name in table_names_to_explode} - @asset(name=f"exploded_{root_table}", ins=ins, io_manager_key=io_manager_key) + asset_group, asset_name = root_table.split("__") + asset_name = f"{asset_group}__exploded_{asset_name}" + + @asset(name=asset_name, ins=ins, io_manager_key=io_manager_key) def exploded_tables_asset( **kwargs: dict[str, pd.DataFrame], ) -> pd.DataFrame: @@ -1076,19 +1079,19 @@ def create_exploded_table_assets() -> list[AssetsDefinition]: """ explosion_args = [ { - "root_table": "income_statement_ferc1", + "root_table": "core_ferc1__yearly_income_statements", "table_names_to_explode": [ - "core_ferc1__yearly_income_statement", + "core_ferc1__yearly_income_statements", "core_ferc1__yearly_depreciation_amortization_summary", "core_ferc1__yearly_electric_operating_expenses", "core_ferc1__yearly_electric_operating_revenues", ], "calculation_tolerance": EXPLOSION_CALCULATION_TOLERANCES[ - "core_ferc1__yearly_income_statement" + "core_ferc1__yearly_income_statements" ], "seed_nodes": [ NodeId( - table_name="core_ferc1__yearly_income_statement", + table_name="core_ferc1__yearly_income_statements", xbrl_factoid="net_income_loss", utility_type="total", plant_status=pd.NA, @@ -1097,7 +1100,7 @@ def create_exploded_table_assets() -> list[AssetsDefinition]: ], }, { - "root_table": "balance_sheet_assets_ferc1", + "root_table": "core_ferc1__yearly_balance_sheet_assets", "table_names_to_explode": [ "core_ferc1__yearly_balance_sheet_assets", "core_ferc1__yearly_balance_sheet_assets", @@ -1119,7 +1122,7 @@ def create_exploded_table_assets() -> list[AssetsDefinition]: ], }, { - "root_table": "balance_sheet_liabilities_ferc1", + "root_table": "core_ferc1__yearly_balance_sheet_liabilities", "table_names_to_explode": [ "core_ferc1__yearly_balance_sheet_liabilities", "core_ferc1__yearly_balance_sheet_liabilities", @@ -1545,7 +1548,7 @@ class XbrlCalculationForestFerc1(BaseModel): """A class for manipulating groups of hierarchically nested XBRL calculations. We expect that the facts reported in high-level FERC tables like - :ref:`core_ferc1__yearly_income_statement` and + :ref:`core_ferc1__yearly_income_statements` and :ref:`core_ferc1__yearly_balance_sheet_assets` should be calculable from many individually reported granular values, based on the calculations encoded in the XBRL Metadata, and that these relationships should have diff --git a/src/pudl/package_data/ferc1/xbrl_calculation_component_fixes.csv b/src/pudl/package_data/ferc1/xbrl_calculation_component_fixes.csv index 3c95b6f6d8..494f292e2f 100644 --- a/src/pudl/package_data/ferc1/xbrl_calculation_component_fixes.csv +++ b/src/pudl/package_data/ferc1/xbrl_calculation_component_fixes.csv @@ -47,9 +47,9 @@ core_ferc1__yearly_electric_operating_expenses,power_production_expenses_hydraul core_ferc1__yearly_electric_operating_expenses,power_production_expenses_hydraulic_power,core_ferc1__yearly_electric_operating_expenses,miscellaneous_hydraulic_power_generation_expenses,,,, core_ferc1__yearly_electric_operating_expenses,power_production_expenses_hydraulic_power,core_ferc1__yearly_electric_operating_expenses,rents_hydraulic_power_generation,,,, core_ferc1__yearly_electric_operating_expenses,power_production_expenses_hydraulic_power,core_ferc1__yearly_electric_operating_expenses,water_for_power,,,, -core_ferc1__yearly_electric_operating_expenses,power_production_expenses_hydraulic_power,core_ferc1__yearly_plants_hydro,opex_operations,,,, -core_ferc1__yearly_electric_operating_expenses,power_production_expenses_hydraulic_power,core_ferc1__yearly_plants_pumped_storage,opex_operations,,,, -core_ferc1__yearly_electric_operating_expenses,power_production_expenses_hydraulic_power,core_ferc1__yearly_plants_steam,opex_operations,,,, +core_ferc1__yearly_electric_operating_expenses,power_production_expenses_hydraulic_power,core_ferc1__yearly_hydro_plants,opex_operations,,,, +core_ferc1__yearly_electric_operating_expenses,power_production_expenses_hydraulic_power,core_ferc1__yearly_pumped_storage_plants,opex_operations,,,, +core_ferc1__yearly_electric_operating_expenses,power_production_expenses_hydraulic_power,core_ferc1__yearly_steam_plants,opex_operations,,,, core_ferc1__yearly_electric_operating_expenses,power_production_expenses_steam_power,core_ferc1__yearly_electric_operating_expenses,allowances,,,, core_ferc1__yearly_electric_operating_expenses,power_production_expenses_steam_power,core_ferc1__yearly_electric_operating_expenses,coolants_and_water,,,, core_ferc1__yearly_electric_operating_expenses,power_production_expenses_steam_power,core_ferc1__yearly_electric_operating_expenses,electric_expenses_steam_power_generation,,,, @@ -64,9 +64,9 @@ core_ferc1__yearly_electric_operating_expenses,power_production_expenses_steam_p core_ferc1__yearly_electric_operating_expenses,power_production_expenses_steam_power,core_ferc1__yearly_electric_operating_expenses,steam_expenses_steam_power_generation,,,, core_ferc1__yearly_electric_operating_expenses,power_production_expenses_steam_power,core_ferc1__yearly_electric_operating_expenses,steam_from_other_sources,,,, core_ferc1__yearly_electric_operating_expenses,power_production_expenses_steam_power,core_ferc1__yearly_electric_operating_expenses,steam_transferred_credit,,,, -core_ferc1__yearly_electric_operating_expenses,power_production_expenses_steam_power,core_ferc1__yearly_plants_hydro,opex_operations,,,, -core_ferc1__yearly_electric_operating_expenses,power_production_expenses_steam_power,core_ferc1__yearly_plants_pumped_storage,opex_operations,,,, -core_ferc1__yearly_electric_operating_expenses,power_production_expenses_steam_power,core_ferc1__yearly_plants_steam,opex_operations,,,, +core_ferc1__yearly_electric_operating_expenses,power_production_expenses_steam_power,core_ferc1__yearly_hydro_plants,opex_operations,,,, +core_ferc1__yearly_electric_operating_expenses,power_production_expenses_steam_power,core_ferc1__yearly_pumped_storage_plants,opex_operations,,,, +core_ferc1__yearly_electric_operating_expenses,power_production_expenses_steam_power,core_ferc1__yearly_steam_plants,opex_operations,,,, core_ferc1__yearly_electric_operating_expenses,transmission_operation_expense,core_ferc1__yearly_electric_operating_expenses,load_dispatching_transmission_expense,1.0,,, core_ferc1__yearly_electric_operating_revenues,other_operating_revenues,core_ferc1__yearly_electric_operating_revenues,forfeited_discounts,1.0,,, core_ferc1__yearly_electric_operating_revenues,other_operating_revenues,core_ferc1__yearly_electric_operating_revenues,interdepartmental_rents,1.0,,, @@ -86,34 +86,34 @@ core_ferc1__yearly_electric_plant_depreciation_changes,ending_balance,core_ferc1 core_ferc1__yearly_electric_plant_depreciation_changes,ending_balance,core_ferc1__yearly_electric_plant_depreciation_changes,net_charges_for_retired_plant,1.0,,, core_ferc1__yearly_electric_plant_depreciation_changes,ending_balance,core_ferc1__yearly_electric_plant_depreciation_changes,other_adjustments_to_accumulated_depreciation,1.0,,, core_ferc1__yearly_electric_plant_depreciation_changes,ending_balance,core_ferc1__yearly_electric_plant_depreciation_changes,starting_balance,1.0,,, -core_ferc1__yearly_income_statement,amortization_and_depletion_of_utility_plant,core_ferc1__yearly_depreciation_amortization_summary,amortization_limited_term_electric_plant,1.0,electric,total, -core_ferc1__yearly_income_statement,amortization_and_depletion_of_utility_plant,core_ferc1__yearly_depreciation_amortization_summary,amortization_other_electric_plant,1.0,electric,total, -core_ferc1__yearly_income_statement,depreciation_expense,core_ferc1__yearly_depreciation_amortization_summary,depreciation_expense,1.0,electric,total, -core_ferc1__yearly_income_statement,depreciation_expense_for_asset_retirement_costs,core_ferc1__yearly_depreciation_amortization_summary,depreciation_expense_asset_retirement,1.0,electric,total, -core_ferc1__yearly_income_statement,income_before_extraordinary_items,core_ferc1__yearly_income_statement,net_utility_operating_income,1.0,,, -core_ferc1__yearly_income_statement,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,distribution_maintenance_expense_electric,1.0,electric,, -core_ferc1__yearly_income_statement,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,hydraulic_power_generation_maintenance_expense,1.0,electric,, -core_ferc1__yearly_income_statement,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,maintenance_of_general_plant,1.0,electric,, -core_ferc1__yearly_income_statement,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,nuclear_power_generation_maintenance_expense,1.0,electric,, -core_ferc1__yearly_income_statement,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,other_power_generation_maintenance_expense,1.0,electric,, -core_ferc1__yearly_income_statement,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,regional_market_maintenance_expense,1.0,electric,, -core_ferc1__yearly_income_statement,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,steam_power_generation_maintenance_expense,1.0,electric,, -core_ferc1__yearly_income_statement,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,transmission_maintenance_expense_electric,1.0,electric,, -core_ferc1__yearly_income_statement,operating_revenues,core_ferc1__yearly_electric_operating_revenues,electric_operating_revenues,1.0,electric,, -core_ferc1__yearly_income_statement,operation_expense,core_ferc1__yearly_electric_operating_expenses,administrative_and_general_operation_expense,1.0,electric,, -core_ferc1__yearly_income_statement,operation_expense,core_ferc1__yearly_electric_operating_expenses,customer_account_expenses,1.0,electric,, -core_ferc1__yearly_income_statement,operation_expense,core_ferc1__yearly_electric_operating_expenses,customer_service_and_information_expenses,1.0,electric,, -core_ferc1__yearly_income_statement,operation_expense,core_ferc1__yearly_electric_operating_expenses,distribution_operation_expenses_electric,1.0,electric,, -core_ferc1__yearly_income_statement,operation_expense,core_ferc1__yearly_electric_operating_expenses,hydraulic_power_generation_operations_expense,1.0,electric,, -core_ferc1__yearly_income_statement,operation_expense,core_ferc1__yearly_electric_operating_expenses,nuclear_power_generation_operations_expense,1.0,electric,, -core_ferc1__yearly_income_statement,operation_expense,core_ferc1__yearly_electric_operating_expenses,other_power_generation_operations_expense,1.0,electric,, -core_ferc1__yearly_income_statement,operation_expense,core_ferc1__yearly_electric_operating_expenses,regional_market_operation_expense,1.0,electric,, -core_ferc1__yearly_income_statement,operation_expense,core_ferc1__yearly_electric_operating_expenses,sales_expenses,1.0,electric,, -core_ferc1__yearly_income_statement,operation_expense,core_ferc1__yearly_electric_operating_expenses,steam_power_generation_operations_expense,1.0,electric,, -core_ferc1__yearly_income_statement,operation_expense,core_ferc1__yearly_electric_operating_expenses,transmission_operation_expense,1.0,electric,, -core_ferc1__yearly_income_statement,operation_expense,core_ferc1__yearly_electric_operating_expenses,power_production_expenses,1.0,electric,, -core_ferc1__yearly_income_statement,other_income_deductions,core_ferc1__yearly_income_statement,miscellaneous_deductions,1.0,,, -core_ferc1__yearly_income_statement,taxes_on_other_income_and_deductions,core_ferc1__yearly_income_statement,investment_tax_credits,-1.0,,, +core_ferc1__yearly_income_statements,amortization_and_depletion_of_utility_plant,core_ferc1__yearly_depreciation_amortization_summary,amortization_limited_term_electric_plant,1.0,electric,total, +core_ferc1__yearly_income_statements,amortization_and_depletion_of_utility_plant,core_ferc1__yearly_depreciation_amortization_summary,amortization_other_electric_plant,1.0,electric,total, +core_ferc1__yearly_income_statements,depreciation_expense,core_ferc1__yearly_depreciation_amortization_summary,depreciation_expense,1.0,electric,total, +core_ferc1__yearly_income_statements,depreciation_expense_for_asset_retirement_costs,core_ferc1__yearly_depreciation_amortization_summary,depreciation_expense_asset_retirement,1.0,electric,total, +core_ferc1__yearly_income_statements,income_before_extraordinary_items,core_ferc1__yearly_income_statements,net_utility_operating_income,1.0,,, +core_ferc1__yearly_income_statements,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,distribution_maintenance_expense_electric,1.0,electric,, +core_ferc1__yearly_income_statements,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,hydraulic_power_generation_maintenance_expense,1.0,electric,, +core_ferc1__yearly_income_statements,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,maintenance_of_general_plant,1.0,electric,, +core_ferc1__yearly_income_statements,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,nuclear_power_generation_maintenance_expense,1.0,electric,, +core_ferc1__yearly_income_statements,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,other_power_generation_maintenance_expense,1.0,electric,, +core_ferc1__yearly_income_statements,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,regional_market_maintenance_expense,1.0,electric,, +core_ferc1__yearly_income_statements,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,steam_power_generation_maintenance_expense,1.0,electric,, +core_ferc1__yearly_income_statements,maintenance_expense,core_ferc1__yearly_electric_operating_expenses,transmission_maintenance_expense_electric,1.0,electric,, +core_ferc1__yearly_income_statements,operating_revenues,core_ferc1__yearly_electric_operating_revenues,electric_operating_revenues,1.0,electric,, +core_ferc1__yearly_income_statements,operation_expense,core_ferc1__yearly_electric_operating_expenses,administrative_and_general_operation_expense,1.0,electric,, +core_ferc1__yearly_income_statements,operation_expense,core_ferc1__yearly_electric_operating_expenses,customer_account_expenses,1.0,electric,, +core_ferc1__yearly_income_statements,operation_expense,core_ferc1__yearly_electric_operating_expenses,customer_service_and_information_expenses,1.0,electric,, +core_ferc1__yearly_income_statements,operation_expense,core_ferc1__yearly_electric_operating_expenses,distribution_operation_expenses_electric,1.0,electric,, +core_ferc1__yearly_income_statements,operation_expense,core_ferc1__yearly_electric_operating_expenses,hydraulic_power_generation_operations_expense,1.0,electric,, +core_ferc1__yearly_income_statements,operation_expense,core_ferc1__yearly_electric_operating_expenses,nuclear_power_generation_operations_expense,1.0,electric,, +core_ferc1__yearly_income_statements,operation_expense,core_ferc1__yearly_electric_operating_expenses,other_power_generation_operations_expense,1.0,electric,, +core_ferc1__yearly_income_statements,operation_expense,core_ferc1__yearly_electric_operating_expenses,regional_market_operation_expense,1.0,electric,, +core_ferc1__yearly_income_statements,operation_expense,core_ferc1__yearly_electric_operating_expenses,sales_expenses,1.0,electric,, +core_ferc1__yearly_income_statements,operation_expense,core_ferc1__yearly_electric_operating_expenses,steam_power_generation_operations_expense,1.0,electric,, +core_ferc1__yearly_income_statements,operation_expense,core_ferc1__yearly_electric_operating_expenses,transmission_operation_expense,1.0,electric,, +core_ferc1__yearly_income_statements,operation_expense,core_ferc1__yearly_electric_operating_expenses,power_production_expenses,1.0,electric,, +core_ferc1__yearly_income_statements,other_income_deductions,core_ferc1__yearly_income_statements,miscellaneous_deductions,1.0,,, +core_ferc1__yearly_income_statements,taxes_on_other_income_and_deductions,core_ferc1__yearly_income_statements,investment_tax_credits,-1.0,,, core_ferc1__yearly_plant_in_service,electric_plant_in_service_and_completed_construction_not_classified_electric,core_ferc1__yearly_plant_in_service,distribution_plant,1.0,,, core_ferc1__yearly_plant_in_service,electric_plant_in_service_and_completed_construction_not_classified_electric,core_ferc1__yearly_plant_in_service,general_plant,1.0,,, core_ferc1__yearly_plant_in_service,electric_plant_in_service_and_completed_construction_not_classified_electric,core_ferc1__yearly_plant_in_service,intangible_plant,1.0,,, diff --git a/src/pudl/transform/ferc1.py b/src/pudl/transform/ferc1.py index 5d29aa6675..8d544c68a5 100644 --- a/src/pudl/transform/ferc1.py +++ b/src/pudl/transform/ferc1.py @@ -132,10 +132,10 @@ class TableIdFerc1(enum.Enum): """ FUEL_FERC1 = "core_ferc1__yearly_fuel" - PLANTS_STEAM_FERC1 = "core_ferc1__yearly_plants_steam" - PLANTS_HYDRO_FERC1 = "core_ferc1__yearly_plants_hydro" - PLANTS_SMALL_FERC1 = "core_ferc1__yearly_plants_small" - PLANTS_PUMPED_STORAGE_FERC1 = "core_ferc1__yearly_plants_pumped_storage" + PLANTS_STEAM_FERC1 = "core_ferc1__yearly_steam_plants" + PLANTS_HYDRO_FERC1 = "core_ferc1__yearly_hydro_plants" + PLANTS_SMALL_FERC1 = "core_ferc1__yearly_small_plants" + PLANTS_PUMPED_STORAGE_FERC1 = "core_ferc1__yearly_pumped_storage_plants" PLANT_IN_SERVICE_FERC1 = "core_ferc1__yearly_plant_in_service" PURCHASED_POWER_FERC1 = "core_ferc1__yearly_purchased_power" TRANSMISSION_STATISTICS_FERC1 = "core_ferc1__yearly_transmission_statistics" @@ -151,7 +151,7 @@ class TableIdFerc1(enum.Enum): ) BALANCE_SHEET_ASSETS_FERC1 = "core_ferc1__yearly_balance_sheet_assets" RETAINED_EARNINGS_FERC1 = "core_ferc1__yearly_retained_earnings" - INCOME_STATEMENT_FERC1 = "core_ferc1__yearly_income_statement" + INCOME_STATEMENT_FERC1 = "core_ferc1__yearly_income_statements" ELECTRIC_PLANT_DEPRECIATION_CHANGES_FERC1 = ( "core_ferc1__yearly_electric_plant_depreciation_changes" ) @@ -2409,7 +2409,7 @@ class FuelFerc1TableTransformer(Ferc1AbstractTableTransformer): """A table transformer specific to the :ref:`core_ferc1__yearly_fuel` table. The :ref:`core_ferc1__yearly_fuel` table reports data about fuel consumed by large thermal power - plants in the :ref:`core_ferc1__yearly_plants_steam` table. Each record in the steam table is + plants in the :ref:`core_ferc1__yearly_steam_plants` table. Each record in the steam table is typically associated with several records in the fuel table, with each fuel record reporting data for a particular type of fuel consumed by that plant over the course of a year. The fuel table presents several challenges. @@ -2737,7 +2737,7 @@ def drop_invalid_rows( class PlantsSteamFerc1TableTransformer(Ferc1AbstractTableTransformer): - """Transformer class for the :ref:`core_ferc1__yearly_plants_steam` table.""" + """Transformer class for the :ref:`core_ferc1__yearly_steam_plants` table.""" table_id: TableIdFerc1 = TableIdFerc1.PLANTS_STEAM_FERC1 @@ -2745,10 +2745,10 @@ class PlantsSteamFerc1TableTransformer(Ferc1AbstractTableTransformer): def transform_main( self, df: pd.DataFrame, transformed_fuel: pd.DataFrame ) -> pd.DataFrame: - """Perform table transformations for the :ref:`core_ferc1__yearly_plants_steam` table. + """Perform table transformations for the :ref:`core_ferc1__yearly_steam_plants` table. Note that this method has a non-standard call signature, since the - :ref:`core_ferc1__yearly_plants_steam` table depends on the :ref:`core_ferc1__yearly_fuel` table. + :ref:`core_ferc1__yearly_steam_plants` table depends on the :ref:`core_ferc1__yearly_fuel` table. Args: df: The pre-processed steam plants table. @@ -2807,7 +2807,7 @@ def transform( class PlantsHydroFerc1TableTransformer(Ferc1AbstractTableTransformer): - """A table transformer specific to the :ref:`core_ferc1__yearly_plants_hydro` table.""" + """A table transformer specific to the :ref:`core_ferc1__yearly_hydro_plants` table.""" table_id: TableIdFerc1 = TableIdFerc1.PLANTS_HYDRO_FERC1 @@ -2854,7 +2854,7 @@ def targeted_drop_duplicates(self, df): class PlantsPumpedStorageFerc1TableTransformer(Ferc1AbstractTableTransformer): - """Transformer class for :ref:`core_ferc1__yearly_plants_pumped_storage` table.""" + """Transformer class for :ref:`core_ferc1__yearly_pumped_storage_plants` table.""" table_id: TableIdFerc1 = TableIdFerc1.PLANTS_PUMPED_STORAGE_FERC1 @@ -3053,13 +3053,13 @@ def transform_main(self, df: pd.DataFrame) -> pd.DataFrame: class PlantsSmallFerc1TableTransformer(Ferc1AbstractTableTransformer): - """A table transformer specific to the :ref:`core_ferc1__yearly_plants_small` table.""" + """A table transformer specific to the :ref:`core_ferc1__yearly_small_plants` table.""" table_id: TableIdFerc1 = TableIdFerc1.PLANTS_SMALL_FERC1 @cache_df(key="main") def transform_main(self, df: pd.DataFrame) -> pd.DataFrame: - """Table specific transforms for core_ferc1__yearly_plants_small. + """Table specific transforms for core_ferc1__yearly_small_plants. Params: df: Pre-processed, concatenated XBRL and DBF data. @@ -4453,7 +4453,7 @@ def convert_xbrl_metadata_json_to_df( class IncomeStatementFerc1TableTransformer(Ferc1AbstractTableTransformer): - """Transformer class for the :ref:`core_ferc1__yearly_income_statement` table.""" + """Transformer class for the :ref:`core_ferc1__yearly_income_statements` table.""" table_id: TableIdFerc1 = TableIdFerc1.INCOME_STATEMENT_FERC1 has_unique_record_ids: bool = False @@ -5377,11 +5377,11 @@ class OtherRegulatoryLiabilitiesFerc1TableTransformer(Ferc1AbstractTableTransfor FERC1_TFR_CLASSES: Mapping[str, type[Ferc1AbstractTableTransformer]] = { "core_ferc1__yearly_fuel": FuelFerc1TableTransformer, - "core_ferc1__yearly_plants_steam": PlantsSteamFerc1TableTransformer, - "core_ferc1__yearly_plants_small": PlantsSmallFerc1TableTransformer, - "core_ferc1__yearly_plants_hydro": PlantsHydroFerc1TableTransformer, + "core_ferc1__yearly_steam_plants": PlantsSteamFerc1TableTransformer, + "core_ferc1__yearly_small_plants": PlantsSmallFerc1TableTransformer, + "core_ferc1__yearly_hydro_plants": PlantsHydroFerc1TableTransformer, "core_ferc1__yearly_plant_in_service": PlantInServiceFerc1TableTransformer, - "core_ferc1__yearly_plants_pumped_storage": PlantsPumpedStorageFerc1TableTransformer, + "core_ferc1__yearly_pumped_storage_plants": PlantsPumpedStorageFerc1TableTransformer, "core_ferc1__yearly_transmission_statistics": TransmissionStatisticsFerc1TableTransformer, "core_ferc1__yearly_purchased_power": PurchasedPowerFerc1TableTransformer, "core_ferc1__yearly_electric_energy_sources": ElectricEnergySourcesFerc1TableTransformer, @@ -5391,7 +5391,7 @@ class OtherRegulatoryLiabilitiesFerc1TableTransformer(Ferc1AbstractTableTransfor "core_ferc1__yearly_balance_sheet_liabilities": BalanceSheetLiabilitiesFerc1TableTransformer, "core_ferc1__yearly_depreciation_amortization_summary": DepreciationAmortizationSummaryFerc1TableTransformer, "core_ferc1__yearly_balance_sheet_assets": BalanceSheetAssetsFerc1TableTransformer, - "core_ferc1__yearly_income_statement": IncomeStatementFerc1TableTransformer, + "core_ferc1__yearly_income_statements": IncomeStatementFerc1TableTransformer, "core_ferc1__yearly_electric_plant_depreciation_changes": ElectricPlantDepreciationChangesFerc1TableTransformer, "core_ferc1__yearly_electric_plant_depreciation_functional": ElectricPlantDepreciationFunctionalFerc1TableTransformer, "core_ferc1__yearly_retained_earnings": RetainedEarningsFerc1TableTransformer, @@ -5414,7 +5414,7 @@ def ferc1_transform_asset_factory( This is a convenient way to create assets for tables that only depend on raw dbf, raw xbrl instant and duration tables and xbrl metadata. For tables with additional upstream dependencies, create a stand alone asset using an asset decorator. See - the core_ferc1__yearly_plants_steam asset. + the core_ferc1__yearly_steam_plants asset. Args: table_name: The name of the table to create an asset for. @@ -5502,7 +5502,7 @@ def create_ferc1_transform_assets() -> list[AssetsDefinition]: for table_name, tfr_class in FERC1_TFR_CLASSES.items(): # Bespoke exception. fuel must come before steam b/c fuel proportions are used to # aid in FERC plant ID assignment. - if table_name != "core_ferc1__yearly_plants_steam": + if table_name != "core_ferc1__yearly_steam_plants": assets.append(ferc1_transform_asset_factory(table_name, tfr_class)) return assets @@ -5511,14 +5511,14 @@ def create_ferc1_transform_assets() -> list[AssetsDefinition]: @asset(io_manager_key="pudl_sqlite_io_manager") -def core_ferc1__yearly_plants_steam( +def core_ferc1__yearly_steam_plants( clean_xbrl_metadata_json: dict[str, dict[str, list[dict[str, Any]]]], raw_ferc1_dbf__f1_steam: pd.DataFrame, raw_ferc1_xbrl__steam_electric_generating_plant_statistics_large_plants_402_duration: pd.DataFrame, raw_ferc1_xbrl__steam_electric_generating_plant_statistics_large_plants_402_instant: pd.DataFrame, core_ferc1__yearly_fuel: pd.DataFrame, ) -> pd.DataFrame: - """Create the clean core_ferc1__yearly_plants_steam table. + """Create the clean core_ferc1__yearly_steam_plants table. Args: clean_xbrl_metadata_json: XBRL metadata json for all tables. @@ -5528,10 +5528,10 @@ def core_ferc1__yearly_plants_steam( core_ferc1__yearly_fuel: Transformed core_ferc1__yearly_fuel table. Returns: - Clean core_ferc1__yearly_plants_steam table. + Clean core_ferc1__yearly_steam_plants table. """ df = PlantsSteamFerc1TableTransformer( - xbrl_metadata_json=clean_xbrl_metadata_json["core_ferc1__yearly_plants_steam"] + xbrl_metadata_json=clean_xbrl_metadata_json["core_ferc1__yearly_steam_plants"] ).transform( raw_dbf=raw_ferc1_dbf__f1_steam, raw_xbrl_instant=raw_ferc1_xbrl__steam_electric_generating_plant_statistics_large_plants_402_instant, diff --git a/src/pudl/transform/params/__init__.py b/src/pudl/transform/params/__init__.py index faa48f8218..f5216fc9c2 100644 --- a/src/pudl/transform/params/__init__.py +++ b/src/pudl/transform/params/__init__.py @@ -4,7 +4,7 @@ ``ferc1`` or ``eia923``) and must define a dictionary named ``TRANSFORM_PARAMS``. This dictionary is a nested data structure with 2 or 3 levels of keys: -* The first level has keys that table names (e.g. ``core_ferc1__yearly_plants_steam``). +* The first level has keys that table names (e.g. ``core_ferc1__yearly_steam_plants``). * The second level has keys that are the names of transform functions (e.g. ``convert_units``). * In the case of transform functions that operate on a single column and implement the diff --git a/src/pudl/transform/params/ferc1.py b/src/pudl/transform/params/ferc1.py index 7cc65cb6ba..c711028d3e 100644 --- a/src/pudl/transform/params/ferc1.py +++ b/src/pudl/transform/params/ferc1.py @@ -2242,7 +2242,7 @@ }, ], }, - "core_ferc1__yearly_plants_steam": { + "core_ferc1__yearly_steam_plants": { "rename_columns_ferc1": { "dbf": { "columns": { @@ -2400,7 +2400,7 @@ }, ], }, - "core_ferc1__yearly_plants_hydro": { + "core_ferc1__yearly_hydro_plants": { "rename_columns_ferc1": { "dbf": { "columns": { @@ -2538,7 +2538,7 @@ }, ], }, - "core_ferc1__yearly_plants_small": { + "core_ferc1__yearly_small_plants": { "rename_columns_ferc1": { "dbf": { "columns": { @@ -2730,7 +2730,7 @@ "calculation_tolerance": 0.08, }, }, - "core_ferc1__yearly_plants_pumped_storage": { + "core_ferc1__yearly_pumped_storage_plants": { "rename_columns_ferc1": { "dbf": { "columns": { @@ -3718,7 +3718,7 @@ "calculation_tolerance": 0.08, }, }, - "core_ferc1__yearly_income_statement": { + "core_ferc1__yearly_income_statements": { "rename_columns_ferc1": { "dbf": { "columns": { @@ -3850,7 +3850,7 @@ }, "drop_duplicate_rows_dbf": { "data_columns": ["dollar_value"], - "table_name": "core_ferc1__yearly_income_statement", + "table_name": "core_ferc1__yearly_income_statements", }, "align_row_numbers_dbf": { "dbf_table_names": ["f1_income_stmnt", "f1_incm_stmnt_2"] diff --git a/src/pudl/validate.py b/src/pudl/validate.py index a7ca67793c..114c6dba7c 100644 --- a/src/pudl/validate.py +++ b/src/pudl/validate.py @@ -655,7 +655,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): ############################################################################### -core_ferc1__yearly_plants_steam_capacity = [ +core_ferc1__yearly_steam_plants_capacity = [ { "title": "All Plant Capacity", "query": "", @@ -678,7 +678,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): }, ] -core_ferc1__yearly_plants_steam_expenses = [ +core_ferc1__yearly_steam_plants_expenses = [ { "title": "Capital Expenses (median)", "query": "", @@ -741,7 +741,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): }, ] -core_ferc1__yearly_plants_steam_capacity_ratios = [ +core_ferc1__yearly_steam_plants_capacity_ratios = [ { "title": "Capacity Factor (Tails)", "query": "capacity_factor>0.05", @@ -844,7 +844,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): }, ] -core_ferc1__yearly_plants_steam_connected_hours = [ +core_ferc1__yearly_steam_plants_connected_hours = [ { # Currently failing b/c ~10% of plants have way more than 8760 hours... "title": "Plant Hours Connected (min/max)", "query": "", @@ -857,7 +857,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): }, ] -core_ferc1__yearly_plants_steam_self = [ +core_ferc1__yearly_steam_plants_self = [ { "title": "All Plant Capacity", "query": "", diff --git a/test/integration/etl_test.py b/test/integration/etl_test.py index 8e32f0f55b..57cc4116b1 100644 --- a/test/integration/etl_test.py +++ b/test/integration/etl_test.py @@ -46,7 +46,7 @@ def test_ferc1_xbrl2sqlite(ferc1_engine_xbrl, ferc1_xbrl_taxonomy_metadata): # Has the metadata we've read in from JSON contain a long list of entities? assert isinstance(ferc1_xbrl_taxonomy_metadata, dict) # nosec: B101 assert ( - "core_ferc1__yearly_plants_steam" in ferc1_xbrl_taxonomy_metadata + "core_ferc1__yearly_steam_plants" in ferc1_xbrl_taxonomy_metadata ) # nosec: B101 assert len(ferc1_xbrl_taxonomy_metadata) > 10 # nosec: B101 assert len(ferc1_xbrl_taxonomy_metadata) < 100 # nosec: B101 diff --git a/test/validate/ferc1_test.py b/test/validate/ferc1_test.py index 4277c4dd26..828eb9d793 100644 --- a/test/validate/ferc1_test.py +++ b/test/validate/ferc1_test.py @@ -26,7 +26,7 @@ "core_ferc1__yearly_transmission_statistics", "core_ferc1__yearly_balance_sheet_liabilities", "core_ferc1__yearly_balance_sheet_assets", - "core_ferc1__yearly_income_statement", + "core_ferc1__yearly_income_statements", "core_ferc1__yearly_depreciation_amortization_summary", "core_ferc1__yearly_electric_plant_depreciation_changes", "core_ferc1__yearly_electric_plant_depreciation_functional", diff --git a/test/validate/fuel_ferc1_test.py b/test/validate/fuel_ferc1_test.py index 4135eff948..eef3628b3e 100644 --- a/test/validate/fuel_ferc1_test.py +++ b/test/validate/fuel_ferc1_test.py @@ -50,7 +50,7 @@ def test_fuel_ferc1_trivial(pudl_out_ferc1): ], ) def test_vs_bounds(pudl_out_ferc1, live_dbs, cases): - """Test distributions of reported core_ferc1__yearly_plants_steam columns.""" + """Test distributions of reported core_ferc1__yearly_steam_plants columns.""" if not live_dbs: pytest.skip("Data validation only works with a live PUDL DB.") for case in cases: diff --git a/test/validate/plants_steam_ferc1_test.py b/test/validate/plants_steam_ferc1_test.py index 6ee3130dc1..8a024362d6 100644 --- a/test/validate/plants_steam_ferc1_test.py +++ b/test/validate/plants_steam_ferc1_test.py @@ -17,20 +17,20 @@ @pytest.mark.parametrize( "cases", [ - pytest.param(pv.core_ferc1__yearly_plants_steam_capacity, id="capacity"), - pytest.param(pv.core_ferc1__yearly_plants_steam_expenses, id="expenses"), + pytest.param(pv.core_ferc1__yearly_steam_plants_capacity, id="capacity"), + pytest.param(pv.core_ferc1__yearly_steam_plants_expenses, id="expenses"), pytest.param( - pv.core_ferc1__yearly_plants_steam_capacity_ratios, id="capacity_ratios" + pv.core_ferc1__yearly_steam_plants_capacity_ratios, id="capacity_ratios" ), pytest.param( - pv.core_ferc1__yearly_plants_steam_connected_hours, + pv.core_ferc1__yearly_steam_plants_connected_hours, id="connected_hours", marks=pytest.mark.xfail(reason="FERC 1 data reporting errors."), ), ], ) def test_vs_bounds(pudl_out_ferc1, live_dbs, cases): - """Test distributions of reported core_ferc1__yearly_plants_steam columns.""" + """Test distributions of reported core_ferc1__yearly_steam_plants columns.""" if not live_dbs: pytest.skip("Data validation only works with a live PUDL DB.") validate_df = pd.read_sql( @@ -59,7 +59,7 @@ def test_self_vs_historical(pudl_out_ferc1, live_dbs): peak_demand_ratio=lambda x: x.peak_demand_mw / x.capacity_mw, capability_ratio=lambda x: x.plant_capability_mw / x.capacity_mw, ) - for args in pv.core_ferc1__yearly_plants_steam_self: + for args in pv.core_ferc1__yearly_steam_plants_self: pudl.validate.vs_self(validate_df, **args) From 4260c2e31fc12e6fc03ad0dacaabb17d2b1057d0 Mon Sep 17 00:00:00 2001 From: Bennett Norman Date: Thu, 5 Oct 2023 10:13:03 -0800 Subject: [PATCH 2/2] Rename `heat_rate_mmbtu_mwh -> heat_rate_mmbtu_mwh_by_unit` (#2917) * Rename heat_rate_mmbtu_mwh -> heat_rate_mmbtu_mwh_by_unit * Rename heat rate mmbtu mwh to follow existing naming convention --- ...41cf_rename_heat_rate_column_to_follow_.py | 121 ++++++++++++++++++ ...ename_heat_rate_mmbtu_mwh_to_heat_rate_.py | 121 ++++++++++++++++++ src/pudl/analysis/ferc1_eia.py | 12 +- src/pudl/analysis/ferc1_eia_train.py | 2 +- src/pudl/analysis/mcoe.py | 25 ++-- src/pudl/analysis/plant_parts_eia.py | 2 +- src/pudl/metadata/fields.py | 2 +- src/pudl/metadata/resources/eia.py | 4 +- src/pudl/metadata/resources/mcoe.py | 10 +- src/pudl/validate.py | 12 +- test/unit/analysis/plant_parts_eia_test.py | 4 +- 11 files changed, 280 insertions(+), 35 deletions(-) create mode 100644 migrations/versions/1e4356aa41cf_rename_heat_rate_column_to_follow_.py create mode 100644 migrations/versions/c415b55c4dcf_rename_heat_rate_mmbtu_mwh_to_heat_rate_.py diff --git a/migrations/versions/1e4356aa41cf_rename_heat_rate_column_to_follow_.py b/migrations/versions/1e4356aa41cf_rename_heat_rate_column_to_follow_.py new file mode 100644 index 0000000000..7428946be9 --- /dev/null +++ b/migrations/versions/1e4356aa41cf_rename_heat_rate_column_to_follow_.py @@ -0,0 +1,121 @@ +"""Rename heat rate column to follow column naming convention + +Revision ID: 1e4356aa41cf +Revises: c415b55c4dcf +Create Date: 2023-10-04 20:00:29.217602 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = '1e4356aa41cf' +down_revision = 'c415b55c4dcf' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('_out_eia__monthly_derived_generator_attributes', schema=None) as batch_op: + batch_op.add_column(sa.Column('unit_heat_rate_mmbtu_per_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('_out_eia__monthly_fuel_cost_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('unit_heat_rate_mmbtu_per_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('_out_eia__monthly_heat_rate_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('unit_heat_rate_mmbtu_per_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('_out_eia__monthly_heat_rate_by_unit', schema=None) as batch_op: + batch_op.add_column(sa.Column('unit_heat_rate_mmbtu_per_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('_out_eia__yearly_derived_generator_attributes', schema=None) as batch_op: + batch_op.add_column(sa.Column('unit_heat_rate_mmbtu_per_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('_out_eia__yearly_fuel_cost_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('unit_heat_rate_mmbtu_per_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('_out_eia__yearly_heat_rate_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('unit_heat_rate_mmbtu_per_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('_out_eia__yearly_heat_rate_by_unit', schema=None) as batch_op: + batch_op.add_column(sa.Column('unit_heat_rate_mmbtu_per_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('out_eia__monthly_generators', schema=None) as batch_op: + batch_op.add_column(sa.Column('unit_heat_rate_mmbtu_per_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('out_eia__plant_parts', schema=None) as batch_op: + batch_op.add_column(sa.Column('unit_heat_rate_mmbtu_per_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('out_eia__yearly_generators', schema=None) as batch_op: + batch_op.add_column(sa.Column('unit_heat_rate_mmbtu_per_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('out_eia__yearly_generators_by_ownership', schema=None) as batch_op: + batch_op.add_column(sa.Column('unit_heat_rate_mmbtu_per_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('out_eia__yearly_generators_by_ownership', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.FLOAT(), nullable=True)) + batch_op.drop_column('unit_heat_rate_mmbtu_per_mwh') + + with op.batch_alter_table('out_eia__yearly_generators', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.FLOAT(), nullable=True)) + batch_op.drop_column('unit_heat_rate_mmbtu_per_mwh') + + with op.batch_alter_table('out_eia__plant_parts', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.FLOAT(), nullable=True)) + batch_op.drop_column('unit_heat_rate_mmbtu_per_mwh') + + with op.batch_alter_table('out_eia__monthly_generators', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.FLOAT(), nullable=True)) + batch_op.drop_column('unit_heat_rate_mmbtu_per_mwh') + + with op.batch_alter_table('_out_eia__yearly_heat_rate_by_unit', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.FLOAT(), nullable=True)) + batch_op.drop_column('unit_heat_rate_mmbtu_per_mwh') + + with op.batch_alter_table('_out_eia__yearly_heat_rate_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.FLOAT(), nullable=True)) + batch_op.drop_column('unit_heat_rate_mmbtu_per_mwh') + + with op.batch_alter_table('_out_eia__yearly_fuel_cost_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.FLOAT(), nullable=True)) + batch_op.drop_column('unit_heat_rate_mmbtu_per_mwh') + + with op.batch_alter_table('_out_eia__yearly_derived_generator_attributes', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.FLOAT(), nullable=True)) + batch_op.drop_column('unit_heat_rate_mmbtu_per_mwh') + + with op.batch_alter_table('_out_eia__monthly_heat_rate_by_unit', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.FLOAT(), nullable=True)) + batch_op.drop_column('unit_heat_rate_mmbtu_per_mwh') + + with op.batch_alter_table('_out_eia__monthly_heat_rate_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.FLOAT(), nullable=True)) + batch_op.drop_column('unit_heat_rate_mmbtu_per_mwh') + + with op.batch_alter_table('_out_eia__monthly_fuel_cost_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.FLOAT(), nullable=True)) + batch_op.drop_column('unit_heat_rate_mmbtu_per_mwh') + + with op.batch_alter_table('_out_eia__monthly_derived_generator_attributes', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.FLOAT(), nullable=True)) + batch_op.drop_column('unit_heat_rate_mmbtu_per_mwh') + + # ### end Alembic commands ### diff --git a/migrations/versions/c415b55c4dcf_rename_heat_rate_mmbtu_mwh_to_heat_rate_.py b/migrations/versions/c415b55c4dcf_rename_heat_rate_mmbtu_mwh_to_heat_rate_.py new file mode 100644 index 0000000000..e5e2f9d299 --- /dev/null +++ b/migrations/versions/c415b55c4dcf_rename_heat_rate_mmbtu_mwh_to_heat_rate_.py @@ -0,0 +1,121 @@ +"""Rename heat_rate_mmbtu_mwh to heat_rate_mmbtu_mwh_by_unit + +Revision ID: c415b55c4dcf +Revises: ab0540a6484c +Create Date: 2023-10-04 08:08:23.744433 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = 'c415b55c4dcf' +down_revision = 'ab0540a6484c' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('_out_eia__monthly_derived_generator_attributes', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh') + + with op.batch_alter_table('_out_eia__monthly_fuel_cost_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh') + + with op.batch_alter_table('_out_eia__monthly_heat_rate_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh') + + with op.batch_alter_table('_out_eia__monthly_heat_rate_by_unit', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh') + + with op.batch_alter_table('_out_eia__yearly_derived_generator_attributes', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh') + + with op.batch_alter_table('_out_eia__yearly_fuel_cost_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh') + + with op.batch_alter_table('_out_eia__yearly_heat_rate_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh') + + with op.batch_alter_table('_out_eia__yearly_heat_rate_by_unit', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh') + + with op.batch_alter_table('out_eia__monthly_generators', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh') + + with op.batch_alter_table('out_eia__plant_parts', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh') + + with op.batch_alter_table('out_eia__yearly_generators', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh') + + with op.batch_alter_table('out_eia__yearly_generators_by_ownership', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh_by_unit', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.')) + batch_op.drop_column('heat_rate_mmbtu_mwh') + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('out_eia__yearly_generators_by_ownership', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh', sa.FLOAT(), nullable=True)) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('out_eia__yearly_generators', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh', sa.FLOAT(), nullable=True)) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('out_eia__plant_parts', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh', sa.FLOAT(), nullable=True)) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('out_eia__monthly_generators', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh', sa.FLOAT(), nullable=True)) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('_out_eia__yearly_heat_rate_by_unit', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh', sa.FLOAT(), nullable=True)) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('_out_eia__yearly_heat_rate_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh', sa.FLOAT(), nullable=True)) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('_out_eia__yearly_fuel_cost_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh', sa.FLOAT(), nullable=True)) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('_out_eia__yearly_derived_generator_attributes', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh', sa.FLOAT(), nullable=True)) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('_out_eia__monthly_heat_rate_by_unit', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh', sa.FLOAT(), nullable=True)) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('_out_eia__monthly_heat_rate_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh', sa.FLOAT(), nullable=True)) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('_out_eia__monthly_fuel_cost_by_generator', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh', sa.FLOAT(), nullable=True)) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + with op.batch_alter_table('_out_eia__monthly_derived_generator_attributes', schema=None) as batch_op: + batch_op.add_column(sa.Column('heat_rate_mmbtu_mwh', sa.FLOAT(), nullable=True)) + batch_op.drop_column('heat_rate_mmbtu_mwh_by_unit') + + # ### end Alembic commands ### diff --git a/src/pudl/analysis/ferc1_eia.py b/src/pudl/analysis/ferc1_eia.py index 875209644e..c8a95e91bd 100644 --- a/src/pudl/analysis/ferc1_eia.py +++ b/src/pudl/analysis/ferc1_eia.py @@ -193,7 +193,9 @@ def get_plants_ferc1(self, clobber: bool = False) -> pd.DataFrame: x.plant_id_report_year + "_" + x.utility_id_pudl.map(str) ), fuel_cost_per_mmbtu=lambda x: (x.fuel_cost / x.fuel_mmbtu), - heat_rate_mmbtu_mwh=lambda x: (x.fuel_mmbtu / x.net_generation_mwh), + unit_heat_rate_mmbtu_per_mwh=lambda x: ( + x.fuel_mmbtu / x.net_generation_mwh + ), ) .rename( columns={ @@ -403,9 +405,9 @@ def make_features( label="fuel_cost_per_mmbtu", ), Numeric( - "heat_rate_mmbtu_mwh", - "heat_rate_mmbtu_mwh", - label="heat_rate_mmbtu_mwh", + "unit_heat_rate_mmbtu_per_mwh", + "unit_heat_rate_mmbtu_per_mwh", + label="unit_heat_rate_mmbtu_per_mwh", ), Exact( "fuel_type_code_pudl", @@ -808,7 +810,7 @@ def prettyify_best_matches( "capacity_mw", "capacity_factor", "total_mmbtu", - "heat_rate_mmbtu_mwh", + "unit_heat_rate_mmbtu_per_mwh", "fuel_type_code_pudl", "installation_year", "plant_part_id_eia", diff --git a/src/pudl/analysis/ferc1_eia_train.py b/src/pudl/analysis/ferc1_eia_train.py index 2ff798bb8e..ed01a43701 100644 --- a/src/pudl/analysis/ferc1_eia_train.py +++ b/src/pudl/analysis/ferc1_eia_train.py @@ -103,7 +103,7 @@ "total_fuel_cost", "total_mmbtu", "fuel_cost_per_mmbtu", - "heat_rate_mmbtu_mwh", + "unit_heat_rate_mmbtu_per_mwh", ] # -------------------------------------------------------------------------------------- diff --git a/src/pudl/analysis/mcoe.py b/src/pudl/analysis/mcoe.py index bfa96da89b..1771f3b840 100644 --- a/src/pudl/analysis/mcoe.py +++ b/src/pudl/analysis/mcoe.py @@ -285,7 +285,7 @@ def heat_rate_by_unit(gen_fuel_by_energy_source: pd.DataFrame, bga: pd.DataFrame - unit_id_pudl - net_generation_mwh - fuel_consumed_for_electricity_mmbtu - - heat_rate_mmbtu_mwh + - unit_heat_rate_mmbtu_per_mwh """ gen_fuel_by_unit = pudl.helpers.date_merge( left=gen_fuel_by_energy_source, @@ -301,7 +301,7 @@ def heat_rate_by_unit(gen_fuel_by_energy_source: pd.DataFrame, bga: pd.DataFrame .sum() .convert_dtypes() .assign( - heat_rate_mmbtu_mwh=lambda x: x.fuel_consumed_for_electricity_mmbtu + unit_heat_rate_mmbtu_per_mwh=lambda x: x.fuel_consumed_for_electricity_mmbtu / x.net_generation_mwh ) ) @@ -326,7 +326,7 @@ def heat_rate_by_gen( Returns: DataFrame with columns report_date, plant_id_eia, unit_id_pudl, generator_id, - heat_rate_mmbtu_mwh, fuel_type_code_pudl, fuel_type_count, prime_mover_code. + unit_heat_rate_mmbtu_per_mwh, fuel_type_code_pudl, fuel_type_count, prime_mover_code. The output will have a time frequency corresponding to that of the input pudl_out. Output data types are set to their canonical values before returning. """ @@ -339,7 +339,7 @@ def heat_rate_by_gen( "report_date", "plant_id_eia", "unit_id_pudl", - "heat_rate_mmbtu_mwh", + "unit_heat_rate_mmbtu_per_mwh", ], ] @@ -404,7 +404,7 @@ def fuel_cost( "generator_id", "unit_id_pudl", "report_date", - "heat_rate_mmbtu_mwh", + "unit_heat_rate_mmbtu_per_mwh", ], ] gens = gens.loc[ @@ -515,7 +515,7 @@ def fuel_cost( "plant_id_eia", "report_date", "generator_id", - "heat_rate_mmbtu_mwh", + "unit_heat_rate_mmbtu_per_mwh", "fuel_cost_from_eiaapi", ] ], @@ -532,7 +532,7 @@ def fuel_cost( "report_date", "generator_id", "fuel_cost_per_mmbtu", - "heat_rate_mmbtu_mwh", + "unit_heat_rate_mmbtu_per_mwh", "fuel_cost_from_eiaapi", ] ] @@ -540,13 +540,14 @@ def fuel_cost( fc = ( pd.concat([one_fuel, multi_fuel], sort=True) .assign( - fuel_cost_per_mwh=lambda x: x.fuel_cost_per_mmbtu * x.heat_rate_mmbtu_mwh + fuel_cost_per_mwh=lambda x: x.fuel_cost_per_mmbtu + * x.unit_heat_rate_mmbtu_per_mwh ) .sort_values(["report_date", "plant_id_eia", "generator_id"]) ) out_df = ( - gen_w_ft.drop("heat_rate_mmbtu_mwh", axis=1) + gen_w_ft.drop("unit_heat_rate_mmbtu_per_mwh", axis=1) .drop_duplicates() .merge(fc, on=["report_date", "plant_id_eia", "generator_id"]) ) @@ -633,7 +634,7 @@ def mcoe( "unit_id_pudl", "fuel_cost_from_eiaapi", "fuel_cost_per_mmbtu", - "heat_rate_mmbtu_mwh", + "unit_heat_rate_mmbtu_per_mwh", "fuel_cost_per_mwh", ], ], @@ -645,12 +646,12 @@ def mcoe( ) # Calculate a couple more derived values: .assign( - total_mmbtu=lambda x: x.net_generation_mwh * x.heat_rate_mmbtu_mwh, + total_mmbtu=lambda x: x.net_generation_mwh * x.unit_heat_rate_mmbtu_per_mwh, total_fuel_cost=lambda x: x.total_mmbtu * x.fuel_cost_per_mmbtu, ) .pipe( pudl.helpers.oob_to_nan_with_dependent_cols, - cols=["heat_rate_mmbtu_mwh"], + cols=["unit_heat_rate_mmbtu_per_mwh"], dependent_cols=["total_mmbtu", "fuel_cost_per_mwh"], lb=min_heat_rate, ub=None, diff --git a/src/pudl/analysis/plant_parts_eia.py b/src/pudl/analysis/plant_parts_eia.py index 819157eb76..9a5051c159 100644 --- a/src/pudl/analysis/plant_parts_eia.py +++ b/src/pudl/analysis/plant_parts_eia.py @@ -281,7 +281,7 @@ WTAVG_DICT = { "fuel_cost_per_mwh": "capacity_mw", - "heat_rate_mmbtu_mwh": "capacity_mw", + "unit_heat_rate_mmbtu_per_mwh": "capacity_mw", "fuel_cost_per_mmbtu": "capacity_mw", } """Dict: a dictionary of columns (keys) to perform weighted averages on and the weight diff --git a/src/pudl/metadata/fields.py b/src/pudl/metadata/fields.py index 34d9c912d2..7d63c6b2f5 100644 --- a/src/pudl/metadata/fields.py +++ b/src/pudl/metadata/fields.py @@ -1159,7 +1159,7 @@ "description": "The energy contained in fuel burned, measured in million BTU.", "unit": "MMBtu", }, - "heat_rate_mmbtu_mwh": { + "unit_heat_rate_mmbtu_per_mwh": { "type": "number", "description": "Fuel content per unit of electricity generated. Coming from MCOE calculation.", "unit": "MMBtu_MWh", diff --git a/src/pudl/metadata/resources/eia.py b/src/pudl/metadata/resources/eia.py index d4bd30e9b1..be3a086d10 100644 --- a/src/pudl/metadata/resources/eia.py +++ b/src/pudl/metadata/resources/eia.py @@ -551,7 +551,7 @@ "fuel_cost_from_eiaapi", "fuel_cost_per_mmbtu", "fuel_cost_per_mwh", - "heat_rate_mmbtu_mwh", + "unit_heat_rate_mmbtu_per_mwh", "net_generation_mwh", "total_fuel_cost", "total_mmbtu", @@ -651,7 +651,7 @@ "fuel_cost_per_mwh", "fuel_type_code_pudl", "generator_retirement_date", - "heat_rate_mmbtu_mwh", + "unit_heat_rate_mmbtu_per_mwh", "installation_year", "net_generation_mwh", "generator_operating_year", diff --git a/src/pudl/metadata/resources/mcoe.py b/src/pudl/metadata/resources/mcoe.py index 2b738a3e3d..740506d7fd 100644 --- a/src/pudl/metadata/resources/mcoe.py +++ b/src/pudl/metadata/resources/mcoe.py @@ -24,7 +24,7 @@ "unit_id_pudl", "net_generation_mwh", "fuel_consumed_for_electricity_mmbtu", - "heat_rate_mmbtu_mwh", + "unit_heat_rate_mmbtu_per_mwh", ], "primary_key": [ "report_date", @@ -53,7 +53,7 @@ "plant_id_eia", "unit_id_pudl", "generator_id", - "heat_rate_mmbtu_mwh", + "unit_heat_rate_mmbtu_per_mwh", "fuel_type_code_pudl", "fuel_type_count", "prime_mover_code", @@ -126,7 +126,7 @@ "fuel_type_code_pudl", "fuel_cost_from_eiaapi", "fuel_cost_per_mmbtu", - "heat_rate_mmbtu_mwh", + "unit_heat_rate_mmbtu_per_mwh", "fuel_cost_per_mwh", ], "primary_key": [ @@ -165,7 +165,7 @@ "fuel_cost_from_eiaapi", "fuel_cost_per_mmbtu", "fuel_cost_per_mwh", - "heat_rate_mmbtu_mwh", + "unit_heat_rate_mmbtu_per_mwh", "net_generation_mwh", "total_fuel_cost", "total_mmbtu", @@ -212,7 +212,7 @@ "fuel_cost_from_eiaapi", "fuel_cost_per_mmbtu", "fuel_cost_per_mwh", - "heat_rate_mmbtu_mwh", + "unit_heat_rate_mmbtu_per_mwh", "net_generation_mwh", "total_fuel_cost", "total_mmbtu", diff --git a/src/pudl/validate.py b/src/pudl/validate.py index 114c6dba7c..15955052c7 100644 --- a/src/pudl/validate.py +++ b/src/pudl/validate.py @@ -2657,7 +2657,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): "low_bound": 7.0, "hi_q": 0.50, "hi_bound": 7.5, - "data_col": "heat_rate_mmbtu_mwh", + "data_col": "unit_heat_rate_mmbtu_per_mwh", "weight_col": "net_generation_mwh", }, { # EIA natural gas reporting really only becomes usable in 2015. @@ -2667,7 +2667,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): "low_bound": 6.4, "hi_q": 0.95, "hi_bound": 13.0, - "data_col": "heat_rate_mmbtu_mwh", + "data_col": "unit_heat_rate_mmbtu_per_mwh", "weight_col": "net_generation_mwh", }, ] @@ -2681,7 +2681,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): "low_bound": 10.0, "hi_q": 0.50, "hi_bound": 11.0, - "data_col": "heat_rate_mmbtu_mwh", + "data_col": "unit_heat_rate_mmbtu_per_mwh", "weight_col": "net_generation_mwh", }, { @@ -2691,7 +2691,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): "low_bound": 9.0, "hi_q": 0.95, "hi_bound": 12.5, - "data_col": "heat_rate_mmbtu_mwh", + "data_col": "unit_heat_rate_mmbtu_per_mwh", "weight_col": "net_generation_mwh", }, ] @@ -2857,7 +2857,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): "low_q": 0.05, "mid_q": 0.50, "hi_q": 0.95, - "data_col": "heat_rate_mmbtu_mwh", + "data_col": "unit_heat_rate_mmbtu_per_mwh", "weight_col": "net_generation_mwh", }, { @@ -2866,7 +2866,7 @@ def plot_vs_agg(orig_df, agg_df, validation_cases): "low_q": 0.05, "mid_q": 0.50, "hi_q": 0.95, - "data_col": "heat_rate_mmbtu_mwh", + "data_col": "unit_heat_rate_mmbtu_per_mwh", "weight_col": "net_generation_mwh", }, ] diff --git a/test/unit/analysis/plant_parts_eia_test.py b/test/unit/analysis/plant_parts_eia_test.py index 7eed73c2b2..b45fcf31a4 100644 --- a/test/unit/analysis/plant_parts_eia_test.py +++ b/test/unit/analysis/plant_parts_eia_test.py @@ -535,7 +535,7 @@ def test_one_to_many(): "net_generation_mwh": [100] * 8, "total_fuel_cost": [100] * 8, "fuel_cost_per_mwh": [1] * 8, - "heat_rate_mmbtu_mwh": [1] * 8, + "unit_heat_rate_mmbtu_per_mwh": [1] * 8, "fuel_cost_per_mmbtu": [1] * 8, "fuel_type_code_pudl": ["test"] * 8, "planned_generator_retirement_date": [2076] * 8, @@ -622,7 +622,7 @@ def test_one_to_many(): "net_generation_mwh": [100, 100, 100, 100, 100, 100, 100, 100, 200], "total_fuel_cost": [100, 100, 100, 100, 100, 100, 100, 100, 200], "fuel_cost_per_mwh": [1] * 9, - "heat_rate_mmbtu_mwh": [1] * 9, + "unit_heat_rate_mmbtu_per_mwh": [1] * 9, "fuel_cost_per_mmbtu": [1] * 9, "fuel_type_code_pudl": ["test"] * 9, "planned_generator_retirement_date": [2076] * 9,