Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add EIA AEO fuel cost projections #3656

Merged
merged 4 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions docs/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,19 @@ PUDL Release Notes
v2024.X.X (2024-XX-XX)
---------------------------------------------------------------------------------------

New Data Coverage
^^^^^^^^^^^^^^^^^

EIA AEO
~~~~~~~

* Added new tables from EIA AEO table 54:

* :ref:`core_eiaaeo__yearly_projected_fuel_cost_in_electric_sector_by_type`
contains fuel costs for the electric power sector. These are broken out by
fuel type, and include both nominal USD per MMBtu as well as real 2022 USD
per MMBtu. See issue :issue:`3649` and PR :pr:`3656`.

.. _release-v2024.5.0:

---------------------------------------------------------------------------------------
Expand Down
38 changes: 38 additions & 0 deletions migrations/versions/da38a41d7f99_add_fuel_cost_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""add fuel cost table

Revision ID: da38a41d7f99
Revises: 6dcb5e3e07dd
Create Date: 2024-05-27 16:52:21.327543

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = 'da38a41d7f99'
down_revision = '7992e5e19487'
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('core_eiaaeo__yearly_projected_fuel_cost_in_electric_sector_by_type',
sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'),
sa.Column('electricity_market_module_region_eiaaeo', sa.Enum('florida_reliability_coordinating_council', 'midcontinent_central', 'midcontinent_east', 'midcontinent_south', 'midcontinent_west', 'northeast_power_coordinating_council_new_england', 'northeast_power_coordinating_council_new_york_city_and_long_island', 'northeast_power_coordinating_council_upstate_new_york', 'pjm_commonwealth_edison', 'pjm_dominion', 'pjm_east', 'pjm_west', 'serc_reliability_corporation_central', 'serc_reliability_corporation_east', 'serc_reliability_corporation_southeastern', 'southwest_power_pool_central', 'southwest_power_pool_north', 'southwest_power_pool_south', 'texas_reliability_entity', 'united_states', 'western_electricity_coordinating_council_basin', 'western_electricity_coordinating_council_california_north', 'western_electricity_coordinating_council_california_south', 'western_electricity_coordinating_council_northwest_power_pool_area', 'western_electricity_coordinating_council_rockies', 'western_electricity_coordinating_council_southwest'), nullable=False, comment='AEO projection region.'),
sa.Column('model_case_eiaaeo', sa.Enum('aeo2022', 'high_economic_growth', 'high_macro_and_high_zero_carbon_technology_cost', 'high_macro_and_low_zero_carbon_technology_cost', 'high_oil_and_gas_supply', 'high_oil_price', 'high_uptake_of_inflation_reduction_act', 'high_zero_carbon_technology_cost', 'low_economic_growth', 'low_macro_and_high_zero_carbon_technology_cost', 'low_macro_and_low_zero_carbon_technology_cost', 'low_oil_and_gas_supply', 'low_oil_price', 'low_uptake_of_inflation_reduction_act', 'low_zero_carbon_technology_cost', 'no_inflation_reduction_act', 'reference'), nullable=False, comment='Factors such as economic growth, future oil prices, the ultimate size of domestic energy resources, and technological change are often uncertain. To illustrate some of these uncertainties, EIA runs side cases to show how the model responds to changes in key input variables compared with the Reference case. See https://www.eia.gov/outlooks/aeo/assumptions/case_descriptions.php for more details.'),
sa.Column('projection_year', sa.Integer(), nullable=False, comment='The year of the projected value.'),
sa.Column('fuel_type_eiaaeo', sa.Enum('coal', 'petroleum', 'natural_gas', 'other_gaseous_fuels', 'renewable_sources', 'other'), nullable=False, comment='Fuel type reported for AEO end-use sector generation data.'),
sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'),
sa.Column('fuel_cost_real_per_mmbtu_eiaaeo', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in real USD, standardized to the value of a USD in the year defined by ``real_cost_basis_year``.'),
sa.Column('real_cost_basis_year', sa.Integer(), nullable=True, comment="Four-digit year which is the basis for any 'real cost' monetary values (as opposed to nominal values)."),
sa.PrimaryKeyConstraint('report_year', 'electricity_market_module_region_eiaaeo', 'model_case_eiaaeo', 'projection_year', 'fuel_type_eiaaeo', name=op.f('pk_core_eiaaeo__yearly_projected_fuel_cost_in_electric_sector_by_type'))
)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('core_eiaaeo__yearly_projected_fuel_cost_in_electric_sector_by_type')
# ### end Alembic commands ###
2 changes: 2 additions & 0 deletions src/pudl/metadata/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,8 @@

FUEL_TYPES_EIAAEO: list[str] = [
"coal",
"distillate_fuel_oil",
"residual_fuel_oil",
"petroleum",
"natural_gas",
"other_gaseous_fuels",
Expand Down
14 changes: 14 additions & 0 deletions src/pudl/metadata/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -1648,6 +1648,15 @@
"description": "Average cost of fuel delivered in the report year per reported fuel unit (USD).",
"unit": "USD",
},
"fuel_cost_real_per_mmbtu_eiaaeo": {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need the suffix eiaaeo?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also this is a nit and not blocking but here we say: cost_real but in the other column we say real_cost. Maybe we should pick one?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We wanted to add the suffix eiaaeo because it's possible that other datasets mean something different by "real fuel cost" - maybe it is always based in the same year, for example. It's easier to remove a suffix and merge two fields into one than it is to have to tease apart the different use cases later.

I think this column would be named "Fuel Cost (Real 2022 USD/MMbtu)" and the other would be named "Real Cost Basis Year" (not "Cost (real) Basis Year") in English - which is why I put them that way. I think it's slightly less consistent but more readable the way it is, but happy to switch out if you feel strongly about it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mmm yeah there is no great way to deal with this -- could say real_cost_fuel? but that is inconsistent with how we report fuel_cost in other columns too 🤷. I'll leave this up to you.

"type": "number",
"description": (
"Average fuel cost per mmBTU of heat content in real USD, "
"standardized to the value of a USD in the year defined by "
"``real_cost_basis_year``."
),
"unit": "USD_per_MMBtu",
},
"fuel_derived_from": {
"type": "string",
"description": "Original fuel from which this refined fuel was derived.",
Expand Down Expand Up @@ -3291,6 +3300,11 @@
"description": "Reactive Power Output (MVAr)",
"unit": "MVAr",
},
"real_cost_basis_year": {
"type": "integer",
"description": "Four-digit year which is the basis for any 'real cost' "
"monetary values (as opposed to nominal values).",
},
Comment on lines +3303 to +3307
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is "real" vs. "nominal" cost something we should expect users to understand? TBH I don't know the difference so I don't want to assume other people do. Is this an easily googleable term, or should we go into more depth about what "real" means here. Also open to housing this explanation elsewhere (i.e.: the metadata table description, another/several column description(s)).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a pretty easily searchable term - just going by this docstring and searching "real cost vs nominal value" gets you the right thing as the first hit on DuckDuckGo and the fourth hit on Google after the rando AI generated BS.

image

"real_time_pricing": {
"type": "boolean",
"description": (
Expand Down
15 changes: 4 additions & 11 deletions src/pudl/metadata/resources/eiaaeo.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@
},
"core_eiaaeo__yearly_projected_fuel_cost_in_electric_sector_by_type": {
"description": (
"Projected fuel prices to the electric power sector, including "
"Projected fuel prices for the electric power sector, including "
"electricity-only and combined-heat-and-power plants that have a "
"regulatory status."
Comment on lines +256 to 258
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment above about describing real vs. nominal somewhere.

),
Expand All @@ -266,6 +266,7 @@
"fuel_type_eiaaeo",
"fuel_cost_per_mmbtu",
"fuel_cost_real_per_mmbtu_eiaaeo",
"real_cost_basis_year",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added this so we know what year the real cost refers to.

],
"primary_key": [
"report_year",
Expand Down Expand Up @@ -342,15 +343,6 @@
"description": "Total carbon emissions in short tons.",
"unit": "short_tons",
},
"fuel_cost_real_per_mmbtu_eiaaeo": {
jdangerx marked this conversation as resolved.
Show resolved Hide resolved
"type": "number",
"description": (
"Average fuel cost per mmBTU of heat content in real USD, "
"standardized to the value of a USD in the year before the report "
"year."
),
"unit": "USD_per_MMBtu",
},
"generation_for_own_use_mwh": {
"type": "number",
"description": "Amount of generation that is used for generation instead of sold.",
Expand Down Expand Up @@ -408,8 +400,9 @@
for key, value in _STAGING_RESOURCE_METADATA.items()
if key
in {
"core_eiaaeo__yearly_projected_electric_sales",
jdangerx marked this conversation as resolved.
Show resolved Hide resolved
"core_eiaaeo__yearly_projected_fuel_cost_in_electric_sector_by_type",
"core_eiaaeo__yearly_projected_generation_in_electric_sector_by_technology",
"core_eiaaeo__yearly_projected_generation_in_end_use_sectors_by_fuel_type",
"core_eiaaeo__yearly_projected_electric_sales",
}
}
155 changes: 109 additions & 46 deletions src/pudl/transform/eiaaeo.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,52 +431,6 @@ def core_eiaaeo__yearly_projected_electric_sales(
return renamed_for_pudl


@dataclass
jdangerx marked this conversation as resolved.
Show resolved Hide resolved
class AeoCheckSpec:
"""Define some simple checks that can run on any AEO asset."""

name: str
asset: str
num_rows_by_report_year: dict[int, int]
category_counts: dict[str, int]


BASE_AEO_CATEGORIES = {
"model_case_eiaaeo": 17,
"projection_year": 30,
"electricity_market_module_region_eiaaeo": 26,
}
check_specs = [
AeoCheckSpec(
name="gen_in_electric_sector_by_tech",
asset="core_eiaaeo__yearly_projected_generation_in_electric_sector_by_technology",
num_rows_by_report_year={2023: 166972},
category_counts=BASE_AEO_CATEGORIES
| {
"technology_description_eiaaeo": 13,
},
),
AeoCheckSpec(
name="gen_in_electric_sector_by_tech",
asset="core_eiaaeo__yearly_projected_generation_in_end_use_sectors_by_fuel_type",
num_rows_by_report_year={2023: 77064},
category_counts=BASE_AEO_CATEGORIES
| {
"fuel_type_eiaaeo": 6,
},
),
AeoCheckSpec(
name="electricity_sales",
asset="core_eiaaeo__yearly_projected_electric_sales",
num_rows_by_report_year={2023: 51376},
category_counts=BASE_AEO_CATEGORIES
| {
"customer_class": 4,
},
),
]


@asset(io_manager_key="pudl_io_manager")
def core_eiaaeo__yearly_projected_generation_in_end_use_sectors_by_fuel_type(
raw_eiaaeo__electric_power_projections_regional,
Expand Down Expand Up @@ -560,6 +514,115 @@ def core_eiaaeo__yearly_projected_generation_in_end_use_sectors_by_fuel_type(
return renamed_for_pudl


@asset(io_manager_key="pudl_io_manager")
def core_eiaaeo__yearly_projected_fuel_cost_in_electric_sector_by_type(
raw_eiaaeo__electric_power_projections_regional,
):
"""Projected fuel cost for the electric power sector.

Includes 2022 US dollars per million BTU and nominal US dollars per million
BTU.

In future report years, the base year for the real cost will change, so we
store that base year as well.
"""
sanitized = filter_enrich_sanitize(
raw_df=raw_eiaaeo__electric_power_projections_regional,
relevant_series_names=("Electricity : Fuel Prices",),
).rename(columns={"subtopic": "variable_name", "variable_name": "dimension"})

assert set(sanitized.topic.unique()) == {"electricity"}
assert set(sanitized.variable_name.unique()) == {"fuel_prices"}
assert set(sanitized.units.unique()) == {"2022_mmbtu", "nom_mmbtu"}
# turn variable_name into `nominal_fuel_prices` and `real_fuel_prices` based on unit
sanitized.variable_name = sanitized.units + "_" + sanitized.variable_name

trimmed = sanitized.drop(
columns=[
"topic",
"units",
]
)

unstacked = unstack(
df=trimmed,
eventual_pk=[
"report_year",
"model_case_eiaaeo",
"region",
"dimension",
"projection_year",
],
).assign(real_cost_basis_year=2022)

renamed_for_pudl = unstacked.reset_index().rename(
columns={
"capacity": "summer_capacity_mw",
"generation": "gross_generation_mwh",
"region": "electricity_market_module_region_eiaaeo",
"dimension": "fuel_type_eiaaeo",
"2022_mmbtu_fuel_prices": "fuel_cost_real_per_mmbtu_eiaaeo",
"nom_mmbtu_fuel_prices": "fuel_cost_per_mmbtu",
}
)
return renamed_for_pudl


@dataclass
class AeoCheckSpec:
"""Define some simple checks that can run on any AEO asset."""

name: str
asset: str
num_rows_by_report_year: dict[int, int]
category_counts: dict[str, int]


BASE_AEO_CATEGORIES = {
"model_case_eiaaeo": 17,
"projection_year": 30,
"electricity_market_module_region_eiaaeo": 26,
}
check_specs = [
AeoCheckSpec(
name="gen_in_electric_sector_by_tech",
asset="core_eiaaeo__yearly_projected_generation_in_electric_sector_by_technology",
num_rows_by_report_year={2023: 166972},
category_counts=BASE_AEO_CATEGORIES
| {
"technology_description_eiaaeo": 13,
Comment on lines +586 to +593
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason these aren't in test modules?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are for defining asset checks, which get run during the ETL after an asset has been materialized.

},
),
AeoCheckSpec(
name="gen_in_electric_sector_by_tech",
asset="core_eiaaeo__yearly_projected_generation_in_end_use_sectors_by_fuel_type",
num_rows_by_report_year={2023: 77064},
category_counts=BASE_AEO_CATEGORIES
| {
"fuel_type_eiaaeo": 6,
},
),
AeoCheckSpec(
name="electricity_sales",
asset="core_eiaaeo__yearly_projected_electric_sales",
num_rows_by_report_year={2023: 51376},
category_counts=BASE_AEO_CATEGORIES
| {
"customer_class": 4,
},
),
AeoCheckSpec(
name="electricity_sales",
asset="core_eiaaeo__yearly_projected_fuel_cost_in_electric_sector_by_type",
num_rows_by_report_year={2023: 50882},
category_counts=BASE_AEO_CATEGORIES
| {
"fuel_type_eiaaeo": 4,
},
),
]


def make_check(spec: AeoCheckSpec) -> AssetChecksDefinition:
"""Turn the AeoCheckSpec into an actual Dagster asset check."""

Expand Down