-
-
Notifications
You must be signed in to change notification settings - Fork 119
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* eia860 solar: extract step wahoo * tweak column names * first pass of extracting 860 wind * first pass at extracting nrelatb * fix lil unit test bbs * actually test rename in unit test * fix suffix number in column map * add defintions pages into data source docs --------- Co-authored-by: E. Belfer <[email protected]>
- Loading branch information
Showing
17 changed files
with
2,036 additions
and
15 deletions.
There are no files selected for viewing
518 changes: 518 additions & 0 deletions
518
docs/data_sources/nrelatb/nrelatb_definitions_2020.html
Large diffs are not rendered by default.
Oops, something went wrong.
442 changes: 442 additions & 0 deletions
442
docs/data_sources/nrelatb/nrelatb_definitions_2021.html
Large diffs are not rendered by default.
Oops, something went wrong.
442 changes: 442 additions & 0 deletions
442
docs/data_sources/nrelatb/nrelatb_definitions_2022.html
Large diffs are not rendered by default.
Oops, something went wrong.
482 changes: 482 additions & 0 deletions
482
docs/data_sources/nrelatb/nrelatb_definitions_2023.html
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
excel, | ||
ferc1, | ||
ferc714, | ||
nrelatb, | ||
phmsagas, | ||
xbrl, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
"""Routines used for extracting the raw NREL ATB data.""" | ||
|
||
from dagster import Output, asset | ||
|
||
from pudl.extract.extractor import GenericMetadata, raw_df_factory | ||
from pudl.extract.parquet import ParquetExtractor | ||
|
||
|
||
class Extractor(ParquetExtractor): | ||
"""Extractor for NREL ATB.""" | ||
|
||
def __init__(self, *args, **kwargs): | ||
"""Initialize the module. | ||
Args: | ||
ds (:class:datastore.Datastore): Initialized datastore. | ||
""" | ||
self.METADATA = GenericMetadata("nrelatb") | ||
super().__init__(*args, **kwargs) | ||
|
||
|
||
raw_nrelatb__all_dfs = raw_df_factory(Extractor, name="nrelatb") | ||
|
||
|
||
@asset( | ||
required_resource_keys={"datastore", "dataset_settings"}, | ||
) | ||
def raw_nrelatb__data(raw_nrelatb__all_dfs): | ||
"""Extract raw NREL ATB data from annual parquet files to one dataframe. | ||
Returns: | ||
An extracted NREL ATB dataframe. | ||
""" | ||
return Output(value=raw_nrelatb__all_dfs["data"]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
"""Extractor for Parquet data.""" | ||
|
||
import io | ||
|
||
import pandas as pd | ||
|
||
import pudl.logging_helpers | ||
from pudl.extract.extractor import GenericExtractor, PartitionSelection | ||
|
||
logger = pudl.logging_helpers.get_logger(__name__) | ||
|
||
|
||
class ParquetExtractor(GenericExtractor): | ||
"""Class for extracting dataframes from parquet files. | ||
The extraction logic is invoked by calling extract() method of this class. | ||
""" | ||
|
||
def source_filename(self, page: str, **partition: PartitionSelection) -> str: | ||
"""Produce the source Parquet file name as it will appear in the archive. | ||
Args: | ||
page: pudl name for the dataset contents, eg "boiler_generator_assn" or | ||
"data" | ||
partition: partition to load. Examples: | ||
{'year': 2009} | ||
Returns: | ||
string name of the parquet file | ||
""" | ||
partition_selection = self._metadata._get_partition_selection(partition) | ||
return f"{self._dataset_name}-{partition_selection}.parquet" | ||
|
||
def load_source(self, page: str, **partition: PartitionSelection) -> pd.DataFrame: | ||
"""Produce the dataframe object for the given partition. | ||
This method assumes that the archive includes one unzipped file per partition. | ||
Args: | ||
page: pudl name for the dataset contents, eg "boiler_generator_assn" or | ||
"data" | ||
partition: partition to load. Examples: | ||
{'year': 2009} | ||
{'year_month': '2020-08'} | ||
Returns: | ||
pd.DataFrame instance containing CSV data | ||
""" | ||
res = self.ds.get_unique_resource(self._dataset_name, **partition) | ||
df = pd.read_parquet(io.BytesIO(res)) | ||
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
year_index,2019,2020,2021,2022,2023 | ||
report_year,atb_year,atb_year,atb_year,atb_year,atb_year | ||
core_metric_key,core_metric_key,core_metric_key,core_metric_key,core_metric_key,core_metric_key | ||
core_metric_parameter,core_metric_parameter,core_metric_parameter,core_metric_parameter,core_metric_parameter,core_metric_parameter | ||
core_metric_case,core_metric_case,core_metric_case,core_metric_case,core_metric_case,core_metric_case | ||
cost_recovery_period_years,crpyears,crpyears,crpyears,crpyears,crpyears | ||
technology_description,technology,technology,technology,technology,technology | ||
technology_alias,,,technology_alias,technology_alias,technology_alias | ||
display_name,,,display_name,display_name,display_name | ||
is_default,,,default,default,default | ||
technology_description_detail_1,techdetail,techdetail,techdetail,techdetail,techdetail | ||
technology_description_detail_2,,,,,techdetail2 | ||
resource_description,,,,,resourcedetail | ||
is_technology_mature,,,,,maturity | ||
technology_scale,,,,,scale | ||
scenario_atb,scenario,scenario,scenario,scenario,scenario | ||
core_metric_variable_year,core_metric_variable,core_metric_variable,core_metric_variable,core_metric_variable,core_metric_variable | ||
units,units,units,units,units,units | ||
value,value,value,value,value,value | ||
update_date,update_date,,,, | ||
revision_num,revision,revision,,, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -87,3 +87,5 @@ datasets: | |
year_quarters: ["2022q1"] | ||
phmsagas: | ||
years: [2022] | ||
nrelatb: | ||
years: [2023] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -344,3 +344,5 @@ datasets: | |
2021, | ||
2022, | ||
] | ||
nrelatb: | ||
years: [2019, 2020, 2021, 2022, 2023] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters