Skip to content

Commit

Permalink
chore format modules
Browse files Browse the repository at this point in the history
  • Loading branch information
rouille committed Dec 5, 2023
1 parent a0a269f commit aab1d6d
Show file tree
Hide file tree
Showing 12 changed files with 49 additions and 77 deletions.
10 changes: 5 additions & 5 deletions src/consumed.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,12 +439,12 @@ def _load_rates(self):
)

# Cut off emissions at 9 hours after UTC year
emissions = emissions[:f"{self.year+1}-01-01 09:00:00+00:00"]
rates[((adj, pol))] = emissions
emissions = emissions[: f"{self.year+1}-01-01 09:00:00+00:00"]
rates[(adj, pol)] = emissions

# Make generation data frame
generation = pd.DataFrame(data=gens)
generation = generation[:f"{self.year+1}-01-01 09:00:00+00:00"]
generation = generation[: f"{self.year+1}-01-01 09:00:00+00:00"]

return rates, generation

Expand All @@ -462,7 +462,7 @@ def build_matrices(self, pol: str, adj: str, date):

# Build generation array, using 930 for import-only regions
G = np.zeros(len(self.regions))
for (i, r) in enumerate(self.regions):
for i, r in enumerate(self.regions):
if r in self.import_regions:
G[i] = self.eia930.df.loc[date, KEYS["E"]["NG"] % r]
else:
Expand Down Expand Up @@ -513,7 +513,7 @@ def run(self):
consumed_emissions = np.full(len(self.regions), np.nan)

# Export
for (i, r) in enumerate(self.regions):
for i, r in enumerate(self.regions):
self.results[r].loc[date, col] = consumed_emissions[i]
if total_failed > 0:
logger.warning(
Expand Down
9 changes: 1 addition & 8 deletions src/data_cleaning.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,9 +453,7 @@ def clean_eia923(
"fuel_consumed_mmbtu",
"fuel_consumed_for_electricity_mmbtu",
],
].round(
1
)
].round(1)

validation.test_for_missing_energy_source_code(gen_fuel_allocated)
validation.test_for_negative_values(gen_fuel_allocated)
Expand Down Expand Up @@ -708,7 +706,6 @@ def calculate_aggregated_primary_fuel(
# we will calculate primary fuel based on the fuel with the most consumption,
# generation, and capacity
for source in ["fuel_consumed_for_electricity_mmbtu", "net_generation_mwh"]:

# only keep values greater than zero so that these can be filled by other
# methods if non-zero
primary_fuel_calc = agg_totals_by_fuel[agg_totals_by_fuel[source] > 0]
Expand Down Expand Up @@ -1088,7 +1085,6 @@ def manually_remove_steam_units(df):


def remove_incomplete_unit_months(cems):

# get a count of how many hours are reported in each month for each unit
unit_hours_in_month = (
cems[["plant_id_eia", "report_date", "emissions_unit_id_epa", "datetime_utc"]]
Expand Down Expand Up @@ -1748,7 +1744,6 @@ def filter_unique_cems_data(cems, partial_cems):


def aggregate_plant_data_to_ba_fuel(combined_plant_data, plant_attributes_table):

# create a table that has data for the sythetic plant attributes
shaped_plant_attributes = (
plant_attributes_table[["shaped_plant_id", "ba_code", "fuel_category"]]
Expand Down Expand Up @@ -1892,7 +1887,6 @@ def combine_plant_data(


def create_plant_attributes_table(cems, eia923_allocated, year, primary_fuel_table):

# create a table with the unique plantids from both dataframes
eia_plants = eia923_allocated[
["plant_id_eia", "plant_primary_fuel"]
Expand Down Expand Up @@ -2184,7 +2178,6 @@ def add_plant_local_timezone(df, year):


def aggregate_cems_to_subplant(cems):

GROUPBY_COLUMNS = ["plant_id_eia", "subplant_id", "datetime_utc", "report_date"]

cems_columns_to_aggregate = [
Expand Down
4 changes: 1 addition & 3 deletions src/download_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,7 @@ def download_helper(
# If the file already exists, do not re-download it.
final_destination = output_path if output_path is not None else download_path
if os.path.exists(final_destination):
logger.info(
f"{final_destination.split('/')[-1]} already downloaded, skipping."
)
logger.info(f"{final_destination.split('/')[-1]} already downloaded, skipping.")
return False

# Otherwise, download to the file in chunks.
Expand Down
5 changes: 1 addition & 4 deletions src/eia930.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,6 @@ def load_chalendar_for_pipeline(cleaned_data_filepath, year):


def remove_imputed_ones(eia930_data):

filter = eia930_data["net_generation_mwh_930"].abs() < 1.5

# replace all 1.0 values with zero
Expand Down Expand Up @@ -460,9 +459,7 @@ def manual_930_adjust(raw: pd.DataFrame):
& (raw.index < "2022-06-16 07:00:00+00")
),
cols,
].shift(
1, freq="H"
)
].shift(1, freq="H")
raw = raw.drop(columns=cols)
raw = pd.concat([raw, new], axis="columns")

Expand Down
32 changes: 20 additions & 12 deletions src/emissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,16 +481,19 @@ def calculate_nox_from_fuel_consumption(
if len(missing_ef) > 0:
logger.warning("NOx emission factors are missing for the following records")
logger.warning("Missing factors for FC prime movers are currently expected")
logger.warning("\n" +
missing_ef[
logger.warning(
"\n"
+ missing_ef[
[
"report_date",
"plant_id_eia",
"energy_source_code",
"prime_mover_code",
"generator_id",
]
].drop_duplicates().to_string()
]
.drop_duplicates()
.to_string()
)
gen_fuel_allocated["nox_mass_lb"] = (
gen_fuel_allocated["fuel_consumed_mmbtu"]
Expand Down Expand Up @@ -687,7 +690,8 @@ def calculate_generator_nox_ef_per_unit_from_boiler_type(
)
)
if len(missing_nox_efs) > 0:
logger.warning("""
logger.warning(
"""
After filling with PM-fuel factors, NOx emission factors are still missing for the following boiler types.
An emission factor of zero will be used for these boilers.
Missing factors for FC prime movers are currently expected."""
Expand Down Expand Up @@ -717,7 +721,6 @@ def calculate_generator_nox_ef_per_unit_from_boiler_type(


def load_boiler_firing_type(year):

boiler_design_parameters_eia860 = load_data.load_boiler_design_parameters_eia860(
year
)
Expand Down Expand Up @@ -799,7 +802,6 @@ def fill_missing_factors_based_on_pm_fuel(emission_factors, gen_factors):


def convert_ef_to_lb_per_mmbtu(gen_emission_factors, pudl_out, pollutant):

# get the reported fuel heat content values from EIA-923
(
plant_specific_fuel_heat_content,
Expand Down Expand Up @@ -1214,16 +1216,19 @@ def calculate_so2_from_fuel_consumption(gen_fuel_allocated, pudl_out, year):
if len(missing_ef) > 0:
logger.warning("SO2 emission factors are missing for the above records")
logger.warning("Missing factors for FC prime movers are currently expected")
logger.warning("\n" +
missing_ef[
logger.warning(
"\n"
+ missing_ef[
[
"report_date",
"plant_id_eia",
"energy_source_code",
"prime_mover_code",
"generator_id",
]
].drop_duplicates().to_string()
]
.drop_duplicates()
.to_string()
)
gen_fuel_allocated["so2_mass_lb"] = (
gen_fuel_allocated["fuel_consumed_mmbtu"]
Expand Down Expand Up @@ -1561,15 +1566,18 @@ def adjust_so2_efs_for_fuel_sulfur_content(uncontrolled_so2_factors, pudl_out):
]
if len(missing_sulfur_content) > 0:
logger.warning("Sulfur content data is missing in EIA-923 for the above units.")
logger.warning("\n" +
missing_sulfur_content[
logger.warning(
"\n"
+ missing_sulfur_content[
[
"plant_id_eia",
"generator_id",
"prime_mover_code",
"energy_source_code",
]
].drop_duplicates().to_string()
]
.drop_duplicates()
.to_string()
)
uncontrolled_so2_factors.loc[
uncontrolled_so2_factors["sulfur_content_pct"].isna()
Expand Down
1 change: 0 additions & 1 deletion src/gross_to_net_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,6 @@ def load_monthly_gross_and_net_generation(start_year, end_year):


def gross_to_net_ratio(gross_gen_data, net_gen_data, agg_level, year):

if agg_level == "plant":
plant_aggregation_columns = ["plant_id_eia"]
elif agg_level == "subplant":
Expand Down
8 changes: 1 addition & 7 deletions src/impute_hourly_profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,6 @@ def aggregate_for_residual(


def aggregate_non_930_fuel_categories(cems, plant_attributes):

# get a list of the fuel categories not in EIA-930
fuel_categories_not_in_eia930 = list(
set(plant_attributes.fuel_category.unique())
Expand Down Expand Up @@ -426,9 +425,7 @@ def calculate_residual(
~combined_data["eia930_profile"].isna(), "cems_profile"
] = combined_data.loc[
~combined_data["eia930_profile"].isna(), "cems_profile"
].fillna(
0
)
].fillna(0)

combined_data = calculate_scaled_residual(combined_data)
combined_data = calculate_shifted_residual(combined_data)
Expand Down Expand Up @@ -712,7 +709,6 @@ def identify_missing_profiles(
def average_diba_wind_solar_profiles(
residual_profiles, ba, fuel, report_date, ba_dibas, validation_run=False
):

# calculate the average generation profile for the fuel in all neighboring DIBAs
df_temporary = residual_profiles.copy()[
(residual_profiles["ba_code"].isin(ba_dibas))
Expand Down Expand Up @@ -1022,7 +1018,6 @@ def combine_and_export_hourly_plant_data(

# for each region, shape the EIA-only data, combine with CEMS data, and export
for region in list(plant_attributes[region_to_group].unique()):

# filter each of the data sources to the region
eia_region = monthly_eia_data_to_shape_agg[
monthly_eia_data_to_shape_agg[region_to_group] == region
Expand Down Expand Up @@ -1257,7 +1252,6 @@ def shape_partial_cems_plants(cems, eia923_allocated):

# if there is no data in the partial cems dataframe, skip.
if len(eia_data_to_shape) > 0:

# group the eia data by subplant
eia_data_to_shape = (
eia_data_to_shape.groupby(SUBPLANT_KEYS, dropna=False)[DATA_COLUMNS]
Expand Down
2 changes: 0 additions & 2 deletions src/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,6 @@ def initialize_pudl_out(year=None):
if year is None:
pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine)
else:

pudl_out = pudl.output.pudltabl.PudlTabl(
pudl_engine,
freq="MS",
Expand Down Expand Up @@ -600,7 +599,6 @@ def load_ipcc_gwp():


def load_raw_eia930_data(year, description):

eia_930 = pd.concat(
[
pd.read_csv(
Expand Down
1 change: 0 additions & 1 deletion src/output_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ def output_to_results(
validation.test_for_missing_values(df, small)

if not skip_outputs:

df.to_csv(
results_folder(f"{path_prefix}{subfolder}us_units/{file_name}.csv"),
index=False,
Expand Down
29 changes: 7 additions & 22 deletions src/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ def flag_possible_primary_fuel_mismatches(plant_primary_fuel):
]

for esc_column in ["plant_primary_fuel_from_capacity_mw", "plant_primary_fuel"]:

# load the fuel category table
energy_source_groups = pd.read_csv(
manual_folder("energy_source_groups.csv"), dtype=get_dtypes()
Expand Down Expand Up @@ -341,7 +340,7 @@ def check_missing_or_zero_generation_matches(combined_gen_data):
# identify when there is zero or NA gross generation associated with positive net generation
missing_gross_gen = combined_gen_data[
(combined_gen_data["net_generation_mwh"] > 0)
& ((combined_gen_data["gross_generation_mwh"] == 0))
& (combined_gen_data["gross_generation_mwh"] == 0)
]

# identify when there is zero or NA net generation associated with nonzero gross generation
Expand Down Expand Up @@ -621,9 +620,7 @@ def ensure_non_overlapping_data_from_all_sources(
["in_eia", "in_cems", "in_partial_cems_subplant", "in_partial_cems_plant"]
] = data_overlap[
["in_eia", "in_cems", "in_partial_cems_subplant", "in_partial_cems_plant"]
].fillna(
0
)
].fillna(0)
data_overlap["number_of_locations"] = (
data_overlap["in_eia"]
+ data_overlap["in_cems"]
Expand Down Expand Up @@ -747,7 +744,7 @@ def check_for_complete_timeseries(df, df_name, keys, period):
If the `period` is a 'year', checks that the length of the timeseries is 8760 (for a
non-leap year) or 8784 (for a leap year). If the `period` is a 'month', checks that
the length of the timeseries is equal to the length of the complete date_range
between the earliest and latest timestamp in a month.
between the earliest and latest timestamp in a month.
Args:
df: dataframe containing datetime columns
Expand Down Expand Up @@ -775,7 +772,6 @@ def check_for_complete_timeseries(df, df_name, keys, period):
)
logger.warning("\n" + test.to_string())
elif period == "month":

# count the number of timestamps in each group-month
test = (
df.groupby(keys + ["report_date"])[["datetime_utc"]]
Expand Down Expand Up @@ -1258,9 +1254,7 @@ def summarize_cems_measurement_quality(cems):
"so2_mass_measurement_code",
"nox_mass_measurement_code",
]
].astype(
str
)
].astype(str)
# replace the CEMS mass measurement codes with two categories
measurement_code_map = {
"Measured": "Measured",
Expand All @@ -1283,9 +1277,7 @@ def summarize_cems_measurement_quality(cems):
"so2_mass_measurement_code",
"nox_mass_measurement_code",
]
].replace(
measurement_code_map
)
].replace(measurement_code_map)

cems_quality_summary = []
# calculate the percent of mass for each pollutant that is measured or imputed
Expand Down Expand Up @@ -1435,7 +1427,6 @@ def validate_diba_imputation_method(hourly_profiles, year):


def validate_national_imputation_method(hourly_profiles):

# only keep wind and solar data
data_to_validate = hourly_profiles[
(hourly_profiles["fuel_category"].isin(["wind", "solar"]))
Expand Down Expand Up @@ -1606,7 +1597,6 @@ def test_for_missing_data(df, columns_to_test):


def test_for_missing_incorrect_prime_movers(df, year):

# cehck for incorrect PM by comparing to EIA-860 data
pudl_out = load_data.initialize_pudl_out(year)
pms_in_eia860 = pudl_out.gens_eia860()[
Expand Down Expand Up @@ -1799,16 +1789,12 @@ def load_egrid_plant_file(year):
] = egrid_plant.loc[
egrid_plant["plant_primary_fuel"].isin(CLEAN_FUELS),
"co2_mass_lb_for_electricity_adjusted",
].fillna(
0
)
].fillna(0)
egrid_plant.loc[
egrid_plant["plant_primary_fuel"].isin(CLEAN_FUELS), "co2_mass_lb"
] = egrid_plant.loc[
egrid_plant["plant_primary_fuel"].isin(CLEAN_FUELS), "co2_mass_lb"
].fillna(
0
)
].fillna(0)

# reorder the columns
egrid_plant = egrid_plant[
Expand Down Expand Up @@ -2122,7 +2108,6 @@ def compare_plant_level_results_to_egrid(
def identify_plants_missing_from_our_calculations(
egrid_plant, annual_plant_results, year
):

# remove any plants that have no reported data in egrid
# NOTE: it seems that egrid includes a lot of proposed projects that are not yet operating, but just has missing data for them
plants_with_no_data_in_egrid = list(
Expand Down
Loading

0 comments on commit aab1d6d

Please sign in to comment.