chore format modules

singularity-energy · Dec 5, 2023 · aab1d6d · aab1d6d
1 parent a0a269f
commit aab1d6d
Show file tree

Hide file tree

Showing 12 changed files with 49 additions and 77 deletions.
diff --git a/src/consumed.py b/src/consumed.py
@@ -439,12 +439,12 @@ def _load_rates(self):
                     )
 
                 # Cut off emissions at 9 hours after UTC year
-                emissions = emissions[:f"{self.year+1}-01-01 09:00:00+00:00"]
-                rates[((adj, pol))] = emissions
+                emissions = emissions[: f"{self.year+1}-01-01 09:00:00+00:00"]
+                rates[(adj, pol)] = emissions
 
         # Make generation data frame
         generation = pd.DataFrame(data=gens)
-        generation = generation[:f"{self.year+1}-01-01 09:00:00+00:00"]
+        generation = generation[: f"{self.year+1}-01-01 09:00:00+00:00"]
 
         return rates, generation
 
@@ -462,7 +462,7 @@ def build_matrices(self, pol: str, adj: str, date):
 
         # Build generation array, using 930 for import-only regions
         G = np.zeros(len(self.regions))
-        for (i, r) in enumerate(self.regions):
+        for i, r in enumerate(self.regions):
             if r in self.import_regions:
                 G[i] = self.eia930.df.loc[date, KEYS["E"]["NG"] % r]
             else:
@@ -513,7 +513,7 @@ def run(self):
                             consumed_emissions = np.full(len(self.regions), np.nan)
 
                     # Export
-                    for (i, r) in enumerate(self.regions):
+                    for i, r in enumerate(self.regions):
                         self.results[r].loc[date, col] = consumed_emissions[i]
                 if total_failed > 0:
                     logger.warning(

diff --git a/src/data_cleaning.py b/src/data_cleaning.py
@@ -453,9 +453,7 @@ def clean_eia923(
             "fuel_consumed_mmbtu",
             "fuel_consumed_for_electricity_mmbtu",
         ],
-    ].round(
-        1
-    )
+    ].round(1)
 
     validation.test_for_missing_energy_source_code(gen_fuel_allocated)
     validation.test_for_negative_values(gen_fuel_allocated)
@@ -708,7 +706,6 @@ def calculate_aggregated_primary_fuel(
     # we will calculate primary fuel based on the fuel with the most consumption,
     # generation, and capacity
     for source in ["fuel_consumed_for_electricity_mmbtu", "net_generation_mwh"]:
-
         # only keep values greater than zero so that these can be filled by other
         # methods if non-zero
         primary_fuel_calc = agg_totals_by_fuel[agg_totals_by_fuel[source] > 0]
@@ -1088,7 +1085,6 @@ def manually_remove_steam_units(df):
 
 
 def remove_incomplete_unit_months(cems):
-
     # get a count of how many hours are reported in each month for each unit
     unit_hours_in_month = (
         cems[["plant_id_eia", "report_date", "emissions_unit_id_epa", "datetime_utc"]]
@@ -1748,7 +1744,6 @@ def filter_unique_cems_data(cems, partial_cems):
 
 
 def aggregate_plant_data_to_ba_fuel(combined_plant_data, plant_attributes_table):
-
     # create a table that has data for the sythetic plant attributes
     shaped_plant_attributes = (
         plant_attributes_table[["shaped_plant_id", "ba_code", "fuel_category"]]
@@ -1892,7 +1887,6 @@ def combine_plant_data(
 
 
 def create_plant_attributes_table(cems, eia923_allocated, year, primary_fuel_table):
-
     # create a table with the unique plantids from both dataframes
     eia_plants = eia923_allocated[
         ["plant_id_eia", "plant_primary_fuel"]
@@ -2184,7 +2178,6 @@ def add_plant_local_timezone(df, year):
 
 
 def aggregate_cems_to_subplant(cems):
-
     GROUPBY_COLUMNS = ["plant_id_eia", "subplant_id", "datetime_utc", "report_date"]
 
     cems_columns_to_aggregate = [

diff --git a/src/download_data.py b/src/download_data.py
@@ -41,9 +41,7 @@ def download_helper(
     # If the file already exists, do not re-download it.
     final_destination = output_path if output_path is not None else download_path
     if os.path.exists(final_destination):
-        logger.info(
-            f"{final_destination.split('/')[-1]} already downloaded, skipping."
-        )
+        logger.info(f"{final_destination.split('/')[-1]} already downloaded, skipping.")
         return False
 
     # Otherwise, download to the file in chunks.

diff --git a/src/eia930.py b/src/eia930.py
@@ -285,7 +285,6 @@ def load_chalendar_for_pipeline(cleaned_data_filepath, year):
 
 
 def remove_imputed_ones(eia930_data):
-
     filter = eia930_data["net_generation_mwh_930"].abs() < 1.5
 
     # replace all 1.0 values with zero
@@ -460,9 +459,7 @@ def manual_930_adjust(raw: pd.DataFrame):
             & (raw.index < "2022-06-16 07:00:00+00")
         ),
         cols,
-    ].shift(
-        1, freq="H"
-    )
+    ].shift(1, freq="H")
     raw = raw.drop(columns=cols)
     raw = pd.concat([raw, new], axis="columns")
 

diff --git a/src/emissions.py b/src/emissions.py
@@ -481,16 +481,19 @@ def calculate_nox_from_fuel_consumption(
     if len(missing_ef) > 0:
         logger.warning("NOx emission factors are missing for the following records")
         logger.warning("Missing factors for FC prime movers are currently expected")
-        logger.warning("\n" +
-            missing_ef[
+        logger.warning(
+            "\n"
+            + missing_ef[
                 [
                     "report_date",
                     "plant_id_eia",
                     "energy_source_code",
                     "prime_mover_code",
                     "generator_id",
                 ]
-            ].drop_duplicates().to_string()
+            ]
+            .drop_duplicates()
+            .to_string()
         )
     gen_fuel_allocated["nox_mass_lb"] = (
         gen_fuel_allocated["fuel_consumed_mmbtu"]
@@ -687,7 +690,8 @@ def calculate_generator_nox_ef_per_unit_from_boiler_type(
         )
     )
     if len(missing_nox_efs) > 0:
-        logger.warning("""
+        logger.warning(
+            """
             After filling with PM-fuel factors, NOx emission factors are still missing for the following boiler types.
             An emission factor of zero will be used for these boilers.
             Missing factors for FC prime movers are currently expected."""
@@ -717,7 +721,6 @@ def calculate_generator_nox_ef_per_unit_from_boiler_type(
 
 
 def load_boiler_firing_type(year):
-
     boiler_design_parameters_eia860 = load_data.load_boiler_design_parameters_eia860(
         year
     )
@@ -799,7 +802,6 @@ def fill_missing_factors_based_on_pm_fuel(emission_factors, gen_factors):
 
 
 def convert_ef_to_lb_per_mmbtu(gen_emission_factors, pudl_out, pollutant):
-
     # get the reported fuel heat content values from EIA-923
     (
         plant_specific_fuel_heat_content,
@@ -1214,16 +1216,19 @@ def calculate_so2_from_fuel_consumption(gen_fuel_allocated, pudl_out, year):
     if len(missing_ef) > 0:
         logger.warning("SO2 emission factors are missing for the above records")
         logger.warning("Missing factors for FC prime movers are currently expected")
-        logger.warning("\n" +
-            missing_ef[
+        logger.warning(
+            "\n"
+            + missing_ef[
                 [
                     "report_date",
                     "plant_id_eia",
                     "energy_source_code",
                     "prime_mover_code",
                     "generator_id",
                 ]
-            ].drop_duplicates().to_string()
+            ]
+            .drop_duplicates()
+            .to_string()
         )
     gen_fuel_allocated["so2_mass_lb"] = (
         gen_fuel_allocated["fuel_consumed_mmbtu"]
@@ -1561,15 +1566,18 @@ def adjust_so2_efs_for_fuel_sulfur_content(uncontrolled_so2_factors, pudl_out):
     ]
     if len(missing_sulfur_content) > 0:
         logger.warning("Sulfur content data is missing in EIA-923 for the above units.")
-        logger.warning("\n" +
-            missing_sulfur_content[
+        logger.warning(
+            "\n"
+            + missing_sulfur_content[
                 [
                     "plant_id_eia",
                     "generator_id",
                     "prime_mover_code",
                     "energy_source_code",
                 ]
-            ].drop_duplicates().to_string()
+            ]
+            .drop_duplicates()
+            .to_string()
         )
     uncontrolled_so2_factors.loc[
         uncontrolled_so2_factors["sulfur_content_pct"].isna()

diff --git a/src/gross_to_net_generation.py b/src/gross_to_net_generation.py
@@ -779,7 +779,6 @@ def load_monthly_gross_and_net_generation(start_year, end_year):
 
 
 def gross_to_net_ratio(gross_gen_data, net_gen_data, agg_level, year):
-
     if agg_level == "plant":
         plant_aggregation_columns = ["plant_id_eia"]
     elif agg_level == "subplant":

diff --git a/src/impute_hourly_profiles.py b/src/impute_hourly_profiles.py
@@ -334,7 +334,6 @@ def aggregate_for_residual(
 
 
 def aggregate_non_930_fuel_categories(cems, plant_attributes):
-
     # get a list of the fuel categories not in EIA-930
     fuel_categories_not_in_eia930 = list(
         set(plant_attributes.fuel_category.unique())
@@ -426,9 +425,7 @@ def calculate_residual(
         ~combined_data["eia930_profile"].isna(), "cems_profile"
     ] = combined_data.loc[
         ~combined_data["eia930_profile"].isna(), "cems_profile"
-    ].fillna(
-        0
-    )
+    ].fillna(0)
 
     combined_data = calculate_scaled_residual(combined_data)
     combined_data = calculate_shifted_residual(combined_data)
@@ -712,7 +709,6 @@ def identify_missing_profiles(
 def average_diba_wind_solar_profiles(
     residual_profiles, ba, fuel, report_date, ba_dibas, validation_run=False
 ):
-
     # calculate the average generation profile for the fuel in all neighboring DIBAs
     df_temporary = residual_profiles.copy()[
         (residual_profiles["ba_code"].isin(ba_dibas))
@@ -1022,7 +1018,6 @@ def combine_and_export_hourly_plant_data(
 
     # for each region, shape the EIA-only data, combine with CEMS data, and export
     for region in list(plant_attributes[region_to_group].unique()):
-
         # filter each of the data sources to the region
         eia_region = monthly_eia_data_to_shape_agg[
             monthly_eia_data_to_shape_agg[region_to_group] == region
@@ -1257,7 +1252,6 @@ def shape_partial_cems_plants(cems, eia923_allocated):
 
     # if there is no data in the partial cems dataframe, skip.
     if len(eia_data_to_shape) > 0:
-
         # group the eia data by subplant
         eia_data_to_shape = (
             eia_data_to_shape.groupby(SUBPLANT_KEYS, dropna=False)[DATA_COLUMNS]

diff --git a/src/load_data.py b/src/load_data.py
@@ -367,7 +367,6 @@ def initialize_pudl_out(year=None):
     if year is None:
         pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine)
     else:
-
         pudl_out = pudl.output.pudltabl.PudlTabl(
             pudl_engine,
             freq="MS",
@@ -600,7 +599,6 @@ def load_ipcc_gwp():
 
 
 def load_raw_eia930_data(year, description):
-
     eia_930 = pd.concat(
         [
             pd.read_csv(

diff --git a/src/output_data.py b/src/output_data.py
@@ -141,7 +141,6 @@ def output_to_results(
     validation.test_for_missing_values(df, small)
 
     if not skip_outputs:
-
         df.to_csv(
             results_folder(f"{path_prefix}{subfolder}us_units/{file_name}.csv"),
             index=False,

diff --git a/src/validation.py b/src/validation.py
@@ -110,7 +110,6 @@ def flag_possible_primary_fuel_mismatches(plant_primary_fuel):
     ]
 
     for esc_column in ["plant_primary_fuel_from_capacity_mw", "plant_primary_fuel"]:
-
         # load the fuel category table
         energy_source_groups = pd.read_csv(
             manual_folder("energy_source_groups.csv"), dtype=get_dtypes()
@@ -341,7 +340,7 @@ def check_missing_or_zero_generation_matches(combined_gen_data):
     # identify when there is zero or NA gross generation associated with positive net generation
     missing_gross_gen = combined_gen_data[
         (combined_gen_data["net_generation_mwh"] > 0)
-        & ((combined_gen_data["gross_generation_mwh"] == 0))
+        & (combined_gen_data["gross_generation_mwh"] == 0)
     ]
 
     # identify when there is zero or NA net generation associated with nonzero gross generation
@@ -621,9 +620,7 @@ def ensure_non_overlapping_data_from_all_sources(
         ["in_eia", "in_cems", "in_partial_cems_subplant", "in_partial_cems_plant"]
     ] = data_overlap[
         ["in_eia", "in_cems", "in_partial_cems_subplant", "in_partial_cems_plant"]
-    ].fillna(
-        0
-    )
+    ].fillna(0)
     data_overlap["number_of_locations"] = (
         data_overlap["in_eia"]
         + data_overlap["in_cems"]
@@ -747,7 +744,7 @@ def check_for_complete_timeseries(df, df_name, keys, period):
     If the `period` is a 'year', checks that the length of the timeseries is 8760 (for a
     non-leap year) or 8784 (for a leap year). If the `period` is a 'month', checks that
     the length of the timeseries is equal to the length of the complete date_range
-    between the earliest and latest timestamp in a month. 
+    between the earliest and latest timestamp in a month.
 
     Args:
         df: dataframe containing datetime columns
@@ -775,7 +772,6 @@ def check_for_complete_timeseries(df, df_name, keys, period):
             )
             logger.warning("\n" + test.to_string())
     elif period == "month":
-
         # count the number of timestamps in each group-month
         test = (
             df.groupby(keys + ["report_date"])[["datetime_utc"]]
@@ -1258,9 +1254,7 @@ def summarize_cems_measurement_quality(cems):
             "so2_mass_measurement_code",
             "nox_mass_measurement_code",
         ]
-    ].astype(
-        str
-    )
+    ].astype(str)
     # replace the CEMS mass measurement codes with two categories
     measurement_code_map = {
         "Measured": "Measured",
@@ -1283,9 +1277,7 @@ def summarize_cems_measurement_quality(cems):
             "so2_mass_measurement_code",
             "nox_mass_measurement_code",
         ]
-    ].replace(
-        measurement_code_map
-    )
+    ].replace(measurement_code_map)
 
     cems_quality_summary = []
     # calculate the percent of mass for each pollutant that is measured or imputed
@@ -1435,7 +1427,6 @@ def validate_diba_imputation_method(hourly_profiles, year):
 
 
 def validate_national_imputation_method(hourly_profiles):
-
     # only keep wind and solar data
     data_to_validate = hourly_profiles[
         (hourly_profiles["fuel_category"].isin(["wind", "solar"]))
@@ -1606,7 +1597,6 @@ def test_for_missing_data(df, columns_to_test):
 
 
 def test_for_missing_incorrect_prime_movers(df, year):
-
     # cehck for incorrect PM by comparing to EIA-860 data
     pudl_out = load_data.initialize_pudl_out(year)
     pms_in_eia860 = pudl_out.gens_eia860()[
@@ -1799,16 +1789,12 @@ def load_egrid_plant_file(year):
     ] = egrid_plant.loc[
         egrid_plant["plant_primary_fuel"].isin(CLEAN_FUELS),
         "co2_mass_lb_for_electricity_adjusted",
-    ].fillna(
-        0
-    )
+    ].fillna(0)
     egrid_plant.loc[
         egrid_plant["plant_primary_fuel"].isin(CLEAN_FUELS), "co2_mass_lb"
     ] = egrid_plant.loc[
         egrid_plant["plant_primary_fuel"].isin(CLEAN_FUELS), "co2_mass_lb"
-    ].fillna(
-        0
-    )
+    ].fillna(0)
 
     # reorder the columns
     egrid_plant = egrid_plant[
@@ -2122,7 +2108,6 @@ def compare_plant_level_results_to_egrid(
 def identify_plants_missing_from_our_calculations(
     egrid_plant, annual_plant_results, year
 ):
-
     # remove any plants that have no reported data in egrid
     # NOTE: it seems that egrid includes a lot of proposed projects that are not yet operating, but just has missing data for them
     plants_with_no_data_in_egrid = list(