From c5ac89d52ec6429a6020009b06797c718a4d2546 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Tue, 17 Dec 2024 22:09:57 -0800 Subject: [PATCH] RAHHH --- api/webscraper/database.py | 12 +++---- api/webscraper/nyiso_scraper.py | 30 +++++++++-------- api/webscraper/nyserda_scraper.py | 47 +++++++++++++-------------- api/webscraper/utils/scraper_utils.py | 3 +- types/schema.d.ts | 4 +++ 5 files changed, 52 insertions(+), 44 deletions(-) diff --git a/api/webscraper/database.py b/api/webscraper/database.py index c4a8a01..dcb0c48 100644 --- a/api/webscraper/database.py +++ b/api/webscraper/database.py @@ -7,7 +7,7 @@ from .nyserda_scraper import query_nyserda_large, query_nyserda_solar_repeat from .nyiso_scraper import ( - filter_nyiso_iq_sheet, + # filter_nyiso_iq_sheet, ** NO LONGER NEEDED ** filter_nyiso_cluster_sheet, filter_nyiso_in_service_sheet, filter_nyiso_withdrawn_sheets, @@ -32,7 +32,7 @@ key: str = os.environ.get("NEXT_PUBLIC_SUPABASE_ANON_KEY") supabase: Client = create_client(url, key) supabase_table: str = ( - "Projects_test_deena" # TODO: modify based on which table in supabase we want to edit + "Projects_test_julee" # TODO: modify based on which table in supabase we want to edit ) geocode_api: str = os.environ.get("NEXT_PUBLIC_GEOCODIO_API_KEY") @@ -436,7 +436,7 @@ def nyiso_to_database() -> None: The helper function first checks if an existing project with a matching name exists in Supabase. If so, the existing project is updated if it has newer data (or if any of the last_updated date information is missing). Otherwise, the new project is pushed to Supabase. - This helper function is called for all three sheets in the NYISO xlsx spreadsheet: Interconnection Queue, Cluster Projects, and In Service + This helper function is called for two sheets in the NYISO xlsx spreadsheet: Cluster Projects and In Service """ updated_ids = set() inserted_ids = set() @@ -616,9 +616,9 @@ def nyiso_to_database_helper(projects, sheet_name): print(exception) # call helper function for each sheet with the corresponding sheet name - nyiso_to_database_helper(filter_nyiso_iq_sheet()[:10], "Interconnection Queue") - nyiso_to_database_helper(filter_nyiso_cluster_sheet()[:10], "Cluster Projects") - nyiso_to_database_helper(filter_nyiso_in_service_sheet()[:10], "In Service") + # nyiso_to_database_helper(filter_nyiso_iq_sheet()[:10], "Interconnection Queue") ** NO LONGER NEEDED ** + nyiso_to_database_helper(filter_nyiso_cluster_sheet(), "Cluster Projects") + nyiso_to_database_helper(filter_nyiso_in_service_sheet(), "In Service") return {"updated_ids": updated_ids, "inserted_ids": inserted_ids} diff --git a/api/webscraper/nyiso_scraper.py b/api/webscraper/nyiso_scraper.py index 966fda1..7d2af2a 100644 --- a/api/webscraper/nyiso_scraper.py +++ b/api/webscraper/nyiso_scraper.py @@ -74,9 +74,10 @@ def query_nyiso(): "project_image": None, "interconnection_queue_number": item.get("Queue Pos.", None), "approved": False, - # the following fields are used for updating kdms when updating the database + # the following 2 fields are used for updating kdms when updating the database "date_of_ir": item.get("Date of IR", None), # already a datetime object "ia_tender_date": item.get("IA Tender Date", None), + "utility_service_provider": item.get("Utility", None), } filtered_list.append(project_dict) @@ -119,7 +120,8 @@ def filter_nyiso_list(project_list, sheet_name): "proposed_cod": item.get( "Proposed COD", None ), # NOTE: non-serializable into JSON --> can't directly write to file - "county": [item.get("County")] or None, + # County labelled as "County" for Cluster Projects and "Location County" for In Service + "county": [item.get("County") if sheet_name == "Cluster Projects" else item.get("Location County", None)] or None, "region": None, # missing "zipcode": None, # missing "latitude": None, @@ -140,8 +142,9 @@ def filter_nyiso_list(project_list, sheet_name): "approved": False, # the following fields are used for updating kdms when updating the database "date_of_ir": item.get("Date of IR", None), # datetime object - "ia_tender_date": item.get("IA Tender Date", None), # timestamp object - "utility": item.get("Utility", None), + "ia_tender_date": item.get("IA Tender Date", None), # timestamp objects + # Utility labelled as "Utility" (no space) for Cluster projects sheet and "Utility " (space) for In service + "utility_service_provider": item.get("Utility", None) if sheet_name == "Cluster Projects" else item.get("Utility ", None), } if sheet_name == "In Service": project_dict["developer"] = item.get("Owner/Developer", None) @@ -150,17 +153,18 @@ def filter_nyiso_list(project_list, sheet_name): return filtered_list -def filter_nyiso_iq_sheet(): - all_sheets = query_nyiso_excel() - sheet_names = list(all_sheets.keys()) - iq_key = sheet_names[0] +# ** NO LONGER NEEDED SINCE Interconnection Queue INACCURATE ** +# def filter_nyiso_iq_sheet(): +# all_sheets = query_nyiso_excel() +# sheet_names = list(all_sheets.keys()) +# iq_key = sheet_names[0] - iq_df = all_sheets[iq_key] # Interconnection Queue - iq_df = clean_df_data(iq_df) - iq_list = iq_df.to_dict(orient="records") +# iq_df = all_sheets[iq_key] # Interconnection Queue +# iq_df = clean_df_data(iq_df) +# iq_list = iq_df.to_dict(orient="records") - filtered_list = filter_nyiso_list(iq_list, "Interconnection Queue") - return filtered_list +# filtered_list = filter_nyiso_list(iq_list, "Interconnection Queue") +# return filtered_list def filter_nyiso_cluster_sheet(): diff --git a/api/webscraper/nyserda_scraper.py b/api/webscraper/nyserda_scraper.py index eaceb52..6bdec1f 100644 --- a/api/webscraper/nyserda_scraper.py +++ b/api/webscraper/nyserda_scraper.py @@ -120,30 +120,29 @@ def query_nyserda_solar(offset=0, limit=1000): ): # some projects have no project_id, so we skip them continue - if check_status(item.get("project_status", None)) != "Cancelled": - project_dict = { - "project_name": item.get( - "project_id", None - ), # small data set only has project_id - "project_status": check_status( - item.get("project_status", None) - ), # NYSERDA small-scale solar projects do not have a project status - "renewable_energy_technology": "Solar", - "size": size_in_mw, - "developer": item.get("developer", None), - "proposed_cod": item.get("interconnection_date", None), - "town": [item.get("city_town")] or None, - "county": [item.get("county")] or None, - "region": item.get("redc", None), # missing - "zipcode": item.get("zip", None), - "latitude": None, - "longitude": None, - "data_through_date": item.get("data_through_date").split("T")[0], - "key_development_milestones": initial_kdm, - "project_image": None, - "approved": False, - } - filtered_list.append(project_dict) + project_dict = { + "project_name": item.get( + "project_id", None + ), # small data set only has project_id + "project_status": check_status( + item.get("project_status", None) + ), # NYSERDA small-scale solar projects do not have a project status + "renewable_energy_technology": "Solar", + "size": size_in_mw, + "developer": item.get("developer", None), + "proposed_cod": item.get("interconnection_date", None), + "town": [item.get("city_town")] or None, + "county": [item.get("county")] or None, + "region": item.get("redc", None), # missing + "zipcode": item.get("zip", None), + "latitude": None, + "longitude": None, + "data_through_date": item.get("data_through_date").split("T")[0], + "key_development_milestones": initial_kdm, + "project_image": None, + "approved": False, + } + filtered_list.append(project_dict) return filtered_list diff --git a/api/webscraper/utils/scraper_utils.py b/api/webscraper/utils/scraper_utils.py index 4497899..4680d83 100644 --- a/api/webscraper/utils/scraper_utils.py +++ b/api/webscraper/utils/scraper_utils.py @@ -18,8 +18,9 @@ def check_status(status: str): """ if status is None: return None + # Want to return proposed even if project is cancelled for NYSERDA if status.lower() == "cancelled": - return "Cancelled" + return "Proposed" elif status.lower() == "operational" or status.lower() == "completed": return "Operational" elif status.lower() == "under development": diff --git a/types/schema.d.ts b/types/schema.d.ts index 5fff716..c77e498 100644 --- a/types/schema.d.ts +++ b/types/schema.d.ts @@ -24,6 +24,10 @@ export type Project = { permit_process: string | null; permit_application_number: string | null; last_updated: Date; + has_energy_storage: boolean; + has_pumped_storage: boolean; + storage_size: number; + utility_service_provider: string; }; export interface Option {