Skip to content

Commit

Permalink
RAHHH
Browse files Browse the repository at this point in the history
  • Loading branch information
jjstnlee committed Dec 18, 2024
1 parent 9c4ac21 commit c5ac89d
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 44 deletions.
12 changes: 6 additions & 6 deletions api/webscraper/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from .nyserda_scraper import query_nyserda_large, query_nyserda_solar_repeat
from .nyiso_scraper import (
filter_nyiso_iq_sheet,
# filter_nyiso_iq_sheet, ** NO LONGER NEEDED **
filter_nyiso_cluster_sheet,
filter_nyiso_in_service_sheet,
filter_nyiso_withdrawn_sheets,
Expand All @@ -32,7 +32,7 @@
key: str = os.environ.get("NEXT_PUBLIC_SUPABASE_ANON_KEY")
supabase: Client = create_client(url, key)
supabase_table: str = (
"Projects_test_deena" # TODO: modify based on which table in supabase we want to edit
"Projects_test_julee" # TODO: modify based on which table in supabase we want to edit
)

geocode_api: str = os.environ.get("NEXT_PUBLIC_GEOCODIO_API_KEY")
Expand Down Expand Up @@ -436,7 +436,7 @@ def nyiso_to_database() -> None:
The helper function first checks if an existing project with a matching name exists in Supabase.
If so, the existing project is updated if it has newer data (or if any of the last_updated date information is missing).
Otherwise, the new project is pushed to Supabase.
This helper function is called for all three sheets in the NYISO xlsx spreadsheet: Interconnection Queue, Cluster Projects, and In Service
This helper function is called for two sheets in the NYISO xlsx spreadsheet: Cluster Projects and In Service
"""
updated_ids = set()
inserted_ids = set()
Expand Down Expand Up @@ -616,9 +616,9 @@ def nyiso_to_database_helper(projects, sheet_name):
print(exception)

# call helper function for each sheet with the corresponding sheet name
nyiso_to_database_helper(filter_nyiso_iq_sheet()[:10], "Interconnection Queue")
nyiso_to_database_helper(filter_nyiso_cluster_sheet()[:10], "Cluster Projects")
nyiso_to_database_helper(filter_nyiso_in_service_sheet()[:10], "In Service")
# nyiso_to_database_helper(filter_nyiso_iq_sheet()[:10], "Interconnection Queue") ** NO LONGER NEEDED **
nyiso_to_database_helper(filter_nyiso_cluster_sheet(), "Cluster Projects")
nyiso_to_database_helper(filter_nyiso_in_service_sheet(), "In Service")

return {"updated_ids": updated_ids, "inserted_ids": inserted_ids}

Expand Down
30 changes: 17 additions & 13 deletions api/webscraper/nyiso_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,10 @@ def query_nyiso():
"project_image": None,
"interconnection_queue_number": item.get("Queue Pos.", None),
"approved": False,
# the following fields are used for updating kdms when updating the database
# the following 2 fields are used for updating kdms when updating the database
"date_of_ir": item.get("Date of IR", None), # already a datetime object
"ia_tender_date": item.get("IA Tender Date", None),
"utility_service_provider": item.get("Utility", None),
}
filtered_list.append(project_dict)

Expand Down Expand Up @@ -119,7 +120,8 @@ def filter_nyiso_list(project_list, sheet_name):
"proposed_cod": item.get(
"Proposed COD", None
), # NOTE: non-serializable into JSON --> can't directly write to file
"county": [item.get("County")] or None,
# County labelled as "County" for Cluster Projects and "Location County" for In Service
"county": [item.get("County") if sheet_name == "Cluster Projects" else item.get("Location County", None)] or None,
"region": None, # missing
"zipcode": None, # missing
"latitude": None,
Expand All @@ -140,8 +142,9 @@ def filter_nyiso_list(project_list, sheet_name):
"approved": False,
# the following fields are used for updating kdms when updating the database
"date_of_ir": item.get("Date of IR", None), # datetime object
"ia_tender_date": item.get("IA Tender Date", None), # timestamp object
"utility": item.get("Utility", None),
"ia_tender_date": item.get("IA Tender Date", None), # timestamp objects
# Utility labelled as "Utility" (no space) for Cluster projects sheet and "Utility " (space) for In service
"utility_service_provider": item.get("Utility", None) if sheet_name == "Cluster Projects" else item.get("Utility ", None),
}
if sheet_name == "In Service":
project_dict["developer"] = item.get("Owner/Developer", None)
Expand All @@ -150,17 +153,18 @@ def filter_nyiso_list(project_list, sheet_name):
return filtered_list


def filter_nyiso_iq_sheet():
all_sheets = query_nyiso_excel()
sheet_names = list(all_sheets.keys())
iq_key = sheet_names[0]
# ** NO LONGER NEEDED SINCE Interconnection Queue INACCURATE **
# def filter_nyiso_iq_sheet():
# all_sheets = query_nyiso_excel()
# sheet_names = list(all_sheets.keys())
# iq_key = sheet_names[0]

iq_df = all_sheets[iq_key] # Interconnection Queue
iq_df = clean_df_data(iq_df)
iq_list = iq_df.to_dict(orient="records")
# iq_df = all_sheets[iq_key] # Interconnection Queue
# iq_df = clean_df_data(iq_df)
# iq_list = iq_df.to_dict(orient="records")

filtered_list = filter_nyiso_list(iq_list, "Interconnection Queue")
return filtered_list
# filtered_list = filter_nyiso_list(iq_list, "Interconnection Queue")
# return filtered_list


def filter_nyiso_cluster_sheet():
Expand Down
47 changes: 23 additions & 24 deletions api/webscraper/nyserda_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,30 +120,29 @@ def query_nyserda_solar(offset=0, limit=1000):
): # some projects have no project_id, so we skip them
continue

if check_status(item.get("project_status", None)) != "Cancelled":
project_dict = {
"project_name": item.get(
"project_id", None
), # small data set only has project_id
"project_status": check_status(
item.get("project_status", None)
), # NYSERDA small-scale solar projects do not have a project status
"renewable_energy_technology": "Solar",
"size": size_in_mw,
"developer": item.get("developer", None),
"proposed_cod": item.get("interconnection_date", None),
"town": [item.get("city_town")] or None,
"county": [item.get("county")] or None,
"region": item.get("redc", None), # missing
"zipcode": item.get("zip", None),
"latitude": None,
"longitude": None,
"data_through_date": item.get("data_through_date").split("T")[0],
"key_development_milestones": initial_kdm,
"project_image": None,
"approved": False,
}
filtered_list.append(project_dict)
project_dict = {
"project_name": item.get(
"project_id", None
), # small data set only has project_id
"project_status": check_status(
item.get("project_status", None)
), # NYSERDA small-scale solar projects do not have a project status
"renewable_energy_technology": "Solar",
"size": size_in_mw,
"developer": item.get("developer", None),
"proposed_cod": item.get("interconnection_date", None),
"town": [item.get("city_town")] or None,
"county": [item.get("county")] or None,
"region": item.get("redc", None), # missing
"zipcode": item.get("zip", None),
"latitude": None,
"longitude": None,
"data_through_date": item.get("data_through_date").split("T")[0],
"key_development_milestones": initial_kdm,
"project_image": None,
"approved": False,
}
filtered_list.append(project_dict)
return filtered_list


Expand Down
3 changes: 2 additions & 1 deletion api/webscraper/utils/scraper_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ def check_status(status: str):
"""
if status is None:
return None
# Want to return proposed even if project is cancelled for NYSERDA
if status.lower() == "cancelled":
return "Cancelled"
return "Proposed"
elif status.lower() == "operational" or status.lower() == "completed":
return "Operational"
elif status.lower() == "under development":
Expand Down
4 changes: 4 additions & 0 deletions types/schema.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ export type Project = {
permit_process: string | null;
permit_application_number: string | null;
last_updated: Date;
has_energy_storage: boolean;
has_pumped_storage: boolean;
storage_size: number;
utility_service_provider: string;
};

export interface Option {
Expand Down

0 comments on commit c5ac89d

Please sign in to comment.