From 897a0e0a23dcf8696955244ddae9a124ea114676 Mon Sep 17 00:00:00 2001 From: Alec Delaney Date: Tue, 29 Oct 2024 20:05:45 -0400 Subject: [PATCH] Add additional comments, minimal rearranging of code --- flask_app/__init__.py | 41 +++++++++++++++++++++++++++++++- flask_app/helpers.py | 49 +++++++++++++++++++++++++++++---------- scripts/graphql.py | 34 +++++++++++++++++++++++---- scripts/post_graphql.py | 16 ++++++++++++- scripts/renew_cert.sh | 3 +++ scripts/schedule_cache.sh | 3 +++ scripts/update_cache.sh | 4 ++++ 7 files changed, 132 insertions(+), 18 deletions(-) diff --git a/flask_app/__init__.py b/flask_app/__init__.py index 0281148..e1ed89c 100644 --- a/flask_app/__init__.py +++ b/flask_app/__init__.py @@ -15,7 +15,7 @@ import dateutil.parser import dateutil.tz import jinja2 -from flask import Flask, Response, redirect, render_template, send_file +from flask import Flask, Response, redirect, render_template, send_file, url_for from flask_bootstrap import Bootstrap5 from flask_limiter import Limiter from flask_limiter.util import get_remote_address @@ -27,15 +27,20 @@ sort_jobs_start_date, ) +# Initialize the Flask app app = Flask(__name__) +# Open the configuration settings file with open("/etc/config.json", encoding="utf-8") as jsonfile: config = json.load(jsonfile) +# Disable CSRF for WTForms app.config["WTF_CSRF_ENABLED"] = False +# Initialize Bootstrap bootstrap = Bootstrap5(app) +# Initialize the rate limiter limiter = Limiter( get_remote_address, app=app, @@ -69,38 +74,59 @@ def menorah_settings() -> Response: @limiter.limit("10/second", key_func=lambda: "menorah-settings") def project_menorah_settings() -> str: """Route for creating menorah settings file.""" + # Get the Menorah setup form input_form = MenorahSetupForm() + + # Handle form submission validation if input_form.validate_on_submit(): + # Get the zip code from the form zipcode = input_form.data["zipcode"] + + # Add the zip code to the template and render it with open("assets/settings.json", encoding="utf-8") as template_file: template_text = template_file.read() template = jinja2.Template(template_text) rendered_temp = template.render(zipcode=zipcode) + + # Send the rendered settings file to the user for download file_bytesio = io.BytesIO() file_bytesio.write(rendered_temp.encode("utf-8")) file_bytesio.seek(0) return send_file( file_bytesio, as_attachment=True, download_name="settings.json" ) + + # Render the HTML template return render_template("projects/menorah/settings.html", input_form=input_form) @app.route("/recent", methods=["GET"]) def recent() -> str: """Route for recent GitHub activity.""" + # Get the current datetime and hour as a string datetime_fmt = "%Y%m%d%H" current_datetime = datetime.datetime.now(dateutil.tz.gettz()) current_datetime_str = current_datetime.strftime(datetime_fmt) + + # Read the contents of the relevant (current) recent activity JSON file with open( f"assets/contrib/recent_{current_datetime_str}.json", encoding="utf-8" ) as respfile: contents = json.load(respfile) + + # Get the contribtuons (calendar) collection and specific respository contributions contributions, repos = contents["contributionsCollection"], contents["repositories"] + + # Get the start, end, and delta between times within the contributions collection end_datetime = dateutil.parser.parse(contributions["endedAt"]) start_datetime = dateutil.parser.parse(contributions["startedAt"]) diff_datetime: datetime.timedelta = end_datetime - start_datetime + + # Get the oldest push datetime from the specific repository contributions oldest_push = dateutil.parser.parse(repos["nodes"][-1]["pushedAt"]) diff_oldest = current_datetime - oldest_push + + # Render the HTML template return render_template( "recent.html", repos=repos["nodes"], @@ -114,28 +140,41 @@ def recent() -> str: @app.route("/about", methods=["GET"]) def about() -> str: """Route for about me page.""" + # Load the jobs files and initialize them in a list jobs_path = pathlib.Path("assets/about/jobs") jobs = [] for job_path in jobs_path.glob("*.json"): with open(job_path, encoding="utf-8") as jobfile: job_obj = json.load(jobfile) + # No end date means it's the current (active) job if job_obj["endDate"] is None: job_obj["endDate"] = "current" jobs.append(job_obj) + + # Sort the jobs list based on the custom sorting filter jobs.sort(key=sort_jobs_start_date, reverse=True) + + # Consolidate jobs that are grouped (like promotions) jobs_lists = consolidate_sorted_jobs(jobs) + + # Sort the grouped jobs based on the custom sorting filter jobs_lists.sort(key=sort_grouped_jobs, reverse=True) + # Load the education files and initialize them in a list education_paths = pathlib.Path("assets/about/education") educations = [] for education_path in education_paths.glob("*.json"): with open(education_path, encoding="utf-8") as edufile: edu_obj = json.load(edufile) + # No end date means it's the current (active) education if edu_obj["endYear"] is None: edu_obj["endYear"] = "current" educations.append(edu_obj) + + # Sort the educations by start year educations.sort(key=lambda x: x["startYear"], reverse=True) + # Render the HTML template return render_template("about.html", jobs_lists=jobs_lists, educations=educations) diff --git a/flask_app/helpers.py b/flask_app/helpers.py index e1e9832..8ca63a2 100644 --- a/flask_app/helpers.py +++ b/flask_app/helpers.py @@ -34,11 +34,14 @@ def generate_settings_json(zipcode: str) -> str: def get_repo_info(token: str) -> tuple[dict[str, Any], dict[str, Any]]: """Get repository info from the GraphQL query.""" + # Store the GraphQL API URL url = "https://api.github.com/graphql" + # Get the query from the saved text file with open("assets/graphql_query.txt", encoding="utf-8") as queryfile: query_param = {"query": queryfile.read()} + # Query the API via a POST requrest resp = requests.post( url, json=query_param, @@ -48,56 +51,78 @@ def get_repo_info(token: str) -> tuple[dict[str, Any], dict[str, Any]]: timeout=5, ) + # Return only part of the return data (values within the "user" key) json_resp = json.loads(resp.content)["data"]["user"] - return json_resp["contributionsCollection"], json_resp["repositories"] def sort_jobs_start_date(job: JobDict) -> int: """Sort the jobs by start date.""" + # Split the month and year month_str, year_str = job["startDate"].split("/") + # Use basic formula for data sorting return int(year_str) * 100 + int(month_str) def consolidate_sorted_jobs(jobs: list[JobDict]) -> list[list[JobDict]]: """Consolidate jobs in instances like promotions.""" + # Initialize empty dictionary for storing job groupings while iterating grouped_jobs_dict: dict[str, list[JobDict]] = {} + + # Initialize emptry list for storing sorted job groupings grouped_jobs_list: list[list[JobDict]] = [] + # Iterate through provided jobs for job in jobs: + # Keep track of whether employers are newly added to the list newly_added = False + + # Get the employer for the current job being analyzed employer = job["employer"] - # If not already in dict, add + # If not already in dict, add it and note it is a new job if employer not in grouped_jobs_dict: grouped_jobs_dict[employer] = [job] newly_added = True - # Get different of start and end of roles - start_role = datetime.datetime.strptime( + # Get start date of newer role and end date of older role + # (contained list is sorted in order of newest to oldest) + start_new_role = datetime.datetime.strptime( grouped_jobs_dict[employer][-1]["startDate"], "%m/%Y" ) if job["endDate"] == "current": - end_role = datetime.datetime.now(dateutil.tz.gettz()) + end_old_role = datetime.datetime.now(dateutil.tz.gettz()) else: - end_role = datetime.datetime.strptime(job["endDate"], "%m/%Y") - - # If job was not just newly added and gap is no more than 31 days apart - # then add to existing list + end_old_role = datetime.datetime.strptime(job["endDate"], "%m/%Y") + + # If the employer was not newly added and the gap is no more than 31 days + # apart, then add it to the existing list. This prevents grouping roles + # that have large breaks in between them (e.g., returning co-ops with no + # job in between). + # + # If the employer has already been added to the dictionary and the time + # between the jobs is short, append it to the existing grouping and keep + # using it, as more jobs may be found to add to this grouping. duration_days = 31 - if not newly_added and (start_role - end_role).days <= duration_days: + if not newly_added and (start_new_role - end_old_role).days <= duration_days: grouped_jobs_dict[employer].append(job) + # Otherwise, if the employer is still in the list but the time between jobs + # is longer than 31 days, add the existing grouping to the return job list + # and begin a new dict for iteration, as the current dict is a complete group. elif not newly_added: grouped_jobs_list.append(grouped_jobs_dict[employer]) grouped_jobs_dict[employer] = [job] + # Jobs still remaining in the dict after iteration are complete, and should be + # added to the return job list for remaining_job in grouped_jobs_dict.values(): grouped_jobs_list.append(remaining_job) + # Return the grouped jobs list return grouped_jobs_list def sort_grouped_jobs(jobs_list: list[JobDict]) -> int: - """Sort the grouped lists of jobs.""" - return sort_jobs_start_date(jobs_list[0]) + """Sort the grouped lists of jobs (based on first job within group).""" + return sort_jobs_start_date(jobs_list[-1]) diff --git a/scripts/graphql.py b/scripts/graphql.py index 6b18527..b1ae7ef 100644 --- a/scripts/graphql.py +++ b/scripts/graphql.py @@ -12,35 +12,53 @@ import dateutil.tz import requests +# Store the GraphQL API URL and date format for files URL = "https://api.github.com/graphql" DATETIME_FMT = "%Y%m%d%H" +# Get the current datetime current_datetime = datetime.datetime.now(dateutil.tz.gettz()) +# If "--now" is given as a a command line argument, use the current time +# Otherwise, this is being used by a cron job that runs right before the +# turn of the hour, so look ahead 10 minutes. Note that this isn't an +# execution delay. if "--now" in sys.argv: delay = 0 else: delay = 10 +# Get the datetime string based on the delay next_datetime = current_datetime + datetime.timedelta(minutes=delay) next_datetime_str = next_datetime.strftime(DATETIME_FMT) +# Get the base directory form the command line arguments base_dir = pathlib.Path(sys.argv[1]) +# Create the directory to store the responses resp_dir = base_dir / "assets/contrib/" -new_resp_file = resp_dir / ("recent_" + next_datetime_str + ".json") -parent_card_dir = base_dir / "flask_app/static/img/gh_cards/" -new_card_dir = parent_card_dir / next_datetime_str resp_dir.mkdir(exist_ok=True) + +# Store the name of the new JSON response file +new_resp_file = resp_dir / f"recent_{next_datetime_str}.json" + +# Ensure the parent repository card image directory exists +parent_card_dir = base_dir / "flask_app/static/img/gh_cards/" parent_card_dir.mkdir(exist_ok=True) + +# Create a directory for the specific repository image cards for the given datetime +new_card_dir = parent_card_dir / next_datetime_str new_card_dir.mkdir(exist_ok=True) +# Read the configuration settings file with open("/etc/config.json", encoding="utf-8") as jsonfile: config = json.load(jsonfile) +# Get the GraphQL query from the text file with open(base_dir / "assets/graphql_query.txt", encoding="utf-8") as queryfile: query_param = {"query": queryfile.read()} +# Query the GraphQL API via a POST request resp = requests.post( URL, json=query_param, @@ -50,27 +68,35 @@ timeout=5, ) +# Parse the request for a subset of the returned data (the "user" key) json_resp = json.loads(resp.content)["data"]["user"] +# Store the subset of data in a JSON file for later use with open( resp_dir / f"recent_{next_datetime_str}.json", mode="w", encoding="utf-8" ) as contribfile: json.dump(json_resp, contribfile) - +# Iterate through the returned repository nodes len_nodes = len(json_resp["repositories"]["nodes"]) for index, node in enumerate(json_resp["repositories"]["nodes"]): + # Attempt three times to store the image card for _ in range(3): try: + # Get the repository image card (OpenGraph image) img_resp = requests.get(node["openGraphImageUrl"], timeout=10) status_okay = 200 + + # If request is successful, save the image for caching purposes if img_resp.status_code == status_okay: with open(str(new_card_dir / f"card{index}.png"), mode="wb") as imgfile: for data_chunk in img_resp: imgfile.write(data_chunk) break + # If a timeout occurs, attempt the request again except (TimeoutError, requests.exceptions.ReadTimeout): pass # Try again + # Add mandatory execution delay to prevent constant timeouts and rate limiting finally: if index != len_nodes - 1: time.sleep(1) diff --git a/scripts/post_graphql.py b/scripts/post_graphql.py index 820c335..b997964 100644 --- a/scripts/post_graphql.py +++ b/scripts/post_graphql.py @@ -1,7 +1,11 @@ # SPDX-FileCopyrightText: 2024 Alec Delaney # SPDX-License-Identifier: MIT -"""Delete the images from previous GraphQL query from the static folder.""" +"""Delete the images from previous GraphQL query from the static folder. + +This is used by a cron job that runs right ater the turn of the hour, so +it looks behind 10 minutes. +""" import datetime import pathlib @@ -9,21 +13,31 @@ import dateutil.tz +# STore the date format DATETIME_FMT = "%Y%m%d%H" +# Get the current datetime current_datetime = datetime.datetime.now(dateutil.tz.gettz()) +# Get the datetime string for 10 minutes ago last_datetime = current_datetime - datetime.timedelta(minutes=10) last_datetime_str = last_datetime.strftime(DATETIME_FMT) +# Get the base directory form the command line arguments base_dir = pathlib.Path(sys.argv[1]) +# Get all the necessary paths needed to delete relevant files +# (stored JSON response file and repository image cards and +# parent directory) resp_dir = base_dir / "assets/contrib/" old_resp_file = resp_dir / ("recent_" + last_datetime_str + ".json") parent_card_dir = base_dir / "flask_app/static/img/gh_cards/" old_card_dir = parent_card_dir / last_datetime_str +# Delete the store JSON response file old_resp_file.unlink(missing_ok=True) + +# Delete the repository image cards and parent directory for card in old_card_dir.glob("*"): card.unlink(missing_ok=True) old_card_dir.rmdir() diff --git a/scripts/renew_cert.sh b/scripts/renew_cert.sh index 2bc97a4..82d43f5 100644 --- a/scripts/renew_cert.sh +++ b/scripts/renew_cert.sh @@ -2,5 +2,8 @@ # SPDX-FileCopyrightText: 2024 Alec Delaney # SPDX-License-Identifier: MIT +# Renews the HTTPS certification via certbot +# This script is run via cron + certbot renew --nginx systemctl reload nginx diff --git a/scripts/schedule_cache.sh b/scripts/schedule_cache.sh index b6e59e5..5805788 100644 --- a/scripts/schedule_cache.sh +++ b/scripts/schedule_cache.sh @@ -2,6 +2,9 @@ # SPDX-FileCopyrightText: 2024 Alec Delaney # SPDX-License-Identifier: MIT +# Script for managing the cron job instructions for downloading +# GitHub repository summary card images via cronberry + REPOPATH=$(realpath .) PYBINPATH="$REPOPATH/.venv/bin" diff --git a/scripts/update_cache.sh b/scripts/update_cache.sh index 9733a83..b6aeeb4 100644 --- a/scripts/update_cache.sh +++ b/scripts/update_cache.sh @@ -2,6 +2,10 @@ # SPDX-FileCopyrightText: 2024 Alec Delaney # SPDX-License-Identifier: MIT +# Script for immediately downloading GitHub repository summary +# card images, useful when errors occur and need to be fixed +# on the server + REPOPATH=$(realpath .) PATH="$REPOPATH/.venv/bin:$PATH" SCRIPTPATH="$REPOPATH/scripts/graphql.py"