Add additional comments, minimal rearranging of code

tekktrik · Oct 30, 2024 · 897a0e0 · 897a0e0
1 parent 8053b9c
commit 897a0e0
Show file tree

Hide file tree

Showing 7 changed files with 132 additions and 18 deletions.
diff --git a/flask_app/__init__.py b/flask_app/__init__.py
@@ -15,7 +15,7 @@
 import dateutil.parser
 import dateutil.tz
 import jinja2
-from flask import Flask, Response, redirect, render_template, send_file
+from flask import Flask, Response, redirect, render_template, send_file, url_for
 from flask_bootstrap import Bootstrap5
 from flask_limiter import Limiter
 from flask_limiter.util import get_remote_address
@@ -27,15 +27,20 @@
     sort_jobs_start_date,
 )
 
+# Initialize the Flask app
 app = Flask(__name__)
 
+# Open the configuration settings file
 with open("/etc/config.json", encoding="utf-8") as jsonfile:
     config = json.load(jsonfile)
 
+# Disable CSRF for WTForms
 app.config["WTF_CSRF_ENABLED"] = False
 
+# Initialize Bootstrap
 bootstrap = Bootstrap5(app)
 
+# Initialize the rate limiter
 limiter = Limiter(
     get_remote_address,
     app=app,
@@ -69,38 +74,59 @@ def menorah_settings() -> Response:
 @limiter.limit("10/second", key_func=lambda: "menorah-settings")
 def project_menorah_settings() -> str:
     """Route for creating menorah settings file."""
+    # Get the Menorah setup form
     input_form = MenorahSetupForm()
+
+    # Handle form submission validation
     if input_form.validate_on_submit():
+        # Get the zip code from the form
         zipcode = input_form.data["zipcode"]
+
+        # Add the zip code to the template and render it
         with open("assets/settings.json", encoding="utf-8") as template_file:
             template_text = template_file.read()
         template = jinja2.Template(template_text)
         rendered_temp = template.render(zipcode=zipcode)
+
+        # Send the rendered settings file to the user for download
         file_bytesio = io.BytesIO()
         file_bytesio.write(rendered_temp.encode("utf-8"))
         file_bytesio.seek(0)
         return send_file(
             file_bytesio, as_attachment=True, download_name="settings.json"
         )
+
+    # Render the HTML template
     return render_template("projects/menorah/settings.html", input_form=input_form)
 
 
 @app.route("/recent", methods=["GET"])
 def recent() -> str:
     """Route for recent GitHub activity."""
+    # Get the current datetime and hour as a string
     datetime_fmt = "%Y%m%d%H"
     current_datetime = datetime.datetime.now(dateutil.tz.gettz())
     current_datetime_str = current_datetime.strftime(datetime_fmt)
+
+    # Read the contents of the relevant (current) recent activity JSON file
     with open(
         f"assets/contrib/recent_{current_datetime_str}.json", encoding="utf-8"
     ) as respfile:
         contents = json.load(respfile)
+
+    # Get the contribtuons (calendar) collection and specific respository contributions
     contributions, repos = contents["contributionsCollection"], contents["repositories"]
+
+    # Get the start, end, and delta between times within the contributions collection
     end_datetime = dateutil.parser.parse(contributions["endedAt"])
     start_datetime = dateutil.parser.parse(contributions["startedAt"])
     diff_datetime: datetime.timedelta = end_datetime - start_datetime
+
+    # Get the oldest push datetime from the specific repository contributions
     oldest_push = dateutil.parser.parse(repos["nodes"][-1]["pushedAt"])
     diff_oldest = current_datetime - oldest_push
+
+    # Render the HTML template
     return render_template(
         "recent.html",
         repos=repos["nodes"],
@@ -114,28 +140,41 @@ def recent() -> str:
 @app.route("/about", methods=["GET"])
 def about() -> str:
     """Route for about me page."""
+    # Load the jobs files and initialize them in a list
     jobs_path = pathlib.Path("assets/about/jobs")
     jobs = []
     for job_path in jobs_path.glob("*.json"):
         with open(job_path, encoding="utf-8") as jobfile:
             job_obj = json.load(jobfile)
+            # No end date means it's the current (active) job
             if job_obj["endDate"] is None:
                 job_obj["endDate"] = "current"
             jobs.append(job_obj)
+
+    # Sort the jobs list based on the custom sorting filter
     jobs.sort(key=sort_jobs_start_date, reverse=True)
+
+    # Consolidate jobs that are grouped (like promotions)
     jobs_lists = consolidate_sorted_jobs(jobs)
+
+    # Sort the grouped jobs based on the custom sorting filter
     jobs_lists.sort(key=sort_grouped_jobs, reverse=True)
 
+    # Load the education files and initialize them in a list
     education_paths = pathlib.Path("assets/about/education")
     educations = []
     for education_path in education_paths.glob("*.json"):
         with open(education_path, encoding="utf-8") as edufile:
             edu_obj = json.load(edufile)
+            # No end date means it's the current (active) education
             if edu_obj["endYear"] is None:
                 edu_obj["endYear"] = "current"
             educations.append(edu_obj)
+
+    # Sort the educations by start year
     educations.sort(key=lambda x: x["startYear"], reverse=True)
 
+    # Render the HTML template
     return render_template("about.html", jobs_lists=jobs_lists, educations=educations)
 
 

diff --git a/flask_app/helpers.py b/flask_app/helpers.py
@@ -34,11 +34,14 @@ def generate_settings_json(zipcode: str) -> str:
 
 def get_repo_info(token: str) -> tuple[dict[str, Any], dict[str, Any]]:
     """Get repository info from the GraphQL query."""
+    # Store the GraphQL API URL
     url = "https://api.github.com/graphql"
 
+    # Get the query from the saved text file
     with open("assets/graphql_query.txt", encoding="utf-8") as queryfile:
         query_param = {"query": queryfile.read()}
 
+    # Query the API via a POST requrest
     resp = requests.post(
         url,
         json=query_param,
@@ -48,56 +51,78 @@ def get_repo_info(token: str) -> tuple[dict[str, Any], dict[str, Any]]:
         timeout=5,
     )
 
+    # Return only part of the return data (values within the "user" key)
     json_resp = json.loads(resp.content)["data"]["user"]
-
     return json_resp["contributionsCollection"], json_resp["repositories"]
 
 
 def sort_jobs_start_date(job: JobDict) -> int:
     """Sort the jobs by start date."""
+    # Split the month and year
     month_str, year_str = job["startDate"].split("/")
+    # Use basic formula for data sorting
     return int(year_str) * 100 + int(month_str)
 
 
 def consolidate_sorted_jobs(jobs: list[JobDict]) -> list[list[JobDict]]:
     """Consolidate jobs in instances like promotions."""
+    # Initialize empty dictionary for storing job groupings while iterating
     grouped_jobs_dict: dict[str, list[JobDict]] = {}
+
+    # Initialize emptry list for storing sorted job groupings
     grouped_jobs_list: list[list[JobDict]] = []
 
+    # Iterate through provided jobs
     for job in jobs:
+        # Keep track of whether employers are newly added to the list
         newly_added = False
+
+        # Get the employer for the current job being analyzed
         employer = job["employer"]
 
-        # If not already in dict, add
+        # If not already in dict, add it and note it is a new job
         if employer not in grouped_jobs_dict:
             grouped_jobs_dict[employer] = [job]
             newly_added = True
 
-        # Get different of start and end of roles
-        start_role = datetime.datetime.strptime(
+        # Get start date of newer role and end date of older role
+        # (contained list is sorted in order of newest to oldest)
+        start_new_role = datetime.datetime.strptime(
             grouped_jobs_dict[employer][-1]["startDate"], "%m/%Y"
         )
         if job["endDate"] == "current":
-            end_role = datetime.datetime.now(dateutil.tz.gettz())
+            end_old_role = datetime.datetime.now(dateutil.tz.gettz())
         else:
-            end_role = datetime.datetime.strptime(job["endDate"], "%m/%Y")
-
-        # If job was not just newly added and gap is no more than 31 days apart
-        # then add to existing list
+            end_old_role = datetime.datetime.strptime(job["endDate"], "%m/%Y")
+
+        # If the employer was not newly added and  the gap is no more than 31 days
+        # apart, then add it to  the existing list.  This prevents grouping roles
+        # that have large breaks in between them (e.g., returning co-ops with no
+        # job in between).
+        #
+        # If the employer has already been added to the dictionary and the time
+        # between the jobs is short, append it to the existing grouping and keep
+        # using it, as more jobs may be found to add to this grouping.
         duration_days = 31
-        if not newly_added and (start_role - end_role).days <= duration_days:
+        if not newly_added and (start_new_role - end_old_role).days <= duration_days:
             grouped_jobs_dict[employer].append(job)
 
+        # Otherwise, if the employer is still in the list but the time between jobs
+        # is longer than 31 days, add the existing grouping to the return job list
+        # and begin a new dict for iteration, as the current dict is a complete group.
         elif not newly_added:
             grouped_jobs_list.append(grouped_jobs_dict[employer])
             grouped_jobs_dict[employer] = [job]
 
+    # Jobs still remaining in the dict after iteration are complete, and should be
+    # added to the return job list
     for remaining_job in grouped_jobs_dict.values():
         grouped_jobs_list.append(remaining_job)
 
+    # Return the grouped jobs list
     return grouped_jobs_list
 
 
 def sort_grouped_jobs(jobs_list: list[JobDict]) -> int:
-    """Sort the grouped lists of jobs."""
-    return sort_jobs_start_date(jobs_list[0])
+    """Sort the grouped lists of jobs (based on first job within group)."""
+    return sort_jobs_start_date(jobs_list[-1])
diff --git a/scripts/graphql.py b/scripts/graphql.py
@@ -12,35 +12,53 @@
 import dateutil.tz
 import requests
 
+# Store the GraphQL API URL and date format for files
 URL = "https://api.github.com/graphql"
 DATETIME_FMT = "%Y%m%d%H"
 
+# Get the current datetime
 current_datetime = datetime.datetime.now(dateutil.tz.gettz())
 
+# If "--now" is given as a a command line argument, use the current time
+# Otherwise, this is being used by a cron job that runs right before the
+# turn of the hour, so look ahead 10 minutes.  Note that this isn't an
+# execution delay.
 if "--now" in sys.argv:
     delay = 0
 else:
     delay = 10
 
+# Get the datetime string based on the delay
 next_datetime = current_datetime + datetime.timedelta(minutes=delay)
 next_datetime_str = next_datetime.strftime(DATETIME_FMT)
 
+# Get the base directory form the command line arguments
 base_dir = pathlib.Path(sys.argv[1])
 
+# Create the directory to store the responses
 resp_dir = base_dir / "assets/contrib/"
-new_resp_file = resp_dir / ("recent_" + next_datetime_str + ".json")
-parent_card_dir = base_dir / "flask_app/static/img/gh_cards/"
-new_card_dir = parent_card_dir / next_datetime_str
 resp_dir.mkdir(exist_ok=True)
+
+# Store the name of the new JSON response file
+new_resp_file = resp_dir / f"recent_{next_datetime_str}.json"
+
+# Ensure the parent repository card image directory exists
+parent_card_dir = base_dir / "flask_app/static/img/gh_cards/"
 parent_card_dir.mkdir(exist_ok=True)
+
+# Create a directory for the specific repository image cards for the given datetime
+new_card_dir = parent_card_dir / next_datetime_str
 new_card_dir.mkdir(exist_ok=True)
 
+# Read the configuration settings file
 with open("/etc/config.json", encoding="utf-8") as jsonfile:
     config = json.load(jsonfile)
 
+# Get the GraphQL query from the text file
 with open(base_dir / "assets/graphql_query.txt", encoding="utf-8") as queryfile:
     query_param = {"query": queryfile.read()}
 
+# Query the GraphQL API via a POST request
 resp = requests.post(
     URL,
     json=query_param,
@@ -50,27 +68,35 @@
     timeout=5,
 )
 
+# Parse the request for a subset of the returned data (the "user" key)
 json_resp = json.loads(resp.content)["data"]["user"]
 
+# Store the subset of data in a JSON file for later use
 with open(
     resp_dir / f"recent_{next_datetime_str}.json", mode="w", encoding="utf-8"
 ) as contribfile:
     json.dump(json_resp, contribfile)
 
-
+# Iterate through the returned repository nodes
 len_nodes = len(json_resp["repositories"]["nodes"])
 for index, node in enumerate(json_resp["repositories"]["nodes"]):
+    # Attempt three times to store the image card
     for _ in range(3):
         try:
+            # Get the repository image card (OpenGraph image)
             img_resp = requests.get(node["openGraphImageUrl"], timeout=10)
             status_okay = 200
+
+            # If request is successful, save the image for caching purposes
             if img_resp.status_code == status_okay:
                 with open(str(new_card_dir / f"card{index}.png"), mode="wb") as imgfile:
                     for data_chunk in img_resp:
                         imgfile.write(data_chunk)
                 break
+        # If a timeout occurs, attempt the request again
         except (TimeoutError, requests.exceptions.ReadTimeout):
             pass  # Try again
+        # Add mandatory execution delay to prevent constant timeouts and rate limiting
         finally:
             if index != len_nodes - 1:
                 time.sleep(1)
diff --git a/scripts/post_graphql.py b/scripts/post_graphql.py
@@ -1,29 +1,43 @@
 # SPDX-FileCopyrightText: 2024 Alec Delaney
 # SPDX-License-Identifier: MIT
 
-"""Delete the images from previous GraphQL query from the static folder."""
+"""Delete the images from previous GraphQL query from the static folder.
+
+This is used by a cron job that runs right ater the turn of the hour, so
+it looks behind 10 minutes.
+"""
 
 import datetime
 import pathlib
 import sys
 
 import dateutil.tz
 
+# STore the date format
 DATETIME_FMT = "%Y%m%d%H"
 
+# Get the current datetime
 current_datetime = datetime.datetime.now(dateutil.tz.gettz())
 
+# Get the datetime string for 10 minutes ago
 last_datetime = current_datetime - datetime.timedelta(minutes=10)
 last_datetime_str = last_datetime.strftime(DATETIME_FMT)
 
+# Get the base directory form the command line arguments
 base_dir = pathlib.Path(sys.argv[1])
 
+# Get all the necessary paths needed to delete relevant files
+# (stored JSON response file and repository image cards and
+# parent directory)
 resp_dir = base_dir / "assets/contrib/"
 old_resp_file = resp_dir / ("recent_" + last_datetime_str + ".json")
 parent_card_dir = base_dir / "flask_app/static/img/gh_cards/"
 old_card_dir = parent_card_dir / last_datetime_str
 
+# Delete the store JSON response file
 old_resp_file.unlink(missing_ok=True)
+
+# Delete the repository image cards and parent directory
 for card in old_card_dir.glob("*"):
     card.unlink(missing_ok=True)
 old_card_dir.rmdir()
diff --git a/scripts/renew_cert.sh b/scripts/renew_cert.sh
@@ -2,5 +2,8 @@
 # SPDX-FileCopyrightText: 2024 Alec Delaney
 # SPDX-License-Identifier: MIT
 
+# Renews the HTTPS certification via certbot
+# This script is run via cron
+
 certbot renew --nginx
 systemctl reload nginx
diff --git a/scripts/schedule_cache.sh b/scripts/schedule_cache.sh
@@ -2,6 +2,9 @@
 # SPDX-FileCopyrightText: 2024 Alec Delaney
 # SPDX-License-Identifier: MIT
 
+# Script for managing the cron job instructions for downloading
+# GitHub repository summary card images via cronberry
+
 REPOPATH=$(realpath .)
 
 PYBINPATH="$REPOPATH/.venv/bin"