Skip to content

Commit

Permalink
Add additional comments, minimal rearranging of code
Browse files Browse the repository at this point in the history
  • Loading branch information
tekktrik committed Oct 30, 2024
1 parent 8053b9c commit 897a0e0
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 18 deletions.
41 changes: 40 additions & 1 deletion flask_app/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import dateutil.parser
import dateutil.tz
import jinja2
from flask import Flask, Response, redirect, render_template, send_file
from flask import Flask, Response, redirect, render_template, send_file, url_for
from flask_bootstrap import Bootstrap5
from flask_limiter import Limiter
from flask_limiter.util import get_remote_address
Expand All @@ -27,15 +27,20 @@
sort_jobs_start_date,
)

# Initialize the Flask app
app = Flask(__name__)

# Open the configuration settings file
with open("/etc/config.json", encoding="utf-8") as jsonfile:
config = json.load(jsonfile)

# Disable CSRF for WTForms
app.config["WTF_CSRF_ENABLED"] = False

# Initialize Bootstrap
bootstrap = Bootstrap5(app)

# Initialize the rate limiter
limiter = Limiter(
get_remote_address,
app=app,
Expand Down Expand Up @@ -69,38 +74,59 @@ def menorah_settings() -> Response:
@limiter.limit("10/second", key_func=lambda: "menorah-settings")
def project_menorah_settings() -> str:
"""Route for creating menorah settings file."""
# Get the Menorah setup form
input_form = MenorahSetupForm()

# Handle form submission validation
if input_form.validate_on_submit():
# Get the zip code from the form
zipcode = input_form.data["zipcode"]

# Add the zip code to the template and render it
with open("assets/settings.json", encoding="utf-8") as template_file:
template_text = template_file.read()
template = jinja2.Template(template_text)
rendered_temp = template.render(zipcode=zipcode)

# Send the rendered settings file to the user for download
file_bytesio = io.BytesIO()
file_bytesio.write(rendered_temp.encode("utf-8"))
file_bytesio.seek(0)
return send_file(
file_bytesio, as_attachment=True, download_name="settings.json"
)

# Render the HTML template
return render_template("projects/menorah/settings.html", input_form=input_form)


@app.route("/recent", methods=["GET"])
def recent() -> str:
"""Route for recent GitHub activity."""
# Get the current datetime and hour as a string
datetime_fmt = "%Y%m%d%H"
current_datetime = datetime.datetime.now(dateutil.tz.gettz())
current_datetime_str = current_datetime.strftime(datetime_fmt)

# Read the contents of the relevant (current) recent activity JSON file
with open(
f"assets/contrib/recent_{current_datetime_str}.json", encoding="utf-8"
) as respfile:
contents = json.load(respfile)

# Get the contribtuons (calendar) collection and specific respository contributions
contributions, repos = contents["contributionsCollection"], contents["repositories"]

# Get the start, end, and delta between times within the contributions collection
end_datetime = dateutil.parser.parse(contributions["endedAt"])
start_datetime = dateutil.parser.parse(contributions["startedAt"])
diff_datetime: datetime.timedelta = end_datetime - start_datetime

# Get the oldest push datetime from the specific repository contributions
oldest_push = dateutil.parser.parse(repos["nodes"][-1]["pushedAt"])
diff_oldest = current_datetime - oldest_push

# Render the HTML template
return render_template(
"recent.html",
repos=repos["nodes"],
Expand All @@ -114,28 +140,41 @@ def recent() -> str:
@app.route("/about", methods=["GET"])
def about() -> str:
"""Route for about me page."""
# Load the jobs files and initialize them in a list
jobs_path = pathlib.Path("assets/about/jobs")
jobs = []
for job_path in jobs_path.glob("*.json"):
with open(job_path, encoding="utf-8") as jobfile:
job_obj = json.load(jobfile)
# No end date means it's the current (active) job
if job_obj["endDate"] is None:
job_obj["endDate"] = "current"
jobs.append(job_obj)

# Sort the jobs list based on the custom sorting filter
jobs.sort(key=sort_jobs_start_date, reverse=True)

# Consolidate jobs that are grouped (like promotions)
jobs_lists = consolidate_sorted_jobs(jobs)

# Sort the grouped jobs based on the custom sorting filter
jobs_lists.sort(key=sort_grouped_jobs, reverse=True)

# Load the education files and initialize them in a list
education_paths = pathlib.Path("assets/about/education")
educations = []
for education_path in education_paths.glob("*.json"):
with open(education_path, encoding="utf-8") as edufile:
edu_obj = json.load(edufile)
# No end date means it's the current (active) education
if edu_obj["endYear"] is None:
edu_obj["endYear"] = "current"
educations.append(edu_obj)

# Sort the educations by start year
educations.sort(key=lambda x: x["startYear"], reverse=True)

# Render the HTML template
return render_template("about.html", jobs_lists=jobs_lists, educations=educations)


Expand Down
49 changes: 37 additions & 12 deletions flask_app/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,14 @@ def generate_settings_json(zipcode: str) -> str:

def get_repo_info(token: str) -> tuple[dict[str, Any], dict[str, Any]]:
"""Get repository info from the GraphQL query."""
# Store the GraphQL API URL
url = "https://api.github.com/graphql"

# Get the query from the saved text file
with open("assets/graphql_query.txt", encoding="utf-8") as queryfile:
query_param = {"query": queryfile.read()}

# Query the API via a POST requrest
resp = requests.post(
url,
json=query_param,
Expand All @@ -48,56 +51,78 @@ def get_repo_info(token: str) -> tuple[dict[str, Any], dict[str, Any]]:
timeout=5,
)

# Return only part of the return data (values within the "user" key)
json_resp = json.loads(resp.content)["data"]["user"]

return json_resp["contributionsCollection"], json_resp["repositories"]


def sort_jobs_start_date(job: JobDict) -> int:
"""Sort the jobs by start date."""
# Split the month and year
month_str, year_str = job["startDate"].split("/")
# Use basic formula for data sorting
return int(year_str) * 100 + int(month_str)


def consolidate_sorted_jobs(jobs: list[JobDict]) -> list[list[JobDict]]:
"""Consolidate jobs in instances like promotions."""
# Initialize empty dictionary for storing job groupings while iterating
grouped_jobs_dict: dict[str, list[JobDict]] = {}

# Initialize emptry list for storing sorted job groupings
grouped_jobs_list: list[list[JobDict]] = []

# Iterate through provided jobs
for job in jobs:
# Keep track of whether employers are newly added to the list
newly_added = False

# Get the employer for the current job being analyzed
employer = job["employer"]

# If not already in dict, add
# If not already in dict, add it and note it is a new job
if employer not in grouped_jobs_dict:
grouped_jobs_dict[employer] = [job]
newly_added = True

# Get different of start and end of roles
start_role = datetime.datetime.strptime(
# Get start date of newer role and end date of older role
# (contained list is sorted in order of newest to oldest)
start_new_role = datetime.datetime.strptime(
grouped_jobs_dict[employer][-1]["startDate"], "%m/%Y"
)
if job["endDate"] == "current":
end_role = datetime.datetime.now(dateutil.tz.gettz())
end_old_role = datetime.datetime.now(dateutil.tz.gettz())
else:
end_role = datetime.datetime.strptime(job["endDate"], "%m/%Y")

# If job was not just newly added and gap is no more than 31 days apart
# then add to existing list
end_old_role = datetime.datetime.strptime(job["endDate"], "%m/%Y")

# If the employer was not newly added and the gap is no more than 31 days
# apart, then add it to the existing list. This prevents grouping roles
# that have large breaks in between them (e.g., returning co-ops with no
# job in between).
#
# If the employer has already been added to the dictionary and the time
# between the jobs is short, append it to the existing grouping and keep
# using it, as more jobs may be found to add to this grouping.
duration_days = 31
if not newly_added and (start_role - end_role).days <= duration_days:
if not newly_added and (start_new_role - end_old_role).days <= duration_days:
grouped_jobs_dict[employer].append(job)

# Otherwise, if the employer is still in the list but the time between jobs
# is longer than 31 days, add the existing grouping to the return job list
# and begin a new dict for iteration, as the current dict is a complete group.
elif not newly_added:
grouped_jobs_list.append(grouped_jobs_dict[employer])
grouped_jobs_dict[employer] = [job]

# Jobs still remaining in the dict after iteration are complete, and should be
# added to the return job list
for remaining_job in grouped_jobs_dict.values():
grouped_jobs_list.append(remaining_job)

# Return the grouped jobs list
return grouped_jobs_list


def sort_grouped_jobs(jobs_list: list[JobDict]) -> int:
"""Sort the grouped lists of jobs."""
return sort_jobs_start_date(jobs_list[0])
"""Sort the grouped lists of jobs (based on first job within group)."""
return sort_jobs_start_date(jobs_list[-1])
34 changes: 30 additions & 4 deletions scripts/graphql.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,35 +12,53 @@
import dateutil.tz
import requests

# Store the GraphQL API URL and date format for files
URL = "https://api.github.com/graphql"
DATETIME_FMT = "%Y%m%d%H"

# Get the current datetime
current_datetime = datetime.datetime.now(dateutil.tz.gettz())

# If "--now" is given as a a command line argument, use the current time
# Otherwise, this is being used by a cron job that runs right before the
# turn of the hour, so look ahead 10 minutes. Note that this isn't an
# execution delay.
if "--now" in sys.argv:
delay = 0
else:
delay = 10

# Get the datetime string based on the delay
next_datetime = current_datetime + datetime.timedelta(minutes=delay)
next_datetime_str = next_datetime.strftime(DATETIME_FMT)

# Get the base directory form the command line arguments
base_dir = pathlib.Path(sys.argv[1])

# Create the directory to store the responses
resp_dir = base_dir / "assets/contrib/"
new_resp_file = resp_dir / ("recent_" + next_datetime_str + ".json")
parent_card_dir = base_dir / "flask_app/static/img/gh_cards/"
new_card_dir = parent_card_dir / next_datetime_str
resp_dir.mkdir(exist_ok=True)

# Store the name of the new JSON response file
new_resp_file = resp_dir / f"recent_{next_datetime_str}.json"

# Ensure the parent repository card image directory exists
parent_card_dir = base_dir / "flask_app/static/img/gh_cards/"
parent_card_dir.mkdir(exist_ok=True)

# Create a directory for the specific repository image cards for the given datetime
new_card_dir = parent_card_dir / next_datetime_str
new_card_dir.mkdir(exist_ok=True)

# Read the configuration settings file
with open("/etc/config.json", encoding="utf-8") as jsonfile:
config = json.load(jsonfile)

# Get the GraphQL query from the text file
with open(base_dir / "assets/graphql_query.txt", encoding="utf-8") as queryfile:
query_param = {"query": queryfile.read()}

# Query the GraphQL API via a POST request
resp = requests.post(
URL,
json=query_param,
Expand All @@ -50,27 +68,35 @@
timeout=5,
)

# Parse the request for a subset of the returned data (the "user" key)
json_resp = json.loads(resp.content)["data"]["user"]

# Store the subset of data in a JSON file for later use
with open(
resp_dir / f"recent_{next_datetime_str}.json", mode="w", encoding="utf-8"
) as contribfile:
json.dump(json_resp, contribfile)


# Iterate through the returned repository nodes
len_nodes = len(json_resp["repositories"]["nodes"])
for index, node in enumerate(json_resp["repositories"]["nodes"]):
# Attempt three times to store the image card
for _ in range(3):
try:
# Get the repository image card (OpenGraph image)
img_resp = requests.get(node["openGraphImageUrl"], timeout=10)
status_okay = 200

# If request is successful, save the image for caching purposes
if img_resp.status_code == status_okay:
with open(str(new_card_dir / f"card{index}.png"), mode="wb") as imgfile:
for data_chunk in img_resp:
imgfile.write(data_chunk)
break
# If a timeout occurs, attempt the request again
except (TimeoutError, requests.exceptions.ReadTimeout):
pass # Try again
# Add mandatory execution delay to prevent constant timeouts and rate limiting
finally:
if index != len_nodes - 1:
time.sleep(1)
16 changes: 15 additions & 1 deletion scripts/post_graphql.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,43 @@
# SPDX-FileCopyrightText: 2024 Alec Delaney
# SPDX-License-Identifier: MIT

"""Delete the images from previous GraphQL query from the static folder."""
"""Delete the images from previous GraphQL query from the static folder.
This is used by a cron job that runs right ater the turn of the hour, so
it looks behind 10 minutes.
"""

import datetime
import pathlib
import sys

import dateutil.tz

# STore the date format
DATETIME_FMT = "%Y%m%d%H"

# Get the current datetime
current_datetime = datetime.datetime.now(dateutil.tz.gettz())

# Get the datetime string for 10 minutes ago
last_datetime = current_datetime - datetime.timedelta(minutes=10)
last_datetime_str = last_datetime.strftime(DATETIME_FMT)

# Get the base directory form the command line arguments
base_dir = pathlib.Path(sys.argv[1])

# Get all the necessary paths needed to delete relevant files
# (stored JSON response file and repository image cards and
# parent directory)
resp_dir = base_dir / "assets/contrib/"
old_resp_file = resp_dir / ("recent_" + last_datetime_str + ".json")
parent_card_dir = base_dir / "flask_app/static/img/gh_cards/"
old_card_dir = parent_card_dir / last_datetime_str

# Delete the store JSON response file
old_resp_file.unlink(missing_ok=True)

# Delete the repository image cards and parent directory
for card in old_card_dir.glob("*"):
card.unlink(missing_ok=True)
old_card_dir.rmdir()
3 changes: 3 additions & 0 deletions scripts/renew_cert.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,8 @@
# SPDX-FileCopyrightText: 2024 Alec Delaney
# SPDX-License-Identifier: MIT

# Renews the HTTPS certification via certbot
# This script is run via cron

certbot renew --nginx
systemctl reload nginx
3 changes: 3 additions & 0 deletions scripts/schedule_cache.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
# SPDX-FileCopyrightText: 2024 Alec Delaney
# SPDX-License-Identifier: MIT

# Script for managing the cron job instructions for downloading
# GitHub repository summary card images via cronberry

REPOPATH=$(realpath .)

PYBINPATH="$REPOPATH/.venv/bin"
Expand Down
Loading

0 comments on commit 897a0e0

Please sign in to comment.