Skip to content

Commit

Permalink
added gunicorn wsgi server
Browse files Browse the repository at this point in the history
  • Loading branch information
Whiskas101 committed Jul 6, 2024
1 parent 1d27654 commit 3c6a3fe
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 20 deletions.
7 changes: 4 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ RUN apk update
RUN apk add py-pip
RUN pip install --upgrade pip

WORKDIR /app

WORKDIR /app

COPY requirements.txt .
RUN pip install -r requirements.txt
Expand All @@ -13,8 +14,8 @@ COPY src/ .



EXPOSE 5000
EXPOSE 8000

CMD ["python3", "api.py"]
CMD ["gunicorn","-w","2","-b", "0.0.0.0:8000", "api:app"]


2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ click==8.1.7
Deprecated==1.2.14
Flask==3.0.3
Flask-Limiter==3.7.0
gunicorn==22.0.0
idna==3.7
importlib_resources==6.4.0
itsdangerous==2.2.0
Expand All @@ -21,6 +22,7 @@ Pygments==2.18.0
requests==2.31.0
rich==13.7.1
soupsieve==2.5
tornado==6.4.1
typing_extensions==4.12.2
urllib3==2.2.1
Werkzeug==3.0.3
Expand Down
Binary file added src/__pycache__/api.cpython-310.pyc
Binary file not shown.
26 changes: 15 additions & 11 deletions src/api.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#for logging
import logging


# main entry point for the app
from flask import Flask, request, session, Response
Expand All @@ -18,9 +21,12 @@
storage_uri="memory://" # I know that using default in memory storage is bad practice, but its acceptable for the small scale API that this is
) # This is going on be an a 1gb RAM machine anyway


limiter.init_app(app)

logging.basicConfig(
level=logging.WARNING,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

@app.route('/')
@limiter.limit("100 per minute")
Expand All @@ -44,7 +50,6 @@ def login():
#set the session cookie obtained previously
if response != None:
session['session_cookie'] = response
print("Session:", session)
return response


Expand All @@ -54,17 +59,11 @@ def login():
@app.route('/subjects', methods=['GET'])
@limiter.limit("3 per minute")
def subjects():
print(request.headers)
print("Session:", session)
print(f"HERE ARE THE COOKIES: {session}")

cookies = session['session_cookie']
print(cookies)
#Maybe a decorator, or an if statement can be implemented here to avoid cookies being empty (user session ended/user log out)
#potential boost in readability here, by using a decorator to validate the existence of a session cookie instead

response = core_utils.get_subjects(cookies=cookies)

# print(response)
return response

@app.route('/materials', methods=['POST'])
Expand All @@ -91,11 +90,16 @@ def download_resource():

response = core_utils.get_download_link(target_link, link_type, cookies)

return response

if(response != None):
return response

return Response(status=401)


if __name__ == "__main__":

logging.debug(f"Started {__file__} at 0.0.0.0:5000")

# 0.0.0.0 == every address from 0.0.0.0 to 192.192.192.192
app.run(
debug=True,
Expand Down
File renamed without changes.
21 changes: 21 additions & 0 deletions src/profile_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import cProfile
import pstats
from api import app

def run_profiler():
profiler = cProfile.Profile()
profiler.enable()


try:
app.run(debug=True, host="0.0.0.0", port=5000)

finally:
profiler.disable()

with open('profile stats.prof', "w") as f:
ps = pstats.Stats(profiler, stream=f)
ps.strip_dirs().sort_stats('cumulative').print_stats()

run_profiler()

Binary file added src/profile_stats.prof
Binary file not shown.
Binary file modified src/utils/__pycache__/site_parsing.cpython-310.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion src/utils/core_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def get_subjects(cookies) -> list[dict]:
html_content = response.content
#pass the html_content to a custom parsing block, that converts it into a neat json object
subjects = PARSE.parse_subjects(html=html_content)
print(subjects)
# print(subjects)

return subjects

Expand Down
18 changes: 13 additions & 5 deletions src/utils/site_parsing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from bs4 import BeautifulSoup
import re
import logging

def parse_subjects(html: str) -> list[dict]:
"""
Expand Down Expand Up @@ -85,18 +86,25 @@ def parse_materials(html : str) -> list[dict]:
soup = BeautifulSoup(str(activity), 'lxml')
link = soup.select_one("a")
link = link['href']
print(link)
# print(link)

name = soup.find("span")
name = name.contents[0]
link_type = link[34:-19] # 34 and -19 are just constants to remove the unecessary part of the link

obj = soup.select(".accesshide")
doc_type = "unknown"
if(len(obj)):
doc_type = obj[0].contents[0]
# else:
# print(obj)
#The handling for the different potential scenarios regarding the way the download links have to be made available is
# to be seperated from this request.
activity_object = {
'name':name,
'link':link,
'type':link_type
'type':link_type,
'doctype':doc_type
}

course_materials.append(activity_object)
Expand Down Expand Up @@ -136,12 +144,12 @@ def flexpaper_parse(soup):
if(pdf_url_match):
#successfully found the PDFFILE url
pdf_url = pdf_url_match.group(1)
print("PDF file URL:", pdf_url)
# print("PDF file URL:", pdf_url)
return pdf_url
# print(pdf_url_match)

else:
print("Did not find the link within flexpaper config")
logging.debug("Did not find the link within flexpaper config")
return None

#This method is slightly slower as it extracts after loading a new page.
Expand All @@ -152,7 +160,7 @@ def default_parse(soup):
link_to_content = soup.find("a")
link_to_content = link_to_content['href']

print("FIRST: ",link_to_content)
# print("FIRST: ",link_to_content)
return link_to_content


Expand Down

0 comments on commit 3c6a3fe

Please sign in to comment.