added gunicorn wsgi server

Whiskas101 · Jul 6, 2024 · 3c6a3fe · 3c6a3fe
1 parent 1d27654
commit 3c6a3fe
Show file tree

Hide file tree

Showing 10 changed files with 56 additions and 20 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -4,7 +4,8 @@ RUN apk update
 RUN apk add py-pip
 RUN pip install --upgrade pip
 
-WORKDIR /app
+
+WORKDIR /app 
 
 COPY requirements.txt .
 RUN pip install -r requirements.txt
@@ -13,8 +14,8 @@ COPY src/ .
 
 
 
-EXPOSE 5000
+EXPOSE 8000
 
-CMD ["python3", "api.py"]
+CMD ["gunicorn","-w","2","-b", "0.0.0.0:8000", "api:app"]
 
 
diff --git a/requirements.txt b/requirements.txt
@@ -6,6 +6,7 @@ click==8.1.7
 Deprecated==1.2.14
 Flask==3.0.3
 Flask-Limiter==3.7.0
+gunicorn==22.0.0
 idna==3.7
 importlib_resources==6.4.0
 itsdangerous==2.2.0
@@ -21,6 +22,7 @@ Pygments==2.18.0
 requests==2.31.0
 rich==13.7.1
 soupsieve==2.5
+tornado==6.4.1
 typing_extensions==4.12.2
 urllib3==2.2.1
 Werkzeug==3.0.3

diff --git a/src/__pycache__/api.cpython-310.pyc b/src/__pycache__/api.cpython-310.pyc
diff --git a/src/api.py b/src/api.py
@@ -1,3 +1,6 @@
+#for logging
+import logging
+
 
 # main entry point for the app
 from flask import Flask, request, session, Response
@@ -18,9 +21,12 @@
     storage_uri="memory://" # I know that using default in memory storage is bad practice, but its acceptable for the small scale API that this is
 )                           # This is going on be an a 1gb RAM machine anyway
 
-
 limiter.init_app(app)
 
+logging.basicConfig(
+    level=logging.WARNING,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
 
 @app.route('/')
 @limiter.limit("100 per minute")
@@ -44,7 +50,6 @@ def login():
         #set the session cookie obtained previously
         if response != None:
             session['session_cookie'] = response
-            print("Session:", session)
             return response
 
 
@@ -54,17 +59,11 @@ def login():
 @app.route('/subjects', methods=['GET'])
 @limiter.limit("3 per minute")
 def subjects():
-    print(request.headers)
-    print("Session:", session)
-    print(f"HERE ARE THE COOKIES: {session}")
+
     cookies = session['session_cookie']
-    print(cookies)
-    #Maybe a decorator, or an if statement can be implemented here to avoid cookies being empty (user session ended/user log out)
-    #potential boost in readability here, by using a decorator to validate the existence of a session cookie instead
 
     response = core_utils.get_subjects(cookies=cookies)
 
-    # print(response)
     return response
 
 @app.route('/materials', methods=['POST'])
@@ -91,11 +90,16 @@ def download_resource():
 
         response = core_utils.get_download_link(target_link, link_type, cookies)
 
-        return response
-
+        if(response != None):
+            return response
+
+        return Response(status=401)
 
 
 if __name__ == "__main__":
+
+    logging.debug(f"Started {__file__} at 0.0.0.0:5000")
+
     # 0.0.0.0 == every address from 0.0.0.0 to 192.192.192.192
     app.run(
         debug=True, 

diff --git a/something.html → src/parse-playground/something.html b/something.html → src/parse-playground/something.html
diff --git a/src/profile_api.py b/src/profile_api.py
@@ -0,0 +1,21 @@
+import cProfile
+import pstats
+from api import app
+
+def run_profiler():
+    profiler = cProfile.Profile()
+    profiler.enable()
+
+
+    try: 
+        app.run(debug=True, host="0.0.0.0", port=5000)
+
+    finally:
+        profiler.disable()
+
+        with open('profile stats.prof', "w") as f:
+            ps = pstats.Stats(profiler, stream=f)
+            ps.strip_dirs().sort_stats('cumulative').print_stats()
+
+run_profiler()
+
diff --git a/src/profile_stats.prof b/src/profile_stats.prof
diff --git a/src/utils/__pycache__/site_parsing.cpython-310.pyc b/src/utils/__pycache__/site_parsing.cpython-310.pyc
diff --git a/src/utils/core_utils.py b/src/utils/core_utils.py
@@ -82,7 +82,7 @@ def get_subjects(cookies) -> list[dict]:
         html_content = response.content
         #pass the html_content to a custom parsing block, that converts it into a neat json object 
         subjects = PARSE.parse_subjects(html=html_content)
-        print(subjects)
+        # print(subjects)
 
         return subjects
 

diff --git a/src/utils/site_parsing.py b/src/utils/site_parsing.py
@@ -1,5 +1,6 @@
 from bs4 import BeautifulSoup
 import re
+import logging
 
 def parse_subjects(html: str) -> list[dict]:
     """
@@ -85,18 +86,25 @@ def parse_materials(html : str) -> list[dict]:
         soup = BeautifulSoup(str(activity), 'lxml')
         link = soup.select_one("a")
         link = link['href']
-        print(link)
+        # print(link)
 
         name = soup.find("span")
         name = name.contents[0]
         link_type = link[34:-19] # 34 and -19 are just constants to remove the unecessary part of the link
 
+        obj = soup.select(".accesshide")
+        doc_type = "unknown"
+        if(len(obj)):
+            doc_type = obj[0].contents[0]
+        # else:
+            # print(obj)
         #The handling for the different potential scenarios regarding the way the download links have to be made available is 
         # to be seperated from this request.
         activity_object = {
             'name':name,
             'link':link,
-            'type':link_type
+            'type':link_type,
+            'doctype':doc_type
         }
 
         course_materials.append(activity_object)
@@ -136,12 +144,12 @@ def flexpaper_parse(soup):
             if(pdf_url_match):
                 #successfully found the PDFFILE url
                 pdf_url = pdf_url_match.group(1)
-                print("PDF file URL:", pdf_url)
+                # print("PDF file URL:", pdf_url)
                 return pdf_url
                 # print(pdf_url_match)
 
             else:
-                print("Did not find the link within flexpaper config")
+                logging.debug("Did not find the link within flexpaper config")
                 return None
 
     #This method is slightly slower as it extracts after loading a new page.
@@ -152,7 +160,7 @@ def default_parse(soup):
         link_to_content = soup.find("a")
         link_to_content = link_to_content['href']
 
-        print("FIRST: ",link_to_content) 
+        # print("FIRST: ",link_to_content) 
         return link_to_content