Skip to content

Commit

Permalink
Added a JSON database to add state of when posts were processed.
Browse files Browse the repository at this point in the history
  • Loading branch information
DennisJensen95 committed Oct 16, 2022
1 parent a57feca commit 8f8a551
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 8 deletions.
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ coloredlogs
selenium==4.4.*
boto3==1.24.*
schedule==1.1.*
twilio==7.13.*
twilio==7.13.*
pysondb==1.6.*
57 changes: 57 additions & 0 deletions src/curly_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Interface for saving curly girl posts and artifacts
"""

# Standard library
from hashlib import sha256
from datetime import datetime
import os

# Third party libraries
import pysondb


DATEFORMAT = "%d/%m/%Y-%H:%M:%S"


def initialize_database(database_path: str = f"{os.getcwd()}/curlygirl.json") -> pysondb.db.JsonDatabase:
"""Initialize pysondb
Args:
database_path (str, optional): Path to database. Defaults to "CURRENT_DIR/curlygirl.json".
Returns:
pysondb.db: database object.
"""
return pysondb.db.getDb(database_path)


def save_post(post_text: str, db: pysondb.db.JsonDatabase):
"""Save post to database
Args:
post_text (str): Text of the post saved.
db (pysondb.db): Database being saved to.
Returns:
id (str): Id of the post.
"""
id_sha256 = compute_post_id(post_text)

if db.getByQuery({"id_post": id_sha256}):
return id_sha256

db.add({"post_text": post_text, "time": datetime.now().strftime(
DATEFORMAT), "id_post": f"{id_sha256}"})
return id_sha256


def compute_post_id(post_text: str):
return sha256(post_text.encode()).hexdigest()


def get_post_timestamp(post_id: str, db: pysondb.db.JsonDatabase):
post = db.getByQuery({"id_post": post_id})
if len(post) != 1:
assert "Multiple entries with the same id should not be possible"

return datetime.strptime(post[0]["time"], DATEFORMAT)
42 changes: 36 additions & 6 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
# Standard library
from time import sleep
import schedule
from datetime import datetime

# Application libraries
from logger.init_logger import getLogger
from aws_secrets import get_secret
import curly_db

# Third party libraries
from twilio.rest import Client
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
import pysondb

log = getLogger(__file__)

Expand All @@ -23,10 +26,17 @@ def identify_cancellation(post_text: str):
return False


def check_if_any_post_is_cancellation(post_texts: list):
def check_if_any_post_is_cancellation(post_texts: list, db: pysondb.db.JsonDatabase):
for post_text in post_texts:
if identify_cancellation(post_text):
return True
post_id = curly_db.save_post(post_text=post_text, db=db)
time_stamp = curly_db.get_post_timestamp(post_id=post_id, db=db)
log.debug(
f"First message of the post was sent at: {time_stamp.strftime(curly_db.DATEFORMAT)}")
if not is_post_too_old(5, post_time=time_stamp):
return True
log.debug("Not notifying the client as the post was too old")
return False
return False


Expand Down Expand Up @@ -63,6 +73,23 @@ def extract_posts_from_page(driver: webdriver):
return post_texts


def is_post_too_old(min_old_threshold: int, post_time: datetime):
"""Check if the post is too old to be relevant
Args:
min_old_threshold (int, optional): The number of minutes the post can be old. Defaults to 10.
Returns:
bool: True if the post is too old
"""
now = datetime.now()
time_difference = now - post_time
if time_difference.total_seconds() / 60 > min_old_threshold:
return True

return False


def send_message_to_user(secrets: dict):
account_sid = secrets["CURLY_TWILIO_ACCOUNT_SID"]
auth_token = secrets["CURLY_TWILIO_AUTH_TOKEN"]
Expand All @@ -81,7 +108,7 @@ def send_message_to_user(secrets: dict):
log.debug(f"Twillio message unique identifer: {message.sid}")


def check_if_any_cancellation(secrets: dict, facebook_page: str):
def check_if_any_cancellation(secrets: dict, facebook_page: str, db: pysondb.db.JsonDatabase):
options = Options()
options.headless = True
driver = webdriver.Firefox(options=options)
Expand All @@ -93,7 +120,7 @@ def check_if_any_cancellation(secrets: dict, facebook_page: str):
click_only_essential_cookies(driver)
sleep(2)
posts = extract_posts_from_page(driver)
if check_if_any_post_is_cancellation(posts):
if check_if_any_post_is_cancellation(posts, db):
log.info("Found a cancellation post - Notify the client")
send_message_to_user(secrets)
else:
Expand All @@ -112,10 +139,13 @@ def main():
log.error("Unable to get TWILIO secrets")
return

# Init database object
db = curly_db.initialize_database()

schedule.every(1).minutes.do(
check_if_any_cancellation, secrets, facebook_page)
check_if_any_cancellation, secrets, facebook_page, db)

check_if_any_cancellation(secrets, facebook_page)
check_if_any_cancellation(secrets, facebook_page, db)

log.debug("Start the scheduler")
while True:
Expand Down
45 changes: 45 additions & 0 deletions src/test_curly_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Standard library
import unittest
import os

# Module to test
import curly_db
from main import is_post_too_old


class TestCurlyDb(unittest.TestCase):
database_path = "/tmp/curly_girl_test_db.json"

def setUp(self) -> None:
print("Setup of test")
self.db = curly_db.initialize_database(self.database_path)

def tearDown(self) -> None:
print("Teardown of test")
self.db.deleteAll()
os.system(f"rm -rf {self.db.filename}*")

def test_inserting_post(self):
# 1
# Insert random post text and check if it is there afterwards
post_text = "Afbud! Der er kommet en tid på mandag bla bla bla."
post_id = curly_db.save_post(post_text=post_text, db=self.db)
self.assertEqual(post_text, self.db.getByQuery(
{"id_post": post_id})[0]["post_text"])

# 2
# Inserting identical post and only one entry should be there.
post_id = curly_db.save_post(post_text=post_text, db=self.db)
self.assertTrue(len(self.db.getByQuery({"id_post": post_id})) == 1)

def test_getting_timestamp(self):
# 1
# Timestamp here and now
post_text = "Afbud! Der er kommet en tid på mandag bla bla bla."
post_id = curly_db.save_post(post_text=post_text, db=self.db)
time_stamp = curly_db.get_post_timestamp(post_id, self.db)
self.assertTrue(not is_post_too_old(10, time_stamp))

# 2
# Make sure it is the current time which is timestamped.
self.assertTrue(is_post_too_old(0, time_stamp))
12 changes: 11 additions & 1 deletion src/test_main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Standard library
import unittest
from main import identify_cancellation
from datetime import datetime, timedelta

# Module being tested
from main import identify_cancellation, is_post_too_old


class TestMain(unittest.TestCase):
Expand All @@ -20,6 +24,12 @@ def test_identifying_negative_cancellation(self):
is_canellation_post = identify_cancellation(post_text)
self.assertFalse(is_canellation_post)

def test_is_post_too_old(self):
now = datetime.now()
post_time = now - timedelta(minutes=5)
self.assertFalse(is_post_too_old(10, post_time))
self.assertTrue(is_post_too_old(3, post_time))


if __name__ == '__main__':
unittest.main()

0 comments on commit 8f8a551

Please sign in to comment.