Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjust email dates #40

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,6 @@ cython_debug/

# macOS
.DS_Store

# VS Code
.vscode
10 changes: 7 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import csv
import json
import re
from datetime import datetime
from collections import defaultdict
from tqdm import tqdm
from src.JobTracker.utils import EmailMessage
Expand All @@ -12,9 +11,14 @@
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

def format_cell(item):
def format_cell(key, item):
"Join list elements with a newline character, remove square brackets"
if isinstance(item, list):
if key in ['company', 'recipient_mail']:
# merge repeating values to a single value
if len(set(item)) == 1:
return str(item[0])

return '\n'.join(map(str, item))
return str(item)

Expand Down Expand Up @@ -80,7 +84,7 @@ def export_to_csv(data, filename):
for row in data:
# remove square and curly brackets
# separate each item to a new line in a cell
formatted_row = {key: format_cell(value) for key, value in row.items()}
formatted_row = {key: format_cell(key, value) for key, value in row.items()}
formatted_row['state'] = remove_curly_braces(formatted_row['state'])
writer.writerow(formatted_row)

Expand Down
7 changes: 6 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,20 @@ aiosignal==1.3.1
async-timeout==4.0.3
attrs==23.1.0
beautifulsoup4==4.12.2
bs4==0.0.1
certifi==2023.7.22
charset-normalizer==3.3.1
frozenlist==1.4.0
idna==3.4
multidict==6.0.4
openai==0.28.1
python-dateutil==2.8.2
pytz==2023.3.post1
regex==2023.12.25
requests==2.31.0
six==1.16.0
soupsieve==2.5
tiktoken==0.5.1
tqdm==4.66.1
urllib3==2.0.7
yarl==1.9.2
tiktoken==0.5.1
15 changes: 1 addition & 14 deletions src/JobTracker/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,20 +158,7 @@ def get_content(self, info):
except KeyError:
return ('Failed', 'JSON not formatted correctly')
else:
# add initial value
month_day_year_time = 'Sun, 99 Dec 9999 99:99:99 '
date_object = datetime.strptime('Mon, 15 Mar 2021 14:45:30 +0000', "%a, %d %b %Y %H:%M:%S %z")
try:
date_object = datetime.strptime(info['date'], "%a, %d %b %Y %H:%M:%S %z")
month_day_year_time = date_object.strftime("%b %d %Y %H:%M:%S")
except ValueError:
try:
date_object = datetime.strptime(info['date'], "%a, %d %b %Y %H:%M:%S %z (%Z)")
month_day_year_time = date_object.strftime("%b %d %Y %H:%M:%S")
except ValueError:
logging.warn('Unable to parse date, Use default date instead')
info['state'] = json.dumps({info['state']:month_day_year_time})
info['rank'] = date_object
info['state'] = json.dumps({info['state']:info['date_utc_str']})
return ('Succeed', info)
else:
return ('Failed', 'Not related to a job application or interview process')
Expand Down
19 changes: 17 additions & 2 deletions src/JobTracker/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import re
import email.utils
import mailbox
import pytz
import requests
from .config import KEYWORD
from dateutil.parser import parse as parse_date
from email.header import decode_header
from email.utils import parseaddr
from bs4 import BeautifulSoup
Expand Down Expand Up @@ -48,6 +50,16 @@ def get_text(self, mail):
text = self.cleanup_body(soup.get_text())
return text

def format_utc_date(self, date_str):
"""
Format date string to UTC Time zone,
return unified time string and datetime object.
"""
date_obj = parse_date(date_str)
date_obj_utc = date_obj.astimezone(pytz.utc)
date_str_utc = date_obj_utc.strftime("%b %d %Y %H:%M:%S")
return date_str_utc, date_obj_utc

def get_mail_info(self):
res = []
# Loop over every mail and get info
Expand All @@ -58,15 +70,18 @@ def get_mail_info(self):
for part, charset in subject)
sender_name, sender_mail = email.utils.parseaddr(mail['from'])
recipient_name, recipient_mail = email.utils.parseaddr(mail['to'])
date = mail['date']
date_str = mail['date']
date_str_utc, date_obj_utc = self.format_utc_date(mail['date'])
body = self.get_mail_body(mail)
if self.related_to_application(body + subject):
info['subject'] = subject
info['sender_name'] = sender_name
info['sender_mail'] = sender_mail
info['recipient_name'] = recipient_name
info['recipient_mail'] = recipient_mail
info['date'] = date
info['date'] = date_str
info['date_utc_str'] = date_str_utc
info['date_utc_obj'] = date_obj_utc
info['body'] = body
info['length'] = len(body)
res.append(info)
Expand Down
6 changes: 2 additions & 4 deletions tests/test_chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def setUp(self, mock_model_list):
@patch('openai.ChatCompletion.create')
@patch('src.JobTracker.config')
def test_get_content_succeed(self, mock_config, mock_openai_chatcompletion_create):
info = {'body': 'test email body', 'date': "Thu, 09 Nov 2023 23:27:06 +0000"}
info = {'body': 'test email body', 'date_utc_str': "Nov 09 2023 23:27:06"}
mock_config.API_KEY = 'test_api_key'
mock_config.FUNCTION = 'test_function'
# Mock the API response
Expand All @@ -67,11 +67,9 @@ def test_get_content_succeed(self, mock_config, mock_openai_chatcompletion_creat
mock_response.choices = [mock_choice]
mock_openai_chatcompletion_create.return_value = mock_response
state, data = self.chat_gpt.get_content(info)
date_object = datetime.strptime(info['date'], "%a, %d %b %Y %H:%M:%S %z")
month_day_year_time = date_object.strftime("%b %d %Y %H:%M:%S")
self.assertEqual(state, 'Succeed')
self.assertEqual(data['company'], 'TestCompany')
self.assertEqual(data['state'], json.dumps({"TestState": month_day_year_time}))
self.assertEqual(data['state'], json.dumps({"TestState": "Nov 09 2023 23:27:06"}))
self.assertEqual(data['next_step'], 'TestNextStep')

@patch('openai.ChatCompletion.create')
Expand Down
27 changes: 27 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,33 @@ def setUp(self, mock_mbox):
self.mbox_path = 'path/to/mbox'
self.email_message = EmailMessage(self.mbox_path)

def create_mock_mail(self):
# Create a mock mail item
mock_mail = MagicMock()
mock_mail.__getitem__.side_effect = lambda key: {
'subject': '=?utf-8?B?U3ViamVjdA==?=', # Base64 encoded "Subject"
'from': 'John Doe <[email protected]>',
'to': 'Jane Doe <[email protected]>',
'date': 'Fri, 01 Jan 2021 10:00:00 +0000'
}.get(key, '')
return [mock_mail]

@patch('src.JobTracker.utils.EmailMessage.get_mail_body', return_value="Mail body")
@patch('src.JobTracker.utils.EmailMessage.related_to_application', return_value=True)
def test_get_mail_info(self, mock_related_to_application, mock_get_mail_body):
self.email_message.mail_lst = self.create_mock_mail()
info = self.email_message.get_mail_info()[0]
self.assertEqual(info['subject'], 'Subject')
self.assertEqual(info['sender_name'], 'John Doe')
self.assertEqual(info['sender_mail'], '[email protected]')
self.assertEqual(info['recipient_name'], 'Jane Doe')
self.assertEqual(info['recipient_mail'], '[email protected]')
self.assertEqual(info['date'], 'Fri, 01 Jan 2021 10:00:00 +0000')
self.assertEqual(info['body'], 'Mail body')
self.assertTrue(mock_related_to_application.called)
self.assertTrue(mock_get_mail_body.called)


def test_clenup_body(self):
text = "This is a test message. Visit http://example.com for details.\r\nNew line here."
cleaned_text = self.email_message.cleanup_body(text)
Expand Down