diff --git a/archive/streamlit_app.py b/archive/streamlit_app.py index c8b2dbfe..2b24634f 100644 --- a/archive/streamlit_app.py +++ b/archive/streamlit_app.py @@ -1,13 +1,10 @@ -import string -import spacy -import pywaffle import streamlit as st import pandas as pd import json import plotly.express as px import plotly.graph_objects as go import matplotlib.pyplot as plt -import squarify +import pywaffle st.title('Resume :blue[Matcher]') st.image('Assets/img/header_image.jpg') @@ -20,82 +17,68 @@ def read_json(filename): return data -# read the json file -resume = read_json( - 'Data/Processed/Resume-d531571e-e4fa-45eb-ab6a-267cdeb6647e.json') -job_desc = read_json( - 'Data/Processed/Job-Desc-a4f06ccb-8d5a-4d0b-9f02-3ba6d686472e.json') - -st.write("### Reading Resume's POS") -df = pd.DataFrame(resume['pos_frequencies'], index=[0]) -fig = go.Figure(data=go.Bar(y=list(resume['pos_frequencies'].values()), x=list(resume['pos_frequencies'].keys())), - layout_title_text="Resume's POS") -st.write(fig) - -df2 = pd.DataFrame(resume['keyterms'], columns=["keyword", "value"]) -st.dataframe(df2) - -# Create the dictionary -keyword_dict = {} -for keyword, value in resume['keyterms']: - keyword_dict[keyword] = value - -fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"], - font=dict(size=12), - fill_color='#070A52'), - cells=dict(values=[list(keyword_dict.keys()), - list(keyword_dict.values())], - line_color='darkslategray', - fill_color='#6DA9E4')) - ]) -st.plotly_chart(fig) - -st.divider() - -for keyword, value in resume['keyterms']: - pass - - -# display the waffle chart -figure = plt.figure( - FigureClass=pywaffle.Waffle, - rows=20, - columns=20, - values=keyword_dict, - legend={'loc': 'upper left', 'bbox_to_anchor': (1, 1)}) - - -# Display the dictionary - -st.pyplot(fig=figure) -# st.write(dict) - -fig = px.treemap(df2, path=['keyword'], values='value', - color_continuous_scale='RdBu', - title='Resume POS') -st.write(fig) - - -st.plotly_chart(figure_or_data=fig) - -fig = go.Figure(data=[go.Table( - header=dict(values=["Tri Grams"], - fill_color='#1D267D', - align='center', font=dict(color='white', size=16)), - cells=dict(values=[resume['tri_grams']], - fill_color='#19A7CE', - align='left'))]) - -st.plotly_chart(figure_or_data=fig) - -fig = go.Figure(data=[go.Table( - header=dict(values=["Bi Grams"], - fill_color='#1D267D', - align='center', font=dict(color='white', size=16)), - cells=dict(values=[resume['bi_grams']], - fill_color='#19A7CE', - align='left'))]) - -st.plotly_chart(figure_or_data=fig) - - +def main(): + resume = read_json('Data/Processed/Resume-d531571e-e4fa-45eb-ab6a-267cdeb6647e.json') + job_desc = read_json('Data/Processed/Job-Desc-a4f06ccb-8d5a-4d0b-9f02-3ba6d686472e.json') + + show_pos(resume) + show_keyterms(resume) + show_waffle_chart(resume['keyterms']) + show_treemap(resume['keyterms']) + show_n_grams(resume['tri_grams'], 'Tri Grams') + show_n_grams(resume['bi_grams'], 'Bi Grams') + + +def show_pos(resume_data): + st.write("### Reading Resume's POS") + df = pd.DataFrame(resume_data['pos_frequencies'], index=[0]) + fig = go.Figure(data=go.Bar(y=list(resume_data['pos_frequencies'].values()), x=list(resume_data['pos_frequencies'].keys())), + layout_title_text="Resume's POS") + st.plotly_chart(fig) + + +def show_keyterms(resume_data): + df2 = pd.DataFrame(resume_data['keyterms'], columns=["keyword", "value"]) + st.dataframe(df2) + fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"], + font=dict(size=12), + fill_color='#070A52'), + cells=dict(values=[list(keyword_dict.keys()), + list(keyword_dict.values())], + line_color='darkslategray', + fill_color='#6DA9E4')) + ]) + st.plotly_chart(fig) + + +def show_waffle_chart(keyword_dict): + figure = plt.figure( + FigureClass=pywaffle.Waffle, + rows=20, + columns=20, + values=keyword_dict, + legend={'loc': 'upper left', 'bbox_to_anchor': (1, 1)}) + st.pyplot(fig=figure) + + +def show_treemap(keyword_data): + df2 = pd.DataFrame(keyword_data, columns=["keyword", "value"]) + fig = px.treemap(df2, path=['keyword'], values='value', + color_continuous_scale='RdBu', + title='Resume POS') + st.plotly_chart(fig) + + +def show_n_grams(n_gram_data, title): + fig = go.Figure(data=[go.Table( + header=dict(values=[title], + fill_color='#1D267D', + align='center', font=dict(color='white', size=16)), + cells=dict(values=[n_gram_data], + fill_color='#19A7CE', + align='left'))]) + st.plotly_chart(figure_or_data=fig) + + +if __name__ == "__main__": + main() diff --git a/run_first.py b/run_first.py index f9e71f04..00190480 100644 --- a/run_first.py +++ b/run_first.py @@ -1,60 +1,37 @@ import json +import logging from scripts.utils.ReadFiles import get_filenames_from_dir from scripts.ResumeProcessor import ResumeProcessor from scripts.JobDescriptionProcessor import JobDescriptionProcessor -import logging -logging.basicConfig(filename='app.log', filemode='w', - level=logging.DEBUG, +logging.basicConfig(filename='app.log', filemode='w', level=logging.DEBUG, format='%(name)s - %(levelname)s - %(message)s') - def read_json(filename): with open(filename) as f: data = json.load(f) return data - -logging.info('Started to read from Data/Resumes') -try: - # Check if there are resumes present or not. - # If present then parse it. - file_names = get_filenames_from_dir("Data/Resumes") - logging.info('Reading from Data/Resumes is now complete.') -except: - # Exit the program if there are no resumes. - logging.error('There are no resumes present in the specified folder.') - logging.error('Exiting from the program.') - logging.error( - 'Please add resumes in the Data/Resumes folder and try again.') - exit(1) - -# Now after getting the file_names parse the resumes into a JSON Format. -logging.info('Started parsing the resumes.') -for file in file_names: - processor = ResumeProcessor(file) - success = processor.process() -logging.info('Parsing of the resumes is now complete.') - -logging.info('Started to read from Data/JobDescription') -try: - # Check if there are resumes present or not. - # If present then parse it. - file_names = get_filenames_from_dir("Data/JobDescription") - logging.info('Reading from Data/JobDescription is now complete.') -except: - # Exit the program if there are no resumes. - logging.error( - 'There are no job-description present in the specified folder.') - logging.error('Exiting from the program.') - logging.error( - 'Please add resumes in the Data/JobDescription folder and try again.') - exit(1) - -# Now after getting the file_names parse the resumes into a JSON Format. -logging.info('Started parsing the Job Descriptions.') -for file in file_names: - processor = JobDescriptionProcessor(file) - success = processor.process() -logging.info('Parsing of the Job Descriptions is now complete.') -logging.info('Success now run `streamlit run streamlit_second.py`') +def process_files(directory_path, processor_class): + try: + file_names = get_filenames_from_dir(directory_path) + logging.info(f'Reading from {directory_path} is now complete.') + except: + logging.error(f'There are no files present in {directory_path}.') + logging.error('Exiting from the program.') + logging.error(f'Please add files in the {directory_path} folder and try again.') + exit(1) + + logging.info(f'Started processing files in {directory_path}.') + for file in file_names: + processor = processor_class(file) + success = processor.process() + logging.info(f'Processing of files in {directory_path} is now complete.') + +if __name__ == "__main__": + try: + process_files("Data/Resumes", ResumeProcessor) + process_files("Data/JobDescription", JobDescriptionProcessor) + logging.info('Success! Run `streamlit run streamlit_second.py`') + except Exception as e: + logging.error(f'An error occurred: {str(e)}')