From 1c1b5fd229c431a3167ef4df5623fcae79c0a3a6 Mon Sep 17 00:00:00 2001 From: Subhoshri Pal <138369802+Subhoshri@users.noreply.github.com> Date: Tue, 14 May 2024 22:48:02 +0530 Subject: [PATCH 1/7] Create indeed.py --- src/scrape_up/Indeed/indeed.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/scrape_up/Indeed/indeed.py diff --git a/src/scrape_up/Indeed/indeed.py b/src/scrape_up/Indeed/indeed.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/src/scrape_up/Indeed/indeed.py @@ -0,0 +1 @@ + From 3c6335fafe8b07f3ac5f539cca12ca0917d54061 Mon Sep 17 00:00:00 2001 From: Subhoshri Pal <138369802+Subhoshri@users.noreply.github.com> Date: Tue, 14 May 2024 22:48:45 +0530 Subject: [PATCH 2/7] Create __init__.py --- src/scrape_up/Indeed/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/scrape_up/Indeed/__init__.py diff --git a/src/scrape_up/Indeed/__init__.py b/src/scrape_up/Indeed/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/src/scrape_up/Indeed/__init__.py @@ -0,0 +1 @@ + From 9541526dd6c1b6ecfa330e52b5c22af4326f6d2b Mon Sep 17 00:00:00 2001 From: Subhoshri Pal <138369802+Subhoshri@users.noreply.github.com> Date: Tue, 14 May 2024 23:53:52 +0530 Subject: [PATCH 3/7] Update indeed.py Created a scrapper for Indeed to fetch company and job details including company name, its location, the job position, the date of job posting, salary offered and the summary. --- src/scrape_up/Indeed/indeed.py | 69 ++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/src/scrape_up/Indeed/indeed.py b/src/scrape_up/Indeed/indeed.py index 8b137891..480964dd 100644 --- a/src/scrape_up/Indeed/indeed.py +++ b/src/scrape_up/Indeed/indeed.py @@ -1 +1,70 @@ +import requests +from bs4 import BeautifulSoup +import csv +from datetime import datetime +class Indeed: + """ + Create an instance of `Indeed` class. + ```python + indeed = Indeed() + ``` + | Methods | Details | + | ---------------------- | ------------------------------------------------------------------------------------------------ | + | `.get_url()` | Returns the URL of the job having a specific position and location. | + | `.get_record()` | Returns the company details like job title, company name, location, job post date, and salary. | + """ + + def __init__(self): + self.position=position + self.location=location + + def get_url (self, position,location): + template = 'https://www.indeed.com/jobs?q={}&l={}' + url = template.format(position,location) + return url + + #getting the record + def get_record(self, card): + atag1= card.h2.a.span + job_title= atag1.get('title') + atag2= card.h2.a + job_url= 'https://indeed.com'+atag2.get('href') + + company= card.find('span','companyName').text.strip() + location= card.find('div','companyLocation').text.strip() + summary= card.find('div','job-snippet').text.strip() + posted_date= card.find('span','date').text.strip() + today= datetime.today().strftime('%Y-%m-%d') + + try: + salary = card.find('div','metadata estimated-salary-container').text.strip() + except AttributeError: + salary = '' + + record = (job_title, job_url, location, company, posted_date, today, summary, salary) + return record + + #writing the main function + def main(self, position, location): + records = [] + url = get_url(position, location) + + while True: + response=requests.get(url) + soup = BeautifulSoup(response.text,'html.parser') + cards=soup.find_all('div','job_seen_beacon') + for card in cards: + record=get_record(card) + records.append(record) + try: + url='https://indeed.com'+soup.find('a',{'aria-label':'Next'}).get('href') + except AttributeError: + break + + with open(f'{position}-{location}.csv','w',newline='',encoding= 'utf-8') as f: + writer= csv.writer(f) + writer.writerow(['Job_Title', 'Job_Url', 'Location', 'Company', 'Post_Date', 'Extraction_Date', 'Summary', 'Salary']) + writer.writerows(records) + +main('business manager', 'Geneva') #creating a demo csv file to access the records From 34ca5081477b443c822644e95b4ff135eb104c8f Mon Sep 17 00:00:00 2001 From: Subhoshri Pal <138369802+Subhoshri@users.noreply.github.com> Date: Tue, 14 May 2024 23:55:30 +0530 Subject: [PATCH 4/7] Update __init__.py --- src/scrape_up/Indeed/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/scrape_up/Indeed/__init__.py b/src/scrape_up/Indeed/__init__.py index 8b137891..d8c6ca60 100644 --- a/src/scrape_up/Indeed/__init__.py +++ b/src/scrape_up/Indeed/__init__.py @@ -1 +1,2 @@ - +from .indeed import Indeed +__all__= ["Indeed"] From e8441520aa4b2deacd6b5854658b2b130830832a Mon Sep 17 00:00:00 2001 From: Subhoshri Pal <138369802+Subhoshri@users.noreply.github.com> Date: Tue, 14 May 2024 23:57:47 +0530 Subject: [PATCH 5/7] Update dev-documentation.md --- dev-documentation.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dev-documentation.md b/dev-documentation.md index 2673a397..f025ca7a 100644 --- a/dev-documentation.md +++ b/dev-documentation.md @@ -1616,3 +1616,14 @@ First create an object of class `Dictionary`. | `.word_of_the_day_definition()` | Returns the definition of the word of the day. | --- + +## Indeed + +First create an object of class `Indeed`. + +| Methods | Details | +| ------------------------------- | ---------------------------------------------- | +| `.get_url()` | Returns the word of the day. | +| `.word_of_the_day_definition()` | Returns the definition of the word of the day. | + +--- From 57ed86b93a384a4f802cbbe2b19fd1029d1987b4 Mon Sep 17 00:00:00 2001 From: Subhoshri Pal <138369802+Subhoshri@users.noreply.github.com> Date: Wed, 15 May 2024 00:03:29 +0530 Subject: [PATCH 6/7] Update dev-documentation.md Updated the Indeed Scrapper documentation with its methods and details. --- dev-documentation.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/dev-documentation.md b/dev-documentation.md index f025ca7a..171534b1 100644 --- a/dev-documentation.md +++ b/dev-documentation.md @@ -1620,10 +1620,16 @@ First create an object of class `Dictionary`. ## Indeed First create an object of class `Indeed`. +```python +from scrape_up import indeed -| Methods | Details | -| ------------------------------- | ---------------------------------------------- | -| `.get_url()` | Returns the word of the day. | -| `.word_of_the_day_definition()` | Returns the definition of the word of the day. | +indeed_job = indeed.get_url(positon="business manager",location="Geneva") +indeed_job.get_record() +``` + | Methods | Details | + | ---------------------- | ------------------------------------------------------------------------------------------------ | + | `.get_url()` | Returns the URL of the job having a specific position and location. | + | `.get_record()` | Returns the company details like job title, company name, location, job post date, and salary. | + --- From a57dcea5c96c9f0e0dbd516dfeb78d587aaf886f Mon Sep 17 00:00:00 2001 From: Subhoshri Pal <138369802+Subhoshri@users.noreply.github.com> Date: Wed, 15 May 2024 00:11:32 +0530 Subject: [PATCH 7/7] Update documentation.md --- documentation.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/documentation.md b/documentation.md index 9bb5230c..45508751 100644 --- a/documentation.md +++ b/documentation.md @@ -733,3 +733,21 @@ boxoffice = imdb.BoxOffice() | Methods | Details | | --------------- | ------------------------------------------------------------------------------ | | `.top_movies()` | Returns the top box office movies, weekend and total gross, and weeks released.| + + +#### Indeed + +Create an object of class `Indeed`. +```python +from scrape_up import indeed + +indeed_job = indeed.get_url(positon="business manager",location="Geneva") +indeed_job.get_record() +``` + + | Methods | Details | + | ---------------------- | ------------------------------------------------------------------------------------------------ | + | `.get_url()` | Returns the URL of the job having a specific position and location. | + | `.get_record()` | Returns the company details like job title, company name, location, job post date, and salary. | + +---