-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathreddit-udemy-checkout.py
98 lines (82 loc) · 4.4 KB
/
reddit-udemy-checkout.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import requests, urllib # , beautifulsoup4
from bs4 import BeautifulSoup
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from tabledef import *
import re
import datetime
now = datetime.datetime.now()
date_last_checked = datetime.datetime(now.year, now.month, now.day)
# Initialise Database
engine = create_engine('sqlite:///courses.db')
Session = sessionmaker(bind=engine)
session = Session()
for udemy_tr in session.query(Course).filter(Course.status == "udemy url found").filter(Course.post_date == "reddit"):
result = requests.get(udemy_tr.udemy_url)
assert result.status_code != "200"
# try-except, as the name implies, executes the code under try, and if exceptions occur, the code under except is executed instead.
# this is one common way in Python to handle exceptions properly. an exception is simply an error that happens during code execution.
try:
soup = BeautifulSoup(result.content, "html.parser")
# course_name
sample = soup.find("h1", "clp-lead__title")
course_name = sample.text
print("course_name:", course_name)
# checkout_url
sample = soup.find("a", "course-cta--buy")
checkout_url = "https://www.udemy.com" + sample.attrs['href']
print("checkout_url:", checkout_url)
# discounted_price
sample = soup.find("span", "price-text__current")
print("sample.text:", sample.text)
if "Current price:" in sample.text:
# i.e. Course was once paid, but now on discount (not necessarily free though)
discounted_price = sample.text.split("Current price: ")[1].strip()
print("old discounted_price:", discounted_price)
if discounted_price == "Free":
discounted_price = 0
else:
discounted_price = int(re.findall('\d+', discounted_price)[0])
print("new discounted_price:", discounted_price)
# original_price
sample = soup.find("span", "price-text__old--price")
original_price = sample.text.split("Original price: ")[1].strip()
original_price = int(re.findall('\d+', original_price)[0])
print("original_price:", original_price)
# update db
udemy_tr.course_name = course_name
udemy_tr.checkout_url = checkout_url
udemy_tr.original_price = original_price
udemy_tr.discounted_price = discounted_price
udemy_tr.status = "checkout url found"
udemy_tr.date_last_checked = date_last_checked # date_last_checked = datetime.datetime(datetime.datetime.now().year, datetime.datetime.now().month, datetime.datetime.now().day)
session.commit()
print("row", str(udemy_tr.id), "updated\n")
elif "Price:" in sample.text:
# i.e. Course is NOT on discount, price might be free or paid
original_price = sample.text.split("Price: ")[0].strip()
print("old original_price:", original_price)
if "Free" in original_price:
original_price = 0
else:
original_price = int(re.findall('\d+', original_price)[0])
print("new original_price:", original_price)
discounted_price = None
print("discounted_price:", discounted_price)
# update db
udemy_tr.course_name = course_name
udemy_tr.checkout_url = checkout_url
udemy_tr.original_price = original_price
udemy_tr.discounted_price = discounted_price
udemy_tr.status = "checkout url found"
udemy_tr.date_last_checked = date_last_checked # date_last_checked = datetime.datetime(datetime.datetime.now().year, datetime.datetime.now().month, datetime.datetime.now().day)
session.commit()
print("row", str(udemy_tr.id), "updated\n")
else:
print("error has occured when retrieving price. skipping to next...\n")
# it is good practice for you to handle the type of exceptions properly.
# this except segment is a catch-all error for the execution of the code.
# it is generally bad practice to catch all exceptions - you will not be able to know the type of exception that has been thrown.
except:
print("error, skipping row", str(udemy_tr.id), "to next...\n")
pass