-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathscrape.py
97 lines (83 loc) · 3.24 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import datetime
import requests
from bs4 import BeautifulSoup
# courses = ["CSE3001", "CSE2006"] # add list of courses required
courses = []
config = {
"Cookie": "", # dev tools > request headers
"_csrf": "", # dev tools > payload
"authorizedID": "", # registration number
"x": datetime.datetime.now(datetime.timezone.utc).strftime(
"%a, %d %b %Y %H:%M:%S GMT"
),
}
payload = {
"_csrf": config["_csrf"],
"courseCode": "",
"authorizedID": config["authorizedID"],
"x": config["x"],
"cccategory": "",
}
headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8",
"Connection": "keep-alive",
"Content-Length": "116",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Cookie": config["Cookie"],
"DNT": "1",
"Host": "vtop.vit.ac.in",
"Origin": "https://vtop.vit.ac.in",
"Referer": "https://vtop.vit.ac.in/vtop/content",
"sec-ch-ua": '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
}
if len(courses) == 0:
url = "https://vtop.vit.ac.in/vtop/academics/common/StudentRegistrationScheduleAllocation"
# payload["verifyMenu"] = True
r = requests.post(url, data=payload, headers=headers)
soup = BeautifulSoup(r.text, "html.parser")
# get element with id curriculumCategory
curriculumCategory = soup.find("select", id="curriculumCategory")
# get all options from curriculumCategory
options = curriculumCategory.find_all("option")
x = [i.get("value") for i in options][1:]
print(x)
url = "https://vtop.vit.ac.in/vtop/academics/common/getCoursesListForCurriculmCategory"
courses = []
subject_name = []
for i in x:
payload["cccategory"] = i
r = requests.post(url, data=payload, headers=headers)
soup = BeautifulSoup(r.text, "html.parser")
## extract value of options
options = soup.find_all("option")
for option in options:
if option.get("value") != "":
courses.append(option.get("value"))
subject_name.append(option.text)
url = "https://vtop.vit.ac.in/vtop/academics/common/getCoursesDetailForRegistration"
for j in range(len(courses)):
# print(f"\n{j}:\n")
payload["courseCode"] = courses[j]
r = requests.post(url, data=payload, headers=headers)
soup = BeautifulSoup(r.text, "html.parser")
s = soup.select("#courseDetailFragement > div > table > tr > td > span")
for i in range(0, len(s), 4):
# write to csv
with open("courses.csv", "a") as f:
if subject_name == "":
f.write(
f"{courses[j]},{s[i].text},{s[i+1].text},{s[i+2].text},{s[i+3].text}\n"
)
else:
f.write(
f'{courses[j]},"{subject_name[j]}",{s[i].text},{s[i+1].text},{s[i+2].text},{s[i+3].text}\n'
)