-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
111 lines (84 loc) · 3.02 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# -*- coding: utf-8 -*-
import json
import os
import sys
from time import sleep
from typing import Dict, List
from data_access import DataAccess, OTHER_PLAYLIST_IDS
from youtube import YouTube
ROOT_DIR = os.getcwd()
def save_all_playlist_items(
youtube: YouTube, playlist_ids: List[str], dry_run: bool = True
):
for pid in playlist_ids:
print(f"Fetching {pid}")
if not dry_run:
data = youtube.get_pitems_for_pid(pid)
with open(
os.path.join(ROOT_DIR, "db", "playlist_items", f"{pid}.json"), mode="w"
) as f:
f.write(json.dumps(data))
else:
print("\t(Dry run)")
print("Done.")
sleep(0.5)
def save_all_videos(
youtube: YouTube, playlist_item_dict: Dict[str, List], dry_run: bool = True,
):
for pid, pitems in playlist_item_dict.items():
print(f"Fetching videos for {pid}")
if not dry_run:
data = youtube.get_videos_for_pitems(pitems)
for video in data:
with open(
os.path.join(ROOT_DIR, "db", "videos", f"{video['id']}.json"),
mode="w",
) as f:
f.write(json.dumps(video))
else:
print("\t(Dry run)")
print("Done.")
sleep(0.5)
def save_threads(youtube: YouTube, da: DataAccess, from_vid: str, dry_run: bool = True):
for video in da.gen_all_videos_in_order(from_vid):
vid = video["id"]
vtitle = video["snippet"]["title"]
print()
print(f"Processing {vtitle}...")
if da.have_comments_for_video(vid):
print(f'We\'ve already got comments for "{vtitle}".')
print("Skipping...")
continue
if not dry_run:
threads = youtube.get_comment_threads_for_video(vid)
with open(
os.path.join(ROOT_DIR, "db", "commentThreads", f"{vid}.json"), mode="w"
) as f:
f.write(json.dumps(threads))
else:
print("\t(Dry run)")
print(f'Threads for "{vtitle}" saved.')
print()
print("------------------------------------------------------------")
# Give a little delay between batches.
# - DOS paranoia.
sleep(1)
def main():
# Disable OAuthlib's HTTPS verification when running locally.
# *DO NOT* leave this option enabled in production.
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
# Get the API key as a CLI arg.
api_key = sys.argv[1]
if not api_key:
raise Exception("No API key provided.")
# Get credentials and create an API client
youtube = YouTube(api_key)
# Do stuff.
da = DataAccess()
pitems_dict = da.get_pitems_dict(OTHER_PLAYLIST_IDS)
current_vid = "lM28rfsHge0"
# save_threads(youtube, da, from_vid=current_vid, dry_run=False)
# save_all_playlist_items(youtube, OTHER_PLAYLIST_IDS, dry_run=False)
save_all_videos(youtube, pitems_dict, dry_run=False)
if __name__ == "__main__":
main()