Skip to content

Commit

Permalink
Configurable rate-limit!
Browse files Browse the repository at this point in the history
Instance changed to esmailelbob.xyz bc official one was breaking (likely due to scraping/botting protection)
Configurable rate-limit added - applies to downloader and scraper

Signed-off-by: nanometer5088 <[email protected]>
  • Loading branch information
nanometer5088 committed Jun 24, 2023
1 parent f951ce5 commit 5a7ba85
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 23 deletions.
12 changes: 3 additions & 9 deletions src/byuser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,14 @@ def proxitok_scraper(username: str) -> list[str]:
session = requests.Session()
direct_links = []
next_href = ""
rate_limit = 0
while True:
url = f"{OPTIONS['proxitok_instance']}/@{username}{next_href}"
response = session.get(url)
log(f"Scraping {url}")

if response.status_code == 429 or response.status_code == 403:
# may want to adjust this ratio
rate_limit += 1
sleep_time = 30 * rate_limit
print(f"{response.status_code} {response.reason} sleeping for {sleep_time}")
log(f"\n{response.status_code} {response.reason} sleeping for {sleep_time}")
time.sleep(sleep_time)
continue
if OPTIONS["ratelimit"] != 0:
log(f'Sleeping for {OPTIONS["ratelimit"]}s')
time.sleep(OPTIONS["ratelimit"])

if not response.ok:
error_msg = f"{response.status_code} {response.reason} getting {url}"
Expand Down
10 changes: 7 additions & 3 deletions src/constants.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
# Do not change anything here
APP = {
"name": "CLI TikTok",
"version": 0.85
"version": 0.86
}


# Here are the app settings. You are free to configure this field
OPTIONS = {
# This controls the Proxitok instance that will be scraped to obtain the URLs.
"proxitok_instance": "https://proxitok.pabloferreiro.es",
"proxitok_instance": "https://proxitok.esmailelbob.xyz",

# This handles the command used to playback videos. It's heavily recommended to use MPV
# Make sure your player can launch through the CLI and exits after playback
"player_command": "mpv"
"player_command": "mpv",

# This is the wait time in seconds. It helps Tiktok and Proxitok don't ratelimit the user
# It applies to the scraping and downloading
"ratelimit": 5
}
7 changes: 6 additions & 1 deletion src/downloader.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import os
import time

from yt_dlp import YoutubeDL
from yt_dlp.utils import DownloadError

from log import logtofile as log
from src.functions import url_redirection

from src.constants import OPTIONS

def downloader(url):
ydl_opts = {
Expand Down Expand Up @@ -38,6 +39,10 @@ def downloadtiktoks(urls):
randomvideo = index = index + 1
url = url_redirection(urls[randomvideo])

if OPTIONS["ratelimit"] != 0:
log(f'Sleeping for {OPTIONS["ratelimit"]}s')
time.sleep(OPTIONS["ratelimit"])

try:
downloader(url)
log(f"Video {url} was downloaded")
Expand Down
14 changes: 4 additions & 10 deletions src/trending.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from bs4 import BeautifulSoup

from log import logtofile as log

from src.constants import OPTIONS

def streamtrending(amount:int = 24):
links = proxitok_trending(amount)
Expand All @@ -28,22 +28,16 @@ def proxitok_trending(amount: int = 24) -> list[str]:
session = requests.Session()
direct_links = []
next_href = ""
rate_limit = 0
while True:
# The "next" page url is always the same but loads different trending videos each time
url = f"{OPTIONS['proxitok_instance']}/trending{next_href}"

response = session.get(url)
log(f"Scraping {url}")

if response.status_code == 429 or response.status_code == 403:
# may want to adjust this ratio
rate_limit += 1
sleep_time = 30 * rate_limit
print(f"{response.status_code} {response.reason} sleeping for {sleep_time}")
log(f"\n{response.status_code} {response.reason} sleeping for {sleep_time}")
time.sleep(sleep_time)
continue
if OPTIONS["ratelimit"] != 0:
log(f'Sleeping for {OPTIONS["ratelimit"]}s')
time.sleep(OPTIONS["ratelimit"])

if not response.ok:
error_msg = f"{response.status_code} {response.reason} getting {url}"
Expand Down

0 comments on commit 5a7ba85

Please sign in to comment.