smart-downloader.py

# Author: Kunal Kumar
# Social: twitter.com/l1v1n9h311, instagram.com/prokunal
# Website: procoder.in
import os,sys,wget,urllib.request,threading
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
from http.cookies import SimpleCookie
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
from math import ceil

#disabling ssl warning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
c_link = sys.argv[1]

#headers
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"}

#setting up cookie
raw_cookie = input("Enter the cookies: ")
if len(raw_cookie) == 0:
    print("Please Enter Cookies from burpsuite.")
    sys.exit()
cookie = SimpleCookie()
cookie.load(raw_cookie)
cookies = {}

for key, morsel in cookie.items():
    cookies[key] = morsel.value

#finding links of video id
req = Request(c_link)
html_page = urlopen(req)
soup = BeautifulSoup(html_page,"lxml")
temp_links_0 = []

for link in soup.findAll('a'):
    temp_links_0.append(str(link.get('href')))
  
temp_links_1 = []
prefix = "https://www.pentesteracademy.com/accounting?id="
for link in temp_links_0:   
    if link.startswith('/video?id='):
        temp_links_1.append(prefix+link.split('=')[1])

#remove duplicate video links      
temp_links_1 = set(temp_links_1)
temp_links_1 = list(temp_links_1)
temp_links_1.sort()
f_vid = len(temp_links_1)
print('Total videos found %s'%f_vid)
print('If your internet speed is less than 1 mbps, choose 15 videos max to download.')
frm = int(input("Enter the number of videos to download:(from) "))
if frm == 0:
    print('Videos number start from 1.')
    sys.exit()
to =  int(input("Enter the number of videos to download:(to) "))
temp_links_1 = temp_links_1[frm-1:to]
final_link = []
#checking if links are saved in txt file
pre_fix = c_link.split('=')[1]
filename =  str(frm) + 'to' + str(to) + '_' +  'id_' + pre_fix + '_videos_links.txt'

try:
    f_open = open(filename,'r')
    final_link = f_open.read().split('\n')
    final_link = final_link[:-1]
    
except:
    pass 
#checking if links are working or not
try:
    check = urllib.request.urlopen(final_link[0])
except Exception as e:
    e = str(e)
    if e == 'HTTP Error 403: Forbidden':
        e = str(e)
        print('all previous links got expire.')
        final_link = []
        n = input("Do want to fetch/download all links again? (y/n): ")
        if n == 'y':
            pass
        else:
            print('\ntata bye bye')
            sys.exit()
        
    else:
        pass


if len(final_link) == 0:
    
    f1_open = open(filename,'w')
    final_link = []
    for link in temp_links_1:
        r = requests.get(link,headers=headers,cookies=cookies,allow_redirects=False,verify=False)
        if r.headers['location'] == "https://www.pentesteracademy.com/login":
            print("Enter cookies from burpsuite, Browser cookies will not work.")
            sys.exit(0)
        final_link.append(r.headers['location'])
        f1_open.write(str(r.headers['location']+'\n'))
    f1_open.close()
else:
    pass

#checking for videos if already downloaded
_dir = os.listdir(".")
for i in range(1,10,1):
    for i in final_link:
        data = i.split("/")[6].split("?")[0]
        if data in _dir:
            final_link.remove(i)
if len(final_link) == 0:
    print("All videos are already downloaded.")
    sys.exit(0)
#finding videos size in mb and number of videos

size = 0
size_count = 0
count = 0
print("Calculating Total size of all files, it may take upto 2 minute...")

for link in final_link:
    size = urllib.request.urlopen(link)
    size_count = size.length + size_count
    count = count + 1

print("Downloading %d Videos and Total size of all files in MB is %.2fMB and in GB %.2fGB."%(count,size_count/1024/1024,size_count/1024/1024/1024))


#starting the download with thread.

print("Downloading start....")
if len(temp_links_1) < 12:
    l1 = ceil(len(final_link)/3)
elif (len(temp_links_1) < 18):
    l1 = ceil(len(final_link)/4)
elif (len(temp_links_1) < 24 or 24 > temp_links_1):
    l1 = ceil(len(final_link)/5)

link1 = final_link[0:l1]
link2 = final_link[l1:l1+l1]
link3 = final_link[l1+l1:l1+l1+l1]
link4 = final_link[l1+l1+l1:l1+l1+l1+l1]


def bar_progress(current, total, width=80):
    progress_message =  "Downloading: %d%% [%d MB / %d MB] " % (current / total * 100, current/1024/1024, total/1024/1024)
    sys.stdout.write("\r" + progress_message)
    sys.stdout.flush()

def downloader1():
    for i in link1:
      filename = wget.download(i,bar=bar_progress)
      print(filename+ " Downloaded")

def downloader2():
    for i in link2:
      filename = wget.download(i,bar=bar_progress)
      print(filename + " Downloaded")

def downloader3():
    for i in link3:
      filename = wget.download(i,bar=bar_progress)
      print(filename+ " Downloaded")

def downloader4():
    for i in link4:
      filename = wget.download(i,bar=bar_progress)
      print(filename + " Downloaded")        
t1 = threading.Thread(target=downloader1, name='d1')
t2 = threading.Thread(target=downloader2,name='d2')
t3 = threading.Thread(target=downloader3, name='d3')
t4 = threading.Thread(target=downloader4,name='d4')
t1.start()
t2.start()
t3.start()
t4.start()