From 0271b36584bb097df7fe01ca298625bfc94de89a Mon Sep 17 00:00:00 2001 From: Tuhin Bepari Date: Fri, 24 Nov 2017 11:02:18 +0600 Subject: [PATCH] Working with Image Downloading --- Image/Download.py | 55 ++++++++---------- Image/__pycache__/Download.cpython-36.pyc | Bin 1722 -> 1474 bytes Models/Complete/Image.py | 3 +- Models/Complete/Link.py | 3 +- .../Complete/__pycache__/Image.cpython-36.pyc | Bin 541 -> 515 bytes test.py | 11 ++-- 6 files changed, 33 insertions(+), 39 deletions(-) diff --git a/Image/Download.py b/Image/Download.py index 39193c2..32488d4 100644 --- a/Image/Download.py +++ b/Image/Download.py @@ -1,51 +1,44 @@ import errno import os.path - +import Models.Queue.Image +import Models.Complete.Image from Image import Save class Download(): - def __init__(self, file_name, path=''): - self.links = set() - self.completed = set() - self.file_name = file_name - self.file_to_set() - self.path = path + path = 'storage/images' - def file_to_set(self) -> object: - """ - Load links from file and set to Set() - :return: object - """ - if not os.path.exists(self.file_name): - raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), self.file_name) - with open(self.file_name, 'rt') as f: - for line in f: - self.links.add(line.replace('\n', '')) - return sorted(self.links) + def __init__(self, take=None): + self.queue = Models.Queue.Image.Image() + self.complete = Models.Complete.Image.Image() + self.limit = take + if not os.path.exists(self.path): + os.makedirs(self.path) def start(self) -> object: """ Start Downloading file :rtype: object """ - for file in self.links: + if isinstance(self.limit, int) and len(self.queue.links) >= self.limit: + links = sorted(self.queue.links)[0:self.limit] + else: + links = self.queue.links + + for file in links: try: - img = Save.SaveFile(file, self.path) + img = Save.Save(file, self.path) img.save() - except: - continue - self.completed.add(file) - self.set_to_file() + except Exception as e: + print(str(e)) + self.complete.add(file) + self.save() - def set_to_file(self) -> object: + def save(self) -> object: """ Update links txt file :return : None """ - remaining = self.links.difference(self.completed) - with open(self.file_name, 'w') as f: - if len(remaining) > 0: - for line in self.links: - f.write(line + "\n") - f.write("") + self.queue.links = self.queue.links.difference(self.complete.links) + self.queue.save() + self.complete.save() diff --git a/Image/__pycache__/Download.cpython-36.pyc b/Image/__pycache__/Download.cpython-36.pyc index da65400230b27f95cfe0c96c233a52a0bb2ccdb2..9de48f03bda9cdf0fce6afa39a6814101ef2244d 100644 GIT binary patch literal 1474 zcmZux&2Aev5GJ|g?rJS7iHjyp4}p5WsHY($?waV>L=L}erK*e*-_O-tIq?j`I&tZ>_(@yODrJ?6r&~!>MO|3OTI2XEx365N zt_ov~i|hPC$wHeEbKI!vEaes~zY~2MPW#Un$A_mz>DNl1&aUmNwmE%PpMTS~7G?-} zxih&HWm~xgxgO9v* z)N?{<-Lx)h%^0t;#_Q$|x))*WAB3^LoElzN;y&IuwvWQbhe>r^gR%ba5H5byWpji@ M^!GWY-PaHP1;1}upa1{> literal 1722 zcmZux&yV9Y6toC;kOayyv!2SxDp`&whEazwdkRVN0!}YT8vF&{kU>hVe^I;^L$r2J z6ZZ+M2$E4wAYPXca5%ho}83E!1nFu4cF9 zg{-f>oQ^+}(!f}Z#ntZoC$n1{<@uzZm^{ZQ8lw;z!5czb95$XiF90}H!d$C*NYNJ_ zygL5YUA=^fAtf#0f~B{n`UB&epbTNHh2t7vfOYRDY9gF*x@JEgujo8k(o^#N(F(t7 zm+_L79VBSY)|8T0{Y!EQbbn2rk>|v8(P!;wUp$SI?ei5dk~j#Hk&@E{0ZWRSCxBRD zWO5AoN0PCs^-CNK!f=q%QJ)%7t)A#A(K|{};IR3>8^67Ya29xAdMV03bQARFnJMMhk52}FjWh7)Dd*H161aa19;sXY(5&+n@q=V0A~Y@ zCwdn?)c2qxL6;U9ycg`JL82BTN#_V zc@smIdPzSS$XYZXS$d-|8x>x2tK8N`aWS8aMnZ{txW(}yx?rwbuM)KlZq&qx)OQlh z+Dc3dCTunpu0EVn3 k^$C_sbqJOXTm^Jub6)=!)JC6dPWKo=)tI?Z5n0#!2jhQ)0RR91 diff --git a/Models/Complete/Image.py b/Models/Complete/Image.py index 50e6785..af38a12 100644 --- a/Models/Complete/Image.py +++ b/Models/Complete/Image.py @@ -5,6 +5,5 @@ class Image(Model): file_path = 'storage/complete/images.txt' def __init__(self): - self.links.clear() - self.fetch() Model.__init__(self) + self.fetch() diff --git a/Models/Complete/Link.py b/Models/Complete/Link.py index 13a20e8..9df62ae 100644 --- a/Models/Complete/Link.py +++ b/Models/Complete/Link.py @@ -5,7 +5,6 @@ class Link(Model): file_path = 'storage/complete/links.txt' def __init__(self): - self.links.clear() - self.fetch() Model.__init__(self) + self.fetch() diff --git a/Models/Complete/__pycache__/Image.cpython-36.pyc b/Models/Complete/__pycache__/Image.cpython-36.pyc index 3149d9db82811b4d945877b9a8bba6455473c13c..78f525c5daf4c7a3ff6297ec1c6f8a95456b9f89 100644 GIT binary patch delta 175 zcmbQs(#*nX%*)F)VX1i3_KBRk9mNaYCGxIV_;^U)O(^5;4Gc*}*u@tA~q^)EqVw)_>XfD74RK*U&#UKU)BiH0K pMi)^KyNC^yV4u97(M$tf5UA!BhfQvNN@-529V3ue46>Go834ZIAwK{B delta 201 zcmZo>najdy%*)F)`&rye8u%KsC2GY;yBcN^?@} M7=gTEkc~Xd06GLH=Kufz diff --git a/test.py b/test.py index 0ea38e7..e744a7e 100644 --- a/test.py +++ b/test.py @@ -3,11 +3,14 @@ import os.path; from Crawler.Page import Page from Crawler.Image import Image +from Image.Download import Download if __name__ == '__main__': + # page=Page('https://gopostie.com') + # page.save_links() - #page=Page('https://gopostie.com') - #page.save_links() + # img = Image('https://gopostie.com') + # img.save_links() - img = Image('https://gopostie.com') - img.save_links() + down = Download() + down.start()