diff --git a/bin/m2emConverter.py b/bin/m2emConverter.py index e9529ac..d330b58 100644 --- a/bin/m2emConverter.py +++ b/bin/m2emConverter.py @@ -22,14 +22,17 @@ def RecursiveConverter(config): # get relevant data of this Manga mangatitle = chapter[2] + manganame = chapter[11] # check if mangatitle contains ":" characters that OS can't handle as folders - if ":" in mangatitle: - mangatitle = mangatitle.replace(":", "_") + mangatitle = helper.sanetizeName(mangatitle) - imagefolder = str(saveloc + mangatitle + "/images/") - eblocation = str(saveloc + mangatitle + "/" + mangatitle + "." + ebformat.lower()) - cbzlocation = str(saveloc + mangatitle + "/" + mangatitle + ".cbz") + # check if manganame contains ":" characters that OS can't handle as folders + manganame = helper.sanetizeName(manganame) + + imagefolder = str(saveloc + manganame + "/"+ mangatitle + "/images/") + eblocation = str(saveloc + manganame + "/"+ mangatitle + "/" + mangatitle + "." + ebformat.lower()) + cbzlocation = str(saveloc + manganame + "/"+ mangatitle + "/" + mangatitle + ".cbz") # Create CBZ to make creation easier @@ -43,7 +46,7 @@ def RecursiveConverter(config): try: zf = zipfile.ZipFile(cbzlocation, "w") except Exception as e: - logging.warn("Failed opening archive! %s" % e) + logging.warning("Failed opening archive! %s" % e) diff --git a/bin/m2emDownloader.py b/bin/m2emDownloader.py index a3a059d..8b52803 100644 --- a/bin/m2emDownloader.py +++ b/bin/m2emDownloader.py @@ -1,9 +1,13 @@ import logging import os import requests +from shutil import move import bin.m2emHelper as helper import bin.sourceparser.m2emMangastream as msparser - +import bin.sourceparser.m2emMangafox as mxparser +from PIL import Image +from PIL import ImageOps +from PIL import ImageFilter def ChapterDownloader(config): @@ -25,14 +29,27 @@ def ChapterDownloader(config): mangastarturl = chapter[4] mangapages = chapter[9] mangatitle = chapter[2] + manganame = chapter[11] # check if mangatitle contains ":" characters that OS can't handle as folders - if ":" in mangatitle: - mangatitle = mangatitle.replace(":", "_") + mangatitle = helper.sanetizeName(mangatitle) + + # check if manganame contains ":" characters that OS can't handle as folders + manganame = helper.sanetizeName(manganame) + + # Old Download folder from v0.1.0 + oldlocation = str(saveloc + mangatitle) + newlocation = str(saveloc + manganame) - downloadfolder = str(saveloc + mangatitle + "/images") + # Define Download location + downloadfolder = str(saveloc + manganame + "/" + mangatitle + "/images") + # Check if the old DL location is being used + if os.path.isdir(oldlocation): + logging.info("Moving old DL location to new one") + helper.createFolder(newlocation) + move(oldlocation, newlocation) if os.path.isdir(downloadfolder): @@ -59,9 +76,16 @@ def ChapterDownloader(config): # Mangafox Parser - elif origin == "mangafox.com": - #logging.info("Getting Mangadata from Mangafox.me") - pass + elif origin == "mangafox.me": + urllist = mxparser.getPagesUrl(mangastarturl,mangapages) + + + # Turn Manga pages into Image links! + imageurls=[] + for i in urllist: + imageurls.append(mxparser.getImageUrl(i)) + logging.debug("List of all Images for %s" % mangatitle) + logging.debug(imageurls) else: pass @@ -74,8 +98,32 @@ def ChapterDownloader(config): counter = 0 for image in imageurls: counter = counter + 1 - f = open(downloadfolder + "/" + str("{0:0=3d}".format(counter)) + ".png", 'wb') + + imagepath = downloadfolder + "/" + str("{0:0=3d}".format(counter)) + ".png" + + f = open(imagepath, 'wb') f.write(requests.get(image).content) f.close + + # Cleanse image, remove footer + # + # I have borrowed this code from the kmanga project. + # https://github.com/aplanas/kmanga/blob/master/mobi/mobi.py#L416 + # Thanks a lot to Alberto Planas for coming up with it! + # + if origin == "mangafox.me": + logging.debug("Cleaning Mangafox") + img = Image.open(imagepath) + _img = ImageOps.invert(img.convert(mode='L')) + _img = _img.point(lambda x: x and 255) + _img = _img.filter(ImageFilter.MinFilter(size=3)) + _img = _img.filter(ImageFilter.GaussianBlur(radius=5)) + _img = _img.point(lambda x: (x >= 48) and x) + + cleaned = img.crop(_img.getbbox()) if _img.getbbox() else img + cleaned.save(imagepath) + + + logging.info("Finished download!") \ No newline at end of file diff --git a/bin/m2emHelper.py b/bin/m2emHelper.py index d0fa530..faefd19 100644 --- a/bin/m2emHelper.py +++ b/bin/m2emHelper.py @@ -5,13 +5,9 @@ import texttable import requests import validators - -try: - from urllib.parse import urlparse -except ImportError: - from urlparse import urlparse - +from urllib.parse import urlparse import bin.sourceparser.m2emMangastream as msparser +import bin.sourceparser.m2emMangafox as mxparser ''' @@ -70,6 +66,7 @@ def createDB(config): logging.info(e) finally: conn.close() + logging.info("Created database %s" % database) ''' Function set manga as sent @@ -561,7 +558,7 @@ def switchChapterSend(chapterid,config): ''' -Function that gets feed data and display it nicely +Function that prints the last 10 chapters Returns: N/A ''' def printChapters(config): @@ -589,6 +586,9 @@ def printChapters(config): # Reverse List to get newest first __tabledata.reverse() + #Cut the list down to max 10 articles + __cuttabledata = __tabledata[:15] + table = texttable.Texttable(max_width=120) table.set_deco(texttable.Texttable.HEADER) table.set_cols_align(["l", "l", "l", "l", "l", "l"]) @@ -601,12 +601,14 @@ def printChapters(config): table.header (["ID", "MANGA", "CHAPTER", "CHAPTERNAME", "RSS ORIGIN", "SEND STATUS"]) logging.info("Listing the last 10 chapters:") - for i in range(0,10): - if __tabledata[i][8] == 1: + for row in __cuttabledata: + # Rename row[8] + if row[8] == 1: sendstatus = "SENT" else: sendstatus = "NOT SENT" - table.add_row([__tabledata[i][0], __tabledata[i][11], __tabledata[i][10], __tabledata[i][5]+"\n", str(__tabledata[i][1]), sendstatus]) + table.add_row([row[0], row[11], row[10], row[5]+"\n", str(row[1]), sendstatus]) + logging.info(table.draw()) @@ -711,16 +713,22 @@ def getSourceURL(url): Function that gets Manga Data from Chapter URL Returns: mangadata (array) ''' -def getMangaData(url): +def getMangaData(url,entry): # Get source of to decide which parser to use origin = getSourceURL(url) + print(origin) # Mangastream Parser if origin == "mangastream.com": logging.debug("Getting Mangadata from Mangastream.com for %s" % url) + # Easy Stuff + title = entry.title + chapter_name = entry.description + chapter_pubDate = entry.published + # Load page once to hand it over to parser function logging.debug("Loading Page to gather data...") page = requests.get(url) @@ -732,11 +740,29 @@ def getMangaData(url): logging.debug("Mangadata succesfully loaded") - mangadata = [manganame, pages, chapter] + mangadata = [manganame, pages, chapter, title, chapter_name, chapter_pubDate] # Mangafox Parser - elif origin == "mangafox.com": - logging.info("Getting Mangadata from Mangafox.me") + elif origin == "mangafox.me": + logging.debug("Getting Mangadata from Mangafox.me for %s" % url) + + # Easy Stuff + title = entry.title + chapter_pubDate = entry.published + + # Load page once to hand it over to parser function + logging.debug("Loading Page to gather data...") + page = requests.get(url) + + # Getting the data + manganame = mxparser.getTitle(page) + pages = mxparser.getPages(page) + chapter = mxparser.getChapter(url) + chapter_name = mxparser.getChapterName(page) + + logging.debug("Mangadata succesfully loaded") + + mangadata = [manganame, pages, chapter, title, chapter_name, chapter_pubDate] else: @@ -757,3 +783,14 @@ def createFolder(folder): logging.debug("Folder %s Created!" % folder) else: logging.debug("Folder %s Exists!" % folder) + + +''' +Function that returns sanetized folder name +''' +def sanetizeName(name): + if ":" in name: + name = name.replace(":", "_") + return name + else: + return name diff --git a/bin/m2emRssParser.py b/bin/m2emRssParser.py index ea5b091..20a9a93 100644 --- a/bin/m2emRssParser.py +++ b/bin/m2emRssParser.py @@ -34,5 +34,8 @@ def RssParser(config): current_manga = Manga() current_manga.database = database current_manga.load_from_feed(entry, str(i[1])) - current_manga.print_manga() - current_manga.save() + + # No need to continue if it is already saved :) + if len(current_manga.duplicated) == 0: + current_manga.print_manga() + current_manga.save() diff --git a/bin/models/m2emManga.py b/bin/models/m2emManga.py index 7c8989a..ae0eb2d 100644 --- a/bin/models/m2emManga.py +++ b/bin/models/m2emManga.py @@ -27,23 +27,43 @@ def __init__(self): def load_from_feed(self, entry, parent_feed): self.chapter_link = entry.link - # Getting specific manga data - logging.debug("Fetching Data from Weblink") - mangadata = helper.getMangaData(self.chapter_link) - logging.debug("Finished Collecting Chapter Data!") - - self.manga_name = mangadata[0] - self.title = entry.title - self.chapter = mangadata[2] - self.chapter_name = entry.description - self.chapter_pages = mangadata[1] - self.chapter_pubDate = entry.published - self.parent_feed = parent_feed - - # Set some defaul values - self.ispulled = 0 - self.isconverted = 0 - self.issent = 0 + # Open Database + try: + conn = sqlite3.connect(self.database) + except Exception as e: + logging.error("Could not connect to DB %s" % e) + return False + logging.debug("Succesfully Connected to DB %s" % self.database) + c = conn.cursor() + + # Check if link is already in DB to make sure only data gets downloaded that is not yet downloaded + logging.debug("Checking if chapter is already saved...") + c.execute("SELECT url FROM chapter WHERE url = ?", (str(self.chapter_link),)) + self.duplicated = c.fetchall() + conn.close() + + if len(self.duplicated) != 0: + logging.debug("Manga is already in Database! Skipping...") + logging.debug("Duplicated Data: %s" % self.duplicated) + else: + + # Getting specific manga data + logging.debug("Fetching Data from Weblink") + mangadata = helper.getMangaData(self.chapter_link, entry) + logging.debug("Finished Collecting Chapter Data!") + + self.manga_name = mangadata[0] + self.title = mangadata[3] + self.chapter = mangadata[2] + self.chapter_name = mangadata[4] + self.chapter_pages = mangadata[1] + self.chapter_pubDate = mangadata[5] + self.parent_feed = parent_feed + + # Set some defaul values + self.ispulled = 0 + self.isconverted = 0 + self.issent = 0 def print_manga(self): logging.debug("Title: {}".format(self.title)) @@ -69,15 +89,12 @@ def save(self): return False logging.debug("Succesfully Connected to DB %s" % self.database) c = conn.cursor() - logging.debug("Checking if chapter is already saved...") # Check if Feed is already saved in DB - c.execute("SELECT url FROM chapter WHERE url = ?", (str(self.chapter_link),)) - duplicated = c.fetchall() - + #c.execute("SELECT url FROM chapter WHERE url = ?", (str(self.chapter_link),)) + #duplicated = c.fetchall() - logging.debug("Duplicated Data: %s" % duplicated) - if len(duplicated) != 0: + if len(self.duplicated) != 0: logging.debug("Manga is already in Database! Skipping...") else: logging.info("Saving Chapter Data for %s" % self.title) diff --git a/bin/sourceparser/m2emMangafox.py b/bin/sourceparser/m2emMangafox.py new file mode 100644 index 0000000..b033d33 --- /dev/null +++ b/bin/sourceparser/m2emMangafox.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python +import logging +import re +import requests +from urllib.parse import urlparse +from bs4 import BeautifulSoup + +''' + + MangaFox Parser + + +''' + + +''' +get Manga Title +Returns: title +''' +def getTitle(page): + soup = BeautifulSoup(page.content, 'html.parser') + + #Get Manga Titel + var = soup.findAll("h2") + step1 = ''.join(var[0].findAll(text=True)) + step2 = step1.split() + step3 = step2[:-3] + title = ' '.join(step3) + + return title + + +''' +get Manga Chapter name +Returns: Chapter name +''' +def getChapterName(page): + soup = BeautifulSoup(page.content, 'html.parser') + + #Get Manga Titel + search = re.search(': (.*?) at MangaFox.me"', str(soup)) + chaptername = search.group(1) + return chaptername + + +''' +get Manga Pages +Returns: integer pages +''' +def getPages(page): + soup = BeautifulSoup(page.content, 'html.parser') + + #Get Manga Titel + search =re.search('var total_pages=(.*?);', str(soup)) + pages = search.group(1) + return pages + + + +''' +get Manga chapter +Returns: integer chapter +''' +def getChapter(url): + #soup = BeautifulSoup(page.content, 'html.parser') + + #Get Manga Titel + search =re.search('/c(.*?)/', str(url)) + chapter = search.group(1) + return chapter + +''' +get Manga Pages URL +Returns: urllist +''' +def getPagesUrl(starturl,pages): + pagesurllist=[] + + # Split URL to create list + parsed = urlparse(starturl) + + # get url loc + urlpath = parsed.path + + # start url generator + for page in range(pages): + page = page + 1 + urlpathsplit = urlpath.split("/") + urlpathsplit[-1] = str(page) + fullurllocation = "/".join(urlpathsplit) + fullurl = parsed.scheme + "://" + parsed.netloc + fullurllocation + ".html" + pagesurllist.append(fullurl) + + logging.debug("All pages:") + logging.debug(pagesurllist) + return pagesurllist + + + +''' +get Manga Image URL +Returns: urllist +''' +def getImageUrl(pageurl): + # Download Page + page = requests.get(pageurl) + + #Pass page to parser + soup = BeautifulSoup(page.content, 'html.parser') + var1 = soup.find(id='image') + + imageurl = var1['src'] + return imageurl + pass \ No newline at end of file