Merge pull request #4 from schemen/devel

Merge work from devel to master
schemen · Nov 18, 2017 · 77e198e · 77e198e
2 parents 02b3b89 + d230a19
commit 77e198e
Show file tree

Hide file tree

Showing 6 changed files with 275 additions and 53 deletions.
diff --git a/bin/m2emConverter.py b/bin/m2emConverter.py
@@ -22,14 +22,17 @@ def RecursiveConverter(config):
 
         # get relevant data of this Manga
         mangatitle   = chapter[2]
+        manganame    = chapter[11]
 
         # check if mangatitle contains ":" characters that OS can't handle as folders
-        if ":" in mangatitle:
-            mangatitle = mangatitle.replace(":", "_")
+        mangatitle = helper.sanetizeName(mangatitle)
 
-        imagefolder  = str(saveloc + mangatitle + "/images/")
-        eblocation   = str(saveloc + mangatitle + "/" + mangatitle + "." + ebformat.lower())
-        cbzlocation  = str(saveloc + mangatitle + "/" + mangatitle + ".cbz")
+        # check if manganame contains ":" characters that OS can't handle as folders
+        manganame = helper.sanetizeName(manganame)
+
+        imagefolder  = str(saveloc + manganame + "/"+  mangatitle + "/images/")
+        eblocation   = str(saveloc + manganame + "/"+  mangatitle + "/" + mangatitle + "." + ebformat.lower())
+        cbzlocation  = str(saveloc + manganame + "/"+ mangatitle + "/" + mangatitle + ".cbz")
 
 
         # Create CBZ to make creation easier
@@ -43,7 +46,7 @@ def RecursiveConverter(config):
             try:
                 zf = zipfile.ZipFile(cbzlocation, "w")
             except Exception as e:
-                logging.warn("Failed opening archive! %s" % e)
+                logging.warning("Failed opening archive! %s" % e)
 
 
 

diff --git a/bin/m2emDownloader.py b/bin/m2emDownloader.py
@@ -1,9 +1,13 @@
 import logging
 import os
 import requests
+from shutil import move
 import bin.m2emHelper as helper
 import bin.sourceparser.m2emMangastream as msparser
-
+import bin.sourceparser.m2emMangafox as mxparser
+from PIL import Image
+from PIL import ImageOps
+from PIL import ImageFilter
 
 def ChapterDownloader(config):
 
@@ -25,14 +29,27 @@ def ChapterDownloader(config):
         mangastarturl   = chapter[4]
         mangapages      = chapter[9]
         mangatitle      = chapter[2]
+        manganame       = chapter[11]
 
         # check if mangatitle contains ":" characters that OS can't handle as folders
-        if ":" in mangatitle:
-            mangatitle = mangatitle.replace(":", "_")
+        mangatitle = helper.sanetizeName(mangatitle)
+
+        # check if manganame contains ":" characters that OS can't handle as folders
+        manganame = helper.sanetizeName(manganame)
+
+        # Old Download folder from v0.1.0
+        oldlocation = str(saveloc + mangatitle)
+        newlocation = str(saveloc + manganame)
 
-        downloadfolder  = str(saveloc + mangatitle + "/images")
+        # Define Download location
+        downloadfolder  = str(saveloc + manganame + "/" + mangatitle + "/images")
 
 
+        # Check if the old DL location is being used
+        if os.path.isdir(oldlocation):
+            logging.info("Moving old DL location to new one")
+            helper.createFolder(newlocation)
+            move(oldlocation, newlocation)
 
 
         if os.path.isdir(downloadfolder):
@@ -59,9 +76,16 @@ def ChapterDownloader(config):
 
 
             # Mangafox Parser
-            elif origin == "mangafox.com":
-                #logging.info("Getting Mangadata from Mangafox.me")
-                pass
+            elif origin == "mangafox.me":
+                urllist = mxparser.getPagesUrl(mangastarturl,mangapages)
+
+
+                # Turn Manga pages into Image links!
+                imageurls=[]
+                for i in urllist:
+                    imageurls.append(mxparser.getImageUrl(i))
+                logging.debug("List of all Images for %s" % mangatitle)
+                logging.debug(imageurls)
 
             else:
                 pass
@@ -74,8 +98,32 @@ def ChapterDownloader(config):
                 counter = 0
                 for image in imageurls:
                     counter = counter + 1
-                    f = open(downloadfolder + "/" + str("{0:0=3d}".format(counter)) + ".png", 'wb')
+
+                    imagepath = downloadfolder + "/" + str("{0:0=3d}".format(counter)) + ".png"
+
+                    f = open(imagepath, 'wb')
                     f.write(requests.get(image).content)
                     f.close
 
+
+                    # Cleanse image, remove footer
+                    #
+                    #   I have borrowed this code from the kmanga project.
+                    #   https://github.com/aplanas/kmanga/blob/master/mobi/mobi.py#L416
+                    #   Thanks a lot to Alberto Planas for coming up with it!
+                    #
+                    if origin == "mangafox.me":
+                        logging.debug("Cleaning Mangafox")
+                        img = Image.open(imagepath)
+                        _img = ImageOps.invert(img.convert(mode='L'))
+                        _img = _img.point(lambda x: x and 255)
+                        _img = _img.filter(ImageFilter.MinFilter(size=3))
+                        _img = _img.filter(ImageFilter.GaussianBlur(radius=5))
+                        _img = _img.point(lambda x: (x >= 48) and x)
+
+                        cleaned = img.crop(_img.getbbox()) if _img.getbbox() else img
+                        cleaned.save(imagepath)
+
+
+
                 logging.info("Finished download!")
diff --git a/bin/m2emHelper.py b/bin/m2emHelper.py
@@ -5,13 +5,9 @@
 import texttable
 import requests
 import validators
-
-try:
-    from urllib.parse import urlparse
-except ImportError:
-    from urlparse import urlparse
-
+from urllib.parse import urlparse
 import bin.sourceparser.m2emMangastream as msparser
+import bin.sourceparser.m2emMangafox as mxparser
 
 '''
 
@@ -70,6 +66,7 @@ def createDB(config):
         logging.info(e)
     finally:
         conn.close()
+        logging.info("Created database %s" % database)
 
 '''
 Function set manga as sent
@@ -561,7 +558,7 @@ def switchChapterSend(chapterid,config):
 
 
 '''
-Function that gets feed data and display it nicely
+Function that prints the last 10 chapters
 Returns: N/A
 '''
 def printChapters(config):
@@ -589,6 +586,9 @@ def printChapters(config):
     # Reverse List to get newest first
     __tabledata.reverse()
 
+    #Cut the list down to max 10 articles
+    __cuttabledata = __tabledata[:15]
+
     table = texttable.Texttable(max_width=120)
     table.set_deco(texttable.Texttable.HEADER)
     table.set_cols_align(["l", "l", "l", "l", "l", "l"])
@@ -601,12 +601,14 @@ def printChapters(config):
     table.header (["ID", "MANGA", "CHAPTER", "CHAPTERNAME", "RSS ORIGIN", "SEND STATUS"])
 
     logging.info("Listing the last 10 chapters:")
-    for i in range(0,10):
-        if __tabledata[i][8] == 1:
+    for row in __cuttabledata:
+        # Rename row[8]
+        if row[8] == 1:
             sendstatus = "SENT"
         else:
             sendstatus = "NOT SENT"
-        table.add_row([__tabledata[i][0], __tabledata[i][11], __tabledata[i][10], __tabledata[i][5]+"\n", str(__tabledata[i][1]), sendstatus])
+        table.add_row([row[0], row[11], row[10], row[5]+"\n", str(row[1]), sendstatus])
+
     logging.info(table.draw())
 
 
@@ -711,16 +713,22 @@ def getSourceURL(url):
 Function that gets Manga Data from Chapter URL
 Returns: mangadata (array)
 '''
-def getMangaData(url):
+def getMangaData(url,entry):
 
     # Get source of to decide which parser to use
     origin = getSourceURL(url)
 
+    print(origin)
     # Mangastream Parser
     if origin == "mangastream.com":
 
         logging.debug("Getting Mangadata from Mangastream.com for %s" % url)
 
+        # Easy Stuff
+        title = entry.title
+        chapter_name = entry.description
+        chapter_pubDate = entry.published
+
         # Load page once to hand it over to parser function
         logging.debug("Loading Page to gather data...")
         page = requests.get(url)
@@ -732,11 +740,29 @@ def getMangaData(url):
 
         logging.debug("Mangadata succesfully loaded")
 
-        mangadata = [manganame, pages, chapter]
+        mangadata = [manganame, pages, chapter, title, chapter_name, chapter_pubDate]
 
     # Mangafox Parser
-    elif origin == "mangafox.com":
-        logging.info("Getting Mangadata from Mangafox.me")
+    elif origin == "mangafox.me":
+        logging.debug("Getting Mangadata from Mangafox.me for %s" % url)
+
+        # Easy Stuff
+        title = entry.title
+        chapter_pubDate = entry.published
+
+        # Load page once to hand it over to parser function
+        logging.debug("Loading Page to gather data...")
+        page = requests.get(url)
+
+        # Getting the data
+        manganame    = mxparser.getTitle(page)
+        pages        = mxparser.getPages(page)
+        chapter      = mxparser.getChapter(url)
+        chapter_name = mxparser.getChapterName(page)
+
+        logging.debug("Mangadata succesfully loaded")
+
+        mangadata = [manganame, pages, chapter, title, chapter_name, chapter_pubDate]
 
 
     else:
@@ -757,3 +783,14 @@ def createFolder(folder):
         logging.debug("Folder %s Created!" % folder)
     else:
         logging.debug("Folder %s Exists!" % folder)
+
+
+'''
+Function that returns sanetized folder name
+'''
+def sanetizeName(name):
+    if ":" in name:
+        name = name.replace(":", "_")
+        return name
+    else:
+        return name
diff --git a/bin/m2emRssParser.py b/bin/m2emRssParser.py
@@ -34,5 +34,8 @@ def RssParser(config):
             current_manga = Manga()
             current_manga.database = database
             current_manga.load_from_feed(entry, str(i[1]))
-            current_manga.print_manga()
-            current_manga.save()
+
+            # No need to continue if it is already saved :)
+            if len(current_manga.duplicated) == 0:
+                current_manga.print_manga()
+                current_manga.save()
diff --git a/bin/models/m2emManga.py b/bin/models/m2emManga.py
@@ -27,23 +27,43 @@ def __init__(self):
     def load_from_feed(self, entry, parent_feed):
         self.chapter_link = entry.link
 
-        # Getting specific manga data
-        logging.debug("Fetching Data from Weblink")
-        mangadata = helper.getMangaData(self.chapter_link)
-        logging.debug("Finished Collecting Chapter Data!")
-
-        self.manga_name = mangadata[0]
-        self.title = entry.title
-        self.chapter = mangadata[2]
-        self.chapter_name = entry.description
-        self.chapter_pages = mangadata[1]
-        self.chapter_pubDate = entry.published
-        self.parent_feed = parent_feed
-
-        # Set some defaul values
-        self.ispulled = 0
-        self.isconverted = 0
-        self.issent = 0
+        # Open Database
+        try:
+            conn = sqlite3.connect(self.database)
+        except Exception as e:
+            logging.error("Could not connect to DB %s" % e)
+            return False
+        logging.debug("Succesfully Connected to DB %s" % self.database)
+        c = conn.cursor()
+
+        # Check if link is already in DB to make sure only data gets downloaded that is not yet downloaded
+        logging.debug("Checking if chapter is already saved...")
+        c.execute("SELECT url FROM chapter WHERE url = ?", (str(self.chapter_link),))
+        self.duplicated = c.fetchall()
+        conn.close()
+
+        if len(self.duplicated) != 0:
+            logging.debug("Manga is already in Database! Skipping...")
+            logging.debug("Duplicated Data: %s" % self.duplicated)
+        else:
+
+            # Getting specific manga data
+            logging.debug("Fetching Data from Weblink")
+            mangadata = helper.getMangaData(self.chapter_link, entry)
+            logging.debug("Finished Collecting Chapter Data!")
+
+            self.manga_name = mangadata[0]
+            self.title = mangadata[3]
+            self.chapter = mangadata[2]
+            self.chapter_name = mangadata[4]
+            self.chapter_pages = mangadata[1]
+            self.chapter_pubDate = mangadata[5]
+            self.parent_feed = parent_feed
+
+            # Set some defaul values
+            self.ispulled = 0
+            self.isconverted = 0
+            self.issent = 0
 
     def print_manga(self):
         logging.debug("Title:         {}".format(self.title))
@@ -69,15 +89,12 @@ def save(self):
             return False
         logging.debug("Succesfully Connected to DB %s" % self.database)
         c = conn.cursor()
-        logging.debug("Checking if chapter is already saved...")
 
         # Check if Feed is already saved in DB
-        c.execute("SELECT url FROM chapter WHERE url = ?", (str(self.chapter_link),))
-        duplicated = c.fetchall()
-
+        #c.execute("SELECT url FROM chapter WHERE url = ?", (str(self.chapter_link),))
+        #duplicated = c.fetchall()
 
-        logging.debug("Duplicated Data: %s" % duplicated)
-        if len(duplicated) != 0:
+        if len(self.duplicated) != 0:
             logging.debug("Manga is already in Database! Skipping...")
         else:
             logging.info("Saving Chapter Data for %s" % self.title)