Skip to content

Commit

Permalink
Merge pull request #4 from schemen/devel
Browse files Browse the repository at this point in the history
Merge work from devel to master
  • Loading branch information
schemen authored Nov 18, 2017
2 parents 02b3b89 + d230a19 commit 77e198e
Show file tree
Hide file tree
Showing 6 changed files with 275 additions and 53 deletions.
15 changes: 9 additions & 6 deletions bin/m2emConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,17 @@ def RecursiveConverter(config):

# get relevant data of this Manga
mangatitle = chapter[2]
manganame = chapter[11]

# check if mangatitle contains ":" characters that OS can't handle as folders
if ":" in mangatitle:
mangatitle = mangatitle.replace(":", "_")
mangatitle = helper.sanetizeName(mangatitle)

imagefolder = str(saveloc + mangatitle + "/images/")
eblocation = str(saveloc + mangatitle + "/" + mangatitle + "." + ebformat.lower())
cbzlocation = str(saveloc + mangatitle + "/" + mangatitle + ".cbz")
# check if manganame contains ":" characters that OS can't handle as folders
manganame = helper.sanetizeName(manganame)

imagefolder = str(saveloc + manganame + "/"+ mangatitle + "/images/")
eblocation = str(saveloc + manganame + "/"+ mangatitle + "/" + mangatitle + "." + ebformat.lower())
cbzlocation = str(saveloc + manganame + "/"+ mangatitle + "/" + mangatitle + ".cbz")


# Create CBZ to make creation easier
Expand All @@ -43,7 +46,7 @@ def RecursiveConverter(config):
try:
zf = zipfile.ZipFile(cbzlocation, "w")
except Exception as e:
logging.warn("Failed opening archive! %s" % e)
logging.warning("Failed opening archive! %s" % e)



Expand Down
64 changes: 56 additions & 8 deletions bin/m2emDownloader.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import logging
import os
import requests
from shutil import move
import bin.m2emHelper as helper
import bin.sourceparser.m2emMangastream as msparser

import bin.sourceparser.m2emMangafox as mxparser
from PIL import Image
from PIL import ImageOps
from PIL import ImageFilter

def ChapterDownloader(config):

Expand All @@ -25,14 +29,27 @@ def ChapterDownloader(config):
mangastarturl = chapter[4]
mangapages = chapter[9]
mangatitle = chapter[2]
manganame = chapter[11]

# check if mangatitle contains ":" characters that OS can't handle as folders
if ":" in mangatitle:
mangatitle = mangatitle.replace(":", "_")
mangatitle = helper.sanetizeName(mangatitle)

# check if manganame contains ":" characters that OS can't handle as folders
manganame = helper.sanetizeName(manganame)

# Old Download folder from v0.1.0
oldlocation = str(saveloc + mangatitle)
newlocation = str(saveloc + manganame)

downloadfolder = str(saveloc + mangatitle + "/images")
# Define Download location
downloadfolder = str(saveloc + manganame + "/" + mangatitle + "/images")


# Check if the old DL location is being used
if os.path.isdir(oldlocation):
logging.info("Moving old DL location to new one")
helper.createFolder(newlocation)
move(oldlocation, newlocation)


if os.path.isdir(downloadfolder):
Expand All @@ -59,9 +76,16 @@ def ChapterDownloader(config):


# Mangafox Parser
elif origin == "mangafox.com":
#logging.info("Getting Mangadata from Mangafox.me")
pass
elif origin == "mangafox.me":
urllist = mxparser.getPagesUrl(mangastarturl,mangapages)


# Turn Manga pages into Image links!
imageurls=[]
for i in urllist:
imageurls.append(mxparser.getImageUrl(i))
logging.debug("List of all Images for %s" % mangatitle)
logging.debug(imageurls)

else:
pass
Expand All @@ -74,8 +98,32 @@ def ChapterDownloader(config):
counter = 0
for image in imageurls:
counter = counter + 1
f = open(downloadfolder + "/" + str("{0:0=3d}".format(counter)) + ".png", 'wb')

imagepath = downloadfolder + "/" + str("{0:0=3d}".format(counter)) + ".png"

f = open(imagepath, 'wb')
f.write(requests.get(image).content)
f.close


# Cleanse image, remove footer
#
# I have borrowed this code from the kmanga project.
# https://github.com/aplanas/kmanga/blob/master/mobi/mobi.py#L416
# Thanks a lot to Alberto Planas for coming up with it!
#
if origin == "mangafox.me":
logging.debug("Cleaning Mangafox")
img = Image.open(imagepath)
_img = ImageOps.invert(img.convert(mode='L'))
_img = _img.point(lambda x: x and 255)
_img = _img.filter(ImageFilter.MinFilter(size=3))
_img = _img.filter(ImageFilter.GaussianBlur(radius=5))
_img = _img.point(lambda x: (x >= 48) and x)

cleaned = img.crop(_img.getbbox()) if _img.getbbox() else img
cleaned.save(imagepath)



logging.info("Finished download!")
65 changes: 51 additions & 14 deletions bin/m2emHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,9 @@
import texttable
import requests
import validators

try:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse

from urllib.parse import urlparse
import bin.sourceparser.m2emMangastream as msparser
import bin.sourceparser.m2emMangafox as mxparser

'''
Expand Down Expand Up @@ -70,6 +66,7 @@ def createDB(config):
logging.info(e)
finally:
conn.close()
logging.info("Created database %s" % database)

'''
Function set manga as sent
Expand Down Expand Up @@ -561,7 +558,7 @@ def switchChapterSend(chapterid,config):


'''
Function that gets feed data and display it nicely
Function that prints the last 10 chapters
Returns: N/A
'''
def printChapters(config):
Expand Down Expand Up @@ -589,6 +586,9 @@ def printChapters(config):
# Reverse List to get newest first
__tabledata.reverse()

#Cut the list down to max 10 articles
__cuttabledata = __tabledata[:15]

table = texttable.Texttable(max_width=120)
table.set_deco(texttable.Texttable.HEADER)
table.set_cols_align(["l", "l", "l", "l", "l", "l"])
Expand All @@ -601,12 +601,14 @@ def printChapters(config):
table.header (["ID", "MANGA", "CHAPTER", "CHAPTERNAME", "RSS ORIGIN", "SEND STATUS"])

logging.info("Listing the last 10 chapters:")
for i in range(0,10):
if __tabledata[i][8] == 1:
for row in __cuttabledata:
# Rename row[8]
if row[8] == 1:
sendstatus = "SENT"
else:
sendstatus = "NOT SENT"
table.add_row([__tabledata[i][0], __tabledata[i][11], __tabledata[i][10], __tabledata[i][5]+"\n", str(__tabledata[i][1]), sendstatus])
table.add_row([row[0], row[11], row[10], row[5]+"\n", str(row[1]), sendstatus])

logging.info(table.draw())


Expand Down Expand Up @@ -711,16 +713,22 @@ def getSourceURL(url):
Function that gets Manga Data from Chapter URL
Returns: mangadata (array)
'''
def getMangaData(url):
def getMangaData(url,entry):

# Get source of to decide which parser to use
origin = getSourceURL(url)

print(origin)
# Mangastream Parser
if origin == "mangastream.com":

logging.debug("Getting Mangadata from Mangastream.com for %s" % url)

# Easy Stuff
title = entry.title
chapter_name = entry.description
chapter_pubDate = entry.published

# Load page once to hand it over to parser function
logging.debug("Loading Page to gather data...")
page = requests.get(url)
Expand All @@ -732,11 +740,29 @@ def getMangaData(url):

logging.debug("Mangadata succesfully loaded")

mangadata = [manganame, pages, chapter]
mangadata = [manganame, pages, chapter, title, chapter_name, chapter_pubDate]

# Mangafox Parser
elif origin == "mangafox.com":
logging.info("Getting Mangadata from Mangafox.me")
elif origin == "mangafox.me":
logging.debug("Getting Mangadata from Mangafox.me for %s" % url)

# Easy Stuff
title = entry.title
chapter_pubDate = entry.published

# Load page once to hand it over to parser function
logging.debug("Loading Page to gather data...")
page = requests.get(url)

# Getting the data
manganame = mxparser.getTitle(page)
pages = mxparser.getPages(page)
chapter = mxparser.getChapter(url)
chapter_name = mxparser.getChapterName(page)

logging.debug("Mangadata succesfully loaded")

mangadata = [manganame, pages, chapter, title, chapter_name, chapter_pubDate]


else:
Expand All @@ -757,3 +783,14 @@ def createFolder(folder):
logging.debug("Folder %s Created!" % folder)
else:
logging.debug("Folder %s Exists!" % folder)


'''
Function that returns sanetized folder name
'''
def sanetizeName(name):
if ":" in name:
name = name.replace(":", "_")
return name
else:
return name
7 changes: 5 additions & 2 deletions bin/m2emRssParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,8 @@ def RssParser(config):
current_manga = Manga()
current_manga.database = database
current_manga.load_from_feed(entry, str(i[1]))
current_manga.print_manga()
current_manga.save()

# No need to continue if it is already saved :)
if len(current_manga.duplicated) == 0:
current_manga.print_manga()
current_manga.save()
63 changes: 40 additions & 23 deletions bin/models/m2emManga.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,43 @@ def __init__(self):
def load_from_feed(self, entry, parent_feed):
self.chapter_link = entry.link

# Getting specific manga data
logging.debug("Fetching Data from Weblink")
mangadata = helper.getMangaData(self.chapter_link)
logging.debug("Finished Collecting Chapter Data!")

self.manga_name = mangadata[0]
self.title = entry.title
self.chapter = mangadata[2]
self.chapter_name = entry.description
self.chapter_pages = mangadata[1]
self.chapter_pubDate = entry.published
self.parent_feed = parent_feed

# Set some defaul values
self.ispulled = 0
self.isconverted = 0
self.issent = 0
# Open Database
try:
conn = sqlite3.connect(self.database)
except Exception as e:
logging.error("Could not connect to DB %s" % e)
return False
logging.debug("Succesfully Connected to DB %s" % self.database)
c = conn.cursor()

# Check if link is already in DB to make sure only data gets downloaded that is not yet downloaded
logging.debug("Checking if chapter is already saved...")
c.execute("SELECT url FROM chapter WHERE url = ?", (str(self.chapter_link),))
self.duplicated = c.fetchall()
conn.close()

if len(self.duplicated) != 0:
logging.debug("Manga is already in Database! Skipping...")
logging.debug("Duplicated Data: %s" % self.duplicated)
else:

# Getting specific manga data
logging.debug("Fetching Data from Weblink")
mangadata = helper.getMangaData(self.chapter_link, entry)
logging.debug("Finished Collecting Chapter Data!")

self.manga_name = mangadata[0]
self.title = mangadata[3]
self.chapter = mangadata[2]
self.chapter_name = mangadata[4]
self.chapter_pages = mangadata[1]
self.chapter_pubDate = mangadata[5]
self.parent_feed = parent_feed

# Set some defaul values
self.ispulled = 0
self.isconverted = 0
self.issent = 0

def print_manga(self):
logging.debug("Title: {}".format(self.title))
Expand All @@ -69,15 +89,12 @@ def save(self):
return False
logging.debug("Succesfully Connected to DB %s" % self.database)
c = conn.cursor()
logging.debug("Checking if chapter is already saved...")

# Check if Feed is already saved in DB
c.execute("SELECT url FROM chapter WHERE url = ?", (str(self.chapter_link),))
duplicated = c.fetchall()

#c.execute("SELECT url FROM chapter WHERE url = ?", (str(self.chapter_link),))
#duplicated = c.fetchall()

logging.debug("Duplicated Data: %s" % duplicated)
if len(duplicated) != 0:
if len(self.duplicated) != 0:
logging.debug("Manga is already in Database! Skipping...")
else:
logging.info("Saving Chapter Data for %s" % self.title)
Expand Down
Loading

0 comments on commit 77e198e

Please sign in to comment.