Merge pull request #27 from schemen/devel

Version 0.6.0
schemen · Mar 12, 2020 · edfa9f2 · edfa9f2
2 parents f88810c + bde3e25
commit edfa9f2
Show file tree

Hide file tree

Showing 14 changed files with 374 additions and 37 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -11,3 +11,5 @@ kindlegen.exe
 venv
 main.db
 log/*
+docker-compose.yml
+Dockerfile
diff --git a/README.md b/README.md
@@ -14,12 +14,23 @@ M2em let's you automatically download Mangas via RSS Feed that updates at a conf
 ## Supported Websites
 
 * Mangastream
-* MangaFox
+* MangaFox (With Splash Rendering container)
 * Cdmnet
 
 # Setup
 
-M2em requires Python3 and I highly recommend working in a virtualenv. Some OS require the python-dev package!
+M2em requires Python3 and I highly recommend working in a virtualenv and if you want to use Mangasources which are JavaScript heavy, I actually recommend to use docker to deploy the m2em binary and the rendering service together. Some OS require the python-dev package!
+
+## Docker Setup
+You can use the Dockerfile or the image schemen/m2em. All options in the config.ini are available as environment variable. Make sure you write the exactly the same!
+
+Have a look at the example Compose file in the repository. This will deploy two containers, m2em and splash. Splash is to render websites which use javascript. The alias (which you can add to your bashrc if you want) allows you to directly call the containerized application
+
+```
+docker-compose up -d
+alias m2em='sudo docker exec -it m2em_m2em_1 ./m2em.py'
+m2em -h
+```
 
 ## Create and install virtual environment
 ```x-sh
@@ -45,23 +56,6 @@ deactivate
 
 Get Kindlegen here: https://www.amazon.com/gp/feature.html?docId=1000765211
 
-## Docker Setup
-You can use the Dockerfile or the image schemen/m2em. All options in the config.ini are available as environment variable. Make sure you write the exactly the same!
-
-Example Compose file:
-```
-version: '2'
-services:
-  m2em:
-    image: schemen/m2em:latest
-    environment:
-     - SMTPServer=mail.example.com
-     - [email protected]
-     - EmailAddressPw=verysecurepassword
-    volumes:
-     - <DATA_DIRECTORY>:/usr/src/app/data
-
-``` 
 
 ## Concept
 As a concept, M2em has different workers that run in a loop. All Chapter/user data is saved in a SQLite3 Database.
@@ -155,6 +149,11 @@ optional arguments:
   --daemon              Run as daemon
   -d, --debug           Debug Mode
   -v, --version         show program's version number and exit
+  -f "filter_regex", --filter "filter_regex"
+                        Adds a filter(python regex format), to filter the
+                        title of any manga parsed. Example: "(?i)one-punch"
+  -fl, --filter-list    Lists all filters
+
 
 ```
 
@@ -185,6 +184,8 @@ EbookFormat = MOBI
 # Ebook Profile setting, check 
 # https://github.com/ciromattia/kcc for more information
 EbookProfile = KV
+# If you want to run splash intependently change this setting
+SplashServer = http://splash:8050
 # Sender Email Server Settings
 SMTPServer = mail.example.com
 ServerPort = 587

diff --git a/bin/Helper.py b/bin/Helper.py
@@ -91,7 +91,46 @@ def printFeeds():
 
     logging.info(table.draw())
 
-
+'''
+Function write a filter into the DB
+Returns: N/A
+'''
+def writeFilter(filter_value):
+
+    # Connect to DB
+    db.connection()
+
+    # Insert Data
+    feed = Filter.create(filtervalue=filter_value)
+    feed.save()
+    logging.info("Succesfully added \"%s\" to the List of Filters", (filter_value))
+
+    # Close connection
+    db.close()
+
+'''
+Function that gets filter data and display it nicely
+Returns: N/A
+'''
+def printFilters():
+
+    table = texttable.Texttable()
+    table.set_deco(texttable.Texttable.HEADER)
+    table.set_cols_dtype(['i',  # int
+                          't',])  # text
+    table.header(["ID", "FILTER"])
+
+    # Connect
+    db.connection()
+
+    for row in Filter.select():
+        table.add_row([row.filterid, row.filtervalue])
+
+    # Close connection
+    db.close()
+
+    logging.info(table.draw())
+
 '''
 Function that gets feed data and display it nicely
 Returns: N/A
@@ -408,6 +447,22 @@ def getUsers():
 
     return users
 
+'''
+Function that gets the current DB version for migrations
+Returns: $dbversion
+'''
+def getMigrationVersion():
+
+    # Make the query
+    db.connection()
+
+    try:
+        version = Migratehistory.select().order_by(Migratehistory.id.desc()).get().name
+    except OperationalError as error:
+        version = ""
+
+    return version
+
 
 
 '''
@@ -523,6 +578,9 @@ def sanetizeName(name):
     if ":" in name:
         name = name.replace(":", "_")
         return name
+    elif "/" in name:
+        name = name.replace("/", "")
+        return name
     else:
         return name
 

diff --git a/bin/Migrator.py b/bin/Migrator.py
@@ -0,0 +1,23 @@
+from peewee_migrate import Router
+from peewee import SqliteDatabase
+
+import bin.Config as Config
+
+
+# Load config right at the start
+config = Config.load_config()
+
+db = SqliteDatabase(config['Database'])
+
+def migrate():
+    router = Router(db)
+    router.run()
+
+# Create migration
+#router.create('initial')
+
+# Run migration/migrations
+#router.run('initial')
+
+# Run all unapplied migrations
+
diff --git a/bin/Models.py b/bin/Models.py
@@ -37,6 +37,15 @@ class Feeds(ModelBase):
     feedid = AutoField()
     url = TextField()
 
+class Migratehistory(ModelBase):
+    id = AutoField()
+    name = CharField()
+    migrated_at = DateTimeField()
+
+class Filter(ModelBase):
+    filterid = AutoField()
+    filtervalue = TextField()
+
 def create_tables():
     db.connection()
-    db.create_tables([User, Chapter, Feeds])
+    db.create_tables([User, Chapter, Feeds, Filter])
diff --git a/bin/RssParser.py b/bin/RssParser.py
@@ -2,6 +2,7 @@
 import logging
 import ssl
 import feedparser
+import re
 from bin.models.Manga import Manga
 from bin.Models import *
 
@@ -34,5 +35,17 @@ def RssParser():
 
             # No need to continue if it is already saved :)
             if not current_manga.duplicated.exists():
-                current_manga.print_manga()
-                current_manga.save()
+
+                # Check if any filters are set, continue as usual if not.
+                if Filter.select().exists():
+                    filters = Filter.select().execute()
+                    for filter_entry in filters.iterator():
+
+                        # Save manga that match the filter
+                        if re.search(filter_entry.filtervalue, current_manga.title):
+                            current_manga.save()
+                            current_manga.print_manga()
+                else:
+                    current_manga.save()
+                    current_manga.print_manga()
+
diff --git a/bin/sourceparser/Mangafox.py b/bin/sourceparser/Mangafox.py
@@ -5,28 +5,32 @@
 from urllib.parse import urlparse
 import requests
 from bs4 import BeautifulSoup
+import bin.Config as Config
 
 '''
 
         MangaFox Parser
 
 
 '''
-
+# Splash Rendering Service address
+config = Config.load_config()
+splash_server = config["SplashServer"]
 
 '''
 get Manga Title
 Returns: title
 '''
 def getTitle(page):
+    title = None
     soup = BeautifulSoup(page.content, 'html.parser')
 
     #Get Manga Titel
-    var = soup.findAll("h2")
-    step1 = ''.join(var[0].findAll(text=True))
-    step2 = step1.split()
-    step3 = step2[:-3]
-    title = ' '.join(step3)
+    search = re.search('content="Read\s(.*?)\smanga online,', str(soup))
+    try:
+        title = search.group(1)
+    except AttributeError:
+        logging.error("No Title Fount!")
 
     return title
 
@@ -56,7 +60,7 @@ def getPages(page):
     soup = BeautifulSoup(page.content, 'html.parser')
 
     #Get Manga Titel
-    search =re.search('var total_pages=(.*?);', str(soup))
+    search =re.search('var imagecount=(.*?);', str(soup))
     pages = search.group(1)
     return pages
 
@@ -108,11 +112,30 @@ def getPagesUrl(starturl,pages):
 '''
 def getImageUrl(pageurl):
     # Download Page
-    page = requests.get(pageurl)
+
+    # Splash LUA script
+    script = """
+    splash.resource_timeout = 5
+    splash:add_cookie{"IsAdult", "1", "/", domain="fanfox.net"}
+    splash:on_request(function(request)
+        if string.find(request.url, "tenmanga.com") ~= nil then
+            request.abort()
+        end
+    end)
+    splash:go(args.url)
+    return splash:html()
+    """
+
+    logging.debug("Sending rendering request to Splash")
+    resp = requests.post(str(splash_server + "/run"), json={
+        'lua_source': script,
+        'url': pageurl
+    })
+    page = resp.content
 
     #Pass page to parser
-    soup = BeautifulSoup(page.content, 'html.parser')
-    var1 = soup.find(id='image')
+    var =re.search('style=\"cursor:pointer\" src=\"//(.*?)\"', str(page))
 
-    imageurl = var1['src']
+    logging.debug(var.group(1))
+    imageurl = "http://" + var.group(1)
     return imageurl
diff --git a/config.ini b/config.ini
@@ -4,6 +4,8 @@ Database = data/main.db
 Sleep = 900
 EbookFormat = MOBI
 EbookProfile = KV
+SplashServer = http://splash:8050
+DisableMigrations = False
 # Sender Email Server Settings
 SMTPServer = mail.example.com
 ServerPort = 587

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,17 @@
+version: '2'
+services:
+  m2em:
+    image: schemen/m2em:latest
+    environment:
+     - SMTPServer=mail.example.com
+     - [email protected]
+     - EmailAddressPw=verysecurepassword
+    volumes:
+     - m2em:/usr/src/app/data
+
+  splash:
+    image: scrapinghub/splash
+    command: --max-timeout 3600
+
+volumes:
+  m2em:
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,3 +11,5 @@ kindlegen.exe @@
     venv
     main.db
     log/*
+    docker-compose.yml
+    Dockerfile