diff --git a/.dockerignore b/.dockerignore index 27e9f85..11f1fae 100644 --- a/.dockerignore +++ b/.dockerignore @@ -11,3 +11,5 @@ kindlegen.exe venv main.db log/* +docker-compose.yml +Dockerfile diff --git a/README.md b/README.md index 24f4786..d455a8c 100644 --- a/README.md +++ b/README.md @@ -14,12 +14,23 @@ M2em let's you automatically download Mangas via RSS Feed that updates at a conf ## Supported Websites * Mangastream -* MangaFox +* MangaFox (With Splash Rendering container) * Cdmnet # Setup -M2em requires Python3 and I highly recommend working in a virtualenv. Some OS require the python-dev package! +M2em requires Python3 and I highly recommend working in a virtualenv and if you want to use Mangasources which are JavaScript heavy, I actually recommend to use docker to deploy the m2em binary and the rendering service together. Some OS require the python-dev package! + +## Docker Setup +You can use the Dockerfile or the image schemen/m2em. All options in the config.ini are available as environment variable. Make sure you write the exactly the same! + +Have a look at the example Compose file in the repository. This will deploy two containers, m2em and splash. Splash is to render websites which use javascript. The alias (which you can add to your bashrc if you want) allows you to directly call the containerized application + +``` +docker-compose up -d +alias m2em='sudo docker exec -it m2em_m2em_1 ./m2em.py' +m2em -h +``` ## Create and install virtual environment ```x-sh @@ -45,23 +56,6 @@ deactivate Get Kindlegen here: https://www.amazon.com/gp/feature.html?docId=1000765211 -## Docker Setup -You can use the Dockerfile or the image schemen/m2em. All options in the config.ini are available as environment variable. Make sure you write the exactly the same! - -Example Compose file: -``` -version: '2' -services: - m2em: - image: schemen/m2em:latest - environment: - - SMTPServer=mail.example.com - - EmailAddress=comic@example.com - - EmailAddressPw=verysecurepassword - volumes: - - :/usr/src/app/data - -``` ## Concept As a concept, M2em has different workers that run in a loop. All Chapter/user data is saved in a SQLite3 Database. @@ -155,6 +149,11 @@ optional arguments: --daemon Run as daemon -d, --debug Debug Mode -v, --version show program's version number and exit + -f "filter_regex", --filter "filter_regex" + Adds a filter(python regex format), to filter the + title of any manga parsed. Example: "(?i)one-punch" + -fl, --filter-list Lists all filters + ``` @@ -185,6 +184,8 @@ EbookFormat = MOBI # Ebook Profile setting, check # https://github.com/ciromattia/kcc for more information EbookProfile = KV +# If you want to run splash intependently change this setting +SplashServer = http://splash:8050 # Sender Email Server Settings SMTPServer = mail.example.com ServerPort = 587 diff --git a/bin/Helper.py b/bin/Helper.py index 7ee73e4..cc87b46 100644 --- a/bin/Helper.py +++ b/bin/Helper.py @@ -91,7 +91,46 @@ def printFeeds(): logging.info(table.draw()) - +''' +Function write a filter into the DB +Returns: N/A +''' +def writeFilter(filter_value): + + # Connect to DB + db.connection() + + # Insert Data + feed = Filter.create(filtervalue=filter_value) + feed.save() + logging.info("Succesfully added \"%s\" to the List of Filters", (filter_value)) + + # Close connection + db.close() + +''' +Function that gets filter data and display it nicely +Returns: N/A +''' +def printFilters(): + + table = texttable.Texttable() + table.set_deco(texttable.Texttable.HEADER) + table.set_cols_dtype(['i', # int + 't',]) # text + table.header(["ID", "FILTER"]) + + # Connect + db.connection() + + for row in Filter.select(): + table.add_row([row.filterid, row.filtervalue]) + + # Close connection + db.close() + + logging.info(table.draw()) + ''' Function that gets feed data and display it nicely Returns: N/A @@ -408,6 +447,22 @@ def getUsers(): return users +''' +Function that gets the current DB version for migrations +Returns: $dbversion +''' +def getMigrationVersion(): + + # Make the query + db.connection() + + try: + version = Migratehistory.select().order_by(Migratehistory.id.desc()).get().name + except OperationalError as error: + version = "" + + return version + ''' @@ -523,6 +578,9 @@ def sanetizeName(name): if ":" in name: name = name.replace(":", "_") return name + elif "/" in name: + name = name.replace("/", "") + return name else: return name diff --git a/bin/Migrator.py b/bin/Migrator.py new file mode 100644 index 0000000..995b645 --- /dev/null +++ b/bin/Migrator.py @@ -0,0 +1,23 @@ +from peewee_migrate import Router +from peewee import SqliteDatabase + +import bin.Config as Config + + +# Load config right at the start +config = Config.load_config() + +db = SqliteDatabase(config['Database']) + +def migrate(): + router = Router(db) + router.run() + +# Create migration +#router.create('initial') + +# Run migration/migrations +#router.run('initial') + +# Run all unapplied migrations + diff --git a/bin/Models.py b/bin/Models.py index 9a33586..4319828 100644 --- a/bin/Models.py +++ b/bin/Models.py @@ -37,6 +37,15 @@ class Feeds(ModelBase): feedid = AutoField() url = TextField() +class Migratehistory(ModelBase): + id = AutoField() + name = CharField() + migrated_at = DateTimeField() + +class Filter(ModelBase): + filterid = AutoField() + filtervalue = TextField() + def create_tables(): db.connection() - db.create_tables([User, Chapter, Feeds]) + db.create_tables([User, Chapter, Feeds, Filter]) diff --git a/bin/RssParser.py b/bin/RssParser.py index 19dbf41..f6386a5 100644 --- a/bin/RssParser.py +++ b/bin/RssParser.py @@ -2,6 +2,7 @@ import logging import ssl import feedparser +import re from bin.models.Manga import Manga from bin.Models import * @@ -34,5 +35,17 @@ def RssParser(): # No need to continue if it is already saved :) if not current_manga.duplicated.exists(): - current_manga.print_manga() - current_manga.save() + + # Check if any filters are set, continue as usual if not. + if Filter.select().exists(): + filters = Filter.select().execute() + for filter_entry in filters.iterator(): + + # Save manga that match the filter + if re.search(filter_entry.filtervalue, current_manga.title): + current_manga.save() + current_manga.print_manga() + else: + current_manga.save() + current_manga.print_manga() + diff --git a/bin/sourceparser/Mangafox.py b/bin/sourceparser/Mangafox.py index 74bb704..9d170f3 100644 --- a/bin/sourceparser/Mangafox.py +++ b/bin/sourceparser/Mangafox.py @@ -5,6 +5,7 @@ from urllib.parse import urlparse import requests from bs4 import BeautifulSoup +import bin.Config as Config ''' @@ -12,21 +13,24 @@ ''' - +# Splash Rendering Service address +config = Config.load_config() +splash_server = config["SplashServer"] ''' get Manga Title Returns: title ''' def getTitle(page): + title = None soup = BeautifulSoup(page.content, 'html.parser') #Get Manga Titel - var = soup.findAll("h2") - step1 = ''.join(var[0].findAll(text=True)) - step2 = step1.split() - step3 = step2[:-3] - title = ' '.join(step3) + search = re.search('content="Read\s(.*?)\smanga online,', str(soup)) + try: + title = search.group(1) + except AttributeError: + logging.error("No Title Fount!") return title @@ -56,7 +60,7 @@ def getPages(page): soup = BeautifulSoup(page.content, 'html.parser') #Get Manga Titel - search =re.search('var total_pages=(.*?);', str(soup)) + search =re.search('var imagecount=(.*?);', str(soup)) pages = search.group(1) return pages @@ -108,11 +112,30 @@ def getPagesUrl(starturl,pages): ''' def getImageUrl(pageurl): # Download Page - page = requests.get(pageurl) + + # Splash LUA script + script = """ + splash.resource_timeout = 5 + splash:add_cookie{"IsAdult", "1", "/", domain="fanfox.net"} + splash:on_request(function(request) + if string.find(request.url, "tenmanga.com") ~= nil then + request.abort() + end + end) + splash:go(args.url) + return splash:html() + """ + + logging.debug("Sending rendering request to Splash") + resp = requests.post(str(splash_server + "/run"), json={ + 'lua_source': script, + 'url': pageurl + }) + page = resp.content #Pass page to parser - soup = BeautifulSoup(page.content, 'html.parser') - var1 = soup.find(id='image') + var =re.search('style=\"cursor:pointer\" src=\"//(.*?)\"', str(page)) - imageurl = var1['src'] + logging.debug(var.group(1)) + imageurl = "http://" + var.group(1) return imageurl diff --git a/config.ini b/config.ini index 698a0ad..d16129d 100644 --- a/config.ini +++ b/config.ini @@ -4,6 +4,8 @@ Database = data/main.db Sleep = 900 EbookFormat = MOBI EbookProfile = KV +SplashServer = http://splash:8050 +DisableMigrations = False # Sender Email Server Settings SMTPServer = mail.example.com ServerPort = 587 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..1e3f939 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,17 @@ +version: '2' +services: + m2em: + image: schemen/m2em:latest + environment: + - SMTPServer=mail.example.com + - EmailAddress=comic@example.com + - EmailAddressPw=verysecurepassword + volumes: + - m2em:/usr/src/app/data + + splash: + image: scrapinghub/splash + command: --max-timeout 3600 + +volumes: + m2em: \ No newline at end of file diff --git a/m2em.py b/m2em.py index 183c212..de1ca02 100755 --- a/m2em.py +++ b/m2em.py @@ -15,6 +15,7 @@ import bin.DownloaderHandler as mdownloader import bin.ConverterHandler as mconverter import bin.SenderHandler as msender +import bin.Migrator as migrator class M2em: """ Main Class """ @@ -44,6 +45,16 @@ def __init__(self): helper.createFolder(self.config["SaveLocation"]) helper.createDB() + # Check weather there are some database migrations + mversion = helper.getMigrationVersion() + ".py" + if self.config["DisableMigrations"] == "True": + logging.debug("Migrations disabled! Current version: %s ", mversion) + else: + if mversion in os.listdir(os.getcwd() + "/migrations"): + logging.debug("No migrations required! Current version: %s ", mversion) + else: + migrator.migrate() + def read_arguments(self): """ function that reads all arguments """ @@ -97,6 +108,12 @@ def read_arguments(self): action="store_true") parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__) + parser.add_argument('-f', '--filter', + help='Adds a filter(python regex format), to filter the title of any manga parsed. Example: "(?i)one-punch"', + nargs=1, metavar='"filter_regex"') + parser.add_argument('-fl', '--filter-list', + help='Lists all filters', + action='store_true') self.args = parser.parse_args() @@ -117,6 +134,8 @@ def read_arguments(self): and self.args.switch_send is None \ and self.args.add_user is False \ and self.args.list_manga is None \ + and self.args.filter is None \ + and self.args.filter_list is None \ and not any([self.args.add_user, self.args.create_db, self.args.daemon, @@ -144,6 +163,16 @@ def save_feed_to_db(self): else: logging.error("You need to enter an URL!") + ''' + Catch -f/--filter + ''' + def add_filter(self): + if len(self.args.filter) > 0: + filter_value = self.args.filter[0] + logging.debug("Entered filter: %s", filter_value) + helper.writeFilter(filter_value) + else: + logging.error("You need to enter a filter value!") ''' Catch -s/--switch-user @@ -171,12 +200,16 @@ def delete_feed(self): ''' - Catch --list-feeds + Catch -lf/--list-feeds ''' def list_feeds(self): helper.printFeeds() - + ''' + Catch -fl/--filter-list + ''' + def filter_list(self): + helper.printFilters() ''' Catch -L/--list-chapters-all ''' @@ -365,6 +398,14 @@ def run(self): self.process_chapter() return + if self.args.filter: + self.add_filter() + return + + if self.args.filter_list: + self.filter_list() + return + # Mainloop if self.args.start: daemon = True diff --git a/migrations/001_initial.py b/migrations/001_initial.py new file mode 100644 index 0000000..bd032d6 --- /dev/null +++ b/migrations/001_initial.py @@ -0,0 +1,43 @@ +"""Peewee migrations -- 001_initial.py. + +Some examples (model - class or model name):: + + > Model = migrator.orm['model_name'] # Return model in current state by name + + > migrator.sql(sql) # Run custom SQL + > migrator.python(func, *args, **kwargs) # Run python code + > migrator.create_model(Model) # Create a model (could be used as decorator) + > migrator.remove_model(model, cascade=True) # Remove a model + > migrator.add_fields(model, **fields) # Add fields to a model + > migrator.change_fields(model, **fields) # Change fields + > migrator.remove_fields(model, *field_names, cascade=True) + > migrator.rename_field(model, old_field_name, new_field_name) + > migrator.rename_table(model, new_table_name) + > migrator.add_index(model, *col_names, unique=False) + > migrator.drop_index(model, *col_names) + > migrator.add_not_null(model, *field_names) + > migrator.drop_not_null(model, *field_names) + > migrator.add_default(model, field_name, default) + +""" + +import datetime as dt +import peewee as pw +from decimal import ROUND_HALF_EVEN + +try: + import playhouse.postgres_ext as pw_pext +except ImportError: + pass + +SQL = pw.SQL + + +def migrate(migrator, database, fake=False, **kwargs): + """Write your migrations here.""" + + + +def rollback(migrator, database, fake=False, **kwargs): + """Write your rollback migrations here.""" + diff --git a/migrations/002_testmigration.py b/migrations/002_testmigration.py new file mode 100644 index 0000000..7c9de15 --- /dev/null +++ b/migrations/002_testmigration.py @@ -0,0 +1,51 @@ +"""Peewee migrations -- 002_testmigration.py. + +Some examples (model - class or model name):: + + > Model = migrator.orm['model_name'] # Return model in current state by name + + > migrator.sql(sql) # Run custom SQL + > migrator.python(func, *args, **kwargs) # Run python code + > migrator.create_model(Model) # Create a model (could be used as decorator) + > migrator.remove_model(model, cascade=True) # Remove a model + > migrator.add_fields(model, **fields) # Add fields to a model + > migrator.change_fields(model, **fields) # Change fields + > migrator.remove_fields(model, *field_names, cascade=True) + > migrator.rename_field(model, old_field_name, new_field_name) + > migrator.rename_table(model, new_table_name) + > migrator.add_index(model, *col_names, unique=False) + > migrator.drop_index(model, *col_names) + > migrator.add_not_null(model, *field_names) + > migrator.drop_not_null(model, *field_names) + > migrator.add_default(model, field_name, default) + +""" + +import datetime as dt +import peewee as pw +from decimal import ROUND_HALF_EVEN + +try: + import playhouse.postgres_ext as pw_pext +except ImportError: + pass + +SQL = pw.SQL + + +def migrate(migrator, database, fake=False, **kwargs): + """Write your migrations here.""" + migrator.python(testmigration) + + + +def rollback(migrator, database, fake=False, **kwargs): + """Write your rollback migrations here.""" + migrator.python(testmigration_rollback) + + +def testmigration(): + print("Rolling the test migration") + +def testmigration_rollback(): + print("Reverting the test migration") diff --git a/migrations/003_filters.py b/migrations/003_filters.py new file mode 100644 index 0000000..4537b74 --- /dev/null +++ b/migrations/003_filters.py @@ -0,0 +1,53 @@ +"""Peewee migrations -- 003_filters.py. + +Some examples (model - class or model name):: + + > Model = migrator.orm['model_name'] # Return model in current state by name + + > migrator.sql(sql) # Run custom SQL + > migrator.python(func, *args, **kwargs) # Run python code + > migrator.create_model(Model) # Create a model (could be used as decorator) + > migrator.remove_model(model, cascade=True) # Remove a model + > migrator.add_fields(model, **fields) # Add fields to a model + > migrator.change_fields(model, **fields) # Change fields + > migrator.remove_fields(model, *field_names, cascade=True) + > migrator.rename_field(model, old_field_name, new_field_name) + > migrator.rename_table(model, new_table_name) + > migrator.add_index(model, *col_names, unique=False) + > migrator.drop_index(model, *col_names) + > migrator.add_not_null(model, *field_names) + > migrator.drop_not_null(model, *field_names) + > migrator.add_default(model, field_name, default) + +""" + +import datetime as dt +from bin.Models import * +import peewee as pw +from decimal import ROUND_HALF_EVEN + +try: + import playhouse.postgres_ext as pw_pext +except ImportError: + pass + +SQL = pw.SQL + + +def migrate(migrator, database, fake=False, **kwargs): + """Write your migrations here.""" + + @migrator.create_model + class Filter(ModelBase): + filterid = AutoField() + filtervalue = TextField() + + + +def rollback(migrator, database, fake=False, **kwargs): + """Write your rollback migrations here.""" + + @migrator.remove_model + class Filter(ModelBase): + filterid = AutoField() + filtervalue = TextField() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 7c075ab..3092c5d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,8 +2,9 @@ validators==0.12.0 texttable==1.1.1 requests==2.20.0 bs4==0.0.1 -urllib3==1.23 +urllib3==1.24.2 feedparser==5.2.1 KindleComicConverter==5.4.3 peewee==3.7.0 +peewee-migrate==1.1.6 python-dateutil==2.7.5