diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e810f42215..d312cdd45e 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -853,6 +853,12 @@ Consider all listed sites to potentially be NSFW. Galleries + + Toonily + https://toonily.com/ + Chapters, Manga + + Toyhouse https://toyhou.se/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index d624736211..4c2e90dff0 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -154,6 +154,7 @@ "telegraph", "tmohentai", "toyhouse", + "toonily", "tsumino", "tumblr", "tumblrgallery", diff --git a/gallery_dl/extractor/toonily.py b/gallery_dl/extractor/toonily.py new file mode 100644 index 0000000000..dc83ef3d3c --- /dev/null +++ b/gallery_dl/extractor/toonily.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://toonily.com/""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text, exception +import re + + +class ToonilyBase(): + """Base class for Toonily extractors""" + category = "toonily" + root = "https://toonily.com" + + @staticmethod + def parse_chapter_string(chapter_string, data): + match = re.match( + r"(?:(.+)\s*-\s*)?[Cc]hapter\s*(\d+)(\.\d+)?(?:\s*-\s*(.+))?", + text.unescape(chapter_string).strip()) + manga, chapter, minor, title = match.groups() + manga = manga.strip() if manga else "" + data["manga"] = data.pop("manga", manga) + data["chapter"] = text.parse_int(chapter) + data["chapter_minor"] = minor or "" + data["title"] = title or "" + data["lang"] = "en" + data["language"] = "English" + + +class ToonilyChapterExtractor(ToonilyBase, ChapterExtractor): + """Extractor for manga-chapters from toonily.com""" + pattern = (r"(?:https?://)?(?:www\.)?toonily\.com" + r"(/webtoon/[^/?#]+/[^/?#]+)") + example = "https://toonily.com/webtoon/MANGA/chapter-01/" + + def metadata(self, page): + tags = text.extr(page, 'class="wp-manga-tags-list">', '') + data = {"tags": list(text.split_html(tags)[::2])} + info = text.extr(page, '

', "

") + if not info: + raise exception.NotFoundError("chapter") + self.parse_chapter_string(info, data) + return data + + def images(self, page): + page = text.extr( + page, '
', '
"): + url , pos = text.extract(chapter, '", "", pos) + self.parse_chapter_string(info, data) + result.append((url, data.copy())) + return result + + def metadata(self, page): + extr = text.extract_from(text.extr( + page, 'class="summary_content">', 'class="manga-action"')) + return { + "manga" : text.extr(page, "

", "

").strip(), + "description": text.unescape(text.remove_html(text.extract( + page, ">", "
", page.index("summary__content"))[0])), + "rating" : text.parse_float( + extr('total_votes">', "").strip()), + "manga_alt" : text.remove_html( + extr("Alternative \n
", "")).split("; "), + "author" : list(text.extract_iter( + extr('class="author-content">', ""), '"tag">', "")), + "artist" : list(text.extract_iter( + extr('class="artist-content">', ""), '"tag">', "")), + "genres" : list(text.extract_iter( + extr('class="genres-content">', ""), '"tag">', "")), + "type" : text.remove_html( + extr("Type \n", "")), + "release" : text.parse_int(text.remove_html( + extr("Release \n", ""))), + "status" : text.remove_html( + extr("Status \n", "")), + } diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 50b6e5d8ce..a622cd4d7d 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -132,6 +132,7 @@ "thatpervert" : "ThatPervert", "thebarchive" : "The /b/ Archive", "thecollection" : "The /co/llection", + "toonily" : "Toonily", "tumblrgallery" : "TumblrGallery", "vanillarock" : "もえぴりあ", "vidyart2" : "/v/idyart2", diff --git a/test/results/toonily.py b/test/results/toonily.py new file mode 100644 index 0000000000..414b2cc68d --- /dev/null +++ b/test/results/toonily.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import toonily +from gallery_dl import exception + + +__tests__ = ( +{ + "#url" : "https://toonily.com/webtoon/such-a-cute-spy/chapter-36/", + "#category": ("", "toonily", "chapter"), + "#class" : toonily.ToonilyChapterExtractor, + "#pattern" : r"https://toonily\.com/wp-content/uploads/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+", + "#count" : 11, + + "manga" : "Jinxed", + "title" : "", + "chapter" : 36, + "tags" : ["harem"], + "lang" : "en", + "language" : "English", +}, + +{ + "#url" : "https://toonily.com/webtoon/such-a-cute-spy/chapter-1000000/", + "#category": ("", "toonily", "chapter"), + "#class" : toonily.ToonilyChapterExtractor, + "#exception": exception.NotFoundError, +}, + +{ + "#url" : "https://toonily.com/webtoon/such-a-cute-spy", + "#category": ("", "toonily", "manga"), + "#class" : toonily.ToonilyMangaExtractor, + "#pattern" : r"https://toonily\.com/webtoon/such-a-cute-spy/chapter-\d+([_-].+)?/", + "#count" : ">= 13", + + "manga" : "Such a Cute Spy", + "author" : ["Life of Ruin"], + "artist" : ["Ganghyeon Yeo"], + "genres" : [ + "Action", + "Comedy", + "Romance", + "School Life", + ], + "rating" : float, + "status" : "End", + "lang" : "en", + "language" : "English", + "manga_alt" : list, +}, + +{ + "#url" : "https://toonily.com/webtoon/doesnotexist", + "#category": ("", "toonily", "manga"), + "#class" : toonily.ToonilyMangaExtractor, + "#exception": exception.HttpError, +}, + +)