Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[nudostar.tv] add support #6556

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,4 @@ target/

/*.snap
/*_source.tar.bz2
/gallery-dl
1 change: 1 addition & 0 deletions gallery_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
"nitter",
"nozomi",
"nsfwalbum",
"nudostar",
"paheal",
"patreon",
"philomena",
Expand Down
95 changes: 95 additions & 0 deletions gallery_dl/extractor/nudostar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
import re
from .common import Extractor, Message, GalleryExtractor
from .. import text

BASE_PATTERN = r"(?:https?://)?nudostar\.tv"


class NudostarGalleryExtractor(GalleryExtractor):
"""Extractor for Nudostar albums"""
category = "nudostar"
pattern = BASE_PATTERN + r"/models/([\w-]*)/$"
directory_fmt = ("{category}", "{user_id}")
filename_fmt = "{filename}.{extension}"

def __init__(self, match):
self.root = text.root_from_url(match.group(0))
self.gallery_url = match.group(0)
GalleryExtractor.__init__(self, match, self.gallery_url)

def images(self, page):
"""Return a list of all (image-url, None) tuples"""
url_list = []
while True: # Loop to handle all pages
# Process current page's images
for image_page_url in text.extract_iter(
page, '<div class="item">', 'title='):
page_url = text.extract(image_page_url, '="', '"')[0]
# Create a match object for the image extractor
image_match = re.match(NudostarExtractor.pattern, page_url)
if image_match:
# Create an instance of the image extractor
image_extractor = NudostarExtractor(image_match)
image_extractor.session = self.session # Share our session
image_extractor.initialize() # Initialize the extractor
# Get the items from the extractor
for item in image_extractor.items():
if item[0] == Message.Url:
message_type, url, metadata = item
url_list.append((url, metadata))
break # We only want the first URL from each page

# Look for next page
next_page = text.extract(
page, '<li class="next"><a href="', '"')[0]
if not next_page:
break # No more pages

# Get the next page's content
page = self.request(next_page).text
return url_list

def metadata(self, page):
"""Return metadata dictionary"""
model = self.gallery_url.split("/models/")[1].split("/")[0]
return {
"gallery_id": model,
"title": model,
"user_id": model,
}


class NudostarExtractor(Extractor):
"""Extractor for Nudostar Images"""
category = "nudostar"
directory_fmt = ("{category}", "{user_id}")
filename_fmt = "{filename}.{extension}"
pattern = (
BASE_PATTERN +
r"/models/([^&#/]+)*/(\w*)/"
)
# Sample URL: "https://nudostar.tv/models/megan-bitchell/343/"

# TODO: page head/title has some good metadata for alternate names?

def __init__(self, match):
Extractor.__init__(self, match)
self.user_id, self.image_id = match.groups()

def items(self):
"""Return a list of all (image-url, metadata)-tuples"""
pagetext = self.request(self.url, notfound=self.subcategory).text
url_regex = (
r'<a href=\"https://nudostar\.tv/models/[^&#]+'
r'\s+<img src=\"([^&\"]+)\"'
)
match = re.search(url_regex, pagetext)
image_url = match.group(1)
data = text.nameext_from_url(image_url, {"url": image_url})
data["extension"] = text.ext_from_url(image_url)
data["filename"] = f"{self.user_id}-{self.image_id}"
data["user_id"] = self.user_id

yield Message.Directory, data
yield Message.Url, image_url, data
1 change: 1 addition & 0 deletions scripts/supportedsites.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
"nijie" : "nijie",
"nozomi" : "Nozomi.la",
"nsfwalbum" : "NSFWalbum.com",
"nudostar" : "Nudostar.tv",
"paheal" : "rule #34",
"photovogue" : "PhotoVogue",
"pidgiwiki" : "PidgiWiki",
Expand Down
26 changes: 26 additions & 0 deletions test/results/nudostar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

from gallery_dl.extractor import nudostar


__tests__ = (
{
"#url": "https://nudostar.tv/models/eva-joys/",
"#class" : nudostar.NudostarGalleryExtractor,
"#count" : ">= 30",
"category" : ("nudostar"),
},

{
"#url": "https://nudostar.tv/models/thebigtittiecommittee/148/",
"#class" : nudostar.NudostarExtractor,
"category" : "nudostar",
"filename" : "thebigtittiecommittee-148",
'extension': 'jpg',
"user_id": "thebigtittiecommittee",
},
)
Loading