diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 07ff0bea..70430bb4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,7 +31,7 @@ repos: hooks: - id: ruff - repo: https://github.com/RobertCraigie/pyright-python - rev: v1.1.368 + rev: v1.1.371 hooks: - id: pyright name: pyright (system) diff --git a/CHANGELOG b/CHANGELOG index 58e73218..e043b50e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Validate if ZIM cannot be created at given output location (#204) - Add videos, subtitles, thumbnails and channel branding to the ZIM "on the fly" (#209) - Remove `--no-zim`, `--keep` CLI arguments +- Add support to index content from `zimui` JSON files in the ZIM using custom `IndexData` (#224) ## [2.3.0] - 2024-05-22 diff --git a/scraper/pyproject.toml b/scraper/pyproject.toml index b42a4519..cddc3029 100644 --- a/scraper/pyproject.toml +++ b/scraper/pyproject.toml @@ -31,7 +31,7 @@ additional-keywords = ["youtube"] [project.optional-dependencies] scripts = ["invoke==2.2.0"] lint = ["black==24.4.2", "ruff==0.4.10"] -check = ["pyright==1.1.368"] +check = ["pyright==1.1.371"] test = ["pytest==8.2.2", "coverage==7.5.3"] dev = [ "pre-commit==3.7.1", diff --git a/scraper/src/youtube2zim/scraper.py b/scraper/src/youtube2zim/scraper.py index e5207920..b8c43ace 100644 --- a/scraper/src/youtube2zim/scraper.py +++ b/scraper/src/youtube2zim/scraper.py @@ -21,6 +21,7 @@ import yt_dlp from kiwixstorage import KiwixStorage +from libzim.writer import IndexData # type: ignore from pif import get_public_ip from zimscraperlib.download import stream_file from zimscraperlib.filesystem import delete_callback @@ -31,7 +32,7 @@ from zimscraperlib.image.transformation import resize_image from zimscraperlib.inputs import compute_descriptions from zimscraperlib.video.presets import VideoMp4Low, VideoWebmLow -from zimscraperlib.zim import Creator +from zimscraperlib.zim import Creator, StaticItem from zimscraperlib.zim.filesystem import validate_zimfile_creatable from zimscraperlib.zim.metadata import ( validate_description, @@ -84,6 +85,29 @@ MAXIMUM_YOUTUBEID_LENGTH = 24 +class CustomIndexData(IndexData): + """Custom IndexData class to allow for custom title and content""" + + def __init__(self, title: str, content: str): + self.title = title + self.content = content + + def has_indexdata(self): + return True + + def get_title(self): + return self.title + + def get_content(self): + return self.content + + def get_keywords(self): + return "" + + def get_wordcount(self): + return len(self.content.split()) if self.content else 0 + + class Youtube2Zim: def __init__( self, @@ -1066,11 +1090,22 @@ def get_video_slug(video) -> str: def generate_playlist_object(playlist) -> Playlist: channel_data = get_channel_json(playlist.creator_id) videos = get_videos_list(playlist) + playlist_videos = [generate_video_preview_object(video) for video in videos] + + # add videos to ZIM index + for idx, video_obj in enumerate(playlist_videos): + self.add_custom_item_to_zim_index( + video_obj.title, + videos[idx]["snippet"]["description"], + video_obj.slug, + f"watch/{video_obj.slug}?list={get_playlist_slug(playlist)}", + ) + return Playlist( id=playlist.playlist_id, title=playlist.title, description=playlist.description, - videos=[generate_video_preview_object(video) for video in videos], + videos=playlist_videos, publication_date=playlist.published_at, author=Author( channel_id=playlist.creator_id, @@ -1141,16 +1176,23 @@ def get_playlist_slug(playlist) -> str: playlist_slug # set uploads playlist as main playlist ) + playlist_obj = generate_playlist_object(playlist) self.zim_file.add_item_for( path=playlist_path, title=playlist.title, - content=generate_playlist_object(playlist).model_dump_json( - by_alias=True, indent=2 - ), + content=playlist_obj.model_dump_json(by_alias=True, indent=2), mimetype="application/json", is_front=False, ) + # add playlist to ZIM index + self.add_custom_item_to_zim_index( + playlist_obj.title, + playlist_obj.description, + playlist_slug, + f"playlist/{playlist_slug}", + ) + # write playlists.json file self.zim_file.add_item_for( path="playlists.json", @@ -1215,3 +1257,26 @@ def add_file_to_zim( fpath=fpath, callback=callback, ) + + def add_custom_item_to_zim_index( + self, title: str, content: str, fname: str, zimui_redirect: str + ): + """add a custom item to the ZIM index""" + + redirect_url = f"../index.html#/{zimui_redirect}" + html_content = ( + f"{title}" + f'' + f"" + ) + + item = StaticItem( + title=title, + path="index/" + fname, + content=bytes(html_content, "utf-8"), + mimetype="text/html", + ) + item.get_indexdata = lambda: CustomIndexData(title, content) + + logger.debug(f"Adding {fname} to ZIM index") + self.zim_file.add_item(item)