diff --git a/tests/unit/via/views/view_video_test.py b/tests/unit/via/views/view_video_test.py index 6cce9fc7..4debdfea 100644 --- a/tests/unit/via/views/view_video_test.py +++ b/tests/unit/via/views/view_video_test.py @@ -6,7 +6,7 @@ from pyramid.httpexceptions import HTTPUnauthorized from via.exceptions import BadURL -from via.views.view_video import view_youtube_video +from via.views.view_video import CAPTION_TRACK_PREFERENCES, view_youtube_video # webargs's kwargs injection into view functions falsely triggers Pylint's # no-value-for-parameter all the time so just disable it file-wide. @@ -22,7 +22,6 @@ def test_it( video_url, ViaSecurityPolicy, ): - # Override default `None` response. pyramid_request.params["via.video.lang"] = sentinel.transcript_id response = view_youtube_video(pyramid_request) @@ -61,11 +60,38 @@ def test_it( }, } - def test_it_defaults_to_en(self, pyramid_request, youtube_service): + def test_it_looks_up_transcripts(self, pyramid_request, youtube_service): pyramid_request.params.pop("via.video.lang", None) response = view_youtube_video(pyramid_request) + youtube_service.get_video_info.assert_called_once_with( + video_url=youtube_service.canonical_video_url.return_value + ) + video = youtube_service.get_video_info.return_value + video.caption.find_matching_track.assert_called_once_with( + CAPTION_TRACK_PREFERENCES + ) + caption_track = video.caption.find_matching_track.return_value + + assert response["api"]["transcript"]["url"] == pyramid_request.route_url( + "api.youtube.transcript", + video_id=youtube_service.get_video_id.return_value, + transcript_id=caption_track.id, + ) + + @pytest.mark.parametrize("has_captions,has_matches", ((True, False), (False, True))) + def test_it_defaults_to_en_a( + self, pyramid_request, youtube_service, has_captions, has_matches + ): + pyramid_request.params.pop("via.video.lang", None) + video = youtube_service.get_video_info.return_value + video.has_captions = has_captions + if not has_matches: + video.caption.find_matching_track.return_value = None + + response = view_youtube_video(pyramid_request) + assert response["api"]["transcript"]["url"] == pyramid_request.route_url( "api.youtube.transcript", video_id=youtube_service.get_video_id.return_value, @@ -82,7 +108,7 @@ def test_it_errors_if_the_url_is_invalid(self, pyramid_request): "query": {"url": ["Not a valid URL."]} } - def test_it_errors_if_the_url_is_not_a_YouTube_url( + def test_it_errors_if_the_url_is_not_a_youtube_url( self, pyramid_request, youtube_service ): # YouTubeService returns None if it can't extract the YouTube video ID diff --git a/via/views/view_video.py b/via/views/view_video.py index 908cf3d7..ffc8970d 100644 --- a/via/views/view_video.py +++ b/via/views/view_video.py @@ -1,4 +1,5 @@ import marshmallow +from h_matchers import Any from h_vialib import Configuration from pyramid.httpexceptions import HTTPUnauthorized from pyramid.view import view_config @@ -8,6 +9,24 @@ from via.exceptions import BadURL from via.security import ViaSecurityPolicy from via.services import YouTubeService +from via.services.youtube_api import CaptionTrack + +CAPTION_TRACK_PREFERENCES = ( + # Plain English first + CaptionTrack(language_code="en"), + # Plain varieties of English + CaptionTrack(language_code=Any.string.matching("^en-.*")), + # Sub-categories of plain English + CaptionTrack(language_code="en", name=Any()), + # English varieties with names + CaptionTrack(language_code=Any.string.matching("^en-.*"), name=Any()), + # Any auto generated English + CaptionTrack(language_code=Any.string.matching("^en-.*"), name=Any(), kind=Any()), + # Any track which can be translated into English + CaptionTrack( + language_code=Any(), name=Any(), kind=Any(), translated_language_code="en" + ), +) @view_config(renderer="via:templates/view_video.html.jinja2", route_name="youtube") @@ -15,18 +34,18 @@ {"url": fields.Url(required=True)}, location="query", unknown=marshmallow.INCLUDE ) def view_youtube_video(request, url, **kwargs): - youtube_service = request.find_service(YouTubeService) + youtube_service: YouTubeService = request.find_service(YouTubeService) if not youtube_service.enabled: raise HTTPUnauthorized() video_id = youtube_service.get_video_id(url) - video_url = youtube_service.canonical_video_url(video_id) - video_title = youtube_service.get_video_title(video_id) - if not video_id: raise BadURL(f"Unsupported video URL: {url}", url=url) + video_url = youtube_service.canonical_video_url(video_id) + video_title = youtube_service.get_video_title(video_id) + via_config, client_config = Configuration.extract_from_params(kwargs) return { @@ -41,7 +60,9 @@ def view_youtube_video(request, url, **kwargs): "url": request.route_url( "api.youtube.transcript", video_id=video_id, - transcript_id=via_config.get("video", {}).get("lang", "en.a"), + transcript_id=_get_transcript_id( + youtube_service, video_url, via_config + ), ), "method": "GET", "headers": { @@ -50,3 +71,20 @@ def view_youtube_video(request, url, **kwargs): } }, } + + +def _get_transcript_id(youtube_service, url, via_config): + # Try the `via.video.lang` param first + if transcript_id := via_config.get("video", {}).get("lang"): + return transcript_id + + # Then look up a transcript matching our preferences + video = youtube_service.get_video_info(video_url=url) + if video.has_captions and ( + caption_track := video.caption.find_matching_track(CAPTION_TRACK_PREFERENCES) + ): + return caption_track.id + + # This shouldn't be possible to get to if this was an option, but we need a + # fall-back. We could also fail here instead? + return "en.a"