Skip to content

Commit

Permalink
If no transcript is provided, search for a consistent transcript
Browse files Browse the repository at this point in the history
  • Loading branch information
Jon Betts committed Jul 19, 2023
1 parent 1832f1b commit 6f8b07a
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 9 deletions.
34 changes: 30 additions & 4 deletions tests/unit/via/views/view_video_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pyramid.httpexceptions import HTTPUnauthorized

from via.exceptions import BadURL
from via.views.view_video import view_youtube_video
from via.views.view_video import CAPTION_TRACK_PREFERENCES, view_youtube_video

# webargs's kwargs injection into view functions falsely triggers Pylint's
# no-value-for-parameter all the time so just disable it file-wide.
Expand All @@ -22,7 +22,6 @@ def test_it(
video_url,
ViaSecurityPolicy,
):
# Override default `None` response.
pyramid_request.params["via.video.lang"] = sentinel.transcript_id

response = view_youtube_video(pyramid_request)
Expand Down Expand Up @@ -61,11 +60,38 @@ def test_it(
},
}

def test_it_defaults_to_en(self, pyramid_request, youtube_service):
def test_it_looks_up_transcripts(self, pyramid_request, youtube_service):
pyramid_request.params.pop("via.video.lang", None)

response = view_youtube_video(pyramid_request)

youtube_service.get_video_info.assert_called_once_with(
video_url=youtube_service.canonical_video_url.return_value
)
video = youtube_service.get_video_info.return_value
video.caption.find_matching_track.assert_called_once_with(
CAPTION_TRACK_PREFERENCES
)
caption_track = video.caption.find_matching_track.return_value

assert response["api"]["transcript"]["url"] == pyramid_request.route_url(
"api.youtube.transcript",
video_id=youtube_service.get_video_id.return_value,
transcript_id=caption_track.id,
)

@pytest.mark.parametrize("has_captions,has_matches", ((True, False), (False, True)))
def test_it_defaults_to_en_a(
self, pyramid_request, youtube_service, has_captions, has_matches
):
pyramid_request.params.pop("via.video.lang", None)
video = youtube_service.get_video_info.return_value
video.has_captions = has_captions
if not has_matches:
video.caption.find_matching_track.return_value = None

response = view_youtube_video(pyramid_request)

assert response["api"]["transcript"]["url"] == pyramid_request.route_url(
"api.youtube.transcript",
video_id=youtube_service.get_video_id.return_value,
Expand All @@ -82,7 +108,7 @@ def test_it_errors_if_the_url_is_invalid(self, pyramid_request):
"query": {"url": ["Not a valid URL."]}
}

def test_it_errors_if_the_url_is_not_a_YouTube_url(
def test_it_errors_if_the_url_is_not_a_youtube_url(
self, pyramid_request, youtube_service
):
# YouTubeService returns None if it can't extract the YouTube video ID
Expand Down
48 changes: 43 additions & 5 deletions via/views/view_video.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import marshmallow
from h_matchers import Any
from h_vialib import Configuration
from pyramid.httpexceptions import HTTPUnauthorized
from pyramid.view import view_config
Expand All @@ -8,25 +9,43 @@
from via.exceptions import BadURL
from via.security import ViaSecurityPolicy
from via.services import YouTubeService
from via.services.youtube_api import CaptionTrack

CAPTION_TRACK_PREFERENCES = (
# Plain English first
CaptionTrack(language_code="en"),
# Plain varieties of English
CaptionTrack(language_code=Any.string.matching("^en-.*")),
# Sub-categories of plain English
CaptionTrack(language_code="en", name=Any()),
# English varieties with names
CaptionTrack(language_code=Any.string.matching("^en-.*"), name=Any()),
# Any auto generated English
CaptionTrack(language_code=Any.string.matching("^en-.*"), name=Any(), kind=Any()),
# Any track which can be translated into English
CaptionTrack(
language_code=Any(), name=Any(), kind=Any(), translated_language_code="en"
),
)


@view_config(renderer="via:templates/view_video.html.jinja2", route_name="youtube")
@use_kwargs(
{"url": fields.Url(required=True)}, location="query", unknown=marshmallow.INCLUDE
)
def view_youtube_video(request, url, **kwargs):
youtube_service = request.find_service(YouTubeService)
youtube_service: YouTubeService = request.find_service(YouTubeService)

if not youtube_service.enabled:
raise HTTPUnauthorized()

video_id = youtube_service.get_video_id(url)
video_url = youtube_service.canonical_video_url(video_id)
video_title = youtube_service.get_video_title(video_id)

if not video_id:
raise BadURL(f"Unsupported video URL: {url}", url=url)

video_url = youtube_service.canonical_video_url(video_id)
video_title = youtube_service.get_video_title(video_id)

via_config, client_config = Configuration.extract_from_params(kwargs)

return {
Expand All @@ -41,7 +60,9 @@ def view_youtube_video(request, url, **kwargs):
"url": request.route_url(
"api.youtube.transcript",
video_id=video_id,
transcript_id=via_config.get("video", {}).get("lang", "en.a"),
transcript_id=_get_transcript_id(
youtube_service, video_url, via_config
),
),
"method": "GET",
"headers": {
Expand All @@ -50,3 +71,20 @@ def view_youtube_video(request, url, **kwargs):
}
},
}


def _get_transcript_id(youtube_service, url, via_config):
# Try the `via.video.lang` param first
if transcript_id := via_config.get("video", {}).get("lang"):
return transcript_id

# Then look up a transcript matching our preferences
video = youtube_service.get_video_info(video_url=url)
if video.has_captions and (
caption_track := video.caption.find_matching_track(CAPTION_TRACK_PREFERENCES)
):
return caption_track.id

# This shouldn't be possible to get to if this was an option, but we need a
# fall-back. We could also fail here instead?
return "en.a"

0 comments on commit 6f8b07a

Please sign in to comment.