Skip to content

Commit

Permalink
Add an algorithm for matching a YouTube caption track
Browse files Browse the repository at this point in the history
  • Loading branch information
Jon Betts committed Aug 3, 2023
1 parent caee583 commit aebdea5
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 0 deletions.
54 changes: 54 additions & 0 deletions tests/unit/via/services/youtube_api/models_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest
from h_matchers import Any
from pytest import param

from via.services.youtube_api import Captions, CaptionTrack, Video

Expand Down Expand Up @@ -68,6 +69,59 @@ def test_from_v1_json_minimal(self, CaptionTrack):
assert not captions.tracks
CaptionTrack.assert_not_called()

@pytest.mark.parametrize(
"preferences,expected_label",
(
param(
[CaptionTrack("en")],
"plain_en",
id="direct_match",
),
param(
[CaptionTrack("de"), CaptionTrack("en-gb")],
"plain_en_gb",
id="miss_then_hit",
),
param(
[
CaptionTrack("de"),
CaptionTrack(Any.string.matching("^en-.*"), name="Name"),
],
"named_en_gb",
id="wild_cards",
),
param(
[CaptionTrack("fr", kind=None), CaptionTrack("en", kind="asr")],
"en_auto",
id="fallback_to_auto",
),
param(
[CaptionTrack(Any(), name="Name")],
"named_en_gb",
id="same_level_sorting",
),
param([CaptionTrack("fr")], None, id="miss"),
),
)
def test_find_matching_track(self, preferences, expected_label):
captions = Captions(
tracks=[
CaptionTrack("en", label="plain_en"),
CaptionTrack("en-gb", label="plain_en_gb"),
CaptionTrack("en-us", name="Name", label="named_en_us"),
CaptionTrack("en-gb", name="Name", label="named_en_gb"),
CaptionTrack("en", kind="asr", label="en_auto"),
]
)

caption_track = captions.find_matching_track(preferences)

assert (
caption_track.label == expected_label
if expected_label
else not caption_track
)

@pytest.fixture
def CaptionTrack(self, patch):
return patch("via.services.youtube_api.models.CaptionTrack")
Expand Down
41 changes: 41 additions & 0 deletions via/services/youtube_api/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import base64
from copy import deepcopy
from dataclasses import dataclass, field
from operator import attrgetter
from typing import List, Optional

from via.services.youtube_api._nested_data import safe_get
Expand Down Expand Up @@ -84,6 +86,45 @@ def from_v1_json(cls, data: dict):
]
)

def find_matching_track(
self, preferences: List[CaptionTrack]
) -> Optional[CaptionTrack]:
"""
Get a caption track which matching the preferences in order.
This method takes the provided list of caption track objects and
searches the available tracks for those with matching details:
* language_code
* name
* is_auto_generated / kind
Earlier items are higher priority.
:param preferences: List of partially filled out caption track objects
which represent the caption track we would like.
"""

def get_key(track: CaptionTrack):
return track.language_code, track.kind, track.name

search_keys = [get_key(preference) for preference in preferences]
best_index, best_caption_track = None, None

# Sort the tracks to keep the algorithm more stable! This only insulates
# us from sorting changes, not metadata changes.
for caption_track in sorted(self.tracks, key=attrgetter("id")):
try:
index = search_keys.index(get_key(caption_track))
except ValueError:
continue

# Items with lower indexes are first choices for the user
if best_index is None or best_index > index:
best_index, best_caption_track = index, deepcopy(caption_track)

return best_caption_track


@dataclass
class Video:
Expand Down

0 comments on commit aebdea5

Please sign in to comment.