Skip to content

Commit

Permalink
Replace youtube-transcript-api library
Browse files Browse the repository at this point in the history
Replace the third-party `youtube-transcript-api` library with our own
code for getting transcripts from YouTube.
  • Loading branch information
seanh committed Aug 15, 2023
1 parent f431a6d commit f01d9fd
Show file tree
Hide file tree
Showing 9 changed files with 525 additions and 26 deletions.
3 changes: 2 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from tests.factories import TranscriptFactory, VideoFactory
from tests.factories import TranscriptFactory, TranscriptInfoFactory, VideoFactory
from tests.factories.factoryboy_sqlalchemy_session import (
clear_factoryboy_sqlalchemy_session,
set_factoryboy_sqlalchemy_session,
Expand All @@ -16,6 +16,7 @@

# Each factory has to be registered with pytest_factoryboy.
register(TranscriptFactory)
register(TranscriptInfoFactory)
register(VideoFactory)


Expand Down
1 change: 1 addition & 0 deletions tests/factories/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from tests.factories.transcript import TranscriptFactory
from tests.factories.transcript_info import TranscriptInfoFactory
from tests.factories.video import VideoFactory
13 changes: 13 additions & 0 deletions tests/factories/transcript_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from factory import Factory, Sequence

from via.services.youtube_transcript import TranscriptInfo


class TranscriptInfoFactory(Factory):
class Meta:
model = TranscriptInfo

language_code = "en-us"
name = "English (United States)"
url = Sequence(lambda n: f"https://example.com/api/timedtext?v={n}")
autogenerated = False
6 changes: 6 additions & 0 deletions tests/unit/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
URLDetailsService,
ViaClientService,
YouTubeService,
YouTubeTranscriptService,
)


Expand Down Expand Up @@ -69,3 +70,8 @@ def youtube_service(mock_service):
youtube_service.get_video_id.return_value = None

return youtube_service


@pytest.fixture
def youtube_transcript_service(mock_service):
return mock_service(YouTubeTranscriptService)
51 changes: 31 additions & 20 deletions tests/unit/via/services/youtube_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def test_enabled(self, db_session, enabled, api_key, expected):
enabled=enabled,
api_key=api_key,
http_service=sentinel.http_service,
youtube_transcript_service=sentinel.youtube_transcript_service,
).enabled
== expected
)
Expand Down Expand Up @@ -94,38 +95,45 @@ def test_get_video_title_raises_YouTubeDataAPIError(self, svc, http_service):

assert exc_info.value.__cause__ == http_service.get.side_effect

def test_get_transcript(self, db_session, svc, YouTubeTranscriptApi):
YouTubeTranscriptApi.get_transcript.return_value = [
{"text": "foo", "start": 0.0, "duration": 1.0},
{"text": "bar", "start": 1.0, "duration": 2.0},
]
def test_get_transcript(
self, db_session, svc, youtube_transcript_service, transcript_info
):
youtube_transcript_service.pick_default_transcript.return_value = (
transcript_info
)
youtube_transcript_service.get_transcript.return_value = "test_transcript"

returned_transcript = svc.get_transcript("test_video_id")

YouTubeTranscriptApi.get_transcript.assert_called_once_with(
"test_video_id", languages=("en",)
youtube_transcript_service.get_transcript_infos.assert_called_once_with(
"test_video_id"
)
youtube_transcript_service.pick_default_transcript.assert_called_once_with(
youtube_transcript_service.get_transcript_infos.return_value
)
assert returned_transcript == YouTubeTranscriptApi.get_transcript.return_value
youtube_transcript_service.get_transcript.assert_called_once_with(
transcript_info
)
assert returned_transcript == "test_transcript"
# It should have cached the transcript in the DB.
assert db_session.scalars(select(Transcript)).all() == [
Any.instance_of(Transcript).with_attrs(
{
"video_id": "test_video_id",
"transcript": YouTubeTranscriptApi.get_transcript.return_value,
"transcript_id": transcript_info.id,
"transcript": "test_transcript",
}
)
]

@pytest.mark.usefixtures("db_session")
def test_get_transcript_returns_cached_transcripts(
self, transcript, svc, YouTubeTranscriptApi
self, svc, transcript, youtube_transcript_service
):
returned_transcript = svc.get_transcript(transcript.video_id)

YouTubeTranscriptApi.get_transcript.assert_not_called()
youtube_transcript_service.get_transcript.assert_not_called()
assert returned_transcript == transcript.transcript

@pytest.mark.usefixtures("db_session")
def test_get_transcript_returns_oldest_cached_transcript(
self, transcript_factory, svc
):
Expand Down Expand Up @@ -155,18 +163,25 @@ def test_canonical_video_url(self, video_id, expected_url, svc):
assert expected_url == svc.canonical_video_url(video_id)

@pytest.fixture
def svc(self, db_session, http_service):
def svc(self, db_session, http_service, youtube_transcript_service):
return YouTubeService(
db_session=db_session,
enabled=True,
api_key=sentinel.api_key,
http_service=http_service,
youtube_transcript_service=youtube_transcript_service,
)


class TestFactory:
def test_it(
self, YouTubeService, youtube_service, pyramid_request, http_service, db_session
self,
YouTubeService,
youtube_service,
pyramid_request,
http_service,
db_session,
youtube_transcript_service,
):
returned = factory(sentinel.context, pyramid_request)

Expand All @@ -175,6 +190,7 @@ def test_it(
enabled=pyramid_request.registry.settings["youtube_transcripts"],
api_key="test_youtube_api_key",
http_service=http_service,
youtube_transcript_service=youtube_transcript_service,
)
assert returned == youtube_service

Expand All @@ -185,8 +201,3 @@ def YouTubeService(self, patch):
@pytest.fixture
def youtube_service(self, YouTubeService):
return YouTubeService.return_value


@pytest.fixture(autouse=True)
def YouTubeTranscriptApi(patch):
return patch("via.services.youtube.YouTubeTranscriptApi")
Loading

0 comments on commit f01d9fd

Please sign in to comment.