Skip to content

Commit

Permalink
Cache YouTube video titles in the DB
Browse files Browse the repository at this point in the history
  • Loading branch information
seanh committed Jul 18, 2023
1 parent 1e9ba1a commit 1d2075d
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 10 deletions.
5 changes: 5 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,20 @@
import httpretty
import pytest
from h_matchers import Any
from pytest_factoryboy import register
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from tests.factories import VideoFactory
from tests.factories.factoryboy_sqlalchemy_session import (
clear_factoryboy_sqlalchemy_session,
set_factoryboy_sqlalchemy_session,
)
from via.db import Base

# Each factory has to be registered with pytest_factoryboy.
register(VideoFactory)


@pytest.fixture
def pyramid_settings():
Expand Down
1 change: 1 addition & 0 deletions tests/factories/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from tests.factories.video import VideoFactory
13 changes: 13 additions & 0 deletions tests/factories/video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from factory import Faker, Sequence
from factory.alchemy import SQLAlchemyModelFactory

from via.models import Video, VideoType


class VideoFactory(SQLAlchemyModelFactory):
class Meta:
model = Video

type = Faker("random_element", elements=VideoType)
video_id = Sequence(lambda n: f"video_id_{n}")
title = Sequence(lambda n: f"video_title_{n}")
36 changes: 29 additions & 7 deletions tests/unit/via/services/youtube_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
from unittest.mock import sentinel

import pytest
from h_matchers import Any
from requests import Response
from sqlalchemy import select

from via.models import Video, VideoType
from via.services.youtube import YouTubeDataAPIError, YouTubeService, factory


Expand All @@ -20,6 +23,7 @@ class TestYouTubeService:
def test_enabled(self, enabled, api_key, expected):
assert (
YouTubeService(
db_session=sentinel.db,
enabled=enabled,
api_key=api_key,
http_service=sentinel.http_service,
Expand Down Expand Up @@ -54,28 +58,40 @@ def test_enabled(self, enabled, api_key, expected):
def test_get_video_id(self, url, expected_video_id, svc):
assert expected_video_id == svc.get_video_id(url)

def test_get_video_title(self, svc, http_service):
def test_get_video_title(self, svc, db_session, http_service):
response = http_service.get.return_value = Response()
response.raw = BytesIO(b'{"items": [{"snippet": {"title": "video_title"}}]}')

title = svc.get_video_title(sentinel.video_id)
title = svc.get_video_title("test_video_id")

http_service.get.assert_called_once_with(
"https://www.googleapis.com/youtube/v3/videos",
params={
"id": sentinel.video_id,
"id": "test_video_id",
"key": sentinel.api_key,
"part": "snippet",
"maxResults": "1",
},
)
assert title == "video_title"
# It should have cached the video in the DB.
assert db_session.scalars(
select(Video).where(
Video.video_id == "test_video_id", Video.type == VideoType.YOUTUBE
)
).all() == [Any.instance_of(Video).with_attrs({"title": "video_title"})]

def test_get_video_title_uses_cached_videos(self, svc, http_service, video):
title = svc.get_video_title(video.video_id)

assert title == video.title
http_service.get.assert_not_called()

def test_get_video_title_raises_YouTubeDataAPIError(self, svc, http_service):
http_service.get.side_effect = RuntimeError()

with pytest.raises(YouTubeDataAPIError) as exc_info:
svc.get_video_title(sentinel.video_id)
svc.get_video_title("test_video_id")

assert exc_info.value.__cause__ == http_service.get.side_effect

Expand All @@ -100,9 +116,12 @@ def test_canonical_video_url(self, video_id, expected_url, svc):
assert expected_url == svc.canonical_video_url(video_id)

@pytest.fixture
def svc(self, http_service):
def svc(self, db_session, http_service):
return YouTubeService(
enabled=True, api_key=sentinel.api_key, http_service=http_service
db_session=db_session,
enabled=True,
api_key=sentinel.api_key,
http_service=http_service,
)


Expand All @@ -111,7 +130,10 @@ def test_it(self, YouTubeService, youtube_service, http_service, pyramid_request
returned = factory(sentinel.context, pyramid_request)

YouTubeService.assert_called_once_with(
enabled=True, api_key="test_youtube_api_key", http_service=http_service
db_session=pyramid_request.db,
enabled=True,
api_key="test_youtube_api_key",
http_service=http_service,
)
assert returned == youtube_service

Expand Down
3 changes: 3 additions & 0 deletions via/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
from via.models.video import Video, VideoType


def includeme(_config): # pragma: no cover
pass
20 changes: 20 additions & 0 deletions via/models/video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import enum

from sqlalchemy import Enum, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column

from via.db import Base


class VideoType(enum.Enum):
YOUTUBE = 1


class Video(Base):
__tablename__ = "video"
__table_args__ = (UniqueConstraint("type", "video_id"),)

id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True, init=False)
type: Mapped[int] = mapped_column(Enum(VideoType))
video_id: Mapped[str]
title: Mapped[str] = mapped_column(repr=False)
36 changes: 33 additions & 3 deletions via/services/youtube.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from urllib.parse import parse_qs, quote_plus, urlparse

from sqlalchemy import select
from sqlalchemy.orm import Session
from youtube_transcript_api import YouTubeTranscriptApi

from via.models import Video, VideoType
from via.services.http import HTTPService


Expand All @@ -10,7 +13,14 @@ class YouTubeDataAPIError(Exception):


class YouTubeService:
def __init__(self, enabled: bool, api_key: str, http_service: HTTPService):
def __init__(
self,
db_session: Session,
enabled: bool,
api_key: str,
http_service: HTTPService,
):
self._db = db_session
self._enabled = enabled
self._api_key = api_key
self._http_service = http_service
Expand Down Expand Up @@ -61,9 +71,18 @@ def get_video_id(self, url):

def get_video_title(self, video_id):
"""Call the YouTube API and return the title for the given video_id."""
# https://developers.google.com/youtube/v3/docs/videos/list
video = self._db.scalars(
select(Video).where(
Video.type == VideoType.YOUTUBE, Video.video_id == video_id
)
).one_or_none()

if video:
return video.title

try:
return self._http_service.get(
# https://developers.google.com/youtube/v3/docs/videos/list
title = self._http_service.get(
"https://www.googleapis.com/youtube/v3/videos",
params={
"id": video_id,
Expand All @@ -75,6 +94,16 @@ def get_video_title(self, video_id):
except Exception as exc:
raise YouTubeDataAPIError("getting the video title failed") from exc

self._db.add(
Video(
type=VideoType.YOUTUBE,
video_id=video_id,
title=title,
)
)

return title

def get_transcript(self, video_id):
"""
Call the YouTube API and return the transcript for the given video_id.
Expand All @@ -87,6 +116,7 @@ def get_transcript(self, video_id):

def factory(_context, request):
return YouTubeService(
db_session=request.db,
enabled=request.registry.settings["youtube_transcripts"],
api_key=request.registry.settings["youtube_api_key"],
http_service=request.find_service(HTTPService),
Expand Down

0 comments on commit 1d2075d

Please sign in to comment.