Skip to content

Commit

Permalink
added broadcast / audiospace cards #191
Browse files Browse the repository at this point in the history
  • Loading branch information
vladkens committed Jun 29, 2024
1 parent 3c4bbe1 commit fe18dd1
Show file tree
Hide file tree
Showing 13 changed files with 1,587 additions and 14,752 deletions.
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,7 @@ update-mocks:
twscrape list_timeline --raw --limit 10 1494877848087187461 | jq > ./tests/mocked-data/raw_list_timeline.json
@# twscrape favoriters --raw --limit 10 1649191520250245121 | jq > ./tests/mocked-data/raw_favoriters.json
@# twscrape liked_tweets --raw --limit 10 2244994945 | jq > ./tests/mocked-data/raw_likes.json

x:
twscrape tweet_details --raw 1790441814857826439 | jq > ./tests/mocked-data/card_broadcast.json
twscrape tweet_details --raw 1789054061729173804 | jq > ./tests/mocked-data/card_audiospace.json
3 changes: 3 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

from twscrape.accounts_pool import AccountsPool
from twscrape.api import API
from twscrape.logger import set_log_level
from twscrape.queue_client import QueueClient

set_log_level("ERROR")


@pytest.fixture
def pool_mock(tmp_path):
Expand Down
433 changes: 433 additions & 0 deletions tests/mocked-data/card_audiospace.json

Large diffs are not rendered by default.

1,064 changes: 1,064 additions & 0 deletions tests/mocked-data/card_broadcast.json

Large diffs are not rendered by default.

File renamed without changes.
File renamed without changes.
1,360 changes: 0 additions & 1,360 deletions tests/mocked-data/raw_favoriters.json

This file was deleted.

13,373 changes: 0 additions & 13,373 deletions tests/mocked-data/raw_likes.json

This file was deleted.

3 changes: 0 additions & 3 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,8 @@

from twscrape.accounts_pool import NoAccountError
from twscrape.api import API
from twscrape.logger import set_log_level
from twscrape.utils import gather, get_env_bool

set_log_level("DEBUG")


class MockedError(Exception):
pass
Expand Down
44 changes: 36 additions & 8 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,21 @@
from typing import Callable

from twscrape import API, gather
from twscrape.logger import set_log_level
from twscrape.models import PollCard, SummaryCard, Tweet, User, UserRef, parse_tweet
from twscrape.models import (
AudiospaceCard,
BroadcastCard,
PollCard,
SummaryCard,
Tweet,
User,
UserRef,
parse_tweet,
)

BASE_DIR = os.path.dirname(__file__)
DATA_DIR = os.path.join(BASE_DIR, "mocked-data")
os.makedirs(DATA_DIR, exist_ok=True)

set_log_level("DEBUG")


class FakeRep:
text: str
Expand Down Expand Up @@ -419,9 +425,13 @@ async def test_issue_56():
assert len(doc.links) == 5


async def test_issue_72():
async def test_cards():
# Issues:
# - https://github.com/vladkens/twscrape/issues/72
# - https://github.com/vladkens/twscrape/issues/191

# Check SummaryCard
raw = fake_rep("_issue_72").json()
raw = fake_rep("card_summary").json()
doc = parse_tweet(raw, 1696922210588410217)
assert doc is not None
assert doc.card is not None
Expand All @@ -431,8 +441,8 @@ async def test_issue_72():
assert doc.card.description is not None
assert doc.card.url is not None

# Check PoolCard
raw = fake_rep("_issue_72_poll").json()
# Check PollCard
raw = fake_rep("card_poll").json()
doc = parse_tweet(raw, 1780666831310877100)
assert doc is not None
assert doc.card is not None
Expand All @@ -444,3 +454,21 @@ async def test_issue_72():
for x in doc.card.options:
assert x.label is not None
assert x.votesCount is not None

# Check BrodcastCard
raw = fake_rep("card_broadcast").json()
doc = parse_tweet(raw, 1790441814857826439)
assert doc is not None and doc.card is not None
assert doc.card._type == "broadcast"
assert isinstance(doc.card, BroadcastCard)
assert doc.card.title is not None
assert doc.card.url is not None
assert doc.card.photo is not None

# Check AudiospaceCard
raw = fake_rep("card_audiospace").json()
doc = parse_tweet(raw, 1789054061729173804)
assert doc is not None and doc.card is not None
assert doc.card._type == "audiospace"
assert isinstance(doc.card, AudiospaceCard)
assert doc.card.url is not None
3 changes: 0 additions & 3 deletions tests/test_queue_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,8 @@
from pytest_httpx import HTTPXMock

from twscrape.accounts_pool import AccountsPool
from twscrape.logger import set_log_level
from twscrape.queue_client import QueueClient

set_log_level("ERROR")

DB_FILE = "/tmp/twscrape_test_queue_client.db"
URL = "https://example.com/api"
CF = tuple[AccountsPool, QueueClient]
Expand Down
6 changes: 5 additions & 1 deletion twscrape/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,9 @@ def set_log_level(level: _LEVELS):
_LOG_LEVEL = level


def _filter(r):
return r["level"].no >= logger.level(_LOG_LEVEL).no


logger.remove()
logger.add(sys.stderr, filter=lambda r: r["level"].no >= logger.level(_LOG_LEVEL).no)
logger.add(sys.stderr, filter=_filter)
46 changes: 42 additions & 4 deletions twscrape/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
import random
import re
import string
import sys
import traceback
from dataclasses import asdict, dataclass, field
from datetime import datetime
from typing import Generator, Optional
from typing import Generator, Optional, Union

import httpx

Expand Down Expand Up @@ -187,7 +188,7 @@ class Tweet(JSONTrait):
sourceUrl: str | None = None
sourceLabel: str | None = None
media: Optional["Media"] = None
card: Optional["SummaryCard"] | Optional["PollCard"] = None
card: Union[None, "SummaryCard", "PollCard", "BroadcastCard", "AudiospaceCard"] = None
_type: str = "snscrape.modules.twitter.Tweet"

# todo:
Expand Down Expand Up @@ -381,14 +382,28 @@ class PollCard(Card):
_type: str = "poll"


@dataclass
class BroadcastCard(Card):
title: str
url: str
photo: MediaPhoto | None = None
_type: str = "broadcast"


@dataclass
class AudiospaceCard(Card):
url: str
_type: str = "audiospace"


def _parse_card_get_bool(values: list[dict], key: str):
for x in values:
if x["key"] == key:
return x["value"]["boolean_value"]
return False


def _parse_card_get_str(values: list[dict], key: str, defaultVal=None):
def _parse_card_get_str(values: list[dict], key: str, defaultVal=None) -> str | None:
for x in values:
if x["key"] == key:
return x["value"]["string_value"]
Expand Down Expand Up @@ -501,8 +516,31 @@ def _parse_card(obj: dict, url: str):
# print(json.dumps(val, indent=2))
return PollCard(options=options, finished=finished)

if name == "745291183405076480:broadcast":
val = _parse_card_prepare_values(obj)
card_url = _parse_card_get_str(val, "broadcast_url")
card_title = _parse_card_get_str(val, "broadcast_title")
photo, _ = _parse_card_extract_largest_photo(val)
if card_url is None or card_title is None:
return None

return BroadcastCard(title=card_title, url=card_url, photo=photo)

if name == "3691233323:audiospace":
# no more data in this object, possible extra api call needed to get card info
val = _parse_card_prepare_values(obj)
card_url = _parse_card_get_str(val, "card_url")
if card_url is None:
return None

# print(json.dumps(val, indent=2))
return AudiospaceCard(url=card_url)

logger.warning(f"Unknown card type '{name}' on {url}")
# print(json.dumps(obj["card"]["legacy"], indent=2))
if "PYTEST_CURRENT_TEST" in os.environ: # help debugging tests
print(f"Unknown card type '{name}' on {url}", file=sys.stderr)
# print(json.dumps(obj["card"]["legacy"], indent=2))
return None


# internal helpers
Expand Down

0 comments on commit fe18dd1

Please sign in to comment.