Skip to content

Commit

Permalink
update limit in non _raw functions
Browse files Browse the repository at this point in the history
  • Loading branch information
vladkens committed Jul 15, 2023
1 parent a3bb5d2 commit f43bf3c
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 32 deletions.
46 changes: 17 additions & 29 deletions twscrape/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .accounts_pool import AccountsPool
from .constants import *
from .logger import set_log_level
from .models import Tweet, User
from .models import Tweet, User, get_tweets, get_users
from .queue_client import QueueClient
from .utils import encode_params, find_obj, get_by_path, to_old_obj, to_old_rep

Expand Down Expand Up @@ -93,14 +93,9 @@ async def search_raw(self, q: str, limit=-1, kv=None):
yield x

async def search(self, q: str, limit=-1, kv=None):
twids = set()
async for rep in self.search_raw(q, limit=limit, kv=kv):
obj = to_old_rep(rep.json())
for x in obj["tweets"].values():
tmp = Tweet.parse(x, obj)
if tmp.id not in twids:
twids.add(tmp.id)
yield tmp
for x in get_tweets(rep.json(), limit):
yield x

# user_by_id

Expand Down Expand Up @@ -181,9 +176,8 @@ async def followers_raw(self, uid: int, limit=-1, kv=None):

async def followers(self, uid: int, limit=-1, kv=None):
async for rep in self.followers_raw(uid, limit=limit, kv=kv):
obj = to_old_rep(rep.json())
for _, v in obj["users"].items():
yield User.parse(v)
for x in get_users(rep.json(), limit):
yield x

# following

Expand All @@ -195,9 +189,8 @@ async def following_raw(self, uid: int, limit=-1, kv=None):

async def following(self, uid: int, limit=-1, kv=None):
async for rep in self.following_raw(uid, limit=limit, kv=kv):
obj = to_old_rep(rep.json())
for _, v in obj["users"].items():
yield User.parse(v)
for x in get_users(rep.json(), limit):
yield x

# retweeters

Expand All @@ -209,9 +202,8 @@ async def retweeters_raw(self, twid: int, limit=-1, kv=None):

async def retweeters(self, twid: int, limit=-1, kv=None):
async for rep in self.retweeters_raw(twid, limit=limit, kv=kv):
obj = to_old_rep(rep.json())
for _, v in obj["users"].items():
yield User.parse(v)
for x in get_users(rep.json(), limit):
yield x

# favoriters

Expand All @@ -223,9 +215,8 @@ async def favoriters_raw(self, twid: int, limit=-1, kv=None):

async def favoriters(self, twid: int, limit=-1, kv=None):
async for rep in self.favoriters_raw(twid, limit=limit, kv=kv):
obj = to_old_rep(rep.json())
for _, v in obj["users"].items():
yield User.parse(v)
for x in get_users(rep.json(), limit):
yield x

# user_tweets

Expand All @@ -245,9 +236,8 @@ async def user_tweets_raw(self, uid: int, limit=-1, kv=None):

async def user_tweets(self, uid: int, limit=-1, kv=None):
async for rep in self.user_tweets_raw(uid, limit=limit, kv=kv):
obj = to_old_rep(rep.json())
for _, v in obj["tweets"].items():
yield Tweet.parse(v, obj)
for x in get_tweets(rep.json(), limit):
yield x

# user_tweets_and_replies

Expand All @@ -267,9 +257,8 @@ async def user_tweets_and_replies_raw(self, uid: int, limit=-1, kv=None):

async def user_tweets_and_replies(self, uid: int, limit=-1, kv=None):
async for rep in self.user_tweets_and_replies_raw(uid, limit=limit, kv=kv):
obj = to_old_rep(rep.json())
for _, v in obj["tweets"].items():
yield Tweet.parse(v, obj)
for x in get_tweets(rep.json(), limit):
yield x

# list timeline

Expand All @@ -285,6 +274,5 @@ async def list_timeline_raw(self, list_id: int, limit=-1, kv=None):

async def list_timeline(self, list_id: int, limit=-1, kv=None):
async for rep in self.list_timeline_raw(list_id, limit=limit, kv=kv):
obj = to_old_rep(rep.json())
for x in obj["tweets"].values():
yield Tweet.parse(x, obj)
for x in get_tweets(rep, limit):
yield x
41 changes: 38 additions & 3 deletions twscrape/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
import re
from dataclasses import asdict, dataclass, field
from datetime import datetime
from typing import Optional
from typing import Generator, Optional

import httpx

from .logger import logger
from .utils import find_item, get_or, int_or_none
from .utils import find_item, get_or, int_or_none, to_old_rep


@dataclass
Expand Down Expand Up @@ -115,7 +117,7 @@ class User(JSONTrait):
# label: typing.Optional["UserLabel"] = None

@staticmethod
def parse(obj: dict):
def parse(obj: dict, res=None):
return User(
id=int(obj["id_str"]),
id_str=obj["id_str"],
Expand Down Expand Up @@ -373,3 +375,36 @@ def _get_views(obj: dict, rt_obj: dict):
if k is not None:
return k
return None


# reply parsing


def get_items(rep: httpx.Response, kind: str, limit: int = -1):
if kind == "user":
Cls = User
key = "users"
elif kind == "tweet":
Cls = Tweet
key = "tweets"
else:
raise ValueError(f"Invalid kind: {kind}")

ids = set()
obj = to_old_rep(rep.json() if "json" in rep else rep) # type: ignore
for x in obj[key].values():
if limit != -1 and len(ids) >= limit:
break

tmp = Cls.parse(x, obj)
if tmp.id not in ids:
ids.add(tmp.id)
yield tmp


def get_tweets(rep: httpx.Response, limit: int = -1) -> Generator[Tweet, None, None]:
return get_items(rep, "tweet", limit) # type: ignore


def get_users(rep: httpx.Response, limit: int = -1) -> Generator[User, None, None]:
return get_items(rep, "user", limit) # type: ignore
5 changes: 5 additions & 0 deletions twscrape/queue_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,11 @@ async def _check_rep(self, rep: httpx.Response):
await self._close_ctx(-1, banned=True, msg=msg)
raise BannedError(msg)

# possible banned by old api flow
if rep.status_code in (401, 403):
await self._close_ctx(utc_ts() + 60 * 60 * 12) # lock for 12 hours
raise RateLimitError(msg)

# content not found
if rep.status_code == 200 and "_Missing: No status found with that ID." in msg:
return # ignore this error
Expand Down

0 comments on commit f43bf3c

Please sign in to comment.