diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2f5287c..7c070ab 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10", "3.11"] + python-version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 0000000..1569bf5 --- /dev/null +++ b/.tool-versions @@ -0,0 +1 @@ +python 3.12.0 diff --git a/.vscode/settings.json b/.vscode/settings.json index 1d23fc8..0948849 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -8,7 +8,7 @@ }, "[python]": { "editor.formatOnSave": true, - "editor.codeActionsOnSave": ["source.organizeImports"] + "editor.codeActionsOnSave": ["source.organizeImports"], + "editor.defaultFormatter": "ms-python.black-formatter" }, - "python.formatting.provider": "black" } diff --git a/Dockerfile.python b/Dockerfile.python new file mode 100644 index 0000000..8f86239 --- /dev/null +++ b/Dockerfile.python @@ -0,0 +1,11 @@ +ARG VER=3.12 + +FROM python:${VER}-alpine +RUN apk add git + +WORKDIR /app +COPY pyproject.toml readme.md /app/ +RUN pip install -e .[dev] +COPY . /app + +CMD python --version; pytest tests/ diff --git a/sqlite.dockerfile b/Dockerfile.sqlite similarity index 100% rename from sqlite.dockerfile rename to Dockerfile.sqlite diff --git a/Makefile b/Makefile index baa10d1..d7d0427 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,5 @@ .PHONY: all build -SQTEST = docker -l warning build -f sqlite.dockerfile - all: @echo "hi" @@ -38,27 +36,33 @@ show-cov: @coverage html @open htmlcov/index.html -act: - @act --container-architecture linux/amd64 - changelog: @git pull origin --tags > /dev/null @git log $(shell git describe --tags --abbrev=0 HEAD)^..HEAD --pretty=format:'- %s' -test34: +test-py: + $(eval name=twscrape_py$(v)) + @docker -l warning build -f Dockerfile.python --build-arg VER=$(v) -t $(name) . + @docker run $(name) + +test-sq: + $(eval name=twscrape_sq$(v)) + @docker -l warning build -f Dockerfile.sqlite --build-arg SQLY=$(y) --build-arg SQLV=$(v) -t $(name) . + @docker run $(name) + +test-py-matrix: + @make test-py v=3.10 + @make test-py v=3.11 + @make test-py v=3.12 + +test-sq-matrix: @# https://www.sqlite.org/chronology.html - @$(SQTEST) --build-arg SQLY=2018 --build-arg SQLV=3240000 -t twscrape_sq24 . - @$(SQTEST) --build-arg SQLY=2019 --build-arg SQLV=3270200 -t twscrape_sq27 . - @$(SQTEST) --build-arg SQLY=2019 --build-arg SQLV=3300100 -t twscrape_sq30 . - @$(SQTEST) --build-arg SQLY=2020 --build-arg SQLV=3330000 -t twscrape_sq33 . - @$(SQTEST) --build-arg SQLY=2021 --build-arg SQLV=3340100 -t twscrape_sq34 . - @$(SQTEST) --build-arg SQLY=2023 --build-arg SQLV=3430000 -t twscrape_sq43 . - @docker run twscrape_sq24 - @docker run twscrape_sq27 - @docker run twscrape_sq30 - @docker run twscrape_sq33 - @docker run twscrape_sq34 - @docker run twscrape_sq43 + @make test-sq y=2018 v=3240000 + @make test-sq y=2019 v=3270200 + @make test-sq y=2019 v=3300100 + @make test-sq y=2020 v=3330000 + @make test-sq y=2021 v=3340100 + @make test-sq y=2023 v=3430000 update-mocks: twscrape user_by_id --raw 2244994945 | jq > ./tests/mocked-data/user_by_id_raw.json diff --git a/pyproject.toml b/pyproject.toml index ebb5d1d..2a62189 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,22 +16,23 @@ classifiers = [ 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', ] dependencies = [ - "aiosqlite==0.17.0", - "fake-useragent==1.2.1", - "httpx==0.24.0", - "loguru==0.7.0", + "aiosqlite>=0.17.0", + "fake-useragent>=1.3.0", + "httpx>=0.24.0", + "loguru>=0.7.0", ] [project.optional-dependencies] dev = [ - "pylint==2.17.3", - "pytest-asyncio==0.21.0", - "pytest-cov==4.0.0", - "pytest-httpx==0.22.0", - "pytest==7.3.1", - "ruff==0.0.263", + "pylint>=2.17.3", + "pytest-asyncio>=0.21.0", + "pytest-cov>=4.0.0", + "pytest-httpx>=0.22.0", + "pytest>=7.4.0", + "ruff" ] [project.urls] @@ -46,13 +47,13 @@ packages = ['twscrape'] [tool.pylint] max-line-length = 99 disable = [ - "C0103", # invalid-name - "C0114", # missing-module-docstring - "C0115", # missing-class-docstring - "C0116", # missing-function-docstring - "R0903", # too-few-public-methods - "R0913", # too-many-arguments - "W0105", # pointless-string-statement + "C0103", # invalid-name + "C0114", # missing-module-docstring + "C0115", # missing-class-docstring + "C0116", # missing-function-docstring + "R0903", # too-few-public-methods + "R0913", # too-many-arguments + "W0105", # pointless-string-statement ] [tool.pytest.ini_options] @@ -67,3 +68,6 @@ line-length = 99 [tool.ruff] line-length = 99 + +[tool.hatch.metadata] +allow-direct-references = true \ No newline at end of file diff --git a/readme.md b/readme.md index 86d3ee3..52794b3 100644 --- a/readme.md +++ b/readme.md @@ -10,9 +10,9 @@ test status - + license diff --git a/tests/test_parser.py b/tests/test_parser.py index fddbec1..4ff9f6a 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -252,6 +252,7 @@ async def test_user_tweets_and_replies(): for doc in tweets: check_tweet(doc) + async def test_list_timeline(): api = API() mock_gen(api, "list_timeline_raw") diff --git a/tests/test_pool.py b/tests/test_pool.py index 19e5095..3f1f0a9 100644 --- a/tests/test_pool.py +++ b/tests/test_pool.py @@ -1,5 +1,5 @@ from twscrape.accounts_pool import AccountsPool -from twscrape.utils import utc_ts +from twscrape.utils import utc async def test_add_accounts(pool_mock: AccountsPool): @@ -102,7 +102,7 @@ async def test_account_unlock(pool_mock: AccountsPool): assert acc.locks[Q] is not None # should update lock time - end_time = utc_ts() + 60 # + 1 minute + end_time = utc.ts() + 60 # + 1 minute await pool_mock.lock_until(acc.username, Q, end_time) acc = await pool_mock.get(acc.username) diff --git a/twscrape/account.py b/twscrape/account.py index aab9b77..d567d7c 100644 --- a/twscrape/account.py +++ b/twscrape/account.py @@ -7,7 +7,7 @@ from .constants import TOKEN from .models import JSONTrait -from .utils import from_utciso +from .utils import utc @dataclass @@ -30,12 +30,12 @@ class Account(JSONTrait): @staticmethod def from_rs(rs: sqlite3.Row): doc = dict(rs) - doc["locks"] = {k: from_utciso(v) for k, v in json.loads(doc["locks"]).items()} + doc["locks"] = {k: utc.from_iso(v) for k, v in json.loads(doc["locks"]).items()} doc["stats"] = {k: v for k, v in json.loads(doc["stats"]).items() if isinstance(v, int)} doc["headers"] = json.loads(doc["headers"]) doc["cookies"] = json.loads(doc["cookies"]) doc["active"] = bool(doc["active"]) - doc["last_used"] = from_utciso(doc["last_used"]) if doc["last_used"] else None + doc["last_used"] = utc.from_iso(doc["last_used"]) if doc["last_used"] else None return Account(**doc) def to_rs(self): diff --git a/twscrape/accounts_pool.py b/twscrape/accounts_pool.py index 6733ecd..0f6e189 100644 --- a/twscrape/accounts_pool.py +++ b/twscrape/accounts_pool.py @@ -11,7 +11,7 @@ from .db import execute, fetchall, fetchone from .logger import logger from .login import login -from .utils import parse_cookies, utc_ts +from .utils import parse_cookies, utc class AccountInfo(TypedDict): @@ -197,7 +197,7 @@ async def lock_until(self, username: str, queue: str, unlock_at: int, req_count= UPDATE accounts SET locks = json_set(locks, '$.{queue}', datetime({unlock_at}, 'unixepoch')), stats = json_set(stats, '$.{queue}', COALESCE(json_extract(stats, '$.{queue}'), 0) + {req_count}), - last_used = datetime({utc_ts()}, 'unixepoch') + last_used = datetime({utc.ts()}, 'unixepoch') WHERE username = :username """ await execute(self._db_file, qs, {"username": username}) @@ -207,7 +207,7 @@ async def unlock(self, username: str, queue: str, req_count=0): UPDATE accounts SET locks = json_remove(locks, '$.{queue}'), stats = json_set(stats, '$.{queue}', COALESCE(json_extract(stats, '$.{queue}'), 0) + {req_count}), - last_used = datetime({utc_ts()}, 'unixepoch') + last_used = datetime({utc.ts()}, 'unixepoch') WHERE username = :username """ await execute(self._db_file, qs, {"username": username}) @@ -228,7 +228,7 @@ async def get_for_queue(self, queue: str): qs = f""" UPDATE accounts SET locks = json_set(locks, '$.{queue}', datetime('now', '+15 minutes')), - last_used = datetime({utc_ts()}, 'unixepoch') + last_used = datetime({utc.ts()}, 'unixepoch') WHERE username = ({q1}) RETURNING * """ @@ -238,7 +238,7 @@ async def get_for_queue(self, queue: str): qs = f""" UPDATE accounts SET locks = json_set(locks, '$.{queue}', datetime('now', '+15 minutes')), - last_used = datetime({utc_ts()}, 'unixepoch'), + last_used = datetime({utc.ts()}, 'unixepoch'), _tx = '{tx}' WHERE username = ({q1}) """ @@ -277,8 +277,7 @@ async def next_available_at(self, queue: str): """ rs = await fetchone(self._db_file, qs) if rs: - now = datetime.utcnow().replace(tzinfo=timezone.utc) - trg = datetime.fromisoformat(rs[0]).replace(tzinfo=timezone.utc) + now, trg = utc.now(), utc.from_iso(rs[0]) if trg < now: return "now" diff --git a/twscrape/api.py b/twscrape/api.py index 166b291..1827e22 100644 --- a/twscrape/api.py +++ b/twscrape/api.py @@ -109,7 +109,7 @@ async def user_by_id_raw(self, uid: int, kv=None): "hidden_profile_likes_enabled": True, "highlights_tweets_tab_ui_enabled": True, "creator_subscriptions_tweet_preview_api_enabled": True, - "hidden_profile_subscriptions_enabled": True + "hidden_profile_subscriptions_enabled": True, } return await self._gql_item(op, kv, ft) @@ -128,7 +128,7 @@ async def user_by_login_raw(self, login: str, kv=None): "creator_subscriptions_tweet_preview_api_enabled": True, "subscriptions_verification_info_verified_since_enabled": True, "hidden_profile_subscriptions_enabled": True, - "subscriptions_verification_info_is_identity_verified_enabled": False + "subscriptions_verification_info_is_identity_verified_enabled": False, } return await self._gql_item(op, kv, ft) diff --git a/twscrape/login.py b/twscrape/login.py index b99b9dc..4da3c87 100644 --- a/twscrape/login.py +++ b/twscrape/login.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta, timezone +from datetime import timedelta from httpx import AsyncClient, HTTPStatusError, Response @@ -6,7 +6,7 @@ from .constants import LOGIN_URL from .imap import imap_get_email_code, imap_login from .logger import logger -from .utils import raise_for_status +from .utils import raise_for_status, utc async def get_guest_token(client: AsyncClient): @@ -120,7 +120,7 @@ async def login_confirm_email_code(client: AsyncClient, acc: Account, prev: dict if not imap: imap = await imap_login(acc.email, acc.email_password) - now_time = datetime.now(timezone.utc) - timedelta(seconds=30) + now_time = utc.now() - timedelta(seconds=30) value = await imap_get_email_code(imap, acc.email, now_time) payload = { diff --git a/twscrape/models.py b/twscrape/models.py index dd77091..c3706c3 100644 --- a/twscrape/models.py +++ b/twscrape/models.py @@ -12,7 +12,7 @@ import httpx from .logger import logger -from .utils import find_item, get_or, int_or, to_old_rep +from .utils import find_item, get_or, int_or, to_old_rep, utc @dataclass @@ -407,7 +407,7 @@ def _get_views(obj: dict, rt_obj: dict): def _write_dump(kind: str, e: Exception, x: dict, obj: dict): uniq = "".join(random.choice(string.ascii_lowercase) for _ in range(5)) - time = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S") + time = utc.now().strftime("%Y-%m-%d_%H-%M-%S") dumpfile = f"/tmp/twscrape/twscrape_parse_error_{time}_{uniq}.txt" os.makedirs(os.path.dirname(dumpfile), exist_ok=True) diff --git a/twscrape/queue_client.py b/twscrape/queue_client.py index 454cd50..e40356e 100644 --- a/twscrape/queue_client.py +++ b/twscrape/queue_client.py @@ -1,16 +1,15 @@ import json import os -from datetime import datetime from typing import Any import httpx from .accounts_pool import Account, AccountsPool from .logger import logger -from .utils import utc_ts +from .utils import utc ReqParams = dict[str, str | int] | None -TMP_TS = datetime.utcnow().isoformat().split(".")[0].replace("T", "_").replace(":", "-")[0:16] +TMP_TS = utc.now().isoformat().split(".")[0].replace("T", "_").replace(":", "-")[0:16] class Ctx: @@ -39,6 +38,7 @@ class RateLimitError(Exception): class BannedError(Exception): pass + class DependencyError(Exception): pass @@ -151,7 +151,7 @@ async def _check_rep(self, rep: httpx.Response): # possible new limits for tweets view per account if msg.startswith("(88) Rate limit exceeded") or rep.status_code == 429: - await self._close_ctx(utc_ts() + 60 * 60 * 4) # lock for 4 hours + await self._close_ctx(utc.ts() + 60 * 60 * 4) # lock for 4 hours raise RateLimitError(msg) if msg.startswith("(326) Authorization: Denied by access control"): @@ -163,7 +163,7 @@ async def _check_rep(self, rep: httpx.Response): # possible banned by old api flow if rep.status_code in (401, 403): - await self._close_ctx(utc_ts() + 60 * 60 * 12) # lock for 12 hours + await self._close_ctx(utc.ts() + 60 * 60 * 12) # lock for 12 hours raise RateLimitError(msg) # content not found @@ -196,7 +196,7 @@ async def req(self, method: str, url: str, params: ReqParams = None): except (RateLimitError, BannedError): # already handled continue - except (DependencyError): + except DependencyError: logger.error(f"Dependency error, returnning: {url}") return except (httpx.ReadTimeout, httpx.ProxyError): @@ -206,4 +206,4 @@ async def req(self, method: str, url: str, params: ReqParams = None): retry_count += 1 if retry_count >= 3: logger.warning(f"Unknown error {type(e)}: {e}") - await self._close_ctx(utc_ts() + 60 * 15) # 15 minutes + await self._close_ctx(utc.ts() + 60 * 15) # 15 minutes diff --git a/twscrape/utils.py b/twscrape/utils.py index 5e1fd09..51eaa1a 100644 --- a/twscrape/utils.py +++ b/twscrape/utils.py @@ -11,6 +11,20 @@ T = TypeVar("T") +class utc: + @staticmethod + def now() -> datetime: + return datetime.now(timezone.utc) + + @staticmethod + def from_iso(iso: str) -> datetime: + return datetime.fromisoformat(iso).replace(tzinfo=timezone.utc) + + @staticmethod + def ts() -> int: + return int(utc.now().timestamp()) + + async def gather(gen: AsyncGenerator[T, None]) -> list[T]: items = [] async for x in gen: @@ -147,14 +161,6 @@ def to_old_rep(obj: dict) -> dict[str, dict]: return {"tweets": {**tw1, **tw2}, "users": users} -def utc_ts() -> int: - return int(datetime.utcnow().replace(tzinfo=timezone.utc).timestamp()) - - -def from_utciso(iso: str) -> datetime: - return datetime.fromisoformat(iso).replace(tzinfo=timezone.utc) - - def print_table(rows: list[dict], hr_after=False): if not rows: return