diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2f5287c..7c070ab 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: ["3.10", "3.11"]
+ python-version: ["3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
diff --git a/.tool-versions b/.tool-versions
new file mode 100644
index 0000000..1569bf5
--- /dev/null
+++ b/.tool-versions
@@ -0,0 +1 @@
+python 3.12.0
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 1d23fc8..0948849 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -8,7 +8,7 @@
},
"[python]": {
"editor.formatOnSave": true,
- "editor.codeActionsOnSave": ["source.organizeImports"]
+ "editor.codeActionsOnSave": ["source.organizeImports"],
+ "editor.defaultFormatter": "ms-python.black-formatter"
},
- "python.formatting.provider": "black"
}
diff --git a/Dockerfile.python b/Dockerfile.python
new file mode 100644
index 0000000..8f86239
--- /dev/null
+++ b/Dockerfile.python
@@ -0,0 +1,11 @@
+ARG VER=3.12
+
+FROM python:${VER}-alpine
+RUN apk add git
+
+WORKDIR /app
+COPY pyproject.toml readme.md /app/
+RUN pip install -e .[dev]
+COPY . /app
+
+CMD python --version; pytest tests/
diff --git a/sqlite.dockerfile b/Dockerfile.sqlite
similarity index 100%
rename from sqlite.dockerfile
rename to Dockerfile.sqlite
diff --git a/Makefile b/Makefile
index baa10d1..d7d0427 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,5 @@
.PHONY: all build
-SQTEST = docker -l warning build -f sqlite.dockerfile
-
all:
@echo "hi"
@@ -38,27 +36,33 @@ show-cov:
@coverage html
@open htmlcov/index.html
-act:
- @act --container-architecture linux/amd64
-
changelog:
@git pull origin --tags > /dev/null
@git log $(shell git describe --tags --abbrev=0 HEAD)^..HEAD --pretty=format:'- %s'
-test34:
+test-py:
+ $(eval name=twscrape_py$(v))
+ @docker -l warning build -f Dockerfile.python --build-arg VER=$(v) -t $(name) .
+ @docker run $(name)
+
+test-sq:
+ $(eval name=twscrape_sq$(v))
+ @docker -l warning build -f Dockerfile.sqlite --build-arg SQLY=$(y) --build-arg SQLV=$(v) -t $(name) .
+ @docker run $(name)
+
+test-py-matrix:
+ @make test-py v=3.10
+ @make test-py v=3.11
+ @make test-py v=3.12
+
+test-sq-matrix:
@# https://www.sqlite.org/chronology.html
- @$(SQTEST) --build-arg SQLY=2018 --build-arg SQLV=3240000 -t twscrape_sq24 .
- @$(SQTEST) --build-arg SQLY=2019 --build-arg SQLV=3270200 -t twscrape_sq27 .
- @$(SQTEST) --build-arg SQLY=2019 --build-arg SQLV=3300100 -t twscrape_sq30 .
- @$(SQTEST) --build-arg SQLY=2020 --build-arg SQLV=3330000 -t twscrape_sq33 .
- @$(SQTEST) --build-arg SQLY=2021 --build-arg SQLV=3340100 -t twscrape_sq34 .
- @$(SQTEST) --build-arg SQLY=2023 --build-arg SQLV=3430000 -t twscrape_sq43 .
- @docker run twscrape_sq24
- @docker run twscrape_sq27
- @docker run twscrape_sq30
- @docker run twscrape_sq33
- @docker run twscrape_sq34
- @docker run twscrape_sq43
+ @make test-sq y=2018 v=3240000
+ @make test-sq y=2019 v=3270200
+ @make test-sq y=2019 v=3300100
+ @make test-sq y=2020 v=3330000
+ @make test-sq y=2021 v=3340100
+ @make test-sq y=2023 v=3430000
update-mocks:
twscrape user_by_id --raw 2244994945 | jq > ./tests/mocked-data/user_by_id_raw.json
diff --git a/pyproject.toml b/pyproject.toml
index ebb5d1d..2a62189 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,22 +16,23 @@ classifiers = [
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
+ 'Programming Language :: Python :: 3.12',
]
dependencies = [
- "aiosqlite==0.17.0",
- "fake-useragent==1.2.1",
- "httpx==0.24.0",
- "loguru==0.7.0",
+ "aiosqlite>=0.17.0",
+ "fake-useragent>=1.3.0",
+ "httpx>=0.24.0",
+ "loguru>=0.7.0",
]
[project.optional-dependencies]
dev = [
- "pylint==2.17.3",
- "pytest-asyncio==0.21.0",
- "pytest-cov==4.0.0",
- "pytest-httpx==0.22.0",
- "pytest==7.3.1",
- "ruff==0.0.263",
+ "pylint>=2.17.3",
+ "pytest-asyncio>=0.21.0",
+ "pytest-cov>=4.0.0",
+ "pytest-httpx>=0.22.0",
+ "pytest>=7.4.0",
+ "ruff"
]
[project.urls]
@@ -46,13 +47,13 @@ packages = ['twscrape']
[tool.pylint]
max-line-length = 99
disable = [
- "C0103", # invalid-name
- "C0114", # missing-module-docstring
- "C0115", # missing-class-docstring
- "C0116", # missing-function-docstring
- "R0903", # too-few-public-methods
- "R0913", # too-many-arguments
- "W0105", # pointless-string-statement
+ "C0103", # invalid-name
+ "C0114", # missing-module-docstring
+ "C0115", # missing-class-docstring
+ "C0116", # missing-function-docstring
+ "R0903", # too-few-public-methods
+ "R0913", # too-many-arguments
+ "W0105", # pointless-string-statement
]
[tool.pytest.ini_options]
@@ -67,3 +68,6 @@ line-length = 99
[tool.ruff]
line-length = 99
+
+[tool.hatch.metadata]
+allow-direct-references = true
\ No newline at end of file
diff --git a/readme.md b/readme.md
index 86d3ee3..52794b3 100644
--- a/readme.md
+++ b/readme.md
@@ -10,9 +10,9 @@
-
+
diff --git a/tests/test_parser.py b/tests/test_parser.py
index fddbec1..4ff9f6a 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -252,6 +252,7 @@ async def test_user_tweets_and_replies():
for doc in tweets:
check_tweet(doc)
+
async def test_list_timeline():
api = API()
mock_gen(api, "list_timeline_raw")
diff --git a/tests/test_pool.py b/tests/test_pool.py
index 19e5095..3f1f0a9 100644
--- a/tests/test_pool.py
+++ b/tests/test_pool.py
@@ -1,5 +1,5 @@
from twscrape.accounts_pool import AccountsPool
-from twscrape.utils import utc_ts
+from twscrape.utils import utc
async def test_add_accounts(pool_mock: AccountsPool):
@@ -102,7 +102,7 @@ async def test_account_unlock(pool_mock: AccountsPool):
assert acc.locks[Q] is not None
# should update lock time
- end_time = utc_ts() + 60 # + 1 minute
+ end_time = utc.ts() + 60 # + 1 minute
await pool_mock.lock_until(acc.username, Q, end_time)
acc = await pool_mock.get(acc.username)
diff --git a/twscrape/account.py b/twscrape/account.py
index aab9b77..d567d7c 100644
--- a/twscrape/account.py
+++ b/twscrape/account.py
@@ -7,7 +7,7 @@
from .constants import TOKEN
from .models import JSONTrait
-from .utils import from_utciso
+from .utils import utc
@dataclass
@@ -30,12 +30,12 @@ class Account(JSONTrait):
@staticmethod
def from_rs(rs: sqlite3.Row):
doc = dict(rs)
- doc["locks"] = {k: from_utciso(v) for k, v in json.loads(doc["locks"]).items()}
+ doc["locks"] = {k: utc.from_iso(v) for k, v in json.loads(doc["locks"]).items()}
doc["stats"] = {k: v for k, v in json.loads(doc["stats"]).items() if isinstance(v, int)}
doc["headers"] = json.loads(doc["headers"])
doc["cookies"] = json.loads(doc["cookies"])
doc["active"] = bool(doc["active"])
- doc["last_used"] = from_utciso(doc["last_used"]) if doc["last_used"] else None
+ doc["last_used"] = utc.from_iso(doc["last_used"]) if doc["last_used"] else None
return Account(**doc)
def to_rs(self):
diff --git a/twscrape/accounts_pool.py b/twscrape/accounts_pool.py
index 6733ecd..0f6e189 100644
--- a/twscrape/accounts_pool.py
+++ b/twscrape/accounts_pool.py
@@ -11,7 +11,7 @@
from .db import execute, fetchall, fetchone
from .logger import logger
from .login import login
-from .utils import parse_cookies, utc_ts
+from .utils import parse_cookies, utc
class AccountInfo(TypedDict):
@@ -197,7 +197,7 @@ async def lock_until(self, username: str, queue: str, unlock_at: int, req_count=
UPDATE accounts SET
locks = json_set(locks, '$.{queue}', datetime({unlock_at}, 'unixepoch')),
stats = json_set(stats, '$.{queue}', COALESCE(json_extract(stats, '$.{queue}'), 0) + {req_count}),
- last_used = datetime({utc_ts()}, 'unixepoch')
+ last_used = datetime({utc.ts()}, 'unixepoch')
WHERE username = :username
"""
await execute(self._db_file, qs, {"username": username})
@@ -207,7 +207,7 @@ async def unlock(self, username: str, queue: str, req_count=0):
UPDATE accounts SET
locks = json_remove(locks, '$.{queue}'),
stats = json_set(stats, '$.{queue}', COALESCE(json_extract(stats, '$.{queue}'), 0) + {req_count}),
- last_used = datetime({utc_ts()}, 'unixepoch')
+ last_used = datetime({utc.ts()}, 'unixepoch')
WHERE username = :username
"""
await execute(self._db_file, qs, {"username": username})
@@ -228,7 +228,7 @@ async def get_for_queue(self, queue: str):
qs = f"""
UPDATE accounts SET
locks = json_set(locks, '$.{queue}', datetime('now', '+15 minutes')),
- last_used = datetime({utc_ts()}, 'unixepoch')
+ last_used = datetime({utc.ts()}, 'unixepoch')
WHERE username = ({q1})
RETURNING *
"""
@@ -238,7 +238,7 @@ async def get_for_queue(self, queue: str):
qs = f"""
UPDATE accounts SET
locks = json_set(locks, '$.{queue}', datetime('now', '+15 minutes')),
- last_used = datetime({utc_ts()}, 'unixepoch'),
+ last_used = datetime({utc.ts()}, 'unixepoch'),
_tx = '{tx}'
WHERE username = ({q1})
"""
@@ -277,8 +277,7 @@ async def next_available_at(self, queue: str):
"""
rs = await fetchone(self._db_file, qs)
if rs:
- now = datetime.utcnow().replace(tzinfo=timezone.utc)
- trg = datetime.fromisoformat(rs[0]).replace(tzinfo=timezone.utc)
+ now, trg = utc.now(), utc.from_iso(rs[0])
if trg < now:
return "now"
diff --git a/twscrape/api.py b/twscrape/api.py
index 166b291..1827e22 100644
--- a/twscrape/api.py
+++ b/twscrape/api.py
@@ -109,7 +109,7 @@ async def user_by_id_raw(self, uid: int, kv=None):
"hidden_profile_likes_enabled": True,
"highlights_tweets_tab_ui_enabled": True,
"creator_subscriptions_tweet_preview_api_enabled": True,
- "hidden_profile_subscriptions_enabled": True
+ "hidden_profile_subscriptions_enabled": True,
}
return await self._gql_item(op, kv, ft)
@@ -128,7 +128,7 @@ async def user_by_login_raw(self, login: str, kv=None):
"creator_subscriptions_tweet_preview_api_enabled": True,
"subscriptions_verification_info_verified_since_enabled": True,
"hidden_profile_subscriptions_enabled": True,
- "subscriptions_verification_info_is_identity_verified_enabled": False
+ "subscriptions_verification_info_is_identity_verified_enabled": False,
}
return await self._gql_item(op, kv, ft)
diff --git a/twscrape/login.py b/twscrape/login.py
index b99b9dc..4da3c87 100644
--- a/twscrape/login.py
+++ b/twscrape/login.py
@@ -1,4 +1,4 @@
-from datetime import datetime, timedelta, timezone
+from datetime import timedelta
from httpx import AsyncClient, HTTPStatusError, Response
@@ -6,7 +6,7 @@
from .constants import LOGIN_URL
from .imap import imap_get_email_code, imap_login
from .logger import logger
-from .utils import raise_for_status
+from .utils import raise_for_status, utc
async def get_guest_token(client: AsyncClient):
@@ -120,7 +120,7 @@ async def login_confirm_email_code(client: AsyncClient, acc: Account, prev: dict
if not imap:
imap = await imap_login(acc.email, acc.email_password)
- now_time = datetime.now(timezone.utc) - timedelta(seconds=30)
+ now_time = utc.now() - timedelta(seconds=30)
value = await imap_get_email_code(imap, acc.email, now_time)
payload = {
diff --git a/twscrape/models.py b/twscrape/models.py
index dd77091..c3706c3 100644
--- a/twscrape/models.py
+++ b/twscrape/models.py
@@ -12,7 +12,7 @@
import httpx
from .logger import logger
-from .utils import find_item, get_or, int_or, to_old_rep
+from .utils import find_item, get_or, int_or, to_old_rep, utc
@dataclass
@@ -407,7 +407,7 @@ def _get_views(obj: dict, rt_obj: dict):
def _write_dump(kind: str, e: Exception, x: dict, obj: dict):
uniq = "".join(random.choice(string.ascii_lowercase) for _ in range(5))
- time = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S")
+ time = utc.now().strftime("%Y-%m-%d_%H-%M-%S")
dumpfile = f"/tmp/twscrape/twscrape_parse_error_{time}_{uniq}.txt"
os.makedirs(os.path.dirname(dumpfile), exist_ok=True)
diff --git a/twscrape/queue_client.py b/twscrape/queue_client.py
index 454cd50..e40356e 100644
--- a/twscrape/queue_client.py
+++ b/twscrape/queue_client.py
@@ -1,16 +1,15 @@
import json
import os
-from datetime import datetime
from typing import Any
import httpx
from .accounts_pool import Account, AccountsPool
from .logger import logger
-from .utils import utc_ts
+from .utils import utc
ReqParams = dict[str, str | int] | None
-TMP_TS = datetime.utcnow().isoformat().split(".")[0].replace("T", "_").replace(":", "-")[0:16]
+TMP_TS = utc.now().isoformat().split(".")[0].replace("T", "_").replace(":", "-")[0:16]
class Ctx:
@@ -39,6 +38,7 @@ class RateLimitError(Exception):
class BannedError(Exception):
pass
+
class DependencyError(Exception):
pass
@@ -151,7 +151,7 @@ async def _check_rep(self, rep: httpx.Response):
# possible new limits for tweets view per account
if msg.startswith("(88) Rate limit exceeded") or rep.status_code == 429:
- await self._close_ctx(utc_ts() + 60 * 60 * 4) # lock for 4 hours
+ await self._close_ctx(utc.ts() + 60 * 60 * 4) # lock for 4 hours
raise RateLimitError(msg)
if msg.startswith("(326) Authorization: Denied by access control"):
@@ -163,7 +163,7 @@ async def _check_rep(self, rep: httpx.Response):
# possible banned by old api flow
if rep.status_code in (401, 403):
- await self._close_ctx(utc_ts() + 60 * 60 * 12) # lock for 12 hours
+ await self._close_ctx(utc.ts() + 60 * 60 * 12) # lock for 12 hours
raise RateLimitError(msg)
# content not found
@@ -196,7 +196,7 @@ async def req(self, method: str, url: str, params: ReqParams = None):
except (RateLimitError, BannedError):
# already handled
continue
- except (DependencyError):
+ except DependencyError:
logger.error(f"Dependency error, returnning: {url}")
return
except (httpx.ReadTimeout, httpx.ProxyError):
@@ -206,4 +206,4 @@ async def req(self, method: str, url: str, params: ReqParams = None):
retry_count += 1
if retry_count >= 3:
logger.warning(f"Unknown error {type(e)}: {e}")
- await self._close_ctx(utc_ts() + 60 * 15) # 15 minutes
+ await self._close_ctx(utc.ts() + 60 * 15) # 15 minutes
diff --git a/twscrape/utils.py b/twscrape/utils.py
index 5e1fd09..51eaa1a 100644
--- a/twscrape/utils.py
+++ b/twscrape/utils.py
@@ -11,6 +11,20 @@
T = TypeVar("T")
+class utc:
+ @staticmethod
+ def now() -> datetime:
+ return datetime.now(timezone.utc)
+
+ @staticmethod
+ def from_iso(iso: str) -> datetime:
+ return datetime.fromisoformat(iso).replace(tzinfo=timezone.utc)
+
+ @staticmethod
+ def ts() -> int:
+ return int(utc.now().timestamp())
+
+
async def gather(gen: AsyncGenerator[T, None]) -> list[T]:
items = []
async for x in gen:
@@ -147,14 +161,6 @@ def to_old_rep(obj: dict) -> dict[str, dict]:
return {"tweets": {**tw1, **tw2}, "users": users}
-def utc_ts() -> int:
- return int(datetime.utcnow().replace(tzinfo=timezone.utc).timestamp())
-
-
-def from_utciso(iso: str) -> datetime:
- return datetime.fromisoformat(iso).replace(tzinfo=timezone.utc)
-
-
def print_table(rows: list[dict], hr_after=False):
if not rows:
return