Skip to content

Commit

Permalink
add python 3.12 support
Browse files Browse the repository at this point in the history
  • Loading branch information
vladkens committed Nov 1, 2023
1 parent fd64ce2 commit 6a232da
Show file tree
Hide file tree
Showing 17 changed files with 100 additions and 74 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11"]
python-version: ["3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
Expand Down
1 change: 1 addition & 0 deletions .tool-versions
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python 3.12.0
4 changes: 2 additions & 2 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
},
"[python]": {
"editor.formatOnSave": true,
"editor.codeActionsOnSave": ["source.organizeImports"]
"editor.codeActionsOnSave": ["source.organizeImports"],
"editor.defaultFormatter": "ms-python.black-formatter"
},
"python.formatting.provider": "black"
}
11 changes: 11 additions & 0 deletions Dockerfile.python
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
ARG VER=3.12

FROM python:${VER}-alpine
RUN apk add git

WORKDIR /app
COPY pyproject.toml readme.md /app/
RUN pip install -e .[dev]
COPY . /app

CMD python --version; pytest tests/
File renamed without changes.
40 changes: 22 additions & 18 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
.PHONY: all build

SQTEST = docker -l warning build -f sqlite.dockerfile

all:
@echo "hi"

Expand Down Expand Up @@ -38,27 +36,33 @@ show-cov:
@coverage html
@open htmlcov/index.html

act:
@act --container-architecture linux/amd64

changelog:
@git pull origin --tags > /dev/null
@git log $(shell git describe --tags --abbrev=0 HEAD)^..HEAD --pretty=format:'- %s'

test34:
test-py:
$(eval name=twscrape_py$(v))
@docker -l warning build -f Dockerfile.python --build-arg VER=$(v) -t $(name) .
@docker run $(name)

test-sq:
$(eval name=twscrape_sq$(v))
@docker -l warning build -f Dockerfile.sqlite --build-arg SQLY=$(y) --build-arg SQLV=$(v) -t $(name) .
@docker run $(name)

test-py-matrix:
@make test-py v=3.10
@make test-py v=3.11
@make test-py v=3.12

test-sq-matrix:
@# https://www.sqlite.org/chronology.html
@$(SQTEST) --build-arg SQLY=2018 --build-arg SQLV=3240000 -t twscrape_sq24 .
@$(SQTEST) --build-arg SQLY=2019 --build-arg SQLV=3270200 -t twscrape_sq27 .
@$(SQTEST) --build-arg SQLY=2019 --build-arg SQLV=3300100 -t twscrape_sq30 .
@$(SQTEST) --build-arg SQLY=2020 --build-arg SQLV=3330000 -t twscrape_sq33 .
@$(SQTEST) --build-arg SQLY=2021 --build-arg SQLV=3340100 -t twscrape_sq34 .
@$(SQTEST) --build-arg SQLY=2023 --build-arg SQLV=3430000 -t twscrape_sq43 .
@docker run twscrape_sq24
@docker run twscrape_sq27
@docker run twscrape_sq30
@docker run twscrape_sq33
@docker run twscrape_sq34
@docker run twscrape_sq43
@make test-sq y=2018 v=3240000
@make test-sq y=2019 v=3270200
@make test-sq y=2019 v=3300100
@make test-sq y=2020 v=3330000
@make test-sq y=2021 v=3340100
@make test-sq y=2023 v=3430000

update-mocks:
twscrape user_by_id --raw 2244994945 | jq > ./tests/mocked-data/user_by_id_raw.json
Expand Down
38 changes: 21 additions & 17 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,23 @@ classifiers = [
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
]
dependencies = [
"aiosqlite==0.17.0",
"fake-useragent==1.2.1",
"httpx==0.24.0",
"loguru==0.7.0",
"aiosqlite>=0.17.0",
"fake-useragent>=1.3.0",
"httpx>=0.24.0",
"loguru>=0.7.0",
]

[project.optional-dependencies]
dev = [
"pylint==2.17.3",
"pytest-asyncio==0.21.0",
"pytest-cov==4.0.0",
"pytest-httpx==0.22.0",
"pytest==7.3.1",
"ruff==0.0.263",
"pylint>=2.17.3",
"pytest-asyncio>=0.21.0",
"pytest-cov>=4.0.0",
"pytest-httpx>=0.22.0",
"pytest>=7.4.0",
"ruff"
]

[project.urls]
Expand All @@ -46,13 +47,13 @@ packages = ['twscrape']
[tool.pylint]
max-line-length = 99
disable = [
"C0103", # invalid-name
"C0114", # missing-module-docstring
"C0115", # missing-class-docstring
"C0116", # missing-function-docstring
"R0903", # too-few-public-methods
"R0913", # too-many-arguments
"W0105", # pointless-string-statement
"C0103", # invalid-name
"C0114", # missing-module-docstring
"C0115", # missing-class-docstring
"C0116", # missing-function-docstring
"R0903", # too-few-public-methods
"R0913", # too-many-arguments
"W0105", # pointless-string-statement
]

[tool.pytest.ini_options]
Expand All @@ -67,3 +68,6 @@ line-length = 99

[tool.ruff]
line-length = 99

[tool.hatch.metadata]
allow-direct-references = true
4 changes: 2 additions & 2 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
<a href="https://github.com/vladkens/twscrape/actions">
<img src="https://github.com/vladkens/twscrape/workflows/test/badge.svg" alt="test status" />
</a>
<!-- <a href="https://pypi.org/project/twscrape">
<a href="https://pypi.org/project/twscrape">
<img src="https://badgen.net/pypi/dm/twscrape" alt="downloads" />
</a> -->
</a>
<a href="https://github.com/vladkens/twscrape/blob/main/LICENSE">
<img src="https://badgen.net/github/license/vladkens/twscrape" alt="license" />
</a>
Expand Down
1 change: 1 addition & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ async def test_user_tweets_and_replies():
for doc in tweets:
check_tweet(doc)


async def test_list_timeline():
api = API()
mock_gen(api, "list_timeline_raw")
Expand Down
4 changes: 2 additions & 2 deletions tests/test_pool.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from twscrape.accounts_pool import AccountsPool
from twscrape.utils import utc_ts
from twscrape.utils import utc


async def test_add_accounts(pool_mock: AccountsPool):
Expand Down Expand Up @@ -102,7 +102,7 @@ async def test_account_unlock(pool_mock: AccountsPool):
assert acc.locks[Q] is not None

# should update lock time
end_time = utc_ts() + 60 # + 1 minute
end_time = utc.ts() + 60 # + 1 minute
await pool_mock.lock_until(acc.username, Q, end_time)

acc = await pool_mock.get(acc.username)
Expand Down
6 changes: 3 additions & 3 deletions twscrape/account.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from .constants import TOKEN
from .models import JSONTrait
from .utils import from_utciso
from .utils import utc


@dataclass
Expand All @@ -30,12 +30,12 @@ class Account(JSONTrait):
@staticmethod
def from_rs(rs: sqlite3.Row):
doc = dict(rs)
doc["locks"] = {k: from_utciso(v) for k, v in json.loads(doc["locks"]).items()}
doc["locks"] = {k: utc.from_iso(v) for k, v in json.loads(doc["locks"]).items()}
doc["stats"] = {k: v for k, v in json.loads(doc["stats"]).items() if isinstance(v, int)}
doc["headers"] = json.loads(doc["headers"])
doc["cookies"] = json.loads(doc["cookies"])
doc["active"] = bool(doc["active"])
doc["last_used"] = from_utciso(doc["last_used"]) if doc["last_used"] else None
doc["last_used"] = utc.from_iso(doc["last_used"]) if doc["last_used"] else None
return Account(**doc)

def to_rs(self):
Expand Down
13 changes: 6 additions & 7 deletions twscrape/accounts_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .db import execute, fetchall, fetchone
from .logger import logger
from .login import login
from .utils import parse_cookies, utc_ts
from .utils import parse_cookies, utc


class AccountInfo(TypedDict):
Expand Down Expand Up @@ -197,7 +197,7 @@ async def lock_until(self, username: str, queue: str, unlock_at: int, req_count=
UPDATE accounts SET
locks = json_set(locks, '$.{queue}', datetime({unlock_at}, 'unixepoch')),
stats = json_set(stats, '$.{queue}', COALESCE(json_extract(stats, '$.{queue}'), 0) + {req_count}),
last_used = datetime({utc_ts()}, 'unixepoch')
last_used = datetime({utc.ts()}, 'unixepoch')
WHERE username = :username
"""
await execute(self._db_file, qs, {"username": username})
Expand All @@ -207,7 +207,7 @@ async def unlock(self, username: str, queue: str, req_count=0):
UPDATE accounts SET
locks = json_remove(locks, '$.{queue}'),
stats = json_set(stats, '$.{queue}', COALESCE(json_extract(stats, '$.{queue}'), 0) + {req_count}),
last_used = datetime({utc_ts()}, 'unixepoch')
last_used = datetime({utc.ts()}, 'unixepoch')
WHERE username = :username
"""
await execute(self._db_file, qs, {"username": username})
Expand All @@ -228,7 +228,7 @@ async def get_for_queue(self, queue: str):
qs = f"""
UPDATE accounts SET
locks = json_set(locks, '$.{queue}', datetime('now', '+15 minutes')),
last_used = datetime({utc_ts()}, 'unixepoch')
last_used = datetime({utc.ts()}, 'unixepoch')
WHERE username = ({q1})
RETURNING *
"""
Expand All @@ -238,7 +238,7 @@ async def get_for_queue(self, queue: str):
qs = f"""
UPDATE accounts SET
locks = json_set(locks, '$.{queue}', datetime('now', '+15 minutes')),
last_used = datetime({utc_ts()}, 'unixepoch'),
last_used = datetime({utc.ts()}, 'unixepoch'),
_tx = '{tx}'
WHERE username = ({q1})
"""
Expand Down Expand Up @@ -277,8 +277,7 @@ async def next_available_at(self, queue: str):
"""
rs = await fetchone(self._db_file, qs)
if rs:
now = datetime.utcnow().replace(tzinfo=timezone.utc)
trg = datetime.fromisoformat(rs[0]).replace(tzinfo=timezone.utc)
now, trg = utc.now(), utc.from_iso(rs[0])
if trg < now:
return "now"

Expand Down
4 changes: 2 additions & 2 deletions twscrape/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ async def user_by_id_raw(self, uid: int, kv=None):
"hidden_profile_likes_enabled": True,
"highlights_tweets_tab_ui_enabled": True,
"creator_subscriptions_tweet_preview_api_enabled": True,
"hidden_profile_subscriptions_enabled": True
"hidden_profile_subscriptions_enabled": True,
}
return await self._gql_item(op, kv, ft)

Expand All @@ -128,7 +128,7 @@ async def user_by_login_raw(self, login: str, kv=None):
"creator_subscriptions_tweet_preview_api_enabled": True,
"subscriptions_verification_info_verified_since_enabled": True,
"hidden_profile_subscriptions_enabled": True,
"subscriptions_verification_info_is_identity_verified_enabled": False
"subscriptions_verification_info_is_identity_verified_enabled": False,
}
return await self._gql_item(op, kv, ft)

Expand Down
6 changes: 3 additions & 3 deletions twscrape/login.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from datetime import datetime, timedelta, timezone
from datetime import timedelta

from httpx import AsyncClient, HTTPStatusError, Response

from .account import Account
from .constants import LOGIN_URL
from .imap import imap_get_email_code, imap_login
from .logger import logger
from .utils import raise_for_status
from .utils import raise_for_status, utc


async def get_guest_token(client: AsyncClient):
Expand Down Expand Up @@ -120,7 +120,7 @@ async def login_confirm_email_code(client: AsyncClient, acc: Account, prev: dict
if not imap:
imap = await imap_login(acc.email, acc.email_password)

now_time = datetime.now(timezone.utc) - timedelta(seconds=30)
now_time = utc.now() - timedelta(seconds=30)
value = await imap_get_email_code(imap, acc.email, now_time)

payload = {
Expand Down
4 changes: 2 additions & 2 deletions twscrape/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import httpx

from .logger import logger
from .utils import find_item, get_or, int_or, to_old_rep
from .utils import find_item, get_or, int_or, to_old_rep, utc


@dataclass
Expand Down Expand Up @@ -407,7 +407,7 @@ def _get_views(obj: dict, rt_obj: dict):

def _write_dump(kind: str, e: Exception, x: dict, obj: dict):
uniq = "".join(random.choice(string.ascii_lowercase) for _ in range(5))
time = datetime.utcnow().strftime("%Y-%m-%d_%H-%M-%S")
time = utc.now().strftime("%Y-%m-%d_%H-%M-%S")
dumpfile = f"/tmp/twscrape/twscrape_parse_error_{time}_{uniq}.txt"
os.makedirs(os.path.dirname(dumpfile), exist_ok=True)

Expand Down
14 changes: 7 additions & 7 deletions twscrape/queue_client.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import json
import os
from datetime import datetime
from typing import Any

import httpx

from .accounts_pool import Account, AccountsPool
from .logger import logger
from .utils import utc_ts
from .utils import utc

ReqParams = dict[str, str | int] | None
TMP_TS = datetime.utcnow().isoformat().split(".")[0].replace("T", "_").replace(":", "-")[0:16]
TMP_TS = utc.now().isoformat().split(".")[0].replace("T", "_").replace(":", "-")[0:16]


class Ctx:
Expand Down Expand Up @@ -39,6 +38,7 @@ class RateLimitError(Exception):
class BannedError(Exception):
pass


class DependencyError(Exception):
pass

Expand Down Expand Up @@ -151,7 +151,7 @@ async def _check_rep(self, rep: httpx.Response):

# possible new limits for tweets view per account
if msg.startswith("(88) Rate limit exceeded") or rep.status_code == 429:
await self._close_ctx(utc_ts() + 60 * 60 * 4) # lock for 4 hours
await self._close_ctx(utc.ts() + 60 * 60 * 4) # lock for 4 hours
raise RateLimitError(msg)

if msg.startswith("(326) Authorization: Denied by access control"):
Expand All @@ -163,7 +163,7 @@ async def _check_rep(self, rep: httpx.Response):

# possible banned by old api flow
if rep.status_code in (401, 403):
await self._close_ctx(utc_ts() + 60 * 60 * 12) # lock for 12 hours
await self._close_ctx(utc.ts() + 60 * 60 * 12) # lock for 12 hours
raise RateLimitError(msg)

# content not found
Expand Down Expand Up @@ -196,7 +196,7 @@ async def req(self, method: str, url: str, params: ReqParams = None):
except (RateLimitError, BannedError):
# already handled
continue
except (DependencyError):
except DependencyError:
logger.error(f"Dependency error, returnning: {url}")
return
except (httpx.ReadTimeout, httpx.ProxyError):
Expand All @@ -206,4 +206,4 @@ async def req(self, method: str, url: str, params: ReqParams = None):
retry_count += 1
if retry_count >= 3:
logger.warning(f"Unknown error {type(e)}: {e}")
await self._close_ctx(utc_ts() + 60 * 15) # 15 minutes
await self._close_ctx(utc.ts() + 60 * 15) # 15 minutes
Loading

0 comments on commit 6a232da

Please sign in to comment.