From ce4ae955bf8bcbbc8a107fbac6f2a0cb5cedace2 Mon Sep 17 00:00:00 2001 From: anubhav Date: Wed, 29 Jan 2025 22:51:32 +0530 Subject: [PATCH 1/2] Change url from optional to required parameter for SplashRequest and require scrapy >= 2.4 --- scrapy_splash/request.py | 4 +++- scrapy_splash/utils.py | 7 +------ setup.py | 2 +- tests/test_fingerprints.py | 6 +++--- tests/test_middleware.py | 15 +++++++++++++++ tox.ini | 2 +- 6 files changed, 24 insertions(+), 12 deletions(-) diff --git a/scrapy_splash/request.py b/scrapy_splash/request.py index 4471054..b3e5819 100644 --- a/scrapy_splash/request.py +++ b/scrapy_splash/request.py @@ -31,7 +31,7 @@ class SplashRequest(scrapy.Request): It requires SplashMiddleware to work. """ def __init__(self, - url, + url=None, callback=None, method='GET', endpoint='render.html', @@ -48,6 +48,8 @@ def __init__(self, meta=None, **kwargs): + if url is None: + url = 'about:blank' url = to_unicode(url) meta = copy.deepcopy(meta) or {} diff --git a/scrapy_splash/utils.py b/scrapy_splash/utils.py index 3f192f2..03c805c 100644 --- a/scrapy_splash/utils.py +++ b/scrapy_splash/utils.py @@ -5,12 +5,7 @@ import six from scrapy.http import Headers -import scrapy -if scrapy.version_info >= (2, ): - from scrapy.utils.python import to_unicode -else: - from scrapy.utils.python import to_native_str as to_unicode -from scrapy.utils.python import to_bytes +from scrapy.utils.python import to_unicode, to_bytes def dict_hash(obj, start=''): diff --git a/setup.py b/setup.py index 9d1981c..7513d24 100755 --- a/setup.py +++ b/setup.py @@ -30,5 +30,5 @@ 'Topic :: Software Development :: Libraries :: Application Frameworks', 'Topic :: Software Development :: Libraries :: Python Modules', ], - install_requires=['scrapy', 'six'], + install_requires=['scrapy>=2.4', 'six'], ) diff --git a/tests/test_fingerprints.py b/tests/test_fingerprints.py index 22d4b06..09edb19 100644 --- a/tests/test_fingerprints.py +++ b/tests/test_fingerprints.py @@ -142,9 +142,9 @@ def requests(): dict(url=url2, args={'wait': 0.5}), # 5 dict(url=url3), # 6 dict(url=url2, method='POST'), # 7 - dict(url=url3, args={'wait': 0.5}), # 8 - dict(url=url3, args={'wait': 0.5}), # 9 - dict(url=url3, args={'wait': 0.7}), # 10 + dict(args={'wait': 0.5}), # 8 + dict(args={'wait': 0.5}), # 9 + dict(args={'wait': 0.7}), # 10 dict(url=url4), # 11 ] splash_requests = [SplashRequest(**kwargs) for kwargs in request_kwargs] diff --git a/tests/test_middleware.py b/tests/test_middleware.py index 76f8c6b..3f53858 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -630,6 +630,21 @@ def test_cache_args(): assert mw._remote_keys == {} +def test_splash_request_no_url(): + mw = _get_mw() + lua_source = "function main(splash) return {result='ok'} end" + req1 = SplashRequest(meta={'splash': { + 'args': {'lua_source': lua_source}, + 'endpoint': 'execute', + }}) + req = mw.process_request(req1, None) + assert req.url == 'http://127.0.0.1:8050/execute' + assert json.loads(to_unicode(req.body)) == { + 'url': 'about:blank', + 'lua_source': lua_source + } + + def test_post_request(): mw = _get_mw() for body in [b'', b'foo=bar']: diff --git a/tox.ini b/tox.ini index c624590..effb228 100644 --- a/tox.ini +++ b/tox.ini @@ -20,7 +20,7 @@ deps = passenv = SPLASH_URL deps = {[common]deps} - scrapy + scrapy >= 2.4.0 commands = pip install -e . py.test --doctest-modules --doctest-glob '*.py,*.rst' --cov=scrapy_splash {posargs:README.rst scrapy_splash tests} From 2ea59d9fcbe340f569e3b47e6edb935321120a15 Mon Sep 17 00:00:00 2001 From: anubhav Date: Wed, 29 Jan 2025 23:26:14 +0530 Subject: [PATCH 2/2] Removed version restricion for scrapy in tox.ini --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index effb228..c624590 100644 --- a/tox.ini +++ b/tox.ini @@ -20,7 +20,7 @@ deps = passenv = SPLASH_URL deps = {[common]deps} - scrapy >= 2.4.0 + scrapy commands = pip install -e . py.test --doctest-modules --doctest-glob '*.py,*.rst' --cov=scrapy_splash {posargs:README.rst scrapy_splash tests}