From dbd9fb9fc376fd64444b7bc101cdd31d1c15f7e5 Mon Sep 17 00:00:00 2001 From: Ziloka <50429450+ziloka@users.noreply.github.com> Date: Wed, 17 Jan 2024 16:47:10 -0500 Subject: [PATCH 1/3] prevent asyncio from hanging --- proxybroker/api.py | 2 +- proxybroker/checker.py | 2 +- proxybroker/cli.py | 3 ++- proxybroker/judge.py | 2 +- proxybroker/providers.py | 2 +- proxybroker/resolver.py | 2 +- proxybroker/server.py | 2 +- 7 files changed, 8 insertions(+), 7 deletions(-) diff --git a/proxybroker/api.py b/proxybroker/api.py index 71b61fe0..135b61e0 100644 --- a/proxybroker/api.py +++ b/proxybroker/api.py @@ -64,7 +64,7 @@ def __init__( stop_broker_on_sigint=True, **kwargs, ): - self._loop = loop or asyncio.get_event_loop_policy().get_event_loop() + self._loop = loop self._proxies = queue or asyncio.Queue() self._resolver = Resolver(loop=self._loop) self._timeout = timeout diff --git a/proxybroker/checker.py b/proxybroker/checker.py index bedad3f5..a510db37 100644 --- a/proxybroker/checker.py +++ b/proxybroker/checker.py @@ -43,7 +43,7 @@ def __init__( self._strict = strict self._dnsbl = dnsbl or [] self._types = types or {} - self._loop = loop or asyncio.get_event_loop() + self._loop = loop self._resolver = Resolver(loop=self._loop) self._req_http_proto = not types or bool( diff --git a/proxybroker/cli.py b/proxybroker/cli.py index 22bb3bd8..1ab4e1e1 100644 --- a/proxybroker/cli.py +++ b/proxybroker/cli.py @@ -387,7 +387,7 @@ def cli(args=sys.argv[1:]): ns.types.remove('HTTP') ns.types.append(('HTTP', ns.anon_lvl)) - loop = asyncio.get_event_loop_policy().get_event_loop() + loop = asyncio.new_event_loop() proxies = asyncio.Queue() broker = Broker( proxies, @@ -443,6 +443,7 @@ def cli(args=sys.argv[1:]): try: if tasks: + asyncio.set_event_loop(loop) loop.run_until_complete(asyncio.gather(*tasks)) if ns.show_stats: broker.show_stats(verbose=True) diff --git a/proxybroker/judge.py b/proxybroker/judge.py index d7dfe84d..4f1bbe8a 100644 --- a/proxybroker/judge.py +++ b/proxybroker/judge.py @@ -29,7 +29,7 @@ def __init__(self, url, timeout=8, verify_ssl=False, loop=None): self.marks = {'via': 0, 'proxy': 0} self.timeout = timeout self.verify_ssl = verify_ssl - self._loop = loop or asyncio.get_event_loop() + self._loop = loop self._resolver = Resolver(loop=self._loop) def __repr__(self): diff --git a/proxybroker/providers.py b/proxybroker/providers.py index 17b67ab5..6e9adf85 100644 --- a/proxybroker/providers.py +++ b/proxybroker/providers.py @@ -45,7 +45,7 @@ def __init__( self._proxies = set() # concurrent connections on the current provider self._sem_provider = asyncio.Semaphore(max_conn) - self._loop = loop or asyncio.get_event_loop() + self._loop = loop @property def proxies(self): diff --git a/proxybroker/resolver.py b/proxybroker/resolver.py index 10afb42e..5f945828 100644 --- a/proxybroker/resolver.py +++ b/proxybroker/resolver.py @@ -44,7 +44,7 @@ class Resolver: def __init__(self, timeout=5, loop=None): self._timeout = timeout - self._loop = loop or asyncio.get_event_loop() + self._loop = loop self._resolver = aiodns.DNSResolver(loop=self._loop) @staticmethod diff --git a/proxybroker/server.py b/proxybroker/server.py index b969c5e2..16c2c2c9 100644 --- a/proxybroker/server.py +++ b/proxybroker/server.py @@ -131,7 +131,7 @@ def __init__( ): self.host = host self.port = int(port) - self._loop = loop or asyncio.get_event_loop() + self._loop = loop self._timeout = timeout self._max_tries = max_tries self._backlog = backlog From ad18be4fdf329d0cfa4058b19848b531f1b8d568 Mon Sep 17 00:00:00 2001 From: Ziloka <50429450+ziloka@users.noreply.github.com> Date: Wed, 17 Jan 2024 20:16:31 -0500 Subject: [PATCH 2/3] add asyncio WindowsSelectorEventLoopPolicy workaround --- examples/basic.py | 9 +++++++-- examples/find_and_save.py | 8 ++++++-- examples/find_and_use.py | 6 +++++- examples/only_grab.py | 8 +++++++- examples/proxy_server.py | 6 +++++- examples/proxy_smtp_port.py | 8 ++++++-- examples/use_existing_proxy.py | 6 +++++- proxybroker/cli.py | 2 +- proxybroker/server.py | 3 +-- 9 files changed, 43 insertions(+), 13 deletions(-) diff --git a/examples/basic.py b/examples/basic.py index 12e2accb..ac9fa4d2 100644 --- a/examples/basic.py +++ b/examples/basic.py @@ -1,5 +1,6 @@ """Find and show 10 working HTTP(S) proxies.""" +import sys import asyncio from proxybroker import Broker @@ -12,10 +13,14 @@ async def show(proxies): break print('Found proxy: %s' % proxy) +if sys.platform == 'win32': + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + +loop = asyncio.new_event_loop() +asyncio.set_event_loop(loop) proxies = asyncio.Queue() -broker = Broker(proxies) +broker = Broker(proxies, loop=loop) tasks = asyncio.gather(broker.find(types=['HTTP', 'HTTPS'], limit=10), show(proxies)) -loop = asyncio.get_event_loop() loop.run_until_complete(tasks) diff --git a/examples/find_and_save.py b/examples/find_and_save.py index 96adbf41..21300d24 100644 --- a/examples/find_and_save.py +++ b/examples/find_and_save.py @@ -1,9 +1,12 @@ """Find 10 working HTTP(S) proxies and save them to a file.""" +import sys import asyncio from proxybroker import Broker +if sys.platform == 'win32': + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) async def save(proxies, filename): """Save proxies to a file.""" @@ -18,13 +21,14 @@ async def save(proxies, filename): def main(): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) proxies = asyncio.Queue() - broker = Broker(proxies) + broker = Broker(proxies, loop=loop) tasks = asyncio.gather( broker.find(types=['HTTP', 'HTTPS'], limit=10), save(proxies, filename='proxies.txt'), ) - loop = asyncio.get_event_loop() loop.run_until_complete(tasks) diff --git a/examples/find_and_use.py b/examples/find_and_use.py index 72a3578e..1228fd7d 100644 --- a/examples/find_and_use.py +++ b/examples/find_and_use.py @@ -4,6 +4,7 @@ Perhaps it will be much useful and friendlier. """ +import sys import asyncio from urllib.parse import urlparse @@ -12,6 +13,8 @@ from proxybroker import Broker, ProxyPool from proxybroker.errors import NoProxyError +if sys.platform == 'win32': + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) async def fetch(url, proxy_pool, timeout, loop): resp, proxy = None, None @@ -47,7 +50,8 @@ async def get_pages(urls, proxy_pool, timeout=10, loop=None): def main(): - loop = asyncio.get_event_loop() + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) proxies = asyncio.Queue() proxy_pool = ProxyPool(proxies) diff --git a/examples/only_grab.py b/examples/only_grab.py index 9df2faa8..b5942be4 100644 --- a/examples/only_grab.py +++ b/examples/only_grab.py @@ -1,10 +1,13 @@ """Gather proxies from the providers without checking and save them to a file.""" +import sys import asyncio from proxybroker import Broker +if sys.platform == 'win32': + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) async def save(proxies, filename): """Save proxies to a file.""" @@ -17,8 +20,11 @@ async def save(proxies, filename): def main(): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + proxies = asyncio.Queue() - broker = Broker(proxies) + broker = Broker(proxies, loop=loop) tasks = asyncio.gather( broker.grab(countries=['US', 'GB'], limit=10), save(proxies, filename='proxies.txt'), diff --git a/examples/proxy_server.py b/examples/proxy_server.py index 212d02ab..1c8faddd 100644 --- a/examples/proxy_server.py +++ b/examples/proxy_server.py @@ -1,12 +1,15 @@ """Run a local proxy server that distributes incoming requests to external proxies.""" +import sys import asyncio import aiohttp from proxybroker import Broker +if sys.platform == 'win32': + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) async def fetch(url, proxy_url): resp = None @@ -35,7 +38,8 @@ async def get_pages(urls, proxy_url): def main(): host, port = '127.0.0.1', 8888 # by default - loop = asyncio.get_event_loop() + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) types = [('HTTP', 'High'), 'HTTPS', 'CONNECT:80'] codes = [200, 301, 302] diff --git a/examples/proxy_smtp_port.py b/examples/proxy_smtp_port.py index ace344a0..53ee7373 100644 --- a/examples/proxy_smtp_port.py +++ b/examples/proxy_smtp_port.py @@ -1,10 +1,13 @@ """Find 10 working proxies supporting CONNECT method to 25 port (SMTP) and save them to a file.""" +import sys import asyncio from proxybroker import Broker +if sys.platform == 'win32': + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) async def save(proxies, filename): """Save proxies to a file.""" @@ -17,8 +20,10 @@ async def save(proxies, filename): def main(): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) proxies = asyncio.Queue() - broker = Broker(proxies, judges=['smtp://smtp.gmail.com'], max_tries=1) + broker = Broker(proxies, judges=['smtp://smtp.gmail.com'], max_tries=1, loop=loop) # Check proxy in spam databases (DNSBL). By default is disabled. # more databases: http://www.dnsbl.info/dnsbl-database-check.php @@ -35,7 +40,6 @@ def main(): broker.find(types=['CONNECT:25'], dnsbl=dnsbl, limit=10), save(proxies, filename='proxies.txt'), ) - loop = asyncio.get_event_loop() loop.run_until_complete(tasks) diff --git a/examples/use_existing_proxy.py b/examples/use_existing_proxy.py index f65fb537..27e2d730 100644 --- a/examples/use_existing_proxy.py +++ b/examples/use_existing_proxy.py @@ -1,10 +1,13 @@ """Run a local proxy server that distributes incoming requests to external proxies.""" +import sys import asyncio import aiohttp +if sys.platform == 'win32': + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) async def fetch(url, proxy_url): resp = None @@ -33,7 +36,8 @@ async def get_pages(urls, proxy_url): def main(): host, port = '127.0.0.1', 8888 # by default - loop = asyncio.get_event_loop() + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) # types = [('HTTP', 'High'), 'HTTPS', 'CONNECT:80'] # codes = [200, 301, 302] diff --git a/proxybroker/cli.py b/proxybroker/cli.py index 1ab4e1e1..fa7d6f80 100644 --- a/proxybroker/cli.py +++ b/proxybroker/cli.py @@ -388,6 +388,7 @@ def cli(args=sys.argv[1:]): ns.types.append(('HTTP', ns.anon_lvl)) loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) proxies = asyncio.Queue() broker = Broker( proxies, @@ -443,7 +444,6 @@ def cli(args=sys.argv[1:]): try: if tasks: - asyncio.set_event_loop(loop) loop.run_until_complete(asyncio.gather(*tasks)) if ns.show_stats: broker.show_stats(verbose=True) diff --git a/proxybroker/server.py b/proxybroker/server.py index 16c2c2c9..5c8a19b9 100644 --- a/proxybroker/server.py +++ b/proxybroker/server.py @@ -151,8 +151,7 @@ def start(self): self._accept, host=self.host, port=self.port, - backlog=self._backlog, - loop=self._loop, + backlog=self._backlog ) self._server = self._loop.run_until_complete(srv) From 26fa1de70140da57b029e79f75309197f733a31d Mon Sep 17 00:00:00 2001 From: Ziloka <50429450+ziloka@users.noreply.github.com> Date: Mon, 25 Mar 2024 17:57:54 -0400 Subject: [PATCH 3/3] Update proxybroker/api.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- proxybroker/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proxybroker/api.py b/proxybroker/api.py index 135b61e0..04f3c7ba 100644 --- a/proxybroker/api.py +++ b/proxybroker/api.py @@ -64,7 +64,7 @@ def __init__( stop_broker_on_sigint=True, **kwargs, ): - self._loop = loop + self._loop = loop if loop is not None else asyncio.get_event_loop() self._proxies = queue or asyncio.Queue() self._resolver = Resolver(loop=self._loop) self._timeout = timeout