From 13dec101c26800c341f7008e2baf595cd563db6f Mon Sep 17 00:00:00 2001 From: monosans Date: Fri, 19 Jan 2024 20:23:24 +0300 Subject: [PATCH] a --- README.md | 2 +- .../proxy_scraper_checker.py | 47 ++++++++++--------- proxy_scraper_checker/settings.py | 5 +- proxy_scraper_checker/sort.py | 8 +++- 4 files changed, 35 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index af09c072a..d1c2a8219 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ You can get proxies obtained using this script in [monosans/proxy-list](https:// This is the easiest way, but it is only available for x64 Windows, macOS and Linux. Just download the archive for your OS from , unzip it, edit `config.toml` and run the executable. -If Windows Defender detects an executable file as a trojan, please read [this](https://github.com/Nuitka/Nuitka/issues/2495#issuecomment-1762836583). +If Windows Defender detects an executable file as a virus, please read [this](https://github.com/Nuitka/Nuitka/issues/2495#issuecomment-1762836583). ### Running from source code diff --git a/proxy_scraper_checker/proxy_scraper_checker.py b/proxy_scraper_checker/proxy_scraper_checker.py index 35a2d270e..dd76481be 100644 --- a/proxy_scraper_checker/proxy_scraper_checker.py +++ b/proxy_scraper_checker/proxy_scraper_checker.py @@ -123,7 +123,7 @@ def create_proxy_list_str( ) class ProxyScraperChecker: console: Console - proxies_count: Dict[ProxyType, int] = attrs.field(init=False, factory=dict) + proxies_count: Dict[ProxyType, int] = attrs.field(init=False) proxies: Set[Proxy] = attrs.field(init=False, factory=set) session: ClientSession settings: Settings @@ -243,9 +243,11 @@ async def fetch_all_sources(self, progress: Progress) -> None: async def check_all_proxies(self, progress: Progress) -> None: tasks = { proto: progress.add_task( - f"[yellow]Checker [red]:: [green]{proto.name}", total=count + f"[yellow]Checker [red]:: [green]{proto.name}", + total=self.proxies_count[proto], ) - for proto, count in self.get_current_proxies_count().items() + for proto in sort.PROTOCOL_ORDER + if proto in self.proxies_count } coroutines = [ self.check_proxy( @@ -319,15 +321,15 @@ def save_proxies(self) -> None: include_protocol=True, ) (folder / "all.txt").write_text(text, encoding="utf-8") - for proto, proxies in grouped_proxies: + for (_, proto), proxies in grouped_proxies: text = create_proxy_list_str( proxies=proxies, anonymous_only=anonymous_only, include_protocol=False, ) - ( - folder / f"{ProxyType(proto).name.lower()}.txt" - ).write_text(text, encoding="utf-8") + (folder / f"{proto.name.lower()}.txt").write_text( + text, encoding="utf-8" + ) logger.info( "Proxies have been saved at %s.", self.settings.output_path.absolute(), @@ -340,7 +342,7 @@ async def run(self) -> None: fetch = self.fetch_all_sources(progress) if self.settings.enable_geolocation: await asyncio.gather( - fetch, download_geodb(self.session, progress) + download_geodb(self.session, progress), fetch ) else: await fetch @@ -361,14 +363,12 @@ def _get_results_table(self) -> Table: table.add_column("Protocol", style="cyan") table.add_column("Working", style="magenta") table.add_column("Total", style="green") - for proto, proxies in self.get_grouped_proxies().items(): - working = len(tuple(proxies)) - total = self.proxies_count[ProxyType(proto)] - percentage = working / total if total else 0 + current_count = self.get_current_proxies_count() + for proto, total in self.proxies_count.items(): + working = current_count.get(proto, 0) + percentage = working / total table.add_row( - ProxyType(proto).name, - f"{working} ({percentage:.1%})", - str(total), + proto.name, f"{working} ({percentage:.1%})", str(total) ) return table @@ -382,15 +382,16 @@ def _get_progress_bar(self) -> Progress: def get_grouped_proxies(self) -> Dict[ProxyType, Tuple[Proxy, ...]]: key = sort.protocol_sort_key - return { - **{proto: () for proto in self.settings.sources}, - **{ - ProxyType(k): tuple(v) - for k, v in itertools.groupby( - sorted(self.proxies, key=key), key=key - ) - }, + d: Dict[ProxyType, Tuple[Proxy, ...]] = { + proto: () + for proto in sort.PROTOCOL_ORDER + if proto in self.proxies_count } + for (_, proto), v in itertools.groupby( + sorted(self.proxies, key=key), key=key + ): + d[proto] = tuple(v) + return d def get_sorted_proxies( self, diff --git a/proxy_scraper_checker/settings.py b/proxy_scraper_checker/settings.py index 4aedfcbeb..cbcd270c4 100644 --- a/proxy_scraper_checker/settings.py +++ b/proxy_scraper_checker/settings.py @@ -196,7 +196,10 @@ class Settings: attrs.validators.and_( attrs.validators.min_len(1), attrs.validators.deep_iterable( - attrs.validators.instance_of(str) + attrs.validators.and_( + attrs.validators.instance_of(str), + attrs.validators.min_len(1), + ) ), ), ), diff --git a/proxy_scraper_checker/sort.py b/proxy_scraper_checker/sort.py index af81c6dd9..fc98bc57e 100644 --- a/proxy_scraper_checker/sort.py +++ b/proxy_scraper_checker/sort.py @@ -2,11 +2,15 @@ from typing import Tuple +from aiohttp_socks import ProxyType + from .proxy import Proxy +PROTOCOL_ORDER = (ProxyType.HTTP, ProxyType.SOCKS4, ProxyType.SOCKS5) + -def protocol_sort_key(proxy: Proxy) -> int: - return proxy.protocol.value # type: ignore[no-any-return] +def protocol_sort_key(proxy: Proxy) -> Tuple[int, ProxyType]: + return (PROTOCOL_ORDER.index(proxy.protocol), proxy.protocol) def natural_sort_key(proxy: Proxy) -> Tuple[int, ...]: