Skip to content

Commit

Permalink
a
Browse files Browse the repository at this point in the history
  • Loading branch information
monosans committed Jan 19, 2024
1 parent 1f9236a commit 13dec10
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 27 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ You can get proxies obtained using this script in [monosans/proxy-list](https://

This is the easiest way, but it is only available for x64 Windows, macOS and Linux. Just download the archive for your OS from <https://nightly.link/monosans/proxy-scraper-checker/workflows/ci/main?preview>, unzip it, edit `config.toml` and run the executable.

If Windows Defender detects an executable file as a trojan, please read [this](https://github.com/Nuitka/Nuitka/issues/2495#issuecomment-1762836583).
If Windows Defender detects an executable file as a virus, please read [this](https://github.com/Nuitka/Nuitka/issues/2495#issuecomment-1762836583).

### Running from source code

Expand Down
47 changes: 24 additions & 23 deletions proxy_scraper_checker/proxy_scraper_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def create_proxy_list_str(
)
class ProxyScraperChecker:
console: Console
proxies_count: Dict[ProxyType, int] = attrs.field(init=False, factory=dict)
proxies_count: Dict[ProxyType, int] = attrs.field(init=False)
proxies: Set[Proxy] = attrs.field(init=False, factory=set)
session: ClientSession
settings: Settings
Expand Down Expand Up @@ -243,9 +243,11 @@ async def fetch_all_sources(self, progress: Progress) -> None:
async def check_all_proxies(self, progress: Progress) -> None:
tasks = {
proto: progress.add_task(
f"[yellow]Checker [red]:: [green]{proto.name}", total=count
f"[yellow]Checker [red]:: [green]{proto.name}",
total=self.proxies_count[proto],
)
for proto, count in self.get_current_proxies_count().items()
for proto in sort.PROTOCOL_ORDER
if proto in self.proxies_count
}
coroutines = [
self.check_proxy(
Expand Down Expand Up @@ -319,15 +321,15 @@ def save_proxies(self) -> None:
include_protocol=True,
)
(folder / "all.txt").write_text(text, encoding="utf-8")
for proto, proxies in grouped_proxies:
for (_, proto), proxies in grouped_proxies:
text = create_proxy_list_str(
proxies=proxies,
anonymous_only=anonymous_only,
include_protocol=False,
)
(
folder / f"{ProxyType(proto).name.lower()}.txt"
).write_text(text, encoding="utf-8")
(folder / f"{proto.name.lower()}.txt").write_text(
text, encoding="utf-8"
)
logger.info(
"Proxies have been saved at %s.",
self.settings.output_path.absolute(),
Expand All @@ -340,7 +342,7 @@ async def run(self) -> None:
fetch = self.fetch_all_sources(progress)
if self.settings.enable_geolocation:
await asyncio.gather(
fetch, download_geodb(self.session, progress)
download_geodb(self.session, progress), fetch
)
else:
await fetch
Expand All @@ -361,14 +363,12 @@ def _get_results_table(self) -> Table:
table.add_column("Protocol", style="cyan")
table.add_column("Working", style="magenta")
table.add_column("Total", style="green")
for proto, proxies in self.get_grouped_proxies().items():
working = len(tuple(proxies))
total = self.proxies_count[ProxyType(proto)]
percentage = working / total if total else 0
current_count = self.get_current_proxies_count()
for proto, total in self.proxies_count.items():
working = current_count.get(proto, 0)
percentage = working / total
table.add_row(
ProxyType(proto).name,
f"{working} ({percentage:.1%})",
str(total),
proto.name, f"{working} ({percentage:.1%})", str(total)
)
return table

Expand All @@ -382,15 +382,16 @@ def _get_progress_bar(self) -> Progress:

def get_grouped_proxies(self) -> Dict[ProxyType, Tuple[Proxy, ...]]:
key = sort.protocol_sort_key
return {
**{proto: () for proto in self.settings.sources},
**{
ProxyType(k): tuple(v)
for k, v in itertools.groupby(
sorted(self.proxies, key=key), key=key
)
},
d: Dict[ProxyType, Tuple[Proxy, ...]] = {
proto: ()
for proto in sort.PROTOCOL_ORDER
if proto in self.proxies_count
}
for (_, proto), v in itertools.groupby(
sorted(self.proxies, key=key), key=key
):
d[proto] = tuple(v)
return d

def get_sorted_proxies(
self,
Expand Down
5 changes: 4 additions & 1 deletion proxy_scraper_checker/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,10 @@ class Settings:
attrs.validators.and_(
attrs.validators.min_len(1),
attrs.validators.deep_iterable(
attrs.validators.instance_of(str)
attrs.validators.and_(
attrs.validators.instance_of(str),
attrs.validators.min_len(1),
)
),
),
),
Expand Down
8 changes: 6 additions & 2 deletions proxy_scraper_checker/sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@

from typing import Tuple

from aiohttp_socks import ProxyType

from .proxy import Proxy

PROTOCOL_ORDER = (ProxyType.HTTP, ProxyType.SOCKS4, ProxyType.SOCKS5)


def protocol_sort_key(proxy: Proxy) -> int:
return proxy.protocol.value # type: ignore[no-any-return]
def protocol_sort_key(proxy: Proxy) -> Tuple[int, ProxyType]:
return (PROTOCOL_ORDER.index(proxy.protocol), proxy.protocol)


def natural_sort_key(proxy: Proxy) -> Tuple[int, ...]:
Expand Down

0 comments on commit 13dec10

Please sign in to comment.