From ca41bb1afc89e792d5b69b5e7897bf311f79c074 Mon Sep 17 00:00:00 2001 From: Max Dallabetta Date: Mon, 20 Jan 2025 14:25:57 +0100 Subject: [PATCH] add `guard` function and fix string representation --- scripts/publisher_coverage.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/scripts/publisher_coverage.py b/scripts/publisher_coverage.py index 27c168c1..7a8dd45a 100644 --- a/scripts/publisher_coverage.py +++ b/scripts/publisher_coverage.py @@ -6,7 +6,7 @@ """ import sys import traceback -from typing import List, Optional +from typing import Any, Callable, List, Optional, Union from fundus import Crawler, PublisherCollection from fundus.publishers.base_objects import Publisher, PublisherGroup @@ -15,7 +15,7 @@ def main() -> None: failed: int = 0 - timeout_in_seconds: int = 30 + timeout_in_seconds: int = 10 publisher_regions: List[PublisherGroup] = sorted( PublisherCollection.get_subgroup_mapping().values(), key=lambda region: region.__name__ @@ -68,15 +68,28 @@ def main() -> None: ) else: + + def guard(field, fnc: Callable[[Any], bool] = lambda x: x is not None) -> Union[bool, str]: + """Makes a boolean evaluation of based on and guards exceptions + + Args: + field: The article field to evaluate + fnc: The evaluation function + + Returns: + Either True, False or Exception if isinstance(field, Exception) = True + """ + return fnc(field) if not isinstance(field, Exception) else repr(field) + print( f"❌ FAILED: {publisher_name!r} - No complete articles received " f"(URL of an incomplete article: {incomplete_article.html.requested_url}) with attributes:\n" - f"title: {incomplete_article.title is not None}\n" - f"plaintext: {bool(incomplete_article.body)}\n" - f"publishing_date: {incomplete_article.publishing_date is not None}\n" - f"authors: {incomplete_article.authors is not None and not len(incomplete_article.authors) == 0}\n" - f"topics: {incomplete_article.topics is not None and not len(incomplete_article.topics) == 0}\n" - f"images: {incomplete_article.images is not None and not len(incomplete_article.images) == 0}\n" + f"title: {guard(incomplete_article.title)}\n" + f"plaintext: {guard(incomplete_article.body, bool)}\n" + f"publishing_date: {guard(incomplete_article.publishing_date)}\n" + f"authors: {guard(incomplete_article.authors, bool)}\n" + f"topics: {guard(incomplete_article.topics, bool)}\n" + f"images: {guard(incomplete_article.images, bool)}\n" ) failed += 1 continue