Skip to content

Commit

Permalink
Merge pull request #687 from flairNLP/fix-publisher-coverage
Browse files Browse the repository at this point in the history
Fix a bug in `publisher_coverage.py`
  • Loading branch information
MaxDall authored Jan 20, 2025
2 parents 9f1ecda + ca41bb1 commit 76e2153
Showing 1 changed file with 21 additions and 8 deletions.
29 changes: 21 additions & 8 deletions scripts/publisher_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"""
import sys
import traceback
from typing import List, Optional
from typing import Any, Callable, List, Optional, Union

from fundus import Crawler, PublisherCollection
from fundus.publishers.base_objects import Publisher, PublisherGroup
Expand All @@ -15,7 +15,7 @@

def main() -> None:
failed: int = 0
timeout_in_seconds: int = 30
timeout_in_seconds: int = 10

publisher_regions: List[PublisherGroup] = sorted(
PublisherCollection.get_subgroup_mapping().values(), key=lambda region: region.__name__
Expand Down Expand Up @@ -68,15 +68,28 @@ def main() -> None:
)

else:

def guard(field, fnc: Callable[[Any], bool] = lambda x: x is not None) -> Union[bool, str]:
"""Makes a boolean evaluation of <field> based on <fnc> and guards exceptions
Args:
field: The article field to evaluate
fnc: The evaluation function
Returns:
Either True, False or Exception if isinstance(field, Exception) = True
"""
return fnc(field) if not isinstance(field, Exception) else repr(field)

print(
f"❌ FAILED: {publisher_name!r} - No complete articles received "
f"(URL of an incomplete article: {incomplete_article.html.requested_url}) with attributes:\n"
f"title: {incomplete_article.title is not None}\n"
f"plaintext: {bool(incomplete_article.body)}\n"
f"publishing_date: {incomplete_article.publishing_date is not None}\n"
f"authors: {incomplete_article.authors is not None and not len(incomplete_article.authors) == 0}\n"
f"topics: {incomplete_article.topics is not None and not len(incomplete_article.topics) == 0}\n"
f"images: {incomplete_article.images is not None and not len(incomplete_article.images) == 0}\n"
f"title: {guard(incomplete_article.title)}\n"
f"plaintext: {guard(incomplete_article.body, bool)}\n"
f"publishing_date: {guard(incomplete_article.publishing_date)}\n"
f"authors: {guard(incomplete_article.authors, bool)}\n"
f"topics: {guard(incomplete_article.topics, bool)}\n"
f"images: {guard(incomplete_article.images, bool)}\n"
)
failed += 1
continue
Expand Down

0 comments on commit 76e2153

Please sign in to comment.