From 7e69bcdc74bfe239f8d142e69e0c643225e9e522 Mon Sep 17 00:00:00 2001 From: Chris Hokamp Date: Mon, 27 Nov 2023 16:29:50 +0000 Subject: [PATCH] Aylien signal returns media field --- VERSION | 2 +- news_signals/signals.py | 13 +++++++++---- news_signals/signals_dataset.py | 3 ++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/VERSION b/VERSION index 0d91a54..9e11b32 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.3.0 +0.3.1 diff --git a/news_signals/signals.py b/news_signals/signals.py index cf9cea3..846d303 100644 --- a/news_signals/signals.py +++ b/news_signals/signals.py @@ -3,7 +3,7 @@ import sys from abc import abstractmethod from collections import Counter, defaultdict -from typing import List +from typing import List, Optional import json import base64 from pathlib import Path @@ -916,8 +916,13 @@ def add_wikimedia_pageviews_timeseries( granularity='daily', wikidata_client=wikidata_client, wikimedia_endpoint=wikimedia_endpoint, - ) - self.timeseries_df['wikimedia_pageviews'] = pageviews_df['wikimedia_pageviews'].values + ) + try: + self.timeseries_df['wikimedia_pageviews'] = pageviews_df['wikimedia_pageviews'].values + except TypeError as e: + logger.error(e) + logger.warning('Retrieved wikimedia pageviews dataframe is None, not adding to signal') + return self @@ -926,7 +931,7 @@ def __init__( self, name: str, components: List[Signal], - metadata: dict = None + metadata: Optional[dict] = None ): super().__init__(name, metadata=metadata) self.components = components diff --git a/news_signals/signals_dataset.py b/news_signals/signals_dataset.py index 1afc5b9..b127d11 100644 --- a/news_signals/signals_dataset.py +++ b/news_signals/signals_dataset.py @@ -25,7 +25,7 @@ logger = create_logger(__name__, level=logging.INFO) -MAX_BODY_TOKENS = 500 +MAX_BODY_TOKENS = 1000 DEFAULT_METADATA = { 'name': 'News Signals Dataset' } @@ -328,6 +328,7 @@ def reduce_aylien_story( "categories": s["categories"], "industries": s["industries"], "smart_tagger_categories": smart_cats, + "media": s["media"], "clusters": s["clusters"] }, **{f: s[f] for f in additional_fields} )