diff --git a/VERSION b/VERSION index 0d91a54..9e11b32 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.3.0 +0.3.1 diff --git a/news_signals/signals.py b/news_signals/signals.py index cf9cea3..846d303 100644 --- a/news_signals/signals.py +++ b/news_signals/signals.py @@ -3,7 +3,7 @@ import sys from abc import abstractmethod from collections import Counter, defaultdict -from typing import List +from typing import List, Optional import json import base64 from pathlib import Path @@ -916,8 +916,13 @@ def add_wikimedia_pageviews_timeseries( granularity='daily', wikidata_client=wikidata_client, wikimedia_endpoint=wikimedia_endpoint, - ) - self.timeseries_df['wikimedia_pageviews'] = pageviews_df['wikimedia_pageviews'].values + ) + try: + self.timeseries_df['wikimedia_pageviews'] = pageviews_df['wikimedia_pageviews'].values + except TypeError as e: + logger.error(e) + logger.warning('Retrieved wikimedia pageviews dataframe is None, not adding to signal') + return self @@ -926,7 +931,7 @@ def __init__( self, name: str, components: List[Signal], - metadata: dict = None + metadata: Optional[dict] = None ): super().__init__(name, metadata=metadata) self.components = components diff --git a/news_signals/signals_dataset.py b/news_signals/signals_dataset.py index 1afc5b9..b127d11 100644 --- a/news_signals/signals_dataset.py +++ b/news_signals/signals_dataset.py @@ -25,7 +25,7 @@ logger = create_logger(__name__, level=logging.INFO) -MAX_BODY_TOKENS = 500 +MAX_BODY_TOKENS = 1000 DEFAULT_METADATA = { 'name': 'News Signals Dataset' } @@ -328,6 +328,7 @@ def reduce_aylien_story( "categories": s["categories"], "industries": s["industries"], "smart_tagger_categories": smart_cats, + "media": s["media"], "clusters": s["clusters"] }, **{f: s[f] for f in additional_fields} )