-
Notifications
You must be signed in to change notification settings - Fork 1
Article matching
martinthenext edited this page Oct 10, 2014
·
4 revisions
Given: article
coming from article.source_url
. The goal of this is to get an article about the same event/topic but from a source with opposite bias.
def get_matching_article(article):
known_tags = Source.objects.get_all_known_tags() # ['Russia', 'India', 'China'...]
relevant_sources = []
# Which tags we know are in article's keywords?
article_keywords = newspaper.get_article_keywords(article)
relevant_tags = intersect(stem(known_tags), stem(article_keywords))
if relevant_tags:
for t in relevant_tags:
relevant_sources.append(Source.objects.get_source_for_tag(t))
else:
# Which tag we know is mentioned in the article the most?
relevant_tag = get_most_frequent_tag(article.text, known_tags)
if relevant_tag:
# There is as least one known tag mentioned
relevant_sources.append(Source.objects.get_source_for_tag(relevant_tag)
else:
# There are no tags we know in the article, check if we know if source is right or left wing
article_source = Source.objects.get(root_url=article.root_url)
if article_source.is_left_wing:
relevant_sources.append(Source.objects.get(wing='right'))
if article_source.is_right_wing:
relevant_sources.append(Source.objects.get(wing='left'))
if not relevant_sources:
relevant_sources = Source.objects.all().except(article.source) # Search the most relevant article across everything
return get_max_similarity_article(article, relevant_sources)