Skip to content
martinthenext edited this page Oct 10, 2014 · 4 revisions

Given: article coming from article.source_url. The goal of this is to get an article about the same event/topic but from a source with opposite bias.

def get_matching_article(article):
  known_tags = Source.objects.get_all_known_tags() # ['Russia', 'India', 'China'...]
  relevant_sources = []
  # Which tags we know are in article's keywords?
  article_keywords = newspaper.get_article_keywords(article)
  relevant_tags = intersect(stem(known_tags), stem(article_keywords))
  if relevant_tags:
     for t in relevant_tags:
        relevant_sources.append(Source.objects.get_source_for_tag(t))
  else:
     # Which tag we know is mentioned in the article the most?
     relevant_tag = get_most_frequent_tag(article.text, known_tags)
     if relevant_tag:
        # There is as least one known tag mentioned
        relevant_sources.append(Source.objects.get_source_for_tag(relevant_tag)
     else:
        # There are no tags we know in the article, check if we know if source is right or left wing
        article_source = Source.objects.get(root_url=article.root_url)
        if article_source.is_left_wing:
          relevant_sources.append(Source.objects.get(wing='right'))
        if article_source.is_right_wing:
          relevant_sources.append(Source.objects.get(wing='left'))
  if not relevant_sources:
    relevant_sources = Source.objects.all().except(article.source) # Search the most relevant article across everything

  return get_max_similarity_article(article, relevant_sources)
Clone this wiki locally