Skip to content

Commit

Permalink
add RSSFeed and subheadline selector
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxDall committed Apr 19, 2024
1 parent ccefb98 commit 98513f4
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 1 deletion.
6 changes: 5 additions & 1 deletion src/fundus/publishers/de/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,10 @@ class DE(PublisherEnum):
RheinischePost = PublisherSpec(
name="Rheinische Post",
domain="https://rp-online.de/",
sources=[NewsMap("https://rp-online.de/sitemap-news.xml"), Sitemap("https://rp-online.de/sitemap.xml")],
sources=[
RSSFeed("https://rp-online.de/feed.rss"),
NewsMap("https://rp-online.de/sitemap-news.xml"),
Sitemap("https://rp-online.de/sitemap.xml"),
],
parser=RheinischePostParser,
)
2 changes: 2 additions & 0 deletions src/fundus/publishers/de/rheinische_post.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@ class RheinischePostParser(ParserProxy):
class V1(BaseParser):
_summary_selector = CSSSelector("strong[data-cy='intro']")
_paragraph_selector = CSSSelector("div[data-cy='article-content'] p")
_subheadline_selector = CSSSelector("div[data-cy='article-content'] h2")

@attribute
def body(self) -> ArticleBody:
return extract_article_body_with_selector(
self.precomputed.doc,
summary_selector=self._summary_selector,
paragraph_selector=self._paragraph_selector,
subheadline_selector=self._subheadline_selector,
)

@attribute
Expand Down

0 comments on commit 98513f4

Please sign in to comment.