Skip to content

Commit

Permalink
add author_selector
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxDall committed Jan 20, 2025
1 parent 6d57368 commit 32849de
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion src/fundus/publishers/jp/nikkei.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import re
from typing import List, Optional

from lxml.cssselect import CSSSelector
Expand Down Expand Up @@ -37,5 +38,7 @@ def images(self) -> List[Image]:
return image_extraction(
doc=self.precomputed.doc,
paragraph_selector=self._paragraph_selector,
lower_boundary_selector=CSSSelector("div.paywall_pzomzzc"),
lower_boundary_selector=CSSSelector("p.title_thchiij"),
# https://regex101.com/r/qjEM41/1
author_selector=re.compile(r"=(?P<credits>[^=]*?)\s*$"),
)

0 comments on commit 32849de

Please sign in to comment.