Skip to content

Commit

Permalink
Support clipper wechat
Browse files Browse the repository at this point in the history
  • Loading branch information
selfboot committed Apr 28, 2023
1 parent dac2a73 commit fb00490
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 6 deletions.
3 changes: 3 additions & 0 deletions demos/Test Case E.html

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions html2notion/translate/html2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def _is_yinxiang_export_html(html_soup):

"""
<meta name="source-application" content="webclipper.evernote" />
<meta name="source-application" content="微信" />
"""
def _is_yinxiang_clipper_html(html_soup):
exporter_version_meta = html_soup.select_one('html > head > meta[name="exporter-version"]')
Expand All @@ -45,6 +46,8 @@ def _is_yinxiang_clipper_html(html_soup):
clipper_source_content = clipper_source_meta.get('content', "") if isinstance(clipper_source_meta, Tag) else ""
if isinstance(clipper_source_content, str) and clipper_source_content.endswith("evernote"):
return True
if isinstance(clipper_source_content, str) and clipper_source_content in ("微信"):
return True
return False


Expand Down
12 changes: 9 additions & 3 deletions html2notion/translate/html2json_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ def get_notion_data(self):
@staticmethod
def extract_text_and_parents(tag: PageElement, parents=[]):
results = []
if isinstance(tag, NavigableString):
# Filter empty content
if isinstance(tag, NavigableString) and tag.text:
results.append((tag.text, parents))
return results
elif isinstance(tag, Tag):
Expand Down Expand Up @@ -328,7 +329,7 @@ def convert_divider(self, soup):
"type": "divider",
"divider": {}
}

def convert_heading(self, soup):
heading_map = {"h1": "heading_1", "h2": "heading_2", "h3": "heading_3",
"h4": "heading_3", "h5": "heading_3", "h6": "heading_3"}
Expand All @@ -345,7 +346,8 @@ def convert_heading(self, soup):
text_obj = self.generate_inline_obj(soup)
if text_obj:
rich_text.extend(text_obj)
return json_obj
return json_obj
return None

# <ol><li><div>first</div></li><li><div>second</div></li><li><div>third</div></li></ol>
def convert_numbered_list_item(self, soup):
Expand All @@ -356,6 +358,10 @@ def convert_bulleted_list_item(self, soup):
return self.convert_list_items(soup, 'bulleted_list_item')

def convert_list_items(self, soup, list_type):
# Remove heading tags in li
for heading in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
heading.unwrap()

items = soup.find_all('li', recursive=True)
if not items:
logger.warning("No list items found in {soup}")
Expand Down
2 changes: 1 addition & 1 deletion html2notion/translate/html2json_clipper.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,5 +109,5 @@ def convert_code(self, soup):
json_obj["code"]["rich_text"] = self.merge_rich_text(rich_text)
return json_obj


Html2JsonBase.register(YinXiangClipper_Type, Html2JsonYinXiang)
5 changes: 3 additions & 2 deletions html2notion/translate/notion_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ async def create_new_page(self, notion_data):
blocks = notion_data.get("children", [])
limit_size = 100
chunks = [blocks[i: i + limit_size] for i in range(0, len(blocks), limit_size)]
notion_data.pop("children")
if blocks:
notion_data.pop("children")
first_chunk = chunks[0] if chunks else []
created_page = await self.notion_client.pages.create(**notion_data, children=first_chunk)
page_id = created_page["id"]
Expand All @@ -63,7 +64,7 @@ async def main(file_path, notion_api_key):

if __name__ == "__main__":
test_prepare_conf()
file = Path("./demos/Test Case D.html")
file = Path("./demos/Test Case E.html")
notion_api_key = ""
if 'GITHUB_ACTIONS' in os.environ:
notion_api_key = os.environ['notion_api_key']
Expand Down

0 comments on commit fb00490

Please sign in to comment.