Skip to content

Commit

Permalink
try to escape them then
Browse files Browse the repository at this point in the history
  • Loading branch information
Jo Kristian Bergum committed Nov 15, 2023
1 parent 08a2b6e commit 4465ea3
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions feed-split.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def is_selfhosted_doc(doc):
return True
return False

def remove_escape(text):
return text.replace("\\_","_")

def create_text_doc(doc, paragraph, paragraph_id, header):
id = doc['put']
Expand All @@ -65,7 +67,7 @@ def create_text_doc(doc, paragraph, paragraph_id, header):
"path": fields['path'],
"doc_id": fields['path'],
"namespace": new_namespace,
"content": paragraph,
"content": remove_escape(paragraph),
"content_tokens": n_tokens,
"base_uri": sys.argv[2],
"selfhosted": is_selfhosted_doc(doc)
Expand All @@ -75,12 +77,12 @@ def create_text_doc(doc, paragraph, paragraph_id, header):
if header:
title = fields['title']
new_title = title + " - " + header
new_doc["fields"]["title"] = new_title
new_doc["fields"]["title"] = remove_escape(new_title)

if paragraph_id is None:
paragraph_id = str(random.randint(0,1000))

new_doc['fields']['path'] = new_doc['fields']['path'] + "#" + paragraph_id.replace("?","")
new_doc['fields']['path'] = remove_escape(new_doc['fields']['path'] + "#" + paragraph_id.replace("?",""))
new_doc['put'] = new_doc['put'] + "-" + urllib.parse.quote(paragraph_id)

return new_doc
Expand Down

0 comments on commit 4465ea3

Please sign in to comment.