-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmd2tghtml.py
196 lines (139 loc) · 5.32 KB
/
md2tghtml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import re
def escape_html(text: str) -> str:
"""Escapes HTML special characters in a string.
Replaces &, <, > with HTML entities to prevent them
from being interpreted as HTML tags when output.
Args:
text (str): The text to escape.
Returns:
str: The text with HTML characters escaped.
"""
text = text.replace("&", "&")
text = text.replace("<", "<")
text = text.replace(">", ">")
return text
def apply_hand_points(text: str) -> str:
"""Replaces markdown bullet points (*) with right hand point emoji.
Arguments:
text (str): The text to modify.
Returns:
str: The text with markdown bullet points replaced with emoji.
"""
pattern = r"(?<=\n)\*\s(?!\*)|^\*\s(?!\*)"
replaced_text = re.sub(pattern, "• ", text)
return replaced_text
def apply_bold(text: str) -> str:
"""Replaces markdown bold formatting with HTML bold tags.
Arguments:
text (str): The text to modify.
Returns:
str: The text with markdown bold replaced by HTML tags.
"""
pattern = r"\*\*(.*?)\*\*"
replaced_text = re.sub(pattern, r"<b>\1</b>", text)
return replaced_text
def apply_italic(text: str) -> str:
"""Replaces markdown italic formatting with HTML italic tags.
Arguments:
text (str): The text to modify.
Returns:
str: The text with markdown italic replaced by HTML tags.
"""
pattern = r"(?<!\*)\*(?!\*)(?!\*\*)(.*?)(?<!\*)\*(?!\*)"
replaced_text = re.sub(pattern, r"<i>\1</i>", text)
return replaced_text
def apply_code(text: str) -> str:
"""Replace markdown code blocks with HTML <pre> tags.
Arguments:
text (str): The text to modify.
Returns:
str: The text with markdown code blocks replaced by HTML tags.
"""
pattern = r"```([\w]*?)\n([\s\S]*?)```"
replaced_text = re.sub(pattern, r"<pre lang='\1'>\2</pre>", text, flags=re.DOTALL)
return replaced_text
def apply_monospace(text: str) -> str:
"""Replaces markdown monospace backticks with HTML <code> tags.
Arguments:
text (str): The input text containing markdown monospace formatting.
Returns:
str: The text with monospace sections replaced with HTML tags.
"""
pattern = r"(?<!`)`(?!`)(.*?)(?<!`)`(?!`)"
replaced_text = re.sub(pattern, r"<code>\1</code>", text)
return replaced_text
def apply_link(text: str) -> str:
"""Replace markdown links with HTML anchor tags.
Arguments:
text (str): The input text containing markdown links.
Returns:
str: The text with markdown links replaced by HTML anchor tags.
"""
pattern = r"\[(.*?)\]\((.*?)\)"
replaced_text = re.sub(pattern, r'<a href="\2">\1</a>', text)
return replaced_text
def apply_underline(text: str) -> str:
"""Replace markdown underline with HTML underline tags.
Arguments:
text (str): The input text to modify.
Returns:
str: The text with markdown underlines replaced with HTML tags."""
pattern = r"__(.*?)__"
replaced_text = re.sub(pattern, r"<u>\1</u>", text)
return replaced_text
def apply_strikethrough(text: str) -> str:
"""Replace markdown strikethrough with HTML strikethrough tags.
Arguments:
text (str): The input text to modify.
Returns:
str: The text with markdown strikethroughs replaced with HTML tags.
"""
pattern = r"~~(.*?)~~"
replaced_text = re.sub(pattern, r"<s>\1</s>", text)
return replaced_text
def apply_header(text: str) -> str:
"""Replace markdown header # with HTML header tags.
Arguments:
text (str): The input text to modify.
Returns:
str: The text with markdown headers replaced with HTML tags.
"""
pattern = r"^(#{1,6})\s+(.*)"
replaced_text = re.sub(pattern, r"<b><u>\2</u></b>", text, flags=re.DOTALL)
return replaced_text
def apply_exclude_code(text: str) -> str:
"""Apply text formatting to non-code lines.
Iterates through each line, checking if it is in a code block.
If not, applies header, link, bold, italic, underline, strikethrough, monospace, and hand-point
text formatting.
"""
lines = text.split("\n")
in_code_block = False
for i, line in enumerate(lines):
if line.startswith("```"):
in_code_block = True
if not in_code_block:
formatted_line = lines[i]
formatted_line = apply_header(formatted_line)
formatted_line = apply_link(formatted_line)
formatted_line = apply_bold(formatted_line)
formatted_line = apply_italic(formatted_line)
formatted_line = apply_underline(formatted_line)
formatted_line = apply_strikethrough(formatted_line)
formatted_line = apply_monospace(formatted_line)
formatted_line = apply_hand_points(formatted_line)
lines[i] = formatted_line
return "\n".join(lines)
def format_message(text: str) -> str:
"""Format the given message text from markdown to HTML.
Escapes HTML characters, applies link, code, and other rich text formatting,
and returns the formatted HTML string.
Args:
message (str): The plain text message to format.
Returns:
str: The formatted HTML string.
"""
formatted_text = escape_html(text)
formatted_text = apply_exclude_code(formatted_text)
formatted_text = apply_code(formatted_text)
return formatted_text