-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
142 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from bs4 import BeautifulSoup, NavigableString\n", | ||
"\n", | ||
"html_doc = \"\"\"\n", | ||
" <pre\n", | ||
" style=\"\n", | ||
" box-sizing: border-box;\n", | ||
" font-family: 'Roboto Mono', sfmono-regular,\n", | ||
" consolas, 'liberation mono', menlo, courier,\n", | ||
" monospace;\n", | ||
" background: rgb(32, 33, 35) none repeat scroll\n", | ||
" 0% 0% / auto padding-box border-box;\n", | ||
" color: rgb(255, 255, 255);\n", | ||
" overflow-x: auto;\n", | ||
" border-bottom-left-radius: 4px;\n", | ||
" border-bottom-right-radius: 4px;\n", | ||
" margin: 0px;\n", | ||
" min-height: 44px;\n", | ||
" padding: 12px 16px;\n", | ||
" font-size: 15px;\n", | ||
" line-height: 24px;\n", | ||
" border-top-left-radius: 4px;\n", | ||
" border-top-right-radius: 4px;\n", | ||
" \"\n", | ||
" ><code style=\"box-sizing:border-box;font-family:"Roboto Mono", sfmono-regular, consolas, "liberation mono", menlo, courier, monospace;white-space:pre;\">\n", | ||
" <code style=\"box-sizing:border-box;float:left;font-family:"Roboto Mono", sfmono-regular, consolas, "liberation mono", menlo, courier, monospace;padding-right:16px;\"><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">1\n", | ||
"</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">2\n", | ||
"</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">3\n", | ||
"</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">4\n", | ||
"</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">5\n", | ||
"</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">6\n", | ||
"</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">7\n", | ||
"</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">8\n", | ||
"</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">9\n", | ||
"</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">10\n", | ||
"</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">11\n", | ||
"</span><span style=\"box-sizing:border-box;color:rgb(110, 110, 128);\">12\n", | ||
"</span></code>\n", | ||
"<span style=\"box-sizing:border-box;color:rgba(255, 255, 255, 0.5);\"># Note: you need to be using OpenAI Python v0.27.0 for the code below to work</span><span style=\"box-sizing:border-box;\"\n", | ||
"/><span style=\"box-sizing:border-box;\"/><span style=\"box-sizing:border-box;color:rgb(46, 149, 211);\">import</span><span style=\"box-sizing:border-box;\"> openai\n", | ||
"</span>\n", | ||
"openai.ChatCompletion.create(\n", | ||
"<span style=\"box-sizing:border-box;\"> model=</span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"gpt-3.5-turbo\"</span><span style=\"box-sizing:border-box;\">,\n", | ||
"</span> messages=[\n", | ||
"<span style=\"box-sizing:border-box;\"> {</span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"role\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"system\"</span><span style=\"box-sizing:border-box;\">, </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"content\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"You are a helpful assistant.\"</span><span style=\"box-sizing:border-box;\">},\n", | ||
"</span><span style=\"box-sizing:border-box;\"> {</span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"role\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"user\"</span><span style=\"box-sizing:border-box;\">, </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"content\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"Who won the world series in 2020?\"</span><span style=\"box-sizing:border-box;\">},\n", | ||
"</span><span style=\"box-sizing:border-box;\"> {</span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"role\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"assistant\"</span><span style=\"box-sizing:border-box;\">, </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"content\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"The Los Angeles Dodgers won the World Series in 2020.\"</span><span style=\"box-sizing:border-box;\">},\n", | ||
"</span><span style=\"box-sizing:border-box;\"> {</span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"role\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"user\"</span><span style=\"box-sizing:border-box;\">, </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"content\"</span><span style=\"box-sizing:border-box;\">: </span><span style=\"box-sizing:border-box;color:rgb(0, 166, 125);\">\"Where was it played?\"</span><span style=\"box-sizing:border-box;\">}\n", | ||
"</span> ]\n", | ||
")</code></pre>\n", | ||
"\"\"\"\n", | ||
"soup = BeautifulSoup(html_doc, 'html.parser')\n", | ||
"\n", | ||
"# 找到所有的<pre>标签\n", | ||
"pre_tags = soup.find_all('pre')\n", | ||
"\n", | ||
"for pre in pre_tags:\n", | ||
" # 在每个<pre>标签中找到<code>标签\n", | ||
" code_tags = pre.find_all('code')\n", | ||
" \n", | ||
" for code in code_tags:\n", | ||
" # 检查<code>标签是否包含行号,这里假设行号是在<span>标签中的数字\n", | ||
" span_tags = code.find_all('span')\n", | ||
" \n", | ||
" for span in span_tags:\n", | ||
" if span.string and span.string.strip().isdigit():\n", | ||
" # 如果是行号,则删除这个<span>标签\n", | ||
" span.decompose()\n", | ||
"\n", | ||
"# 这时,soup中的HTML已经没有行号了\n", | ||
"print(soup.prettify())\n" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "notion", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.2" | ||
}, | ||
"orig_nbformat": 4 | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters