Skip to content

Commit

Permalink
added convert_to_html
Browse files Browse the repository at this point in the history
Signed-off-by: Peter Staar <[email protected]>
  • Loading branch information
PeterStaar-IBM committed Jun 6, 2024
1 parent dcbbf00 commit 4857504
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 15 deletions.
64 changes: 54 additions & 10 deletions examples/document_conversion_quick_start/convert_documents.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,18 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 1,
"id": "b01a4fd1",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
"Project key: 1234567890abcdefghijklmnopqrstvwyz123456\n"
]
}
],
"source": [
"from dsnotebooks.settings import ProjectNotebookSettings\n",
"\n",
Expand All @@ -56,7 +64,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 3,
"id": "502cdef8",
"metadata": {
"ExecuteTime": {
Expand All @@ -72,7 +80,7 @@
"from pathlib import Path\n",
"from zipfile import ZipFile\n",
"\n",
"from deepsearch.documents.core.export import export_to_markdown\n",
"from deepsearch.documents.core.export import export_to_markdown, export_to_html\n",
"from IPython.display import display, Markdown, HTML, display_html"
]
},
Expand All @@ -86,7 +94,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 4,
"id": "f44fbf08",
"metadata": {},
"outputs": [],
Expand All @@ -96,15 +104,32 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "ec83eb0b",
"metadata": {
"ExecuteTime": {
"end_time": "2022-08-02T12:14:49.216045Z",
"start_time": "2022-08-02T12:14:25.380757Z"
}
},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Processing input: : 100%|\u001b[38;2;15;98;254m██████████████████████████████\u001b[0m| 1/1 [00:00<00:00, 39.65it/s]\u001b[38;2;15;98;254m \u001b[0m\n",
"Submitting input: : 100%|\u001b[38;2;15;98;254m██████████████████████████████\u001b[0m| 1/1 [00:04<00:00, 4.48s/it]\u001b[38;2;15;98;254m \u001b[0m\n",
"Converting input: : 100%|\u001b[38;2;15;98;254m██████████████████████████████\u001b[0m| 1/1 [00:36<00:00, 36.90s/it]\u001b[38;2;15;98;254m \u001b[0m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'Total documents': 1, 'Successfully converted documents': 1}\n"
]
}
],
"source": [
"output_dir = Path(\"./converted_docs\")\n",
"\n",
Expand All @@ -121,10 +146,19 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "382c4869-cca9-43fc-8052-c0ab7e9c175d",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"writing converted_docs/2206.01062.md\n",
"writing converted_docs/2206.01062.html\n"
]
}
],
"source": [
"# Iterare output files and visualize the output\n",
"for output_file in output_dir.rglob(\"json*.zip\"):\n",
Expand All @@ -136,13 +170,23 @@
"\n",
" basename = name.rstrip(\".json\")\n",
" doc_json = json.loads(archive.read(f\"{basename}.json\"))\n",
" \n",
" doc_md = export_to_markdown(doc_json)\n",
"\n",
" ofile = output_dir / f\"{basename}.md\"\n",
" print(f\"writing {ofile}\")\n",
" \n",
" with ofile.open(\"w\") as fw:\n",
" fw.write(doc_md)\n",
"\n",
" print(f\"writing {ofile}\")"
" doc_html = export_to_html(doc_json)\n",
"\n",
" ofile = output_dir / f\"{basename}.html\"\n",
" print(f\"writing {ofile}\")\n",
" \n",
" with ofile.open(\"w\") as fw:\n",
" fw.write(doc_html)\n",
" "
]
},
{
Expand Down
10 changes: 6 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ packages = [

[tool.poetry.dependencies]
python = ">= 3.8, <3.11"
deepsearch-toolkit = "^0.46.0"
deepsearch-toolkit = "0.47.0"
jupyter = "^1.0.0"
ipywidgets = "^7" # previous major release is needed bcause of mols2grid
numpy = "^1.23.4"
Expand Down

0 comments on commit 4857504

Please sign in to comment.