Skip to content

Commit

Permalink
update env vars & repin toolkit
Browse files Browse the repository at this point in the history
Signed-off-by: Panos Vagenas <[email protected]>
  • Loading branch information
vagenas committed May 31, 2024
1 parent 861b26b commit 84ea7e1
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 102 deletions.
9 changes: 4 additions & 5 deletions .github/workflows/nb_runner.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,9 @@ jobs:
DEEPSEARCH_VERIFY_SSL: ${{ vars.DEEPSEARCH_VERIFY_SSL }}
DS_NB_PROJ_KEY: ${{ vars.DS_NB_PROJ_KEY }}
DS_NR_EXCLUDED: ${{ vars.DS_NR_EXCLUDED }}
DS_NB_SEM_ON_IDX_KEY: ${{ vars.DS_NB_SEM_ON_IDX_KEY }}
DS_NB_RAISE_ON_SEM_ERR: ${{ vars.DS_NB_RAISE_ON_SEM_ERR }}
DS_NB_SEM_ON_IDX_DOC_HASH: ${{ vars.DS_NB_SEM_ON_IDX_DOC_HASH }}
DS_NB_SEM_OFF_IDX_KEY: ${{ vars.DS_NB_SEM_OFF_IDX_KEY }}
DS_NB_SEM_OFF_IDX_DOC_HASH: ${{ vars.DS_NB_SEM_OFF_IDX_DOC_HASH }}
DS_NB_QA_IDX_KEY: ${{ vars.DS_NB_QA_IDX_KEY }}
DS_NB_QA_DOC_HASH: ${{ vars.DS_NB_QA_DOC_HASH }}
DS_NB_QUESTION: ${{ vars.DS_NB_QUESTION }}
DS_NB_GEN_TIMEOUT: ${{ vars.DS_NB_GEN_TIMEOUT }}
DS_NB_INDEX_KEY: ${{ vars.DS_NB_INDEX_KEY }}
run: poetry run python -m nbrunner.nb_runner
16 changes: 0 additions & 16 deletions dsnotebooks/settings.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from datetime import datetime
from typing import Optional

from deepsearch.cps.queries import ConstrainedWeight
from dotenv import find_dotenv
from pydantic.v1 import BaseSettings, validator

Expand Down Expand Up @@ -36,18 +35,3 @@ def set_kg_key(cls, v):
class CollOptionalNotebookSettings(NotebookSettings):
proj_key: Optional[str] = None
index_key: Optional[str] = None


class CollQANotebookSettings(ProjectNotebookSettings):
sem_on_idx_key: str
retr_k: int = 5
text_weight: ConstrainedWeight = 0.1
rerank: bool = False
skip_ingested_docs: bool = True
raise_on_sem_err: bool = True


class DocQANotebookSettings(CollQANotebookSettings):
sem_on_idx_doc_hash: str
sem_off_idx_key: str
sem_off_idx_doc_hash: str
67 changes: 31 additions & 36 deletions examples/qa/qa_deep_dive.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,27 +45,22 @@
"source": [
"import os\n",
"from dotenv import load_dotenv\n",
"from pydantic import TypeAdapter\n",
"\n",
"load_dotenv()\n",
"\n",
"# profile to use (user's active profile will be used by default):\n",
"PROFILE_NAME = os.environ.get(\"DS_NB_PROFILE\")\n",
"\n",
"PROFILE_NAME = os.environ.get(\"DS_NB_PROFILE\") # profile to use; defaults to active one\n",
"PROJ_KEY = os.environ[\"DS_NB_PROJ_KEY\"] # project to use\n",
"INDEX_KEY = os.environ[\"DS_NB_SEM_ON_IDX_KEY\"]\n",
"\n",
"# set the doc hash when targeting a specific document\n",
"DOC_HASH = os.environ.get(\"DS_NB_SEM_ON_IDX_DOC_HASH\")\n",
"QUESTION = os.environ[\"DS_NB_SEM_ON_QUESTION\"]\n",
"\n",
"from pydantic import TypeAdapter\n",
"INDEX_KEY = os.environ[\"DS_NB_QA_IDX_KEY\"]\n",
"DOC_HASH = os.environ.get(\"DS_NB_QA_DOC_HASH\") # set only when targeting a specific doc\n",
"QUESTION = os.environ[\"DS_NB_QUESTION\"]\n",
"\n",
"# whether to skip any already semantically ingested docs:\n",
"# whether to ingest incrementally:\n",
"SKIP_INGESTED_DOCS = TypeAdapter(bool).validate_python(\n",
" os.environ.get(\"DS_NB_SKIP_INGESTED_DOCS\", True)\n",
")\n",
"RETR_K = os.environ.get(\"DS_NB_RETR_K\", 3) # number of search results to retrieve\n",
"GEN_TIMEOUT = os.environ.get(\"DB_NB_GEN_TIMEOUT\") # generation timeout in secs"
"GEN_TIMEOUT = os.environ.get(\"DS_NB_GEN_TIMEOUT\", 10) # generation timeout in seconds"
]
},
{
Expand Down Expand Up @@ -634,16 +629,16 @@
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">QueryTimings</span><span style=\"font-weight: bold\">(</span>\n",
" <span style=\"color: #808000; text-decoration-color: #808000\">overall</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.1071437392383814</span>,\n",
" <span style=\"color: #808000; text-decoration-color: #808000\">overall</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.606383173726499</span>,\n",
" <span style=\"color: #808000; text-decoration-color: #808000\">tasks</span>=<span style=\"font-weight: bold\">{</span>\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'QA'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">TaskTimings</span><span style=\"font-weight: bold\">(</span>\n",
" <span style=\"color: #808000; text-decoration-color: #808000\">overall</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.106700461357832</span>,\n",
" <span style=\"color: #808000; text-decoration-color: #808000\">overall</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.6060180617496371</span>,\n",
" <span style=\"color: #808000; text-decoration-color: #808000\">details</span>=<span style=\"font-weight: bold\">{</span>\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'pipeline'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.7934164367616177</span>,\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'encode'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.02867589332163334</span>,\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'search'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.01751641556620598</span>,\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'pipeline'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.115321146324277</span>,\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'encode'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.028653355315327644</span>,\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'search'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.02463858388364315</span>,\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'rerank'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.0</span>,\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'generate'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.7073831260204315</span>\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'generate'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.0093672648072243</span>\n",
" <span style=\"font-weight: bold\">}</span>\n",
" <span style=\"font-weight: bold\">)</span>\n",
" <span style=\"font-weight: bold\">}</span>\n",
Expand All @@ -652,16 +647,16 @@
],
"text/plain": [
"\u001b[1;35mQueryTimings\u001b[0m\u001b[1m(\u001b[0m\n",
" \u001b[33moverall\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1;36m.1071437392383814\u001b[0m,\n",
" \u001b[33moverall\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1;36m.606383173726499\u001b[0m,\n",
" \u001b[33mtasks\u001b[0m=\u001b[1m{\u001b[0m\n",
" \u001b[32m'QA'\u001b[0m: \u001b[1;35mTaskTimings\u001b[0m\u001b[1m(\u001b[0m\n",
" \u001b[33moverall\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1;36m.106700461357832\u001b[0m,\n",
" \u001b[33moverall\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1;36m.6060180617496371\u001b[0m,\n",
" \u001b[33mdetails\u001b[0m=\u001b[1m{\u001b[0m\n",
" \u001b[32m'pipeline'\u001b[0m: \u001b[1;36m0.7934164367616177\u001b[0m,\n",
" \u001b[32m'encode'\u001b[0m: \u001b[1;36m0.02867589332163334\u001b[0m,\n",
" \u001b[32m'search'\u001b[0m: \u001b[1;36m0.01751641556620598\u001b[0m,\n",
" \u001b[32m'pipeline'\u001b[0m: \u001b[1;36m1.115321146324277\u001b[0m,\n",
" \u001b[32m'encode'\u001b[0m: \u001b[1;36m0.028653355315327644\u001b[0m,\n",
" \u001b[32m'search'\u001b[0m: \u001b[1;36m0.02463858388364315\u001b[0m,\n",
" \u001b[32m'rerank'\u001b[0m: \u001b[1;36m0.0\u001b[0m,\n",
" \u001b[32m'generate'\u001b[0m: \u001b[1;36m0.7073831260204315\u001b[0m\n",
" \u001b[32m'generate'\u001b[0m: \u001b[1;36m1.0093672648072243\u001b[0m\n",
" \u001b[1m}\u001b[0m\n",
" \u001b[1m)\u001b[0m\n",
" \u001b[1m}\u001b[0m\n",
Expand Down Expand Up @@ -861,15 +856,15 @@
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">QueryTimings</span><span style=\"font-weight: bold\">(</span>\n",
" <span style=\"color: #808000; text-decoration-color: #808000\">overall</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.1048420211300254</span>,\n",
" <span style=\"color: #808000; text-decoration-color: #808000\">overall</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.1053222483024001</span>,\n",
" <span style=\"color: #808000; text-decoration-color: #808000\">tasks</span>=<span style=\"font-weight: bold\">{</span>\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'QA'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">TaskTimings</span><span style=\"font-weight: bold\">(</span>\n",
" <span style=\"color: #808000; text-decoration-color: #808000\">overall</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.1045362269505858</span>,\n",
" <span style=\"color: #808000; text-decoration-color: #808000\">overall</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.1050042947754264</span>,\n",
" <span style=\"color: #808000; text-decoration-color: #808000\">details</span>=<span style=\"font-weight: bold\">{</span>\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'pipeline'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.6429227869957685</span>,\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'encode'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.0342963021248579</span>,\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'search'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.026565583422780037</span>,\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'rerank'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.5808815937489271</span>\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'pipeline'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.6090765688568354</span>,\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'encode'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.026338623836636543</span>,\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'search'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.025418167933821678</span>,\n",
" <span style=\"color: #008000; text-decoration-color: #008000\">'rerank'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.5562568940222263</span>\n",
" <span style=\"font-weight: bold\">}</span>\n",
" <span style=\"font-weight: bold\">)</span>\n",
" <span style=\"font-weight: bold\">}</span>\n",
Expand All @@ -878,15 +873,15 @@
],
"text/plain": [
"\u001b[1;35mQueryTimings\u001b[0m\u001b[1m(\u001b[0m\n",
" \u001b[33moverall\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1;36m.1048420211300254\u001b[0m,\n",
" \u001b[33moverall\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1;36m.1053222483024001\u001b[0m,\n",
" \u001b[33mtasks\u001b[0m=\u001b[1m{\u001b[0m\n",
" \u001b[32m'QA'\u001b[0m: \u001b[1;35mTaskTimings\u001b[0m\u001b[1m(\u001b[0m\n",
" \u001b[33moverall\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1;36m.1045362269505858\u001b[0m,\n",
" \u001b[33moverall\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1;36m.1050042947754264\u001b[0m,\n",
" \u001b[33mdetails\u001b[0m=\u001b[1m{\u001b[0m\n",
" \u001b[32m'pipeline'\u001b[0m: \u001b[1;36m0.6429227869957685\u001b[0m,\n",
" \u001b[32m'encode'\u001b[0m: \u001b[1;36m0.0342963021248579\u001b[0m,\n",
" \u001b[32m'search'\u001b[0m: \u001b[1;36m0.026565583422780037\u001b[0m,\n",
" \u001b[32m'rerank'\u001b[0m: \u001b[1;36m0.5808815937489271\u001b[0m\n",
" \u001b[32m'pipeline'\u001b[0m: \u001b[1;36m0.6090765688568354\u001b[0m,\n",
" \u001b[32m'encode'\u001b[0m: \u001b[1;36m0.026338623836636543\u001b[0m,\n",
" \u001b[32m'search'\u001b[0m: \u001b[1;36m0.025418167933821678\u001b[0m,\n",
" \u001b[32m'rerank'\u001b[0m: \u001b[1;36m0.5562568940222263\u001b[0m\n",
" \u001b[1m}\u001b[0m\n",
" \u001b[1m)\u001b[0m\n",
" \u001b[1m}\u001b[0m\n",
Expand Down
44 changes: 24 additions & 20 deletions examples/qa/qa_quick_start.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,34 +38,29 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 1,
"id": "5b244bdd-1b52-41ff-b63e-9a203570d210",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from dotenv import load_dotenv\n",
"from pydantic import TypeAdapter\n",
"\n",
"load_dotenv()\n",
"\n",
"# profile to use (user's active profile will be used by default):\n",
"PROFILE_NAME = os.environ.get(\"DS_NB_PROFILE\")\n",
"\n",
"PROFILE_NAME = os.environ.get(\"DS_NB_PROFILE\") # profile to use; defaults to active one\n",
"PROJ_KEY = os.environ[\"DS_NB_PROJ_KEY\"] # project to use\n",
"INDEX_KEY = os.environ[\"DS_NB_SEM_ON_IDX_KEY\"]\n",
"INDEX_KEY = os.environ[\"DS_NB_QA_IDX_KEY\"]\n",
"DOC_HASH = os.environ.get(\"DS_NB_QA_DOC_HASH\") # set only when targeting a specific doc\n",
"QUESTION = os.environ[\"DS_NB_QUESTION\"]\n",
"\n",
"# set the doc hash when targeting a specific document\n",
"DOC_HASH = os.environ.get(\"DS_NB_SEM_ON_IDX_DOC_HASH\")\n",
"QUESTION = os.environ[\"DS_NB_SEM_ON_QUESTION\"]\n",
"\n",
"from pydantic import TypeAdapter\n",
"\n",
"# whether to skip any already semantically ingested docs:\n",
"# whether to ingest incrementally:\n",
"SKIP_INGESTED_DOCS = TypeAdapter(bool).validate_python(\n",
" os.environ.get(\"DS_NB_SKIP_INGESTED_DOCS\", True)\n",
")\n",
"RETR_K = os.environ.get(\"DS_NB_RETR_K\", 3) # number of search results to retrieve\n",
"GEN_TIMEOUT = os.environ.get(\"DB_NB_GEN_TIMEOUT\") # generation timeout in secs"
"GEN_TIMEOUT = os.environ.get(\"DS_NB_GEN_TIMEOUT\", 10) # generation timeout in seconds"
]
},
{
Expand All @@ -78,7 +73,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 2,
"id": "5d236ea0-db1c-4171-8e11-cdd0bad69d66",
"metadata": {},
"outputs": [],
Expand All @@ -100,7 +95,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 3,
"id": "9c108432-a285-4c7b-a996-008ac3ff3d7a",
"metadata": {},
"outputs": [],
Expand All @@ -122,7 +117,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 4,
"id": "adc0f7a4",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -150,17 +145,26 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 5,
"id": "947e87c5",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/pva/work/github.com/DS4SD/deepsearch-examples/.venv/lib/python3.10/site-packages/pydantic/main.py:347: UserWarning: Pydantic serializer warnings:\n",
" Expected `list[str]` but got `_LiteralGenericAlias` - serialized value may not be as expected\n",
" return self.__pydantic_serializer__.to_python(\n"
]
},
{
"data": {
"text/plain": [
"{'ing_out': {}}"
]
},
"execution_count": 12,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -190,7 +194,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 6,
"id": "b960b309",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -403,7 +407,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 7,
"id": "492fd066",
"metadata": {},
"outputs": [
Expand Down
41 changes: 18 additions & 23 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ packages = [

[tool.poetry.dependencies]
python = ">= 3.8, <3.11"
deepsearch-toolkit = { git = "https://github.com/DS4SD/deepsearch-toolkit.git", rev = "e27a294322ebee27645afb3bffa57052d4677941" }
deepsearch-toolkit = "^0.46.0"
jupyter = "^1.0.0"
ipywidgets = "^7" # previous major release is needed bcause of mols2grid
numpy = "^1.23.4"
Expand Down
Loading

0 comments on commit 84ea7e1

Please sign in to comment.