Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify template variables setting #2600

Merged
merged 1 commit into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci_code.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ jobs:
# TODO: We currently need a default plugin to run tests using MongoDB.
# Once the local file database is complete, we may need to update this section.
python -m pip install plugins/mongodb
python -m pip install plugins/openai
python -m pip install plugins/ibis
python -m pip install plugins/sqlalchemy
Expand All @@ -85,5 +86,6 @@ jobs:
- name: Usecase Testing
run: |
cp -r templates/* superduper/templates/
make usecase_testing SUPERDUPER_CONFIG=test/configs/default.yaml
make usecase_testing SUPERDUPER_CONFIG=test/configs/sql.yaml
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

**Before you create a Pull Request, remember to update the Changelog with your changes.**

## Changes Since Last Release
## Changes Since Last Release

#### Changed defaults / behaviours

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ ignore = [
"D102",
"E402",
]
exclude = ["templates", "superduper/templates"]

[tool.ruff.lint.isort]
combine-as-imports = true
Expand Down
4 changes: 3 additions & 1 deletion superduper/base/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,9 @@ def decode(
:param db: The datalayer to use.
"""
if '_variables' in r:
variables = {**r['_variables'], 'output_prefix': CFG.output_prefix}
r = _replace_variables(
{k: v for k, v in r.items() if k != '_variables'}, **r['_variables']
{k: v for k, v in r.items() if k != '_variables'}, **variables
)
schema = schema or r.get(KEY_SCHEMA)
schema = get_schema(db, schema)
Expand All @@ -216,6 +217,7 @@ def decode(

if not isinstance(getters, _Getters):
getters = _Getters(getters)
assert isinstance(getters, _Getters)

# Prioritize using the local artifact storage getter,
# and then use the DB read getter.
Expand Down
1 change: 1 addition & 0 deletions superduper/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def _apply(name: str, variables: str | None = None, data_backend: str | None = N
variables = variables or '{}'
variables = json.loads(variables)

# TODO remove all of this template logic
def _build_from_template(t):
assert variables is not None, 'Variables must be provided for templates'
all_values = variables.copy()
Expand Down
1 change: 0 additions & 1 deletion superduper/components/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ def default_values(self):
def form_template(self):
"""Form to be diplayed to user."""
return {
'identifier': '<enter-a-unique-identifier>',
'_variables': {
k: (
f'<value-{i}>'
Expand Down
28 changes: 1 addition & 27 deletions superduper/rest/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from superduper import logging
from superduper.backends.base.query import Query
from superduper.base.document import Document
from superduper.components.component import Component
from superduper.components.template import Template
from superduper.rest.base import DatalayerDependency, SuperDuperApp

Expand Down Expand Up @@ -169,39 +168,14 @@ def _process_db_apply(db, component, id: str | None = None):
else:
db.apply(component, force=True)

def _process_apply_info(db, info):
if '_variables' in info:
assert {'_variables', 'identifier'}.issubset(info.keys())
variables = info.pop('_variables')
for k in variables:
if isinstance(variables[k], str):
assert '<' not in variables[k]
assert '>' not in variables[k]

identifier = info.pop('identifier')
template_name = info.pop('_template_name', None)

component = Component.from_template(
identifier=identifier,
template_body=info,
template_name=template_name,
db=db,
**variables,
)
return component
component = Document.decode(info, db=db).unpack()
# TODO this shouldn't be necessary to do twice
component.unpack()
return component

@app.add('/db/apply', method='post')
async def db_apply(
info: t.Dict,
background_tasks: BackgroundTasks,
id: str | None = 'test',
db: 'Datalayer' = DatalayerDependency(),
):
component = _process_apply_info(db, info)
component = Document.decode(info, db=db).unpack()
background_tasks.add_task(_process_db_apply, db, component, id)
return {'status': 'ok'}

Expand Down
5 changes: 1 addition & 4 deletions templates/pdf_rag/streamlit.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,7 @@ def init_db():


def load_questions():
return [
"What is sparse-vector retrieval?",
"How to perform Query Optimization?"
]
return ["What is sparse-vector retrieval?", "How to perform Query Optimization?"]


db, model_rag = st.cache_resource(init_db)()
Expand Down
22 changes: 22 additions & 0 deletions test/integration/usecase/test_build_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import json

import pytest

from superduper import Application, Document


@pytest.mark.skip
def test_build_from_template(db):
from superduper import templates

db.apply(templates.simple_rag)

with open('test/material/sample_app/component.json') as f:
component = json.load(f)

component = templates.simple_rag.form_template
component['_variables']['output_prefix'] = '_output__'

c = Document.decode(component, db=db).unpack()

assert isinstance(c, Application)
246 changes: 246 additions & 0 deletions test/material/sample_app/component.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
{
"_variables": {
"table_name": "sample_simple_rag",
"id_field": "_id",
"databackend": "mongodb",
"base_url": null,
"api_key": null,
"embedding_model": "text-embedding-ada-002",
"llm_model": "gpt-3.5-turbo"
},
"types": {
"id_field": {
"type": "str",
"default": "_id"
},
"embedding_model": {
"type": "str",
"default": "text-embedding-ada-002"
},
"llm_model": {
"type": "str",
"default": "gpt-3.5-turbo"
},
"table_name": {
"type": "str",
"default": "sample_simple_rag"
},
"databackend": {
"type": "str",
"default": "mongodb"
},
"base_url": {
"type": "str",
"optional": true,
"default": null
},
"api_key": {
"type": "str",
"optional": true,
"default": null
}
},
"_base": "?simple-rag-app",
"_builds": {
"datatype:dill": {
"_path": "superduper.components.datatype.get_serializer",
"method": "dill",
"encodable": "artifact"
},
"727d3bb560939e1211f9cac189d56e07e9622eeb": {
"_path": "superduper.components.datatype.Artifact",
"datatype": "?datatype:dill",
"uri": null,
"blob": "&:blob:727d3bb560939e1211f9cac189d56e07e9622eeb"
},
"model:chunker": {
"_object": "?727d3bb560939e1211f9cac189d56e07e9622eeb",
"upstream": null,
"plugins": null,
"cache": true,
"status": null,
"signature": "singleton",
"datatype": null,
"output_schema": null,
"model_update_kwargs": {},
"predict_kwargs": {},
"compute_kwargs": {},
"validation": null,
"metric_values": {},
"num_workers": 0,
"serve": false,
"trainer": null,
"deploy": false,
"chunk_size": 200
},
"var-table-name-select-var-id-field-x": {
"_path": "superduper_<var:databackend>.query.parse_query",
"documents": [],
"query": "<var:table_name>.select(\"<var:id_field>\", \"x\")"
},
"listener:chunker": {
"_path": "superduper.components.listener.Listener",
"upstream": null,
"plugins": null,
"cache": true,
"status": null,
"cdc_table": "<var:table_name>",
"key": "x",
"model": "?model:chunker",
"predict_kwargs": {},
"select": "?var-table-name-select-var-id-field-x",
"flatten": true
},
"datatype:sqlvector[1536]": {
"_path": "superduper.components.vector_index.sqlvector",
"shape": [
1536
]
},
"model:<var:embedding_model>": {
"_path": "superduper_openai.model.OpenAIEmbedding",
"upstream": null,
"plugins": null,
"cache": true,
"status": null,
"signature": "singleton",
"datatype": "?datatype:sqlvector[1536]",
"output_schema": null,
"model_update_kwargs": {},
"predict_kwargs": {},
"compute_kwargs": {},
"validation": null,
"metric_values": {},
"num_workers": 0,
"serve": false,
"trainer": null,
"deploy": false,
"model": "<var:embedding_model>",
"max_batch_size": 8,
"openai_api_key": null,
"openai_api_base": null,
"client_kwargs": {
"base_url": null,
"api_key": null
},
"shape": [
1536
],
"batch_size": 100
},
"outputs-chunker-?(listener:chunker.uuid)-select-id-source-outputs-chunker-?(listener:chunker.uuid)": {
"_path": "superduper_<var:databackend>.query.parse_query",
"documents": [],
"query": "<var:output_prefix>chunker__?(listener:chunker.uuid).select(\"id\", \"_source\", \"<var:output_prefix>chunker__?(listener:chunker.uuid)\")"
},
"listener:embeddinglistener": {
"_path": "superduper.components.listener.Listener",
"upstream": [
"?listener:chunker",
"?listener:chunker"
],
"plugins": null,
"cache": true,
"status": null,
"cdc_table": "<var:output_prefix>chunker__?(listener:chunker.uuid)",
"key": "<var:output_prefix>chunker__?(listener:chunker.uuid)",
"model": "?model:<var:embedding_model>",
"predict_kwargs": {},
"select": "?outputs-chunker-?(listener:chunker.uuid)-select-id-source-outputs-chunker-?(listener:chunker.uuid)",
"flatten": false
},
"vector_index:vectorindex": {
"_path": "superduper.components.vector_index.VectorIndex",
"upstream": null,
"plugins": null,
"cache": true,
"status": null,
"cdc_table": "<var:output_prefix>embeddinglistener__?(listener:embeddinglistener.uuid)",
"indexing_listener": "?listener:embeddinglistener",
"compatible_listener": null,
"measure": "cosine",
"metric_values": {}
},
"outputs-chunker-?(listener:chunker.uuid)-select-like-outputs-chunker-?(listener:chunker.uuid)-var-query-vector-index-vectorindex-n-5": {
"_path": "superduper_<var:databackend>.query.parse_query",
"documents": [
{
"<var:output_prefix>chunker__?(listener:chunker.uuid)": "<var:query>"
}
],
"query": "<var:output_prefix>chunker__?(listener:chunker.uuid).select().like(documents[0], vector_index=\"vectorindex\", n=5)"
},
"model:llm-model": {
"_path": "superduper_openai.model.OpenAIChatCompletion",
"upstream": null,
"plugins": null,
"cache": true,
"status": null,
"signature": "singleton",
"datatype": null,
"output_schema": null,
"model_update_kwargs": {},
"predict_kwargs": {},
"compute_kwargs": {},
"validation": null,
"metric_values": {},
"num_workers": 0,
"serve": false,
"trainer": null,
"deploy": false,
"model": "<var:llm_model>",
"max_batch_size": 8,
"openai_api_key": null,
"openai_api_base": null,
"client_kwargs": {
"base_url": null,
"api_key": null
},
"batch_size": 1,
"prompt": ""
},
"model:simple_rag": {
"_path": "superduper.components.model.RAGModel",
"upstream": null,
"plugins": null,
"cache": true,
"status": null,
"signature": "singleton",
"datatype": null,
"output_schema": null,
"model_update_kwargs": {},
"predict_kwargs": {},
"compute_kwargs": {},
"validation": null,
"metric_values": {},
"num_workers": 0,
"serve": false,
"trainer": null,
"deploy": false,
"prompt_template": "Use the following context snippets, these snippets are not ordered!, Answer the question based on this context.\nThese snippets are samples from our internal data-repositories, and should be used exclusively and as a matter of priority to answer the question\n\n{context}\n\nHere's the question: {query}",
"select": "?outputs-chunker-?(listener:chunker.uuid)-select-like-outputs-chunker-?(listener:chunker.uuid)-var-query-vector-index-vectorindex-n-5",
"key": "<var:output_prefix>chunker__?(listener:chunker.uuid)",
"llm": "?model:llm-model"
},
"simple-rag-app": {
"_path": "superduper.components.application.Application",
"upstream": null,
"plugins": null,
"cache": true,
"status": null,
"components": [
"?listener:chunker",
"?vector_index:vectorindex",
"?model:simple_rag"
],
"namespace": null,
"link": null,
"_literals": [
"template"
]
}
},
"_blobs": {},
"_files": {},
"_template_name": "simple_rag"
}
Loading