Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Es vector store basic implementation #21

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
24a59bc
update required packages
noamoss Jan 31, 2025
f7c2433
add es vectorstore class and method
noamoss Jan 31, 2025
f0fd72e
register the new es vector store
noamoss Jan 31, 2025
40f86ab
update sync flow, suppoer es vector store
noamoss Jan 31, 2025
6ed0a48
add cli interface for es vetorstore sync
noamoss Jan 31, 2025
ae39290
update readme.md
noamoss Jan 31, 2025
a8d2b03
remove query method from es vectorstore
noamoss Jan 31, 2025
b6379d9
update .env.example defintions for es vector store
noamoss Jan 31, 2025
ca13b4f
add tests
noamoss Jan 31, 2025
2b7752e
update requirements.txt
noamoss Jan 31, 2025
838c9cb
fix tools initialize condition logic
noamoss Feb 9, 2025
725c228
replace hard-coded embedding with config variable
noamoss Feb 9, 2025
e19d797
refactor: simplify file upload batch processing
noamoss Feb 9, 2025
1604d25
include context name in the index name
noamoss Feb 9, 2025
8c41833
replace file_search with semantic search function tool and add tests
noamoss Feb 9, 2025
503e735
add embedding constants to the right config file...
noamoss Feb 9, 2025
cdf0a9f
remove OpenAI-specific vector store IDs from ES implementation
noamoss Feb 9, 2025
f5eee44
update tests with constant embedding model
noamoss Feb 9, 2025
f66d424
create a local cli tool for communication with the assistants
noamoss Feb 14, 2025
9d14e0d
ui improvements
noamoss Feb 14, 2025
fd84e36
add 'requires_actions' detilas to output
noamoss Feb 14, 2025
40cffc9
Revert "add 'requires_actions' detilas to output"
noamoss Feb 14, 2025
374b8d9
Revert "ui improvements"
noamoss Feb 14, 2025
8c7dfae
Revert "create a local cli tool for communication with the assistants"
noamoss Feb 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .env.sample
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
AIRTABLE_API_KEY=
OPENAI_API_KEY=
OPENAI_API_KEY=
ES_USERNAME=
ES_PASSWORD=
ES_HOST=
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ $ pip install -U -e .
$ botnim --help
```

for development:
```bash
$ pip install -U -e .[dev]
```

## Directory Structure

- `.env.sample`: Sample environment file for the benchmarking scripts.
Expand All @@ -27,6 +32,9 @@ $ botnim --help
- `__init__.py`: Package initialization.
- `vector_store_base.py`: Abstract base class for vector store implementations.
- `vector_store_openai.py`: OpenAI Vector Store implementation.
- `vector_store_es.py`: Elasticsearch Vector Store implementation
- see the `backend/es` directory for examples
- run `pytest` to test the Elasticsearch Vector Store.
- `benchmark/`: Benchmarking scripts for the bots.
Copy this file to `.env` and fill in the necessary values.
- `run-benchmark.py`: Main benchmarking script.
Expand All @@ -52,7 +60,7 @@ $ botnim --help
- Configure the source URL in the bot's `config.yaml`
- The content will be automatically downloaded during sync
Either:
3. `botnim sync {staging/production} {budgetkey/takanon}` to sync the specifications with the OpenAI account.
3. `botnim sync {staging/production} {budgetkey/takanon} --backend {openai/es}` to sync the specifications with the OpenAI account.
- Use `--replace-context` flag to force a complete rebuild of the vector store (useful when context files have been modified)
Or
3. Commit the changes to the repository
Expand Down
18 changes: 11 additions & 7 deletions botnim/cli.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,41 @@
import click
from .sync import sync_agents
from .benchmark.runner import run_benchmarks
from .config import SPECS


@click.group()
def cli():
"""A simple CLI tool."""
pass

# Sync command, receives two arguments: production/staging and a list of bots to sync ('budgetkey'/'takanon' or 'all')
@cli.command()
@cli.command(name='sync')
@click.argument('environment', type=click.Choice(['production', 'staging']))
@click.argument('bots', type=click.Choice(['budgetkey', 'takanon', 'all']))
@click.option('--replace-context', is_flag=True, help='Replace existing context')
def sync(environment, bots, replace_context):
@click.option('--backend', type=click.Choice(['es', 'openai']), default='openai', help='Vector store backend')
def sync(environment, bots, replace_context, backend):
"""Sync bots to Airtable."""
click.echo(f"Syncing {bots} to {environment}")
sync_agents(environment, bots, replace_context=replace_context)
sync_agents(environment, bots, backend=backend,replace_context=replace_context)

# Run benchmarks command, receives three arguments: production/staging, a list of bots to run benchmarks on ('budgetkey'/'takanon' or 'all') and whether to run benchmarks on the production environment to work locally (true/false)
@cli.command()
@cli.command(name='benchmarks')
@click.argument('environment', type=click.Choice(['production', 'staging']))
@click.argument('bots', type=click.Choice(['budgetkey', 'takanon', 'all']))
@click.argument('local', type=click.BOOL)
@click.option('--reuse-answers', type=click.BOOL, default=False)
@click.option('--local', is_flag=True, default=False, help='Run benchmarks locally')
@click.option('--reuse-answers', is_flag=True, default=False)
@click.option('--select', type=click.STRING, default='failed', help='failed/all/AirTable record ID')
@click.option('--concurrency', type=click.INT, default=None)
def benchmarks(environment, bots, local, reuse_answers, select, concurrency):
"""Run benchmarks on bots."""
click.echo(f"Running benchmarks on {bots} in {environment} (save results locally: {local}, reuse answers: {reuse_answers}, select: {select})")
run_benchmarks(environment, bots, local, reuse_answers, select, concurrency)


def main():
cli()

if __name__ == '__main__':
main()
main()
15 changes: 15 additions & 0 deletions botnim/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,22 @@
from pathlib import Path
import dotenv
import logging

ROOT = Path(__file__).parent.parent
SPECS = ROOT / 'specs'

dotenv.load_dotenv(ROOT / '.env')

# Logging configuration
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def get_logger(name: str) -> logging.Logger:
"""Get a logger instance for the given name"""
return logging.getLogger(name)


# Embedding model settings
DEFAULT_EMBEDDING_MODEL = 'text-embedding-3-small'
DEFAULT_EMBEDDING_SIZE = 1536
DEFAULT_BATCH_SIZE = 50
31 changes: 21 additions & 10 deletions botnim/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@
import json
import io
from pathlib import Path

import yaml

from openai import OpenAI

from .config import SPECS
from .vector_store import VectorStoreOpenAI
from .vector_store import VectorStoreOpenAI, VectorStoreES


api_key = os.environ['OPENAI_API_KEY']
es_username = os.environ['ES_USERNAME']
es_password = os.environ['ES_PASSWORD']
es_host = os.environ['ES_HOST']

# Create openai client and get completion for prompt with the 'gpt4-o' model:
client = OpenAI(api_key=api_key)

Expand Down Expand Up @@ -52,25 +53,35 @@ def openapi_to_tools(openapi_spec):
ret.append(func)
return ret

def update_assistant(config, config_dir, production, replace_context=False):
def update_assistant(config, config_dir, production, backend, replace_context=False):
tool_resources = None
tools = None
print(f'Updating assistant: {config["name"]}')
# Load context, if necessary
if config.get('context'):
vs = VectorStoreOpenAI(config, config_dir, production, client)
if config.get('context'):
## create vector store based on backend parameter
if backend == 'openai':
vs = VectorStoreOpenAI(config, config_dir, production, client)
## Elasticsearch
elif backend == 'es':
vs = VectorStoreES(config, config_dir, production, es_host, es_username, es_password)
# Update the vector store with the context
tools, tool_resources = vs.vector_store_update(config['context'], replace_context)

# List all the assistants in the organization:
assistants = client.beta.assistants.list()
assistant_id = None
assistant_name = config['name']
if not production:
assistant_name += ' - פיתוח'

print(f'Looking for assistant named: {assistant_name}')
for assistant in assistants:
print(f'Found assistant: {assistant.name} (ID: {assistant.id})')
if assistant.name == assistant_name:
assistant_id = assistant.id
break

print(f'Assistant ID: {assistant_id}')
asst_params = dict(
name=assistant_name,
Expand Down Expand Up @@ -109,7 +120,7 @@ def update_assistant(config, config_dir, production, replace_context=False):
# ...


def sync_agents(environment, bots, replace_context=False):
def sync_agents(environment, bots, backend='openai', replace_context=False):
production = environment == 'production'
for config_fn in SPECS.glob('*/config.yaml'):
config_dir = config_fn.parent
Expand All @@ -118,4 +129,4 @@ def sync_agents(environment, bots, replace_context=False):
with config_fn.open() as config_f:
config = yaml.safe_load(config_f)
config['instructions'] = (config_dir / config['instructions']).read_text()
update_assistant(config, config_dir, production, replace_context=replace_context)
update_assistant(config, config_dir, production, backend, replace_context=replace_context)
4 changes: 2 additions & 2 deletions botnim/vector_store/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .vector_store_openai import VectorStoreOpenAI
from .vector_store_es import VectorStoreES

__all__ = [VectorStoreOpenAI]

__all__ = [VectorStoreOpenAI, VectorStoreES]
Loading