whiletrue-industries · noamoss · Jan 31, 2025 · Jan 31, 2025 · Jan 31, 2025 · Jan 31, 2025
diff --git a/.env.sample b/.env.sample
@@ -1,2 +1,5 @@
 AIRTABLE_API_KEY=
-OPENAI_API_KEY=
+OPENAI_API_KEY=
+ES_USERNAME=
+ES_PASSWORD=
+ES_HOST=
diff --git a/README.md b/README.md
@@ -15,6 +15,11 @@ $ pip install -U -e .
 $ botnim --help
 ```
 
+for development:
+```bash
+$ pip install -U -e .[dev]
+```
+
 ## Directory Structure
 
 - `.env.sample`: Sample environment file for the benchmarking scripts.
@@ -27,6 +32,9 @@ $ botnim --help
     - `__init__.py`: Package initialization.
     - `vector_store_base.py`: Abstract base class for vector store implementations.
     - `vector_store_openai.py`: OpenAI Vector Store implementation.
+    - `vector_store_es.py`: Elasticsearch Vector Store implementation
+        - see the `backend/es` directory for examples
+        - run `pytest` to test the Elasticsearch Vector Store.
   - `benchmark/`: Benchmarking scripts for the bots.
       Copy this file to `.env` and fill in the necessary values.
     - `run-benchmark.py`: Main benchmarking script.
@@ -52,7 +60,7 @@ $ botnim --help
    - Configure the source URL in the bot's `config.yaml`
    - The content will be automatically downloaded during sync
 Either:
-3. `botnim sync {staging/production} {budgetkey/takanon}` to sync the specifications with the OpenAI account.
+3. `botnim sync {staging/production} {budgetkey/takanon} --backend {openai/es}` to sync the specifications with the OpenAI account.
    - Use `--replace-context` flag to force a complete rebuild of the vector store (useful when context files have been modified)
 Or
 3. Commit the changes to the repository

diff --git a/botnim/cli.py b/botnim/cli.py
@@ -1,37 +1,41 @@
 import click
 from .sync import sync_agents
 from .benchmark.runner import run_benchmarks
+from .config import SPECS
+
 
 @click.group()
 def cli():
     """A simple CLI tool."""
     pass
 
 # Sync command, receives two arguments: production/staging and a list of bots to sync ('budgetkey'/'takanon' or 'all')
-@cli.command()
+@cli.command(name='sync')
 @click.argument('environment', type=click.Choice(['production', 'staging']))
 @click.argument('bots', type=click.Choice(['budgetkey', 'takanon', 'all']))
 @click.option('--replace-context', is_flag=True, help='Replace existing context')
-def sync(environment, bots, replace_context):
+@click.option('--backend', type=click.Choice(['es', 'openai']), default='openai', help='Vector store backend')
+def sync(environment, bots, replace_context, backend):
     """Sync bots to Airtable."""
     click.echo(f"Syncing {bots} to {environment}")
-    sync_agents(environment, bots, replace_context=replace_context)
+    sync_agents(environment, bots, backend=backend,replace_context=replace_context)
 
 # Run benchmarks command, receives three arguments: production/staging, a list of bots to run benchmarks on ('budgetkey'/'takanon' or 'all') and whether to run benchmarks on the production environment to work locally (true/false)
-@cli.command()
+@cli.command(name='benchmarks')
 @click.argument('environment', type=click.Choice(['production', 'staging']))
 @click.argument('bots', type=click.Choice(['budgetkey', 'takanon', 'all']))
-@click.argument('local', type=click.BOOL)
-@click.option('--reuse-answers', type=click.BOOL, default=False)
+@click.option('--local', is_flag=True, default=False, help='Run benchmarks locally')
+@click.option('--reuse-answers', is_flag=True, default=False)
 @click.option('--select', type=click.STRING, default='failed', help='failed/all/AirTable record ID')
 @click.option('--concurrency', type=click.INT, default=None)
 def benchmarks(environment, bots, local, reuse_answers, select, concurrency):
     """Run benchmarks on bots."""
     click.echo(f"Running benchmarks on {bots} in {environment} (save results locally: {local}, reuse answers: {reuse_answers}, select: {select})")
     run_benchmarks(environment, bots, local, reuse_answers, select, concurrency)
 
+
 def main():
     cli()
 
 if __name__ == '__main__':
-    main()
+    main()
diff --git a/botnim/config.py b/botnim/config.py
@@ -1,7 +1,22 @@
 from pathlib import Path
 import dotenv
+import logging
 
 ROOT = Path(__file__).parent.parent
 SPECS = ROOT / 'specs'
 
 dotenv.load_dotenv(ROOT / '.env')
+
+# Logging configuration
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+def get_logger(name: str) -> logging.Logger:
+    """Get a logger instance for the given name"""
+    return logging.getLogger(name)
+
+
+# Embedding model settings
+DEFAULT_EMBEDDING_MODEL = 'text-embedding-3-small'
+DEFAULT_EMBEDDING_SIZE = 1536
+DEFAULT_BATCH_SIZE = 50
diff --git a/botnim/sync.py b/botnim/sync.py
@@ -2,16 +2,17 @@
 import json
 import io
 from pathlib import Path
-
 import yaml
-
 from openai import OpenAI
-
 from .config import SPECS
-from .vector_store import VectorStoreOpenAI
+from .vector_store import VectorStoreOpenAI, VectorStoreES
 
 
 api_key = os.environ['OPENAI_API_KEY']
+es_username = os.environ['ES_USERNAME']
+es_password = os.environ['ES_PASSWORD']
+es_host = os.environ['ES_HOST']
+
 # Create openai client and get completion for prompt with the 'gpt4-o' model:
 client = OpenAI(api_key=api_key)
 
@@ -52,25 +53,35 @@ def openapi_to_tools(openapi_spec):
             ret.append(func)
     return ret
 
-def update_assistant(config, config_dir, production, replace_context=False):
+def update_assistant(config, config_dir, production, backend, replace_context=False):
     tool_resources = None
     tools = None
     print(f'Updating assistant: {config["name"]}')
     # Load context, if necessary
-    if config.get('context'):
-        vs = VectorStoreOpenAI(config, config_dir, production, client)
+    if config.get('context'):  
+        ## create vector store based on backend parameter
+        if backend == 'openai':
+            vs = VectorStoreOpenAI(config, config_dir, production, client)
+        ## Elasticsearch
+        elif backend == 'es':
+            vs = VectorStoreES(config, config_dir, production, es_host, es_username, es_password)
+        # Update the vector store with the context
         tools, tool_resources = vs.vector_store_update(config['context'], replace_context)
-
+    
     # List all the assistants in the organization:
     assistants = client.beta.assistants.list()
     assistant_id = None
     assistant_name = config['name']
     if not production:
         assistant_name += ' - פיתוח'
+
+    print(f'Looking for assistant named: {assistant_name}')
     for assistant in assistants:
+        print(f'Found assistant: {assistant.name} (ID: {assistant.id})')
         if assistant.name == assistant_name:
             assistant_id = assistant.id
             break
+
     print(f'Assistant ID: {assistant_id}')
     asst_params = dict(
         name=assistant_name,
@@ -109,7 +120,7 @@ def update_assistant(config, config_dir, production, replace_context=False):
         # ...
 
 
-def sync_agents(environment, bots, replace_context=False):
+def sync_agents(environment, bots, backend='openai', replace_context=False):
     production = environment == 'production'
     for config_fn in SPECS.glob('*/config.yaml'):
         config_dir = config_fn.parent
@@ -118,4 +129,4 @@ def sync_agents(environment, bots, replace_context=False):
             with config_fn.open() as config_f:
                 config = yaml.safe_load(config_f)
                 config['instructions'] = (config_dir / config['instructions']).read_text()
-                update_assistant(config, config_dir, production, replace_context=replace_context)
+                update_assistant(config, config_dir, production, backend, replace_context=replace_context)
diff --git a/botnim/vector_store/__init__.py b/botnim/vector_store/__init__.py
@@ -1,4 +1,4 @@
 from .vector_store_openai import VectorStoreOpenAI
+from .vector_store_es import VectorStoreES
 
-__all__ = [VectorStoreOpenAI]
-
+__all__ = [VectorStoreOpenAI, VectorStoreES]