From 328d7fa125c86d1ff7e2d4f8ebaff2ed21a2114b Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 20:08:24 -0700
Subject: [PATCH 01/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 dspy/retrieve/databricks_rm.py | 287 ++++++++++++++++++++++-----------
 1 file changed, 193 insertions(+), 94 deletions(-)

diff --git a/dspy/retrieve/databricks_rm.py b/dspy/retrieve/databricks_rm.py
index 15d20711e..df597a2a3 100644
--- a/dspy/retrieve/databricks_rm.py
+++ b/dspy/retrieve/databricks_rm.py
@@ -1,25 +1,20 @@
 import json
 import os
-from typing import List, Union, Any, Dict
+from importlib.util import find_spec
+from typing import Any, Dict, List, Optional, Union
+
 import requests
+
 import dspy
 from dspy.primitives.prediction import Prediction
 
+_databricks_sdk_installed = find_spec("databricks.sdk") is not None
+
 
 class DatabricksRM(dspy.Retrieve):
     """
     A retrieval module that uses Databricks Vector Search Endpoint to return the top-k embeddings for a given query.
 
-    Args:
-        databricks_index_name (str): Databricks vector search index to query
-        databricks_endpoint (str): Databricks index endpoint url
-        databricks_token (str): Databricks authentication token
-        columns (list[str]): Column names to include in response
-        filters_json (str, optional): JSON string for query filters
-        k (int, optional): Number of top embeddings to retrieve. Defaults to 3.
-        docs_id_column_name (str, optional): Column name for retrieved doc_ids to return.
-        text_column_name (str, optional): Column name for retrieved text to return.
-
     Examples:
         Below is a code snippet that shows how to configure Databricks Vector Search endpoints:
 
@@ -62,46 +57,56 @@ class DatabricksRM(dspy.Retrieve):
 
         Below is a code snippet that shows how to query the Databricks Direct Vector Access Index using the forward() function.
         ```python
-        self.retrieve = DatabricksRM(query=[1, 2, 3], query_type = 'vector')
+        self.retrieve = DatabricksRM(query=[1, 2, 3])
         ```
     """
 
     def __init__(
         self,
-        databricks_index_name=None,
-        databricks_endpoint=None,
-        databricks_token=None,
-        columns=None,
-        filters_json=None,
-        k=3,
-        docs_id_column_name="id",
-        text_column_name="text",
+        databricks_index_name: str,
+        databricks_endpoint: Optional[str] = None,
+        databricks_token: Optional[str] = None,
+        columns: List[str] = None,
+        filters_json: str = None,
+        k: int = 3,
+        docs_id_column_name: str = "id",
+        text_column_name: str = "text",
     ):
+        """
+        Args:
+            databricks_index_name (str): The name of the Databricks Vector Search Index to query.
+            databricks_endpoint (str, optional): The Databricks workspace URL containing the
+                Vector Search Index. Defaults to the value of the ``DATABRICKS_HOST`` environment
+                variable. If unspecified, the Databricks SDK is used to identify the endpoint
+                based on the current environment.
+            databricks_token (str, optional): The Databricks workspace authentication token to use
+                when querying the Vector Search Index. Defaults to the value of the
+                ``DATABRICKS_TOKEN`` environment variable. If unspecified, the Databricks SDK is
+                used to identify the token based on the current environment.
+            columns (list[str], optional): Extra column names to include in response,
+                in addition to the document id and text columns specified by
+                ``docs_id_column_name`` and ``text_column_name``.
+            filters_json (str, optional): A JSON string specifying additional query filters.
+            k (int): The number of documents to retrieve.
+            docs_id_column_name (str): The name of the column in the Databricks Vector Search Index
+                containing document IDs.
+            text_column_name (str): The name of the column in the Databricks Vector Search Index
+                containing document text to retrieve.
+        """
         super().__init__(k=k)
-        if not databricks_token and not os.environ.get("DATABRICKS_TOKEN"):
-            raise ValueError(
-                "You must supply databricks_token or set environment variable DATABRICKS_TOKEN"
-            )
-        if not databricks_endpoint and not os.environ.get("DATABRICKS_HOST"):
-            raise ValueError(
-                "You must supply databricks_endpoint or set environment variable DATABRICKS_HOST"
-            )
-        if not databricks_index_name:
-            raise ValueError("You must supply vector index name")
-        if not columns:
-            raise ValueError(
-                "You must specify a list of column names to be included in the response"
-            )
-        self.databricks_token = (
-            databricks_token if databricks_token else os.environ["DATABRICKS_TOKEN"]
-        )
+        self.databricks_token = databricks_token if databricks_token is not None else os.environ.get("DATABRICKS_TOKEN")
         self.databricks_endpoint = (
-            databricks_endpoint
-            if databricks_endpoint
-            else os.environ["DATABRICKS_HOST"]
+            databricks_endpoint if databricks_endpoint is not None else os.environ.get("DATABRICKS_HOST")
         )
+        if not _databricks_sdk_installed and (self.databricks_token, self.databricks_endpoint).count(None) > 0:
+            raise ValueError(
+                "To retrieve documents with Databricks Vector Search, you must install the"
+                " databricks-sdk Python library, supply the databricks_token and"
+                " databricks_endpoint parameters, or set the DATABRICKS_TOKEN and DATABRICKS_HOST"
+                " environment variables."
+            )
         self.databricks_index_name = databricks_index_name
-        self.columns = columns
+        self.columns = list({docs_id_column_name, text_column_name, *(columns or [])})
         self.filters_json = filters_json
         self.k = k
         self.docs_id_column_name = docs_id_column_name
@@ -128,71 +133,64 @@ def _get_extra_columns(self, item: Dict[str, Any]) -> Dict[str, Any]:
         Returns:
             Dict[str, Any]: Search result column values, excluding the "text" and not "id" columns.
         """
-        extra_columns = {
-            k: v
-            for k, v in item.items()
-            if k not in [self.docs_id_column_name, self.text_column_name]
-        }
+        extra_columns = {k: v for k, v in item.items() if k not in [self.docs_id_column_name, self.text_column_name]}
         if self.docs_id_column_name == "metadata":
             extra_columns = {
                 **extra_columns,
-                **{
-                    "metadata": {
-                        k: v
-                        for k, v in json.loads(item["metadata"]).items()
-                        if k != "document_id"
-                    }
-                },
+                **{"metadata": {k: v for k, v in json.loads(item["metadata"]).items() if k != "document_id"}},
             }
         return extra_columns
 
     def forward(
         self,
         query: Union[str, List[float]],
-        query_type: str = "text",
+        query_type: str = "ANN",
         filters_json: str = None,
     ) -> dspy.Prediction:
         """Search with Databricks Vector Search Client for self.k top results for query
 
         Args:
-            query (Union[str, List[float]]): query to search for.
-            query_type (str): 'vector' for Direct Vector Access Index and Delta Sync Index using self-managed vectors or 'text' for Delta Sync Index using model endpoint.
+            query (Union[str, List[float]]): Query text or numeric query vector to for which to
+                find relevant documents.
+            query_type (str): The type of search query to perform against the Databricks Vector
+                Search Index. Must be either 'ANN' (approximate nearest neighbor) or 'HYBRID'
+                (hybrid search).
 
         Returns:
             dspy.Prediction: An object containing the retrieved results.
         """
-        headers = {
-            "Authorization": f"Bearer {self.databricks_token}",
-            "Content-Type": "application/json",
-        }
-        payload = {
-            "columns": self.columns,
-            "num_results": self.k,
-        }
-        if query_type == "vector":
-            if not isinstance(query, list):
-                raise ValueError("Query must be a list of floats for query_vector")
-            payload["query_vector"] = query
-        elif query_type == "text":
-            if not isinstance(query, str):
-                raise ValueError("Query must be a string for query_text")
-            payload["query_text"] = query
+        if isinstance(query, str):
+            query_text = query
+            query_vector = None
+        elif isinstance(query, list):
+            query_vector = query
+            query_text = None
         else:
-            raise ValueError("Invalid query type specified. Use 'vector' or 'text'.")
-
-        payload["filters_json"] = filters_json if filters_json else self.filters_json
+            raise ValueError("Query must be a string or a list of floats.")
 
-        response = requests.post(
-            f"{self.databricks_endpoint}/api/2.0/vector-search/indexes/{self.databricks_index_name}/query",
-            json=payload,
-            headers=headers,
-        )
-        results = response.json()
-
-        # Check for errors from REST API call
-        if response.json().get("error_code", None) != None:
-            raise Exception(
-                f"ERROR: {response.json()['error_code']} -- {response.json()['message']}"
+        if _databricks_sdk_installed:
+            results = self._query_via_databricks_sdk(
+                index_name=self.databricks_index_name,
+                k=self.k,
+                columns=self.columns,
+                query_type=query_type,
+                query_text=query_text,
+                query_vector=query_vector,
+                databricks_token=self.databricks_token,
+                databricks_endpoint=self.databricks_endpoint,
+                filters_json=filters_json,
+            )
+        else:
+            results = self._query_via_requests(
+                index_name=self.databricks_index_name,
+                k=self.k,
+                columns=self.columns,
+                databricks_token=self.databricks_token,
+                databricks_endpoint=self.databricks_endpoint,
+                query_type=query_type,
+                query_text=query_text,
+                query_vector=query_vector,
+                filters_json=filters_json,
             )
 
         # Checking if defined columns are present in the index columns
@@ -204,9 +202,7 @@ def forward(
             )
 
         if self.text_column_name not in col_names:
-            raise Exception(
-                f"text_column_name: '{self.text_column_name}' is not in the index columns: \n {col_names}"
-            )
+            raise Exception(f"text_column_name: '{self.text_column_name}' is not in the index columns: \n {col_names}")
 
         # Extracting the results
         items = []
@@ -217,14 +213,117 @@ def forward(
             items += [item]
 
         # Sorting results by score in descending order
-        sorted_docs = sorted(items, key=lambda x: x["score"], reverse=True)[:self.k]
+        sorted_docs = sorted(items, key=lambda x: x["score"], reverse=True)[: self.k]
 
         # Returning the prediction
         return Prediction(
             docs=[doc[self.text_column_name] for doc in sorted_docs],
-            doc_ids=[
-                self._extract_doc_ids(doc)
-                for doc in sorted_docs
-            ],
+            doc_ids=[self._extract_doc_ids(doc) for doc in sorted_docs],
             extra_columns=[self._get_extra_columns(item) for item in sorted_docs],
         )
+
+    @staticmethod
+    def _query_via_requests(
+        index_name: str,
+        k: int,
+        columns: List[str],
+        databricks_token: str,
+        databricks_endpoint: str,
+        query_type: str,
+        query_text: Optional[str],
+        query_vector: Optional[List[float]],
+        filters_json: Optional[str],
+    ) -> List[str]:
+        """
+        Query a Databricks Vector Search Index via the Python requests library.
+
+        Args:
+            index_name (str): Name of the Databricks vector search index to query
+            k (int): Number of relevant documents to retrieve.
+            columns (List[str]): Column names to include in response.
+            databricks_token (str): Databricks authentication token.
+            databricks_endpoint (str): Databricks index endpoint url.
+            query_text (str, optional): Text query for which to find relevant documents. Exactly
+                one of query_text or query_vector must be specified.
+            query_vector (List[float], optional): Numeric query vector for which to find relevant
+                documents. Exactly one of query_text or query_vector must be specified.
+            filters_json (str, optional): JSON string representing additional query filters.
+
+        Returns:
+            List[str]: List of top-k retrieved documents. TODO - UPDATE THIS
+        """
+        if (query_text, query_vector).count(None) != 1:
+            raise ValueError("Exactly one of query_text or query_vector must be specified.")
+
+        headers = {
+            "Authorization": f"Bearer {databricks_token}",
+            "Content-Type": "application/json",
+        }
+        payload = {
+            "columns": columns,
+            "num_results": k,
+            "query_type": query_type,
+        }
+        if filters_json is not None:
+            payload["filters_json"] = filters_json
+        if query_text is not None:
+            payload["query_text"] = query_text
+        elif query_vector is not None:
+            payload["query_vector"] = query_vector
+        response = requests.post(
+            f"{databricks_endpoint}/api/2.0/vector-search/indexes/{index_name}/query",
+            json=payload,
+            headers=headers,
+        )
+        results = response.json()
+        if "error_code" in results:
+            raise Exception(f"ERROR: {results['error_code']} -- {results['message']}")
+        return results
+
+    @staticmethod
+    def _query_via_databricks_sdk(
+        index_name: str,
+        k: int,
+        columns: List[str],
+        query_type: str,
+        query_text: Optional[str],
+        query_vector: Optional[List[float]],
+        databricks_token: Optional[str],
+        databricks_endpoint: Optional[str],
+        filters_json: Optional[str],
+    ) -> List[str]:
+        """
+        Query a Databricks Vector Search Index via the Databricks SDK.
+        Assumes that the databricks-sdk Python library is installed.
+
+        Args:
+            index_name (str): Name of the Databricks vector search index to query
+            k (int): Number of relevant documents to retrieve.
+            columns (List[str]): Column names to include in response.
+            query_text (str, optional): Text query for which to find relevant documents. Exactly
+                one of query_text or query_vector must be specified.
+            query_vector (List[float], optional): Numeric query vector for which to find relevant
+                documents. Exactly one of query_text or query_vector must be specified.
+            filters_json (str, optional): JSON string representing additional query filters.
+            databricks_token (str): Databricks authentication token. If not specified,
+                the token is resolved from the current environment.
+            databricks_endpoint (str): Databricks index endpoint url. If not specified,
+                the endpoint is resolved from the current environment.
+        Returns:
+            List[str]: List of top-k retrieved documents. TODO - UPDATE THIS
+        """
+        from databricks.sdk import WorkspaceClient
+
+        if (query_text, query_vector).count(None) != 1:
+            raise ValueError("Exactly one of query_text or query_vector must be specified.")
+
+        databricks_client = WorkspaceClient(host=databricks_endpoint, token=databricks_token)
+        return databricks_client.vector_search_indexes.query_index(
+            index_name=index_name,
+            query_type=query_type,
+            query_text=query_text,
+            query_vector=query_vector,
+            columns=columns,
+            filters_json=filters_json,
+            num_results=k,
+        ).as_dict()

From 2cd97d15b5e4d16248bc07f38341b6d81b16ec90 Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 20:24:29 -0700
Subject: [PATCH 02/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 dspy/retrieve/databricks_rm.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/dspy/retrieve/databricks_rm.py b/dspy/retrieve/databricks_rm.py
index df597a2a3..b42f96594 100644
--- a/dspy/retrieve/databricks_rm.py
+++ b/dspy/retrieve/databricks_rm.py
@@ -16,7 +16,8 @@ class DatabricksRM(dspy.Retrieve):
     A retrieval module that uses Databricks Vector Search Endpoint to return the top-k embeddings for a given query.
 
     Examples:
-        Below is a code snippet that shows how to configure Databricks Vector Search endpoints:
+        Below is a code snippet that shows how to set up a Databricks Vector Search Index
+        and configure a DatabricksRM DSPy retrieval module to query the index.
 
         (example adapted from "Databricks: How to create and query a Vector Search Index:
         https://docs.databricks.com/en/generative-ai/create-query-vector-search.html#create-a-vector-search-index)
@@ -24,18 +25,14 @@ class DatabricksRM(dspy.Retrieve):
         ```python
         from databricks.vector_search.client import VectorSearchClient
 
-        #Creating Vector Search Client
-
+        # Create a Databricks Vector Search Endpoint
         client = VectorSearchClient()
-
         client.create_endpoint(
             name="your_vector_search_endpoint_name",
             endpoint_type="STANDARD"
         )
 
-        #Creating Vector Search Index using Python SDK
-        #Example for Direct Vector Access Index
-
+        # Create a Databricks Direct Access Vector Search Index
         index = client.create_direct_access_index(
             endpoint_name="your_databricks_host_url",
             index_name="your_index_name",
@@ -50,12 +47,18 @@ class DatabricksRM(dspy.Retrieve):
         )
 
         llm = dspy.OpenAI(model="gpt-3.5-turbo")
-        retriever_model = DatabricksRM(databricks_index_name = "your_index_name",
-        databricks_endpoint = "your_databricks_host_url", databricks_token = "your_databricks_token", columns= ["id", "field2", "field3", "text_vector"], k=3)
+        retriever_model = DatabricksRM(
+            databricks_index_name = "your_index_name",
+            docs_id_column_name="your_id_column",
+            text_column_name="your_text_column",
+            k=3
+        )
         dspy.settings.configure(lm=llm, rm=retriever_model)
         ```
 
-        Below is a code snippet that shows how to query the Databricks Direct Vector Access Index using the forward() function.
+        Below is a code snippet that shows how to query the Databricks Direct Access Vector
+        Search Index using the ``forward()`` function:
+
         ```python
         self.retrieve = DatabricksRM(query=[1, 2, 3])
         ```
@@ -87,6 +90,9 @@ def __init__(
                 in addition to the document id and text columns specified by
                 ``docs_id_column_name`` and ``text_column_name``.
             filters_json (str, optional): A JSON string specifying additional query filters.
+                Example filters: ``{"id <": 5}`` selects records that have an ``id`` column value
+                less than 5, and ``{"id >=": 5, "id <": 10}`` selects records that have an ``id``
+                column value greater than or equal to 5 and less than 10.
             k (int): The number of documents to retrieve.
             docs_id_column_name (str): The name of the column in the Databricks Vector Search Index
                 containing document IDs.
@@ -178,7 +184,7 @@ def forward(
                 query_vector=query_vector,
                 databricks_token=self.databricks_token,
                 databricks_endpoint=self.databricks_endpoint,
-                filters_json=filters_json,
+                filters_json=filters_json or self.filters_json,
             )
         else:
             results = self._query_via_requests(
@@ -190,7 +196,7 @@ def forward(
                 query_type=query_type,
                 query_text=query_text,
                 query_vector=query_vector,
-                filters_json=filters_json,
+                filters_json=filters_json or self.filters_json,
             )
 
         # Checking if defined columns are present in the index columns

From c6b83d394db0fa5a7fbae1c2806da892b62af937 Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 20:26:42 -0700
Subject: [PATCH 03/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 dspy/retrieve/databricks_rm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dspy/retrieve/databricks_rm.py b/dspy/retrieve/databricks_rm.py
index b42f96594..cd955abc4 100644
--- a/dspy/retrieve/databricks_rm.py
+++ b/dspy/retrieve/databricks_rm.py
@@ -69,8 +69,8 @@ def __init__(
         databricks_index_name: str,
         databricks_endpoint: Optional[str] = None,
         databricks_token: Optional[str] = None,
-        columns: List[str] = None,
-        filters_json: str = None,
+        columns: Optional[List[str]] = None,
+        filters_json: Optional[str] = None,
         k: int = 3,
         docs_id_column_name: str = "id",
         text_column_name: str = "text",

From 0310e72f499ed0cb56f055150687a0fefc3b8155 Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 20:28:58 -0700
Subject: [PATCH 04/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 dspy/retrieve/databricks_rm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dspy/retrieve/databricks_rm.py b/dspy/retrieve/databricks_rm.py
index cd955abc4..febec9c1c 100644
--- a/dspy/retrieve/databricks_rm.py
+++ b/dspy/retrieve/databricks_rm.py
@@ -57,7 +57,7 @@ class DatabricksRM(dspy.Retrieve):
         ```
 
         Below is a code snippet that shows how to query the Databricks Direct Access Vector
-        Search Index using the ``forward()`` function:
+        Search Index using the DatabricksRM retrieval module:
 
         ```python
         self.retrieve = DatabricksRM(query=[1, 2, 3])

From 9b040a88d1ea235c05569197c79722847d34c3f6 Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 20:34:48 -0700
Subject: [PATCH 05/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 dspy/retrieve/databricks_rm.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/dspy/retrieve/databricks_rm.py b/dspy/retrieve/databricks_rm.py
index febec9c1c..16eef96c1 100644
--- a/dspy/retrieve/databricks_rm.py
+++ b/dspy/retrieve/databricks_rm.py
@@ -165,6 +165,15 @@ def forward(
         Returns:
             dspy.Prediction: An object containing the retrieved results.
         """
+        if query_type in ["vector", "text"]:
+            # Older versions of DSPy used a `query_type` argument to disambiguate between text
+            # and vector queries, rather than checking the type of the `query` argument. This
+            # differs from the Databricks Vector Search definition of `query_type`, which
+            # specifies the search algorithm to use (e.g. "ANN" or "HYBRID"). To maintain
+            # backwards compatibility with older versions of DSPy, we map the old `query_type`
+            # values to the Databricks Vector Search default query type of "ANN".
+            query_type = "ANN"
+
         if isinstance(query, str):
             query_text = query
             query_vector = None

From e8b3e35b23e8a9bcfc9f25032e2d657b38134b90 Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 20:43:57 -0700
Subject: [PATCH 06/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 dspy/retrieve/databricks_rm.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dspy/retrieve/databricks_rm.py b/dspy/retrieve/databricks_rm.py
index 16eef96c1..232341715 100644
--- a/dspy/retrieve/databricks_rm.py
+++ b/dspy/retrieve/databricks_rm.py
@@ -221,7 +221,7 @@ def forward(
 
         # Extracting the results
         items = []
-        for idx, data_row in enumerate(results["result"]["data_array"]):
+        for _, data_row in enumerate(results["result"]["data_array"]):
             item = {}
             for col_name, val in zip(col_names, data_row):
                 item[col_name] = val
@@ -248,7 +248,7 @@ def _query_via_requests(
         query_text: Optional[str],
         query_vector: Optional[List[float]],
         filters_json: Optional[str],
-    ) -> List[str]:
+    ) -> str:
         """
         Query a Databricks Vector Search Index via the Python requests library.
 
@@ -265,7 +265,7 @@ def _query_via_requests(
             filters_json (str, optional): JSON string representing additional query filters.
 
         Returns:
-            List[str]: List of top-k retrieved documents. TODO - UPDATE THIS
+            str: JSON response from the Databricks Vector Search Index query.
         """
         if (query_text, query_vector).count(None) != 1:
             raise ValueError("Exactly one of query_text or query_vector must be specified.")
@@ -306,7 +306,7 @@ def _query_via_databricks_sdk(
         databricks_token: Optional[str],
         databricks_endpoint: Optional[str],
         filters_json: Optional[str],
-    ) -> List[str]:
+    ) -> str:
         """
         Query a Databricks Vector Search Index via the Databricks SDK.
         Assumes that the databricks-sdk Python library is installed.
@@ -325,7 +325,7 @@ def _query_via_databricks_sdk(
             databricks_endpoint (str): Databricks index endpoint url. If not specified,
                 the endpoint is resolved from the current environment.
         Returns:
-            List[str]: List of top-k retrieved documents. TODO - UPDATE THIS
+            str: JSON response from the Databricks Vector Search Index query.
         """
         from databricks.sdk import WorkspaceClient
 

From 255e6758e8854462ac09946bfaa85f2dff233bae Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 20:44:51 -0700
Subject: [PATCH 07/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 dspy/retrieve/databricks_rm.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/dspy/retrieve/databricks_rm.py b/dspy/retrieve/databricks_rm.py
index 232341715..76877d24f 100644
--- a/dspy/retrieve/databricks_rm.py
+++ b/dspy/retrieve/databricks_rm.py
@@ -248,7 +248,7 @@ def _query_via_requests(
         query_text: Optional[str],
         query_vector: Optional[List[float]],
         filters_json: Optional[str],
-    ) -> str:
+    ) -> Dict[str, Any]:
         """
         Query a Databricks Vector Search Index via the Python requests library.
 
@@ -265,7 +265,7 @@ def _query_via_requests(
             filters_json (str, optional): JSON string representing additional query filters.
 
         Returns:
-            str: JSON response from the Databricks Vector Search Index query.
+            Dict[str, Any]: Parsed JSON response from the Databricks Vector Search Index query.
         """
         if (query_text, query_vector).count(None) != 1:
             raise ValueError("Exactly one of query_text or query_vector must be specified.")
@@ -306,7 +306,7 @@ def _query_via_databricks_sdk(
         databricks_token: Optional[str],
         databricks_endpoint: Optional[str],
         filters_json: Optional[str],
-    ) -> str:
+    ) -> Dict[str, Any]:
         """
         Query a Databricks Vector Search Index via the Databricks SDK.
         Assumes that the databricks-sdk Python library is installed.
@@ -325,7 +325,7 @@ def _query_via_databricks_sdk(
             databricks_endpoint (str): Databricks index endpoint url. If not specified,
                 the endpoint is resolved from the current environment.
         Returns:
-            str: JSON response from the Databricks Vector Search Index query.
+            Dict[str, Any]: Parsed JSON response from the Databricks Vector Search Index query.
         """
         from databricks.sdk import WorkspaceClient
 

From c5389965a5fa37bc8ac39c5025edc19ad11c3289 Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 20:46:06 -0700
Subject: [PATCH 08/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 dspy/retrieve/databricks_rm.py | 96 +++++++++++++++++-----------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/dspy/retrieve/databricks_rm.py b/dspy/retrieve/databricks_rm.py
index 76877d24f..70a0e1cb4 100644
--- a/dspy/retrieve/databricks_rm.py
+++ b/dspy/retrieve/databricks_rm.py
@@ -237,6 +237,54 @@ def forward(
             extra_columns=[self._get_extra_columns(item) for item in sorted_docs],
         )
 
+    @staticmethod
+    def _query_via_databricks_sdk(
+        index_name: str,
+        k: int,
+        columns: List[str],
+        query_type: str,
+        query_text: Optional[str],
+        query_vector: Optional[List[float]],
+        databricks_token: Optional[str],
+        databricks_endpoint: Optional[str],
+        filters_json: Optional[str],
+    ) -> Dict[str, Any]:
+        """
+        Query a Databricks Vector Search Index via the Databricks SDK.
+        Assumes that the databricks-sdk Python library is installed.
+
+        Args:
+            index_name (str): Name of the Databricks vector search index to query
+            k (int): Number of relevant documents to retrieve.
+            columns (List[str]): Column names to include in response.
+            query_text (str, optional): Text query for which to find relevant documents. Exactly
+                one of query_text or query_vector must be specified.
+            query_vector (List[float], optional): Numeric query vector for which to find relevant
+                documents. Exactly one of query_text or query_vector must be specified.
+            filters_json (str, optional): JSON string representing additional query filters.
+            databricks_token (str): Databricks authentication token. If not specified,
+                the token is resolved from the current environment.
+            databricks_endpoint (str): Databricks index endpoint url. If not specified,
+                the endpoint is resolved from the current environment.
+        Returns:
+            Dict[str, Any]: Parsed JSON response from the Databricks Vector Search Index query.
+        """
+        from databricks.sdk import WorkspaceClient
+
+        if (query_text, query_vector).count(None) != 1:
+            raise ValueError("Exactly one of query_text or query_vector must be specified.")
+
+        databricks_client = WorkspaceClient(host=databricks_endpoint, token=databricks_token)
+        return databricks_client.vector_search_indexes.query_index(
+            index_name=index_name,
+            query_type=query_type,
+            query_text=query_text,
+            query_vector=query_vector,
+            columns=columns,
+            filters_json=filters_json,
+            num_results=k,
+        ).as_dict()
+
     @staticmethod
     def _query_via_requests(
         index_name: str,
@@ -294,51 +342,3 @@ def _query_via_requests(
         if "error_code" in results:
             raise Exception(f"ERROR: {results['error_code']} -- {results['message']}")
         return results
-
-    @staticmethod
-    def _query_via_databricks_sdk(
-        index_name: str,
-        k: int,
-        columns: List[str],
-        query_type: str,
-        query_text: Optional[str],
-        query_vector: Optional[List[float]],
-        databricks_token: Optional[str],
-        databricks_endpoint: Optional[str],
-        filters_json: Optional[str],
-    ) -> Dict[str, Any]:
-        """
-        Query a Databricks Vector Search Index via the Databricks SDK.
-        Assumes that the databricks-sdk Python library is installed.
-
-        Args:
-            index_name (str): Name of the Databricks vector search index to query
-            k (int): Number of relevant documents to retrieve.
-            columns (List[str]): Column names to include in response.
-            query_text (str, optional): Text query for which to find relevant documents. Exactly
-                one of query_text or query_vector must be specified.
-            query_vector (List[float], optional): Numeric query vector for which to find relevant
-                documents. Exactly one of query_text or query_vector must be specified.
-            filters_json (str, optional): JSON string representing additional query filters.
-            databricks_token (str): Databricks authentication token. If not specified,
-                the token is resolved from the current environment.
-            databricks_endpoint (str): Databricks index endpoint url. If not specified,
-                the endpoint is resolved from the current environment.
-        Returns:
-            Dict[str, Any]: Parsed JSON response from the Databricks Vector Search Index query.
-        """
-        from databricks.sdk import WorkspaceClient
-
-        if (query_text, query_vector).count(None) != 1:
-            raise ValueError("Exactly one of query_text or query_vector must be specified.")
-
-        databricks_client = WorkspaceClient(host=databricks_endpoint, token=databricks_token)
-        return databricks_client.vector_search_indexes.query_index(
-            index_name=index_name,
-            query_type=query_type,
-            query_text=query_text,
-            query_vector=query_vector,
-            columns=columns,
-            filters_json=filters_json,
-            num_results=k,
-        ).as_dict()

From 279f5485e47a04707f6507dafaca00f06f93f5eb Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 20:47:22 -0700
Subject: [PATCH 09/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 dspy/retrieve/databricks_rm.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dspy/retrieve/databricks_rm.py b/dspy/retrieve/databricks_rm.py
index 70a0e1cb4..d383d920c 100644
--- a/dspy/retrieve/databricks_rm.py
+++ b/dspy/retrieve/databricks_rm.py
@@ -78,11 +78,11 @@ def __init__(
         """
         Args:
             databricks_index_name (str): The name of the Databricks Vector Search Index to query.
-            databricks_endpoint (str, optional): The Databricks workspace URL containing the
-                Vector Search Index. Defaults to the value of the ``DATABRICKS_HOST`` environment
-                variable. If unspecified, the Databricks SDK is used to identify the endpoint
-                based on the current environment.
-            databricks_token (str, optional): The Databricks workspace authentication token to use
+            databricks_endpoint (str, optional): The URL of the Databricks Workspace containing
+                the Vector Search Index. Defaults to the value of the ``DATABRICKS_HOST``
+                environment variable. If unspecified, the Databricks SDK is used to identify the
+                endpoint based on the current environment.
+            databricks_token (str, optional): The Databricks Workspace authentication token to use
                 when querying the Vector Search Index. Defaults to the value of the
                 ``DATABRICKS_TOKEN`` environment variable. If unspecified, the Databricks SDK is
                 used to identify the token based on the current environment.

From fc6fc6d6fc0675f29f08cd454608f51b12b0edae Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 20:52:57 -0700
Subject: [PATCH 10/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 .../AzureCognitiveSearch.md                        |  2 +-
 docs/api/retrieval_model_clients/ChromadbRM.md     |  5 ++++-
 docs/api/retrieval_model_clients/FaissRM.md        |  6 ++++--
 docs/api/retrieval_model_clients/MilvusRM.md       | 10 ++++++----
 docs/api/retrieval_model_clients/MyScaleRM.md      | 14 +++++++++++---
 docs/api/retrieval_model_clients/Neo4jRM.md        | 12 +++++++-----
 docs/api/retrieval_model_clients/RAGatouilleRM.md  |  8 +++++---
 docs/api/retrieval_model_clients/SnowflakeRM.md    |  2 +-
 .../api/retrieval_model_clients/WatsonDiscovery.md |  5 ++++-
 docs/api/retrieval_model_clients/YouRM.md          | 10 ++++------
 10 files changed, 47 insertions(+), 27 deletions(-)

diff --git a/docs/api/retrieval_model_clients/AzureCognitiveSearch.md b/docs/api/retrieval_model_clients/AzureCognitiveSearch.md
index 832184563..bb210e830 100644
--- a/docs/api/retrieval_model_clients/AzureCognitiveSearch.md
+++ b/docs/api/retrieval_model_clients/AzureCognitiveSearch.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 2
+sidebar_position: 3
 ---
 
 # retrieve.AzureCognitiveSearch
diff --git a/docs/api/retrieval_model_clients/ChromadbRM.md b/docs/api/retrieval_model_clients/ChromadbRM.md
index cfff233d2..97fbe26d4 100644
--- a/docs/api/retrieval_model_clients/ChromadbRM.md
+++ b/docs/api/retrieval_model_clients/ChromadbRM.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 3
+sidebar_position: 4
 ---
 
 # retrieve.ChromadbRM
@@ -18,6 +18,7 @@ ChromadbRM(
 ```
 
 **Parameters:**
+
 - `collection_name` (_str_): The name of the chromadb collection.
 - `persist_directory` (_str_): Path to the directory where chromadb data is persisted.
 - `embedding_function` (_Optional[EmbeddingFunction[Embeddable]]_, _optional_): The function used for embedding documents and queries. Defaults to `DefaultEmbeddingFunction()` if not specified.
@@ -30,10 +31,12 @@ ChromadbRM(
 Search the chromadb collection for the top `k` passages matching the given query or queries, using embeddings generated via the specified `embedding_function`.
 
 **Parameters:**
+
 - `query_or_queries` (_Union[str, List[str]]_): The query or list of queries to search for.
 - `k` (_Optional[int]_, _optional_): The number of results to retrieve. If not specified, defaults to the value set during initialization.
 
 **Returns:**
+
 - `dspy.Prediction`: Contains the retrieved passages, each represented as a `dotdict` with schema `[{"id": str, "score": float, "long_text": str, "metadatas": dict }]`
 
 ### Quickstart with OpenAI Embeddings
diff --git a/docs/api/retrieval_model_clients/FaissRM.md b/docs/api/retrieval_model_clients/FaissRM.md
index 634904521..89aed38e7 100644
--- a/docs/api/retrieval_model_clients/FaissRM.md
+++ b/docs/api/retrieval_model_clients/FaissRM.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 4
+sidebar_position: 5
 ---
 
 # retrieve.FaissRM
@@ -17,6 +17,7 @@ FaissRM(
 ```
 
 **Parameters:**
+
 - `document_chunks` (_List[str]_): a list of strings that comprises the corpus to search. You cannot add/insert/upsert to this list after creating this FaissRM object.
 - `vectorizer` (_dsp.modules.sentence_vectorizer.BaseSentenceVectorizer_, _optional_): If not provided, a dsp.modules.sentence_vectorizer.SentenceTransformersVectorizer object is created and used.
 - `k` (_int_, _optional_): The number of top passages to retrieve. Defaults to 3.
@@ -28,16 +29,17 @@ FaissRM(
 Search the FaissRM vector database for the top `k` passages matching the given query or queries, using embeddings generated via the vectorizer specified at FaissRM construction time
 
 **Parameters:**
+
 - `query_or_queries` (_Union[str, List[str]]_): The query or list of queries to search for.
 
 **Returns:**
+
 - `dspy.Prediction`: Contains the retrieved passages, each represented as a `dotdict` with a `long_text` attribute and an `index` attribute. The `index` attribute is the index in the document_chunks array provided to this FaissRM object at construction time.
 
 ### Quickstart with the default vectorizer
 
 The **FaissRM** module provides a retriever that uses an in-memory Faiss vector database. This module does not include a vectorizer; instead it supports any subclass of **dsp.modules.sentence_vectorizer.BaseSentenceVectorizer**. If a vectorizer is not provided, an instance of **dsp.modules.sentence_vectorizer.SentenceTransformersVectorizer** is created and used by **FaissRM**. Note that the default embedding model for **SentenceTransformersVectorizer** is **all-MiniLM-L6-v2**
 
-
 ```python
 import dspy
 from dspy.retrieve.faiss_rm import FaissRM
diff --git a/docs/api/retrieval_model_clients/MilvusRM.md b/docs/api/retrieval_model_clients/MilvusRM.md
index e8fb41e42..2f10b77c5 100644
--- a/docs/api/retrieval_model_clients/MilvusRM.md
+++ b/docs/api/retrieval_model_clients/MilvusRM.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 5
+sidebar_position: 6
 ---
 
 # retrieve.MilvusRM
@@ -20,13 +20,14 @@ MilvusRM(
 ```
 
 **Parameters:**
+
 - `collection_name (str)`: The name of the Milvus collection to query against.
 - `uri (str, optional)`: The Milvus connection uri. Defaults to "http://localhost:19530".
 - `token (str, optional)`: The Milvus connection token. Defaults to None.
 - `db_name (str, optional)`: The Milvus database name. Defaults to "default".
 - `embedding_function (callable, optional)`: The function to convert a list of text to embeddings.
-    The embedding function should take a list of text strings as input and output a list of embeddings.
-    Defaults to None. By default, it will get OpenAI client by the environment variable OPENAI_API_KEY and use OpenAI's embedding model "text-embedding-3-small" with the default dimension.
+  The embedding function should take a list of text strings as input and output a list of embeddings.
+  Defaults to None. By default, it will get OpenAI client by the environment variable OPENAI_API_KEY and use OpenAI's embedding model "text-embedding-3-small" with the default dimension.
 - `k (int, optional)`: The number of top passages to retrieve. Defaults to 3.
 
 ### Methods
@@ -36,10 +37,12 @@ MilvusRM(
 Search the Milvus collection for the top `k` passages matching the given query or queries, using embeddings generated via the default OpenAI embedding or the specified `embedding_function`.
 
 **Parameters:**
+
 - `query_or_queries` (_Union[str, List[str]]_): The query or list of queries to search for.
 - `k` (_Optional[int]_, _optional_): The number of results to retrieve. If not specified, defaults to the value set during initialization.
 
 **Returns:**
+
 - `dspy.Prediction`: Contains the retrieved passages, each represented as a `dotdict` with schema `[{"id": str, "score": float, "long_text": str, "metadatas": dict }]`
 
 ### Quickstart
@@ -71,7 +74,6 @@ for result in results:
     print("Document:", result.long_text, "\n")
 ```
 
-
 #### Customized Embedding Function
 
 ```python
diff --git a/docs/api/retrieval_model_clients/MyScaleRM.md b/docs/api/retrieval_model_clients/MyScaleRM.md
index d45144e1f..5fa8b87d1 100644
--- a/docs/api/retrieval_model_clients/MyScaleRM.md
+++ b/docs/api/retrieval_model_clients/MyScaleRM.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 6
+sidebar_position: 7
 ---
 
 # retrieve.MyScaleRM
@@ -9,6 +9,7 @@ sidebar_position: 6
 Initializes an instance of the `MyScaleRM` class, which is designed to use MyScaleDB (a ClickHouse fork optimized for vector similarity and full-text search) to retrieve documents based on query embeddings. This class supports embedding generation using either local models or OpenAI's API and manages database interactions efficiently.
 
 ### Syntax
+
 ```python
 MyScaleRM(
     client: clickhouse_connect.driver.client.Client,
@@ -22,7 +23,9 @@ MyScaleRM(
     local_embed_model: Optional[str] = None
 )
 ```
+
 ## Parameters for `MyScaleRM` Constructor
+
 - `client` (_clickhouse_connect.driver.client.Client_): A client connection to the MyScaleDB database, used to execute queries and manage interactions with the database.
 - `table` (_str_): Specifies the table within MyScaleDB from which data will be retrieved. This table should be equipped with a vector column for conducting similarity searches.
 - `database` (_str_, optional): The name of the database where the table is located, defaulting to `"default"`.
@@ -34,21 +37,26 @@ MyScaleRM(
 - `local_embed_model` (_str, optional_): Specifies a local model for embedding generation, chosen if local computation is preferred.
 
 ## Methods
+
 ### `forward`
+
 Executes a retrieval operation based on a user's query and returns the top `k` relevant results using the embeddings generated by the specified method.
 
 ### Syntax
+
 ```python
 def forward(self, user_query: str, k: Optional[int] = None) -> dspy.Prediction
 ```
 
 ## Parameters
+
 - `user_query` (_str_): The query to retrieve matching passages.
 - `k` (_Optional[int], optional_): The number of top matches to retrieve. If not provided, it defaults to the `k` value set during class initialization.
 
 ## Returns
+
 - `dspy.Prediction`: Contains the retrieved passages, formatted as a list of `dotdict` objects. Each entry includes:
-    - **long_text (str)**: The text content of the retrieved passage.
+  - **long_text (str)**: The text content of the retrieved passage.
 
 ## Description
 
@@ -77,4 +85,4 @@ passages = results.passages
 for passage in passages:
     print(passage['long_text'], "\n")
 
-```
\ No newline at end of file
+```
diff --git a/docs/api/retrieval_model_clients/Neo4jRM.md b/docs/api/retrieval_model_clients/Neo4jRM.md
index ca153431d..ea7908aaa 100644
--- a/docs/api/retrieval_model_clients/Neo4jRM.md
+++ b/docs/api/retrieval_model_clients/Neo4jRM.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 7
+sidebar_position: 8
 ---
 
 # retrieve.neo4j_rm
@@ -34,6 +34,7 @@ You need to define the credentials as environment variables:
 - `OPENAI_API_KEY` (_str_): Specifies the API key required for authenticiating with OpenAI's services.
 
 **Parameters:**
+
 - `index_name` (_str_): Specifies the name of the vector index to be used within Neo4j for organizing and querying data.
 - `text_node_property` (_str_, _optional_): Defines the specific property of nodes that will be returned.
 - `k` (_int_, _optional_): The number of top results to return from the retrieval operation. It defaults to 5 if not explicitly specified.
@@ -41,7 +42,6 @@ You need to define the credentials as environment variables:
 - `embedding_provider` (_str_, _optional_): The name of the service provider for generating embeddings. Defaults to "openai" if not specified.
 - `embedding_model` (_str_, _optional_): The specific embedding model to use from the provider. By default, it uses the "text-embedding-ada-002" model from OpenAI.
 
-
 ### Methods
 
 #### `forward(self, query: [str], k: Optional[int] = None) -> dspy.Prediction`
@@ -49,21 +49,23 @@ You need to define the credentials as environment variables:
 Search the neo4j vector index for the top `k` passages matching the given query or queries, using embeddings generated via the specified `embedding_model`.
 
 **Parameters:**
-- `query` (str_): The query.
+
+- `query` (str\_): The query.
 - `k` (_Optional[int]_, _optional_): The number of results to retrieve. If not specified, defaults to the value set during initialization.
 
 **Returns:**
+
 - `dspy.Prediction`: Contains the retrieved passages as a list of string with the prediction signature.
 
 ex:
+
 ```python
 Prediction(
     passages=['Passage 1 Lorem Ipsum awesome', 'Passage 2 Lorem Ipsum Youppidoo', 'Passage 3 Lorem Ipsum Yassssss']
 )
 ```
 
-### Quick Example how to use Neo4j in a local environment. 
-
+### Quick Example how to use Neo4j in a local environment.
 
 ```python
 from dspy.retrieve.neo4j_rm import Neo4jRM
diff --git a/docs/api/retrieval_model_clients/RAGatouilleRM.md b/docs/api/retrieval_model_clients/RAGatouilleRM.md
index de7a73399..498047a29 100644
--- a/docs/api/retrieval_model_clients/RAGatouilleRM.md
+++ b/docs/api/retrieval_model_clients/RAGatouilleRM.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 8
+sidebar_position: 9
 ---
 
 # retrieve.RAGatouilleRM
@@ -13,12 +13,13 @@ class RAGatouilleRM(dspy.Retrieve):
     def __init__(
         self,
         index_root: str,
-        index_name: str, 
+        index_name: str,
         k: int = 3,
     ):
 ```
 
 **Parameters:**
+
 - `index_root` (_str_): Folder path where your index is stored.
 - `index_name` (_str_): Name of the index you want to retrieve from.
 - `k` (_int_): The default number of passages to retrieve. Defaults to `3`.
@@ -30,9 +31,10 @@ class RAGatouilleRM(dspy.Retrieve):
 Enables making queries to the RAGatouille-made index for retrieval. Internally, the method handles the specifics of preparing the query to obtain the response. The function handles the retrieval of the top-k passages based on the provided query.
 
 **Parameters:**
+
 - `query_or_queries` (Union[str, List[str]]): Query string used for retrieval.
 - `k` (_int_, _optional_): Number of passages to retrieve. Defaults to 3.
 
 **Returns:**
-- `dspy.Prediction`: List of k passages
 
+- `dspy.Prediction`: List of k passages
diff --git a/docs/api/retrieval_model_clients/SnowflakeRM.md b/docs/api/retrieval_model_clients/SnowflakeRM.md
index c38619888..e94e98b32 100644
--- a/docs/api/retrieval_model_clients/SnowflakeRM.md
+++ b/docs/api/retrieval_model_clients/SnowflakeRM.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 9
+sidebar_position: 10
 ---
 
 # retrieve.SnowflakeRM
diff --git a/docs/api/retrieval_model_clients/WatsonDiscovery.md b/docs/api/retrieval_model_clients/WatsonDiscovery.md
index 3bb358ce8..198fbfdb8 100644
--- a/docs/api/retrieval_model_clients/WatsonDiscovery.md
+++ b/docs/api/retrieval_model_clients/WatsonDiscovery.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 10
+sidebar_position: 11
 ---
 
 # retrieve.WatsonDiscoveryRM
@@ -22,6 +22,7 @@ class WatsonDiscoveryRM:
 ```
 
 **Parameters:**
+
 - `apikey` (str): apikey for authentication purposes,
 - `url` (str): endpoint URL that includes the service instance ID
 - `version` (str): Release date of the version of the API you want to use. Specify dates in YYYY-MM-DD format.
@@ -36,10 +37,12 @@ class WatsonDiscoveryRM:
 Search the Watson Discovery collection for the top `k` passages matching the given query or queries.
 
 **Parameters:**
+
 - `query_or_queries` (_Union[str, list[str]]_): The query or list of queries to search for.
 - `k` (_Optional[int]_, _optional_): The number of results to retrieve. If not specified, defaults to the value set during initialization.
 
 **Returns:**
+
 - `dspy.Prediction`: Contains the retrieved passages, each represented as a `dotdict` with schema `[{"title":str, "long_text": str, "passage_score": float, "document_id": str, "collection_id": str, "start_offset": int, "end_offset": int, "field": str}]`
 
 ### Quickstart
diff --git a/docs/api/retrieval_model_clients/YouRM.md b/docs/api/retrieval_model_clients/YouRM.md
index df29b8a51..ac3f67746 100644
--- a/docs/api/retrieval_model_clients/YouRM.md
+++ b/docs/api/retrieval_model_clients/YouRM.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 11
+sidebar_position: 12
 ---
 
 # retrieve.YouRM
@@ -25,15 +25,15 @@ YouRM(
 **Parameters:**
 
 - `ydc_api_key` (Optional[str]): you.com API key, if `YDC_API_KEY` is not set in the environment
-- `k` (int): If ``endpoint="search"``, the max snippets to return per search hit.
-       If ``endpoint="news"``, the max articles to return.
+- `k` (int): If `endpoint="search"`, the max snippets to return per search hit.
+  If `endpoint="news"`, the max articles to return.
 - `endpoint` (Literal["search", "news"]): you.com endpoints
 - `num_web_results` (Optional[int]): The max number of web results to return, must be under 20
 - `safesearch` (Optional[Literal["off", "moderate", "strict"]]): Safesearch settings, one of "off", "moderate", "strict", defaults to moderate
 - `country` (Optional[str]): Country code, ex: 'US' for United States, see API reference for more info
 - `search_lang` (Optional[str]): (News API) Language codes, ex: 'en' for English, see API reference for more info
 - `ui_lang` (Optional[str]): (News API) User interface language for the response, ex: 'en' for English.
-                        See API reference for more info
+  See API reference for more info
 - `spellcheck` (Optional[bool]): (News API) Whether to spell check query or not, defaults to True
 
 ### Methods
@@ -44,7 +44,6 @@ If `endpoint="search"`, search the web for the top `k` snippets matching the giv
 
 If `endpoint="news"`, search the web for the top `k` articles matching the given query or queries.
 
-
 **Parameters:**
 
 - `query_or_queries` (_Union[str, List[str]]_): The query or list of queries to search for.
@@ -60,7 +59,6 @@ Obtain a You.com API key from [https://api.you.com/](https://api.you.com/).
 
 Export this key to an environment variable `YDC_API_KEY`.
 
-
 ```python
 from dspy.retrieve.you_rm import YouRM
 import os

From 70da6ac948692422a259bf85078ca64b3658fe02 Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 21:21:57 -0700
Subject: [PATCH 11/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 dspy/retrieve/databricks_rm.py | 65 +++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 28 deletions(-)

diff --git a/dspy/retrieve/databricks_rm.py b/dspy/retrieve/databricks_rm.py
index d383d920c..b8cc77ec6 100644
--- a/dspy/retrieve/databricks_rm.py
+++ b/dspy/retrieve/databricks_rm.py
@@ -13,11 +13,12 @@
 
 class DatabricksRM(dspy.Retrieve):
     """
-    A retrieval module that uses Databricks Vector Search Endpoint to return the top-k embeddings for a given query.
+    A retriever module that uses a Databricks Mosaic AI Vector Search Index to return the top-k
+    embeddings for a given query.
 
     Examples:
         Below is a code snippet that shows how to set up a Databricks Vector Search Index
-        and configure a DatabricksRM DSPy retrieval module to query the index.
+        and configure a DatabricksRM DSPy retriever module to query the index.
 
         (example adapted from "Databricks: How to create and query a Vector Search Index:
         https://docs.databricks.com/en/generative-ai/create-query-vector-search.html#create-a-vector-search-index)
@@ -34,33 +35,34 @@ class DatabricksRM(dspy.Retrieve):
 
         # Create a Databricks Direct Access Vector Search Index
         index = client.create_direct_access_index(
-            endpoint_name="your_databricks_host_url",
+            endpoint_name="your_vector_search_endpoint_name",
             index_name="your_index_name",
             primary_key="id",
             embedding_dimension=1024,
             embedding_vector_column="text_vector",
             schema={
-            "id": "int",
-            "field2": "str",
-            "field3": "float",
-            "text_vector": "array<float>"}
+              "id": "int",
+              "field2": "str",
+              "field3": "float",
+              "text_vector": "array<float>"
+            }
         )
 
-        llm = dspy.OpenAI(model="gpt-3.5-turbo")
-        retriever_model = DatabricksRM(
+        # Create a DatabricksRM retriever module to query the Databricks Direct Access Vector
+        # Search Index
+        retriever = DatabricksRM(
             databricks_index_name = "your_index_name",
-            docs_id_column_name="your_id_column",
-            text_column_name="your_text_column",
+            docs_id_column_name="id",
+            text_column_name="field2",
             k=3
         )
-        dspy.settings.configure(lm=llm, rm=retriever_model)
         ```
 
         Below is a code snippet that shows how to query the Databricks Direct Access Vector
-        Search Index using the DatabricksRM retrieval module:
+        Search Index using the DatabricksRM retriever module:
 
         ```python
-        self.retrieve = DatabricksRM(query=[1, 2, 3])
+        retrieved_results = DatabricksRM(query="Example query text"))
         ```
     """
 
@@ -78,18 +80,18 @@ def __init__(
         """
         Args:
             databricks_index_name (str): The name of the Databricks Vector Search Index to query.
-            databricks_endpoint (str, optional): The URL of the Databricks Workspace containing
+            databricks_endpoint (Optional[str]): The URL of the Databricks Workspace containing
                 the Vector Search Index. Defaults to the value of the ``DATABRICKS_HOST``
                 environment variable. If unspecified, the Databricks SDK is used to identify the
                 endpoint based on the current environment.
-            databricks_token (str, optional): The Databricks Workspace authentication token to use
+            databricks_token (Optional[str]): The Databricks Workspace authentication token to use
                 when querying the Vector Search Index. Defaults to the value of the
                 ``DATABRICKS_TOKEN`` environment variable. If unspecified, the Databricks SDK is
                 used to identify the token based on the current environment.
-            columns (list[str], optional): Extra column names to include in response,
+            columns (Optional[List[str]]): Extra column names to include in response,
                 in addition to the document id and text columns specified by
                 ``docs_id_column_name`` and ``text_column_name``.
-            filters_json (str, optional): A JSON string specifying additional query filters.
+            filters_json (Optional[str]): A JSON string specifying additional query filters.
                 Example filters: ``{"id <": 5}`` selects records that have an ``id`` column value
                 less than 5, and ``{"id >=": 5, "id <": 10}`` selects records that have an ``id``
                 column value greater than or equal to 5 and less than 10.
@@ -151,16 +153,23 @@ def forward(
         self,
         query: Union[str, List[float]],
         query_type: str = "ANN",
-        filters_json: str = None,
+        filters_json: Optional[str] = None,
     ) -> dspy.Prediction:
-        """Search with Databricks Vector Search Client for self.k top results for query
+        """
+        Retrieve documents from a Databricks Mosaic AI Vector Search Index that are relevant to the
+        specified query.
 
         Args:
-            query (Union[str, List[float]]): Query text or numeric query vector to for which to
-                find relevant documents.
+            query (Union[str, List[float]]): Query text or numeric query vector for which to
+                retrieve relevant documents.
             query_type (str): The type of search query to perform against the Databricks Vector
                 Search Index. Must be either 'ANN' (approximate nearest neighbor) or 'HYBRID'
                 (hybrid search).
+            filters_json (Optional[str]): A JSON string specifying additional query filters.
+                Example filters: ``{"id <": 5}`` selects records that have an ``id`` column value
+                less than 5, and ``{"id >=": 5, "id <": 10}`` selects records that have an ``id``
+                column value greater than or equal to 5 and less than 10. If specified, this
+                parameter overrides the `filters_json` parameter passed to the constructor.
 
         Returns:
             dspy.Prediction: An object containing the retrieved results.
@@ -257,11 +266,11 @@ def _query_via_databricks_sdk(
             index_name (str): Name of the Databricks vector search index to query
             k (int): Number of relevant documents to retrieve.
             columns (List[str]): Column names to include in response.
-            query_text (str, optional): Text query for which to find relevant documents. Exactly
+            query_text (Optional[str]): Text query for which to find relevant documents. Exactly
                 one of query_text or query_vector must be specified.
-            query_vector (List[float], optional): Numeric query vector for which to find relevant
+            query_vector (Optional[List[float]]): Numeric query vector for which to find relevant
                 documents. Exactly one of query_text or query_vector must be specified.
-            filters_json (str, optional): JSON string representing additional query filters.
+            filters_json (Optional[str]): JSON string representing additional query filters.
             databricks_token (str): Databricks authentication token. If not specified,
                 the token is resolved from the current environment.
             databricks_endpoint (str): Databricks index endpoint url. If not specified,
@@ -306,11 +315,11 @@ def _query_via_requests(
             columns (List[str]): Column names to include in response.
             databricks_token (str): Databricks authentication token.
             databricks_endpoint (str): Databricks index endpoint url.
-            query_text (str, optional): Text query for which to find relevant documents. Exactly
+            query_text (Optional[str]): Text query for which to find relevant documents. Exactly
                 one of query_text or query_vector must be specified.
-            query_vector (List[float], optional): Numeric query vector for which to find relevant
+            query_vector (Optional[List[float]]): Numeric query vector for which to find relevant
                 documents. Exactly one of query_text or query_vector must be specified.
-            filters_json (str, optional): JSON string representing additional query filters.
+            filters_json (Optional[str]): JSON string representing additional query filters.
 
         Returns:
             Dict[str, Any]: Parsed JSON response from the Databricks Vector Search Index query.

From af8a285a28cde1dcc3ea4f44196c087f00ecf822 Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 21:22:32 -0700
Subject: [PATCH 12/23] docs

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 .../retrieval_model_clients/DatabricksRM.md   | 115 ++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 docs/api/retrieval_model_clients/DatabricksRM.md

diff --git a/docs/api/retrieval_model_clients/DatabricksRM.md b/docs/api/retrieval_model_clients/DatabricksRM.md
new file mode 100644
index 000000000..8a0400953
--- /dev/null
+++ b/docs/api/retrieval_model_clients/DatabricksRM.md
@@ -0,0 +1,115 @@
+---
+sidebar_position: 2
+---
+
+# retrieve.DatabricksRM
+
+### Constructor
+
+Initialize an instance of the `DatabricksRM` retriever class, which enables DSPy programs to query
+[Databricks Mosaic AI Vector Search](https://docs.databricks.com/en/generative-ai/vector-search.html#mosaic-ai-vector-search)
+indexes for document retrieval.
+
+```python
+DatabricksRM(
+    databricks_index_name: str,
+    databricks_endpoint: Optional[str] = None,
+    databricks_token: Optional[str] = None,
+    columns: Optional[List[str]] = None,
+    filters_json: Optional[str] = None,
+    k: int = 3,
+    docs_id_column_name: str = "id",
+    text_column_name: str = "text",
+)
+```
+
+**Parameters:**
+
+- `databricks_index_name (str)`: The name of the Databricks Vector Search Index to query.
+- `databricks_endpoint (Optional[str])`: The URL of the Databricks Workspace containing
+  the Vector Search Index. Defaults to the value of the `DATABRICKS_HOST` environment variable.
+  If unspecified, the Databricks SDK is used to identify the endpoint based on the current
+  environment.
+- `databricks_token (Optional[str])`: The Databricks Workspace authentication token to use
+  when querying the Vector Search Index. Defaults to the value of the `DATABRICKS_TOKEN`
+  environment variable. If unspecified, the Databricks SDK is used to identify the token based on
+  the current environment.
+- `columns (Optional[List[str]])`: Extra column names to include in response, in addition to the
+  document id and text columns specified by `docs_id_column_name` and `text_column_name`.
+- `filters_json (Optional[str])`: A JSON string specifying additional query filters.
+  Example filters: `{"id <": 5}` selects records that have an `id` column value
+  less than 5, and `{"id >=": 5, "id <": 10}` selects records that have an `id`
+  column value greater than or equal to 5 and less than 10.
+- `k (int)`: The number of documents to retrieve.
+- `docs_id_column_name (str)`: The name of the column in the Databricks Vector Search Index
+  containing document IDs.
+- `text_column_name (str)`: The name of the column in the Databricks Vector Search Index
+  containing document text to retrieve.
+
+### Methods
+
+#### `def forward(self, query: Union[str, List[float]], query_type: str = "ANN",filters_json: Optional[str] = None) -> dspy.Prediction:`
+
+Retrieve documents from a Databricks Mosaic AI Vector Search Index that are relevant to the
+specified query.
+
+**Parameters:**
+
+- `query (Union[str, List[float]])`: The Query text or numeric query vector
+  for which to retrieve relevant documents.
+- `query_type (str)`: The type of search query to perform against the
+  Databricks Vector Search Index. Must be either 'ANN' (approximate nearest neighbor) or 'HYBRID'
+  (hybrid search).
+- `filters_json (Optional[str])`: A JSON string specifying additional query filters.
+  Example filters: `{"id <": 5}` selects records that have an `id` column value
+  less than 5, and `{"id >=": 5, "id <": 10}` selects records that have an `id`
+  column value greater than or equal to 5 and less than 10. If specified, this
+  parameter overrides the `filters_json` parameter passed to the constructor.
+
+**Returns:**
+
+- `dspy.Prediction`: Contains the retrieved passages, each represented as a `dotdict` with schema `[{"long_text": str}]`
+
+### Quickstart
+
+To retrieve documents using Databricks Mosaic AI Vector Search, you must [create a
+Databricks Mosaic AI Vector Search Index](https://docs.databricks.com/en/generative-ai/create-query-vector-search.html)
+first.
+
+The following example code demonstrates how to set up a Databricks Mosaic AI Direct Access Vector
+Search Index and use the `DatabricksRM` DSPy retriever module to query the index.
+
+```python
+from databricks.vector_search.client import VectorSearchClient
+
+# Create a Databricks Vector Search Endpoint
+client = VectorSearchClient()
+client.create_endpoint(
+    name="your_vector_search_endpoint_name",
+    endpoint_type="STANDARD"
+)
+
+# Create a Databricks Direct Access Vector Search Index
+index = client.create_direct_access_index(
+    endpoint_name="your_vector_search_endpoint_name",
+    index_name="your_index_name",
+    primary_key="id",
+    embedding_dimension=1024,
+    embedding_vector_column="text_vector",
+    schema={
+      "id": "int",
+      "field2": "str",
+      "field3": "float",
+      "text_vector": "array<float>"
+    }
+)
+
+# Create a DatabricksRM retriever and query the Databricks Direct Access Vector Search Index
+retriever = DatabricksRM(
+    databricks_index_name = "your_index_name",
+    docs_id_column_name="id",
+    text_column_name="field2",
+    k=3
+)
+retrieved_results = DatabricksRM(query="Example query text", query_type="hybrid"))
+```

From 5fab088fe2729fa108bec74a35a8bc04c7993fca Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 21:25:01 -0700
Subject: [PATCH 13/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 docs/api/retrieval_model_clients/DatabricksRM.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/api/retrieval_model_clients/DatabricksRM.md b/docs/api/retrieval_model_clients/DatabricksRM.md
index 8a0400953..7902b313e 100644
--- a/docs/api/retrieval_model_clients/DatabricksRM.md
+++ b/docs/api/retrieval_model_clients/DatabricksRM.md
@@ -48,7 +48,7 @@ DatabricksRM(
 
 ### Methods
 
-#### `def forward(self, query: Union[str, List[float]], query_type: str = "ANN",filters_json: Optional[str] = None) -> dspy.Prediction:`
+#### `def forward(self, query: Union[str, List[float]], query_type: str = "ANN", filters_json: Optional[str] = None) -> dspy.Prediction:`
 
 Retrieve documents from a Databricks Mosaic AI Vector Search Index that are relevant to the
 specified query.

From facdf5b708d49e79794c52ef26b30a2573ced19f Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 21:25:36 -0700
Subject: [PATCH 14/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 docs/api/retrieval_model_clients/DatabricksRM.md | 2 +-
 dspy/retrieve/databricks_rm.py                   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/api/retrieval_model_clients/DatabricksRM.md b/docs/api/retrieval_model_clients/DatabricksRM.md
index 7902b313e..4cc7ec376 100644
--- a/docs/api/retrieval_model_clients/DatabricksRM.md
+++ b/docs/api/retrieval_model_clients/DatabricksRM.md
@@ -55,7 +55,7 @@ specified query.
 
 **Parameters:**
 
-- `query (Union[str, List[float]])`: The Query text or numeric query vector
+- `query (Union[str, List[float]])`: The query text or numeric query vector
   for which to retrieve relevant documents.
 - `query_type (str)`: The type of search query to perform against the
   Databricks Vector Search Index. Must be either 'ANN' (approximate nearest neighbor) or 'HYBRID'
diff --git a/dspy/retrieve/databricks_rm.py b/dspy/retrieve/databricks_rm.py
index b8cc77ec6..153c4e926 100644
--- a/dspy/retrieve/databricks_rm.py
+++ b/dspy/retrieve/databricks_rm.py
@@ -160,7 +160,7 @@ def forward(
         specified query.
 
         Args:
-            query (Union[str, List[float]]): Query text or numeric query vector for which to
+            query (Union[str, List[float]]): The query text or numeric query vector for which to
                 retrieve relevant documents.
             query_type (str): The type of search query to perform against the Databricks Vector
                 Search Index. Must be either 'ANN' (approximate nearest neighbor) or 'HYBRID'

From ce2b4d2ec20da03c0b7ebd59690a04fb6e3d5b41 Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 21:26:56 -0700
Subject: [PATCH 15/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 docs/api/retrieval_model_clients/DatabricksRM.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/api/retrieval_model_clients/DatabricksRM.md b/docs/api/retrieval_model_clients/DatabricksRM.md
index 4cc7ec376..23a4c6024 100644
--- a/docs/api/retrieval_model_clients/DatabricksRM.md
+++ b/docs/api/retrieval_model_clients/DatabricksRM.md
@@ -76,8 +76,9 @@ To retrieve documents using Databricks Mosaic AI Vector Search, you must [create
 Databricks Mosaic AI Vector Search Index](https://docs.databricks.com/en/generative-ai/create-query-vector-search.html)
 first.
 
-The following example code demonstrates how to set up a Databricks Mosaic AI Direct Access Vector
-Search Index and use the `DatabricksRM` DSPy retriever module to query the index.
+The following example code demonstrates how to set up a Databricks Mosaic AI
+[Direct Access Vector Search Index](https://docs.databricks.com/en/generative-ai/create-query-vector-search.html#create-a-vector-search-index)
+and use the `DatabricksRM` DSPy retriever module to query the index.
 
 ```python
 from databricks.vector_search.client import VectorSearchClient

From 1c5b5775b28e598a3656de4a4fea9a2e192d1530 Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 21:28:27 -0700
Subject: [PATCH 16/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 docs/api/retrieval_model_clients/DatabricksRM.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/api/retrieval_model_clients/DatabricksRM.md b/docs/api/retrieval_model_clients/DatabricksRM.md
index 23a4c6024..bc85896d6 100644
--- a/docs/api/retrieval_model_clients/DatabricksRM.md
+++ b/docs/api/retrieval_model_clients/DatabricksRM.md
@@ -105,7 +105,8 @@ index = client.create_direct_access_index(
     }
 )
 
-# Create a DatabricksRM retriever and query the Databricks Direct Access Vector Search Index
+# Create a DatabricksRM retriever and retrieve the top-3 most relevant documents from the
+# Databricks Direct Access Vector Search Index corresponding to an example query
 retriever = DatabricksRM(
     databricks_index_name = "your_index_name",
     docs_id_column_name="id",

From c898777a0a6a264f68f49dc445ac608a72196023 Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 21:30:00 -0700
Subject: [PATCH 17/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 docs/api/retrieval_model_clients/DatabricksRM.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/api/retrieval_model_clients/DatabricksRM.md b/docs/api/retrieval_model_clients/DatabricksRM.md
index bc85896d6..f19c1dbc1 100644
--- a/docs/api/retrieval_model_clients/DatabricksRM.md
+++ b/docs/api/retrieval_model_clients/DatabricksRM.md
@@ -78,7 +78,8 @@ first.
 
 The following example code demonstrates how to set up a Databricks Mosaic AI
 [Direct Access Vector Search Index](https://docs.databricks.com/en/generative-ai/create-query-vector-search.html#create-a-vector-search-index)
-and use the `DatabricksRM` DSPy retriever module to query the index.
+and use the `DatabricksRM` DSPy retriever module to query the index. The example requires
+the `databricks-vectorsearch` Python library to be installed.
 
 ```python
 from databricks.vector_search.client import VectorSearchClient

From 628ec2f147972c393f9d79f21332be1fc50ede61 Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 21:36:26 -0700
Subject: [PATCH 18/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 docs/api/retrieval_model_clients/DatabricksRM.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/api/retrieval_model_clients/DatabricksRM.md b/docs/api/retrieval_model_clients/DatabricksRM.md
index f19c1dbc1..ea3d1a242 100644
--- a/docs/api/retrieval_model_clients/DatabricksRM.md
+++ b/docs/api/retrieval_model_clients/DatabricksRM.md
@@ -68,7 +68,8 @@ specified query.
 
 **Returns:**
 
-- `dspy.Prediction`: Contains the retrieved passages, each represented as a `dotdict` with schema `[{"long_text": str}]`
+- `dspy.Prediction`: A `dotdict` containg retrieved documents. The schema is
+  `{'docs': List[str], 'doc_ids': List[Int], extra_columns: List[Dict[str, Any]]}`.
 
 ### Quickstart
 

From afcb597eb739d943ee62047ad7a82c4769e422bf Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Sun, 29 Sep 2024 21:38:08 -0700
Subject: [PATCH 19/23] fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 docs/api/retrieval_model_clients/DatabricksRM.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/api/retrieval_model_clients/DatabricksRM.md b/docs/api/retrieval_model_clients/DatabricksRM.md
index ea3d1a242..fafe66ffb 100644
--- a/docs/api/retrieval_model_clients/DatabricksRM.md
+++ b/docs/api/retrieval_model_clients/DatabricksRM.md
@@ -68,8 +68,9 @@ specified query.
 
 **Returns:**
 
-- `dspy.Prediction`: A `dotdict` containg retrieved documents. The schema is
-  `{'docs': List[str], 'doc_ids': List[Int], extra_columns: List[Dict[str, Any]]}`.
+- `dspy.Prediction`: A `dotdict` containing retrieved documents. The schema is
+  `{'docs': List[str], 'doc_ids': List[Any], extra_columns: List[Dict[str, Any]]}`.
+  The `docs` entry contains the retrieved document content.
 
 ### Quickstart
 

From 1778fdccbcac5cfb64a06633143f8dbbab76f0cf Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Mon, 30 Sep 2024 11:01:37 -0700
Subject: [PATCH 20/23] RM

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 dspy/retrieve/databricks_rm.py | 353 ++++++++++++++++++++++-----------
 1 file changed, 238 insertions(+), 115 deletions(-)

diff --git a/dspy/retrieve/databricks_rm.py b/dspy/retrieve/databricks_rm.py
index 15d20711e..153c4e926 100644
--- a/dspy/retrieve/databricks_rm.py
+++ b/dspy/retrieve/databricks_rm.py
@@ -1,27 +1,24 @@
 import json
 import os
-from typing import List, Union, Any, Dict
+from importlib.util import find_spec
+from typing import Any, Dict, List, Optional, Union
+
 import requests
+
 import dspy
 from dspy.primitives.prediction import Prediction
 
+_databricks_sdk_installed = find_spec("databricks.sdk") is not None
+
 
 class DatabricksRM(dspy.Retrieve):
     """
-    A retrieval module that uses Databricks Vector Search Endpoint to return the top-k embeddings for a given query.
-
-    Args:
-        databricks_index_name (str): Databricks vector search index to query
-        databricks_endpoint (str): Databricks index endpoint url
-        databricks_token (str): Databricks authentication token
-        columns (list[str]): Column names to include in response
-        filters_json (str, optional): JSON string for query filters
-        k (int, optional): Number of top embeddings to retrieve. Defaults to 3.
-        docs_id_column_name (str, optional): Column name for retrieved doc_ids to return.
-        text_column_name (str, optional): Column name for retrieved text to return.
+    A retriever module that uses a Databricks Mosaic AI Vector Search Index to return the top-k
+    embeddings for a given query.
 
     Examples:
-        Below is a code snippet that shows how to configure Databricks Vector Search endpoints:
+        Below is a code snippet that shows how to set up a Databricks Vector Search Index
+        and configure a DatabricksRM DSPy retriever module to query the index.
 
         (example adapted from "Databricks: How to create and query a Vector Search Index:
         https://docs.databricks.com/en/generative-ai/create-query-vector-search.html#create-a-vector-search-index)
@@ -29,79 +26,95 @@ class DatabricksRM(dspy.Retrieve):
         ```python
         from databricks.vector_search.client import VectorSearchClient
 
-        #Creating Vector Search Client
-
+        # Create a Databricks Vector Search Endpoint
         client = VectorSearchClient()
-
         client.create_endpoint(
             name="your_vector_search_endpoint_name",
             endpoint_type="STANDARD"
         )
 
-        #Creating Vector Search Index using Python SDK
-        #Example for Direct Vector Access Index
-
+        # Create a Databricks Direct Access Vector Search Index
         index = client.create_direct_access_index(
-            endpoint_name="your_databricks_host_url",
+            endpoint_name="your_vector_search_endpoint_name",
             index_name="your_index_name",
             primary_key="id",
             embedding_dimension=1024,
             embedding_vector_column="text_vector",
             schema={
-            "id": "int",
-            "field2": "str",
-            "field3": "float",
-            "text_vector": "array<float>"}
+              "id": "int",
+              "field2": "str",
+              "field3": "float",
+              "text_vector": "array<float>"
+            }
         )
 
-        llm = dspy.OpenAI(model="gpt-3.5-turbo")
-        retriever_model = DatabricksRM(databricks_index_name = "your_index_name",
-        databricks_endpoint = "your_databricks_host_url", databricks_token = "your_databricks_token", columns= ["id", "field2", "field3", "text_vector"], k=3)
-        dspy.settings.configure(lm=llm, rm=retriever_model)
+        # Create a DatabricksRM retriever module to query the Databricks Direct Access Vector
+        # Search Index
+        retriever = DatabricksRM(
+            databricks_index_name = "your_index_name",
+            docs_id_column_name="id",
+            text_column_name="field2",
+            k=3
+        )
         ```
 
-        Below is a code snippet that shows how to query the Databricks Direct Vector Access Index using the forward() function.
+        Below is a code snippet that shows how to query the Databricks Direct Access Vector
+        Search Index using the DatabricksRM retriever module:
+
         ```python
-        self.retrieve = DatabricksRM(query=[1, 2, 3], query_type = 'vector')
+        retrieved_results = DatabricksRM(query="Example query text"))
         ```
     """
 
     def __init__(
         self,
-        databricks_index_name=None,
-        databricks_endpoint=None,
-        databricks_token=None,
-        columns=None,
-        filters_json=None,
-        k=3,
-        docs_id_column_name="id",
-        text_column_name="text",
+        databricks_index_name: str,
+        databricks_endpoint: Optional[str] = None,
+        databricks_token: Optional[str] = None,
+        columns: Optional[List[str]] = None,
+        filters_json: Optional[str] = None,
+        k: int = 3,
+        docs_id_column_name: str = "id",
+        text_column_name: str = "text",
     ):
+        """
+        Args:
+            databricks_index_name (str): The name of the Databricks Vector Search Index to query.
+            databricks_endpoint (Optional[str]): The URL of the Databricks Workspace containing
+                the Vector Search Index. Defaults to the value of the ``DATABRICKS_HOST``
+                environment variable. If unspecified, the Databricks SDK is used to identify the
+                endpoint based on the current environment.
+            databricks_token (Optional[str]): The Databricks Workspace authentication token to use
+                when querying the Vector Search Index. Defaults to the value of the
+                ``DATABRICKS_TOKEN`` environment variable. If unspecified, the Databricks SDK is
+                used to identify the token based on the current environment.
+            columns (Optional[List[str]]): Extra column names to include in response,
+                in addition to the document id and text columns specified by
+                ``docs_id_column_name`` and ``text_column_name``.
+            filters_json (Optional[str]): A JSON string specifying additional query filters.
+                Example filters: ``{"id <": 5}`` selects records that have an ``id`` column value
+                less than 5, and ``{"id >=": 5, "id <": 10}`` selects records that have an ``id``
+                column value greater than or equal to 5 and less than 10.
+            k (int): The number of documents to retrieve.
+            docs_id_column_name (str): The name of the column in the Databricks Vector Search Index
+                containing document IDs.
+            text_column_name (str): The name of the column in the Databricks Vector Search Index
+                containing document text to retrieve.
+        """
         super().__init__(k=k)
-        if not databricks_token and not os.environ.get("DATABRICKS_TOKEN"):
-            raise ValueError(
-                "You must supply databricks_token or set environment variable DATABRICKS_TOKEN"
-            )
-        if not databricks_endpoint and not os.environ.get("DATABRICKS_HOST"):
-            raise ValueError(
-                "You must supply databricks_endpoint or set environment variable DATABRICKS_HOST"
-            )
-        if not databricks_index_name:
-            raise ValueError("You must supply vector index name")
-        if not columns:
-            raise ValueError(
-                "You must specify a list of column names to be included in the response"
-            )
-        self.databricks_token = (
-            databricks_token if databricks_token else os.environ["DATABRICKS_TOKEN"]
-        )
+        self.databricks_token = databricks_token if databricks_token is not None else os.environ.get("DATABRICKS_TOKEN")
         self.databricks_endpoint = (
-            databricks_endpoint
-            if databricks_endpoint
-            else os.environ["DATABRICKS_HOST"]
+            databricks_endpoint if databricks_endpoint is not None else os.environ.get("DATABRICKS_HOST")
         )
+        if not _databricks_sdk_installed and (self.databricks_token, self.databricks_endpoint).count(None) > 0:
+            raise ValueError(
+                "To retrieve documents with Databricks Vector Search, you must install the"
+                " databricks-sdk Python library, supply the databricks_token and"
+                " databricks_endpoint parameters, or set the DATABRICKS_TOKEN and DATABRICKS_HOST"
+                " environment variables."
+            )
         self.databricks_index_name = databricks_index_name
-        self.columns = columns
+        self.columns = list({docs_id_column_name, text_column_name, *(columns or [])})
         self.filters_json = filters_json
         self.k = k
         self.docs_id_column_name = docs_id_column_name
@@ -128,71 +141,80 @@ def _get_extra_columns(self, item: Dict[str, Any]) -> Dict[str, Any]:
         Returns:
             Dict[str, Any]: Search result column values, excluding the "text" and not "id" columns.
         """
-        extra_columns = {
-            k: v
-            for k, v in item.items()
-            if k not in [self.docs_id_column_name, self.text_column_name]
-        }
+        extra_columns = {k: v for k, v in item.items() if k not in [self.docs_id_column_name, self.text_column_name]}
         if self.docs_id_column_name == "metadata":
             extra_columns = {
                 **extra_columns,
-                **{
-                    "metadata": {
-                        k: v
-                        for k, v in json.loads(item["metadata"]).items()
-                        if k != "document_id"
-                    }
-                },
+                **{"metadata": {k: v for k, v in json.loads(item["metadata"]).items() if k != "document_id"}},
             }
         return extra_columns
 
     def forward(
         self,
         query: Union[str, List[float]],
-        query_type: str = "text",
-        filters_json: str = None,
+        query_type: str = "ANN",
+        filters_json: Optional[str] = None,
     ) -> dspy.Prediction:
-        """Search with Databricks Vector Search Client for self.k top results for query
+        """
+        Retrieve documents from a Databricks Mosaic AI Vector Search Index that are relevant to the
+        specified query.
 
         Args:
-            query (Union[str, List[float]]): query to search for.
-            query_type (str): 'vector' for Direct Vector Access Index and Delta Sync Index using self-managed vectors or 'text' for Delta Sync Index using model endpoint.
+            query (Union[str, List[float]]): The query text or numeric query vector for which to
+                retrieve relevant documents.
+            query_type (str): The type of search query to perform against the Databricks Vector
+                Search Index. Must be either 'ANN' (approximate nearest neighbor) or 'HYBRID'
+                (hybrid search).
+            filters_json (Optional[str]): A JSON string specifying additional query filters.
+                Example filters: ``{"id <": 5}`` selects records that have an ``id`` column value
+                less than 5, and ``{"id >=": 5, "id <": 10}`` selects records that have an ``id``
+                column value greater than or equal to 5 and less than 10. If specified, this
+                parameter overrides the `filters_json` parameter passed to the constructor.
 
         Returns:
             dspy.Prediction: An object containing the retrieved results.
         """
-        headers = {
-            "Authorization": f"Bearer {self.databricks_token}",
-            "Content-Type": "application/json",
-        }
-        payload = {
-            "columns": self.columns,
-            "num_results": self.k,
-        }
-        if query_type == "vector":
-            if not isinstance(query, list):
-                raise ValueError("Query must be a list of floats for query_vector")
-            payload["query_vector"] = query
-        elif query_type == "text":
-            if not isinstance(query, str):
-                raise ValueError("Query must be a string for query_text")
-            payload["query_text"] = query
-        else:
-            raise ValueError("Invalid query type specified. Use 'vector' or 'text'.")
-
-        payload["filters_json"] = filters_json if filters_json else self.filters_json
+        if query_type in ["vector", "text"]:
+            # Older versions of DSPy used a `query_type` argument to disambiguate between text
+            # and vector queries, rather than checking the type of the `query` argument. This
+            # differs from the Databricks Vector Search definition of `query_type`, which
+            # specifies the search algorithm to use (e.g. "ANN" or "HYBRID"). To maintain
+            # backwards compatibility with older versions of DSPy, we map the old `query_type`
+            # values to the Databricks Vector Search default query type of "ANN".
+            query_type = "ANN"
 
-        response = requests.post(
-            f"{self.databricks_endpoint}/api/2.0/vector-search/indexes/{self.databricks_index_name}/query",
-            json=payload,
-            headers=headers,
-        )
-        results = response.json()
+        if isinstance(query, str):
+            query_text = query
+            query_vector = None
+        elif isinstance(query, list):
+            query_vector = query
+            query_text = None
+        else:
+            raise ValueError("Query must be a string or a list of floats.")
 
-        # Check for errors from REST API call
-        if response.json().get("error_code", None) != None:
-            raise Exception(
-                f"ERROR: {response.json()['error_code']} -- {response.json()['message']}"
+        if _databricks_sdk_installed:
+            results = self._query_via_databricks_sdk(
+                index_name=self.databricks_index_name,
+                k=self.k,
+                columns=self.columns,
+                query_type=query_type,
+                query_text=query_text,
+                query_vector=query_vector,
+                databricks_token=self.databricks_token,
+                databricks_endpoint=self.databricks_endpoint,
+                filters_json=filters_json or self.filters_json,
+            )
+        else:
+            results = self._query_via_requests(
+                index_name=self.databricks_index_name,
+                k=self.k,
+                columns=self.columns,
+                databricks_token=self.databricks_token,
+                databricks_endpoint=self.databricks_endpoint,
+                query_type=query_type,
+                query_text=query_text,
+                query_vector=query_vector,
+                filters_json=filters_json or self.filters_json,
             )
 
         # Checking if defined columns are present in the index columns
@@ -204,27 +226,128 @@ def forward(
             )
 
         if self.text_column_name not in col_names:
-            raise Exception(
-                f"text_column_name: '{self.text_column_name}' is not in the index columns: \n {col_names}"
-            )
+            raise Exception(f"text_column_name: '{self.text_column_name}' is not in the index columns: \n {col_names}")
 
         # Extracting the results
         items = []
-        for idx, data_row in enumerate(results["result"]["data_array"]):
+        for _, data_row in enumerate(results["result"]["data_array"]):
             item = {}
             for col_name, val in zip(col_names, data_row):
                 item[col_name] = val
             items += [item]
 
         # Sorting results by score in descending order
-        sorted_docs = sorted(items, key=lambda x: x["score"], reverse=True)[:self.k]
+        sorted_docs = sorted(items, key=lambda x: x["score"], reverse=True)[: self.k]
 
         # Returning the prediction
         return Prediction(
             docs=[doc[self.text_column_name] for doc in sorted_docs],
-            doc_ids=[
-                self._extract_doc_ids(doc)
-                for doc in sorted_docs
-            ],
+            doc_ids=[self._extract_doc_ids(doc) for doc in sorted_docs],
             extra_columns=[self._get_extra_columns(item) for item in sorted_docs],
         )
+
+    @staticmethod
+    def _query_via_databricks_sdk(
+        index_name: str,
+        k: int,
+        columns: List[str],
+        query_type: str,
+        query_text: Optional[str],
+        query_vector: Optional[List[float]],
+        databricks_token: Optional[str],
+        databricks_endpoint: Optional[str],
+        filters_json: Optional[str],
+    ) -> Dict[str, Any]:
+        """
+        Query a Databricks Vector Search Index via the Databricks SDK.
+        Assumes that the databricks-sdk Python library is installed.
+
+        Args:
+            index_name (str): Name of the Databricks vector search index to query
+            k (int): Number of relevant documents to retrieve.
+            columns (List[str]): Column names to include in response.
+            query_text (Optional[str]): Text query for which to find relevant documents. Exactly
+                one of query_text or query_vector must be specified.
+            query_vector (Optional[List[float]]): Numeric query vector for which to find relevant
+                documents. Exactly one of query_text or query_vector must be specified.
+            filters_json (Optional[str]): JSON string representing additional query filters.
+            databricks_token (str): Databricks authentication token. If not specified,
+                the token is resolved from the current environment.
+            databricks_endpoint (str): Databricks index endpoint url. If not specified,
+                the endpoint is resolved from the current environment.
+        Returns:
+            Dict[str, Any]: Parsed JSON response from the Databricks Vector Search Index query.
+        """
+        from databricks.sdk import WorkspaceClient
+
+        if (query_text, query_vector).count(None) != 1:
+            raise ValueError("Exactly one of query_text or query_vector must be specified.")
+
+        databricks_client = WorkspaceClient(host=databricks_endpoint, token=databricks_token)
+        return databricks_client.vector_search_indexes.query_index(
+            index_name=index_name,
+            query_type=query_type,
+            query_text=query_text,
+            query_vector=query_vector,
+            columns=columns,
+            filters_json=filters_json,
+            num_results=k,
+        ).as_dict()
+
+    @staticmethod
+    def _query_via_requests(
+        index_name: str,
+        k: int,
+        columns: List[str],
+        databricks_token: str,
+        databricks_endpoint: str,
+        query_type: str,
+        query_text: Optional[str],
+        query_vector: Optional[List[float]],
+        filters_json: Optional[str],
+    ) -> Dict[str, Any]:
+        """
+        Query a Databricks Vector Search Index via the Python requests library.
+
+        Args:
+            index_name (str): Name of the Databricks vector search index to query
+            k (int): Number of relevant documents to retrieve.
+            columns (List[str]): Column names to include in response.
+            databricks_token (str): Databricks authentication token.
+            databricks_endpoint (str): Databricks index endpoint url.
+            query_text (Optional[str]): Text query for which to find relevant documents. Exactly
+                one of query_text or query_vector must be specified.
+            query_vector (Optional[List[float]]): Numeric query vector for which to find relevant
+                documents. Exactly one of query_text or query_vector must be specified.
+            filters_json (Optional[str]): JSON string representing additional query filters.
+
+        Returns:
+            Dict[str, Any]: Parsed JSON response from the Databricks Vector Search Index query.
+        """
+        if (query_text, query_vector).count(None) != 1:
+            raise ValueError("Exactly one of query_text or query_vector must be specified.")
+
+        headers = {
+            "Authorization": f"Bearer {databricks_token}",
+            "Content-Type": "application/json",
+        }
+        payload = {
+            "columns": columns,
+            "num_results": k,
+            "query_type": query_type,
+        }
+        if filters_json is not None:
+            payload["filters_json"] = filters_json
+        if query_text is not None:
+            payload["query_text"] = query_text
+        elif query_vector is not None:
+            payload["query_vector"] = query_vector
+        response = requests.post(
+            f"{databricks_endpoint}/api/2.0/vector-search/indexes/{index_name}/query",
+            json=payload,
+            headers=headers,
+        )
+        results = response.json()
+        if "error_code" in results:
+            raise Exception(f"ERROR: {results['error_code']} -- {results['message']}")
+        return results

From 7442f42217eaefa3f5b4f0148697130bc9eedf4d Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Mon, 30 Sep 2024 11:02:37 -0700
Subject: [PATCH 21/23] docs

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 dspy/retrieve/databricks_rm.py | 320 +++++++++------------------------
 1 file changed, 85 insertions(+), 235 deletions(-)

diff --git a/dspy/retrieve/databricks_rm.py b/dspy/retrieve/databricks_rm.py
index 153c4e926..7bb66ebcd 100644
--- a/dspy/retrieve/databricks_rm.py
+++ b/dspy/retrieve/databricks_rm.py
@@ -1,24 +1,29 @@
 import json
 import os
-from importlib.util import find_spec
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Union
 
 import requests
 
 import dspy
 from dspy.primitives.prediction import Prediction
 
-_databricks_sdk_installed = find_spec("databricks.sdk") is not None
-
 
 class DatabricksRM(dspy.Retrieve):
     """
-    A retriever module that uses a Databricks Mosaic AI Vector Search Index to return the top-k
-    embeddings for a given query.
+    A retrieval module that uses Databricks Vector Search Endpoint to return the top-k embeddings for a given query.
+
+    Args:
+        databricks_index_name (str): Databricks vector search index to query
+        databricks_endpoint (str): Databricks index endpoint url
+        databricks_token (str): Databricks authentication token
+        columns (list[str]): Column names to include in response
+        filters_json (str, optional): JSON string for query filters
+        k (int, optional): Number of top embeddings to retrieve. Defaults to 3.
+        docs_id_column_name (str, optional): Column name for retrieved doc_ids to return.
+        text_column_name (str, optional): Column name for retrieved text to return.
 
     Examples:
-        Below is a code snippet that shows how to set up a Databricks Vector Search Index
-        and configure a DatabricksRM DSPy retriever module to query the index.
+        Below is a code snippet that shows how to configure Databricks Vector Search endpoints:
 
         (example adapted from "Databricks: How to create and query a Vector Search Index:
         https://docs.databricks.com/en/generative-ai/create-query-vector-search.html#create-a-vector-search-index)
@@ -26,95 +31,67 @@ class DatabricksRM(dspy.Retrieve):
         ```python
         from databricks.vector_search.client import VectorSearchClient
 
-        # Create a Databricks Vector Search Endpoint
+        #Creating Vector Search Client
+
         client = VectorSearchClient()
+
         client.create_endpoint(
             name="your_vector_search_endpoint_name",
             endpoint_type="STANDARD"
         )
 
-        # Create a Databricks Direct Access Vector Search Index
+        #Creating Vector Search Index using Python SDK
+        #Example for Direct Vector Access Index
+
         index = client.create_direct_access_index(
-            endpoint_name="your_vector_search_endpoint_name",
+            endpoint_name="your_databricks_host_url",
             index_name="your_index_name",
             primary_key="id",
             embedding_dimension=1024,
             embedding_vector_column="text_vector",
             schema={
-              "id": "int",
-              "field2": "str",
-              "field3": "float",
-              "text_vector": "array<float>"
-            }
+            "id": "int",
+            "field2": "str",
+            "field3": "float",
+            "text_vector": "array<float>"}
         )
 
-        # Create a DatabricksRM retriever module to query the Databricks Direct Access Vector
-        # Search Index
-        retriever = DatabricksRM(
-            databricks_index_name = "your_index_name",
-            docs_id_column_name="id",
-            text_column_name="field2",
-            k=3
-        )
+        llm = dspy.OpenAI(model="gpt-3.5-turbo")
+        retriever_model = DatabricksRM(databricks_index_name = "your_index_name",
+        databricks_endpoint = "your_databricks_host_url", databricks_token = "your_databricks_token", columns= ["id", "field2", "field3", "text_vector"], k=3)
+        dspy.settings.configure(lm=llm, rm=retriever_model)
         ```
 
-        Below is a code snippet that shows how to query the Databricks Direct Access Vector
-        Search Index using the DatabricksRM retriever module:
-
+        Below is a code snippet that shows how to query the Databricks Direct Vector Access Index using the forward() function.
         ```python
-        retrieved_results = DatabricksRM(query="Example query text"))
+        self.retrieve = DatabricksRM(query=[1, 2, 3], query_type = 'vector')
         ```
     """
 
     def __init__(
         self,
-        databricks_index_name: str,
-        databricks_endpoint: Optional[str] = None,
-        databricks_token: Optional[str] = None,
-        columns: Optional[List[str]] = None,
-        filters_json: Optional[str] = None,
-        k: int = 3,
-        docs_id_column_name: str = "id",
-        text_column_name: str = "text",
+        databricks_index_name=None,
+        databricks_endpoint=None,
+        databricks_token=None,
+        columns=None,
+        filters_json=None,
+        k=3,
+        docs_id_column_name="id",
+        text_column_name="text",
     ):
-        """
-        Args:
-            databricks_index_name (str): The name of the Databricks Vector Search Index to query.
-            databricks_endpoint (Optional[str]): The URL of the Databricks Workspace containing
-                the Vector Search Index. Defaults to the value of the ``DATABRICKS_HOST``
-                environment variable. If unspecified, the Databricks SDK is used to identify the
-                endpoint based on the current environment.
-            databricks_token (Optional[str]): The Databricks Workspace authentication token to use
-                when querying the Vector Search Index. Defaults to the value of the
-                ``DATABRICKS_TOKEN`` environment variable. If unspecified, the Databricks SDK is
-                used to identify the token based on the current environment.
-            columns (Optional[List[str]]): Extra column names to include in response,
-                in addition to the document id and text columns specified by
-                ``docs_id_column_name`` and ``text_column_name``.
-            filters_json (Optional[str]): A JSON string specifying additional query filters.
-                Example filters: ``{"id <": 5}`` selects records that have an ``id`` column value
-                less than 5, and ``{"id >=": 5, "id <": 10}`` selects records that have an ``id``
-                column value greater than or equal to 5 and less than 10.
-            k (int): The number of documents to retrieve.
-            docs_id_column_name (str): The name of the column in the Databricks Vector Search Index
-                containing document IDs.
-            text_column_name (str): The name of the column in the Databricks Vector Search Index
-                containing document text to retrieve.
-        """
         super().__init__(k=k)
-        self.databricks_token = databricks_token if databricks_token is not None else os.environ.get("DATABRICKS_TOKEN")
-        self.databricks_endpoint = (
-            databricks_endpoint if databricks_endpoint is not None else os.environ.get("DATABRICKS_HOST")
-        )
-        if not _databricks_sdk_installed and (self.databricks_token, self.databricks_endpoint).count(None) > 0:
-            raise ValueError(
-                "To retrieve documents with Databricks Vector Search, you must install the"
-                " databricks-sdk Python library, supply the databricks_token and"
-                " databricks_endpoint parameters, or set the DATABRICKS_TOKEN and DATABRICKS_HOST"
-                " environment variables."
-            )
+        if not databricks_token and not os.environ.get("DATABRICKS_TOKEN"):
+            raise ValueError("You must supply databricks_token or set environment variable DATABRICKS_TOKEN")
+        if not databricks_endpoint and not os.environ.get("DATABRICKS_HOST"):
+            raise ValueError("You must supply databricks_endpoint or set environment variable DATABRICKS_HOST")
+        if not databricks_index_name:
+            raise ValueError("You must supply vector index name")
+        if not columns:
+            raise ValueError("You must specify a list of column names to be included in the response")
+        self.databricks_token = databricks_token if databricks_token else os.environ["DATABRICKS_TOKEN"]
+        self.databricks_endpoint = databricks_endpoint if databricks_endpoint else os.environ["DATABRICKS_HOST"]
         self.databricks_index_name = databricks_index_name
-        self.columns = list({docs_id_column_name, text_column_name, *(columns or [])})
+        self.columns = columns
         self.filters_json = filters_json
         self.k = k
         self.docs_id_column_name = docs_id_column_name
@@ -152,70 +129,49 @@ def _get_extra_columns(self, item: Dict[str, Any]) -> Dict[str, Any]:
     def forward(
         self,
         query: Union[str, List[float]],
-        query_type: str = "ANN",
-        filters_json: Optional[str] = None,
+        query_type: str = "text",
+        filters_json: str = None,
     ) -> dspy.Prediction:
-        """
-        Retrieve documents from a Databricks Mosaic AI Vector Search Index that are relevant to the
-        specified query.
+        """Search with Databricks Vector Search Client for self.k top results for query
 
         Args:
-            query (Union[str, List[float]]): The query text or numeric query vector for which to
-                retrieve relevant documents.
-            query_type (str): The type of search query to perform against the Databricks Vector
-                Search Index. Must be either 'ANN' (approximate nearest neighbor) or 'HYBRID'
-                (hybrid search).
-            filters_json (Optional[str]): A JSON string specifying additional query filters.
-                Example filters: ``{"id <": 5}`` selects records that have an ``id`` column value
-                less than 5, and ``{"id >=": 5, "id <": 10}`` selects records that have an ``id``
-                column value greater than or equal to 5 and less than 10. If specified, this
-                parameter overrides the `filters_json` parameter passed to the constructor.
+            query (Union[str, List[float]]): query to search for.
+            query_type (str): 'vector' for Direct Vector Access Index and Delta Sync Index using self-managed vectors or 'text' for Delta Sync Index using model endpoint.
 
         Returns:
             dspy.Prediction: An object containing the retrieved results.
         """
-        if query_type in ["vector", "text"]:
-            # Older versions of DSPy used a `query_type` argument to disambiguate between text
-            # and vector queries, rather than checking the type of the `query` argument. This
-            # differs from the Databricks Vector Search definition of `query_type`, which
-            # specifies the search algorithm to use (e.g. "ANN" or "HYBRID"). To maintain
-            # backwards compatibility with older versions of DSPy, we map the old `query_type`
-            # values to the Databricks Vector Search default query type of "ANN".
-            query_type = "ANN"
-
-        if isinstance(query, str):
-            query_text = query
-            query_vector = None
-        elif isinstance(query, list):
-            query_vector = query
-            query_text = None
-        else:
-            raise ValueError("Query must be a string or a list of floats.")
-
-        if _databricks_sdk_installed:
-            results = self._query_via_databricks_sdk(
-                index_name=self.databricks_index_name,
-                k=self.k,
-                columns=self.columns,
-                query_type=query_type,
-                query_text=query_text,
-                query_vector=query_vector,
-                databricks_token=self.databricks_token,
-                databricks_endpoint=self.databricks_endpoint,
-                filters_json=filters_json or self.filters_json,
-            )
+        headers = {
+            "Authorization": f"Bearer {self.databricks_token}",
+            "Content-Type": "application/json",
+        }
+        payload = {
+            "columns": self.columns,
+            "num_results": self.k,
+        }
+        if query_type == "vector":
+            if not isinstance(query, list):
+                raise ValueError("Query must be a list of floats for query_vector")
+            payload["query_vector"] = query
+        elif query_type == "text":
+            if not isinstance(query, str):
+                raise ValueError("Query must be a string for query_text")
+            payload["query_text"] = query
         else:
-            results = self._query_via_requests(
-                index_name=self.databricks_index_name,
-                k=self.k,
-                columns=self.columns,
-                databricks_token=self.databricks_token,
-                databricks_endpoint=self.databricks_endpoint,
-                query_type=query_type,
-                query_text=query_text,
-                query_vector=query_vector,
-                filters_json=filters_json or self.filters_json,
-            )
+            raise ValueError("Invalid query type specified. Use 'vector' or 'text'.")
+
+        payload["filters_json"] = filters_json if filters_json else self.filters_json
+
+        response = requests.post(
+            f"{self.databricks_endpoint}/api/2.0/vector-search/indexes/{self.databricks_index_name}/query",
+            json=payload,
+            headers=headers,
+        )
+        results = response.json()
+
+        # Check for errors from REST API call
+        if response.json().get("error_code", None) != None:
+            raise Exception(f"ERROR: {response.json()['error_code']} -- {response.json()['message']}")
 
         # Checking if defined columns are present in the index columns
         col_names = [column["name"] for column in results["manifest"]["columns"]]
@@ -230,7 +186,7 @@ def forward(
 
         # Extracting the results
         items = []
-        for _, data_row in enumerate(results["result"]["data_array"]):
+        for idx, data_row in enumerate(results["result"]["data_array"]):
             item = {}
             for col_name, val in zip(col_names, data_row):
                 item[col_name] = val
@@ -245,109 +201,3 @@ def forward(
             doc_ids=[self._extract_doc_ids(doc) for doc in sorted_docs],
             extra_columns=[self._get_extra_columns(item) for item in sorted_docs],
         )
-
-    @staticmethod
-    def _query_via_databricks_sdk(
-        index_name: str,
-        k: int,
-        columns: List[str],
-        query_type: str,
-        query_text: Optional[str],
-        query_vector: Optional[List[float]],
-        databricks_token: Optional[str],
-        databricks_endpoint: Optional[str],
-        filters_json: Optional[str],
-    ) -> Dict[str, Any]:
-        """
-        Query a Databricks Vector Search Index via the Databricks SDK.
-        Assumes that the databricks-sdk Python library is installed.
-
-        Args:
-            index_name (str): Name of the Databricks vector search index to query
-            k (int): Number of relevant documents to retrieve.
-            columns (List[str]): Column names to include in response.
-            query_text (Optional[str]): Text query for which to find relevant documents. Exactly
-                one of query_text or query_vector must be specified.
-            query_vector (Optional[List[float]]): Numeric query vector for which to find relevant
-                documents. Exactly one of query_text or query_vector must be specified.
-            filters_json (Optional[str]): JSON string representing additional query filters.
-            databricks_token (str): Databricks authentication token. If not specified,
-                the token is resolved from the current environment.
-            databricks_endpoint (str): Databricks index endpoint url. If not specified,
-                the endpoint is resolved from the current environment.
-        Returns:
-            Dict[str, Any]: Parsed JSON response from the Databricks Vector Search Index query.
-        """
-        from databricks.sdk import WorkspaceClient
-
-        if (query_text, query_vector).count(None) != 1:
-            raise ValueError("Exactly one of query_text or query_vector must be specified.")
-
-        databricks_client = WorkspaceClient(host=databricks_endpoint, token=databricks_token)
-        return databricks_client.vector_search_indexes.query_index(
-            index_name=index_name,
-            query_type=query_type,
-            query_text=query_text,
-            query_vector=query_vector,
-            columns=columns,
-            filters_json=filters_json,
-            num_results=k,
-        ).as_dict()
-
-    @staticmethod
-    def _query_via_requests(
-        index_name: str,
-        k: int,
-        columns: List[str],
-        databricks_token: str,
-        databricks_endpoint: str,
-        query_type: str,
-        query_text: Optional[str],
-        query_vector: Optional[List[float]],
-        filters_json: Optional[str],
-    ) -> Dict[str, Any]:
-        """
-        Query a Databricks Vector Search Index via the Python requests library.
-
-        Args:
-            index_name (str): Name of the Databricks vector search index to query
-            k (int): Number of relevant documents to retrieve.
-            columns (List[str]): Column names to include in response.
-            databricks_token (str): Databricks authentication token.
-            databricks_endpoint (str): Databricks index endpoint url.
-            query_text (Optional[str]): Text query for which to find relevant documents. Exactly
-                one of query_text or query_vector must be specified.
-            query_vector (Optional[List[float]]): Numeric query vector for which to find relevant
-                documents. Exactly one of query_text or query_vector must be specified.
-            filters_json (Optional[str]): JSON string representing additional query filters.
-
-        Returns:
-            Dict[str, Any]: Parsed JSON response from the Databricks Vector Search Index query.
-        """
-        if (query_text, query_vector).count(None) != 1:
-            raise ValueError("Exactly one of query_text or query_vector must be specified.")
-
-        headers = {
-            "Authorization": f"Bearer {databricks_token}",
-            "Content-Type": "application/json",
-        }
-        payload = {
-            "columns": columns,
-            "num_results": k,
-            "query_type": query_type,
-        }
-        if filters_json is not None:
-            payload["filters_json"] = filters_json
-        if query_text is not None:
-            payload["query_text"] = query_text
-        elif query_vector is not None:
-            payload["query_vector"] = query_vector
-        response = requests.post(
-            f"{databricks_endpoint}/api/2.0/vector-search/indexes/{index_name}/query",
-            json=payload,
-            headers=headers,
-        )
-        results = response.json()
-        if "error_code" in results:
-            raise Exception(f"ERROR: {results['error_code']} -- {results['message']}")
-        return results

From 926c750ef2a848fa4cc27d6504f671c7a7f3f84f Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Mon, 30 Sep 2024 11:06:34 -0700
Subject: [PATCH 22/23] langfuse fix

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 docs/docs/dspy-usecases.md | 222 ++++++++++++++++++-------------------
 1 file changed, 110 insertions(+), 112 deletions(-)

diff --git a/docs/docs/dspy-usecases.md b/docs/docs/dspy-usecases.md
index 70f55aa2b..3ae7ec499 100644
--- a/docs/docs/dspy-usecases.md
+++ b/docs/docs/dspy-usecases.md
@@ -12,137 +12,135 @@ This list is ever expanding and highly incomplete (WIP)! We'll be adding a bunch
 4. [Providers with DSPy support](#a-few-providers-integrations-and-related-blog-releases)
 5. [Blogs & Videos on using DSPy](#a-few-blogs--videos-on-using-dspy)
 
-
 ## A Few Company Use Cases
 
-| **Name** | **Use Cases** |
-|---|---|
-| **[JetBlue](https://www.jetblue.com/)** | Multiple chatbot use cases. [Blog](https://www.databricks.com/blog/optimizing-databricks-llm-pipelines-dspy) |
-| **[Replit](https://replit.com/)** | Synthesize diffs using code LLMs using a DSPy pipeline. [Blog](https://blog.replit.com/code-repair) |
-| **[Databricks](https://www.databricks.com/)** | Research, products, and customer solutions around LM Judges, RAG, classification, and other applications. [Blog](https://www.databricks.com/blog/dspy-databricks) [Blog II](https://www.databricks.com/customers/ddi) |
-| **[Sephora](https://www.sephora.com/)** | Undisclosed agent usecases; perspectives shared in [DAIS Session](https://www.youtube.com/watch?v=D2HurSldDkE). |
-| **[Zoro UK](https://www.zoro.co.uk/)** | E-commerce applications around structured shopping. [Portkey Session](https://www.youtube.com/watch?v=_vGKSc1tekE) |
-| **[VMware](https://www.vmware.com/)** | RAG and other prompt optimization applications. [Interview in The Register.](https://www.theregister.com/2024/02/22/prompt_engineering_ai_models/) [Business Insider.](https://www.businessinsider.com/chaptgpt-large-language-model-ai-prompt-engineering-automated-optimizer-2024-3) |
-| **[Haize Labs](https://www.haizelabs.com/)** | Automated red-teaming for LLMs. [Blog](https://blog.haizelabs.com/posts/dspy/) |
-| **[Plastic Labs](https://www.plasticlabs.ai/)** | Different pipelines within Honcho. [Blog](https://blog.plasticlabs.ai/blog/User-State-is-State-of-the-Art) |
-| **[PingCAP](https://pingcap.com/)** | Building a knowledge graph. [Article](https://www.pingcap.com/article/building-a-graphrag-from-wikipedia-page-using-dspy-openai-and-tidb-vector-database/) |
-| **[Salomatic](https://langtrace.ai/blog/case-study-how-salomatic-used-langtrace-to-build-a-reliable-medical-report-generation-system)** | Enriching medical reports using DSPy. [Blog](https://langtrace.ai/blog/case-study-how-salomatic-used-langtrace-to-build-a-reliable-medical-report-generation-system) |
-| **[Truelaw](https://www.youtube.com/watch?v=O0F3RAWZNfM)** | How Truelaw builds bespoke LLM pipelines for law firms using DSPy. [Podcast](https://www.youtube.com/watch?v=O0F3RAWZNfM) |
-| **[Moody's](https://www.moodys.com/)** | Leveraging DSPy to optimize RAG systems, LLM-as-a-Judge, and agentic systems for financial workflows. |
-| **[Normal Computing](https://www.normalcomputing.com/)** | Translate specs from chip companies from English to intermediate formal languages |
-| **[Procure.FYI](https://www.procure.fyi/)** | Process messy, publicly available technology spending and pricing data via DSPy. |
-| **[RadiantLogic](https://www.radiantlogic.com/)** | AI Data Assistant. DSPy is used for the agent that routes the query, the context extraction module, the text-to-sql conversion engine, and the table summarization module. |
-| **[Hyperlint](https://hyperlint.com)** | Uses DSPy to generate technical documentation. DSPy helps to fetch relevant information and synthesize that into tutorials. |
-| **[Starops](https://staropshq.com/) & [Saya](https://heysaya.ai/)** | Building research documents given a user's corpus. Generate prompts to create more articles from example articles. |
-| **[Tessel AI](https://tesselai.com/)** | Enhancing human-machine interaction with data use cases. |
-| **[Dicer.ai](https://dicer.ai/)** | Uses DSPy for marketing AI to get the most from their paid ads. |
-| **[Howie](https://howie.ai)** | Using DSPy to automate meeting scheduling through email. |
-| **[Isoform.ai](https://isoform.ai)** | Building custom integrations using DSPy. |
-| **[Trampoline AI](https://trampoline.ai)** | Uses DSPy to power their data-augmentation and LM pipelines. |
+| **Name**                                                                                                                                | **Use Cases**                                                                                                                                                                                                                                                                          |
+| --------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **[JetBlue](https://www.jetblue.com/)**                                                                                                 | Multiple chatbot use cases. [Blog](https://www.databricks.com/blog/optimizing-databricks-llm-pipelines-dspy)                                                                                                                                                                           |
+| **[Replit](https://replit.com/)**                                                                                                       | Synthesize diffs using code LLMs using a DSPy pipeline. [Blog](https://blog.replit.com/code-repair)                                                                                                                                                                                    |
+| **[Databricks](https://www.databricks.com/)**                                                                                           | Research, products, and customer solutions around LM Judges, RAG, classification, and other applications. [Blog](https://www.databricks.com/blog/dspy-databricks) [Blog II](https://www.databricks.com/customers/ddi)                                                                  |
+| **[Sephora](https://www.sephora.com/)**                                                                                                 | Undisclosed agent usecases; perspectives shared in [DAIS Session](https://www.youtube.com/watch?v=D2HurSldDkE).                                                                                                                                                                        |
+| **[Zoro UK](https://www.zoro.co.uk/)**                                                                                                  | E-commerce applications around structured shopping. [Portkey Session](https://www.youtube.com/watch?v=_vGKSc1tekE)                                                                                                                                                                     |
+| **[VMware](https://www.vmware.com/)**                                                                                                   | RAG and other prompt optimization applications. [Interview in The Register.](https://www.theregister.com/2024/02/22/prompt_engineering_ai_models/) [Business Insider.](https://www.businessinsider.com/chaptgpt-large-language-model-ai-prompt-engineering-automated-optimizer-2024-3) |
+| **[Haize Labs](https://www.haizelabs.com/)**                                                                                            | Automated red-teaming for LLMs. [Blog](https://blog.haizelabs.com/posts/dspy/)                                                                                                                                                                                                         |
+| **[Plastic Labs](https://www.plasticlabs.ai/)**                                                                                         | Different pipelines within Honcho. [Blog](https://blog.plasticlabs.ai/blog/User-State-is-State-of-the-Art)                                                                                                                                                                             |
+| **[PingCAP](https://pingcap.com/)**                                                                                                     | Building a knowledge graph. [Article](https://www.pingcap.com/article/building-a-graphrag-from-wikipedia-page-using-dspy-openai-and-tidb-vector-database/)                                                                                                                             |
+| **[Salomatic](https://langtrace.ai/blog/case-study-how-salomatic-used-langtrace-to-build-a-reliable-medical-report-generation-system)** | Enriching medical reports using DSPy. [Blog](https://langtrace.ai/blog/case-study-how-salomatic-used-langtrace-to-build-a-reliable-medical-report-generation-system)                                                                                                                   |
+| **[Truelaw](https://www.youtube.com/watch?v=O0F3RAWZNfM)**                                                                              | How Truelaw builds bespoke LLM pipelines for law firms using DSPy. [Podcast](https://www.youtube.com/watch?v=O0F3RAWZNfM)                                                                                                                                                              |
+| **[Moody's](https://www.moodys.com/)**                                                                                                  | Leveraging DSPy to optimize RAG systems, LLM-as-a-Judge, and agentic systems for financial workflows.                                                                                                                                                                                  |
+| **[Normal Computing](https://www.normalcomputing.com/)**                                                                                | Translate specs from chip companies from English to intermediate formal languages                                                                                                                                                                                                      |
+| **[Procure.FYI](https://www.procure.fyi/)**                                                                                             | Process messy, publicly available technology spending and pricing data via DSPy.                                                                                                                                                                                                       |
+| **[RadiantLogic](https://www.radiantlogic.com/)**                                                                                       | AI Data Assistant. DSPy is used for the agent that routes the query, the context extraction module, the text-to-sql conversion engine, and the table summarization module.                                                                                                             |
+| **[Hyperlint](https://hyperlint.com)**                                                                                                  | Uses DSPy to generate technical documentation. DSPy helps to fetch relevant information and synthesize that into tutorials.                                                                                                                                                            |
+| **[Starops](https://staropshq.com/) & [Saya](https://heysaya.ai/)**                                                                     | Building research documents given a user's corpus. Generate prompts to create more articles from example articles.                                                                                                                                                                     |
+| **[Tessel AI](https://tesselai.com/)**                                                                                                  | Enhancing human-machine interaction with data use cases.                                                                                                                                                                                                                               |
+| **[Dicer.ai](https://dicer.ai/)**                                                                                                       | Uses DSPy for marketing AI to get the most from their paid ads.                                                                                                                                                                                                                        |
+| **[Howie](https://howie.ai)**                                                                                                           | Using DSPy to automate meeting scheduling through email.                                                                                                                                                                                                                               |
+| **[Isoform.ai](https://isoform.ai)**                                                                                                    | Building custom integrations using DSPy.                                                                                                                                                                                                                                               |
+| **[Trampoline AI](https://trampoline.ai)**                                                                                              | Uses DSPy to power their data-augmentation and LM pipelines.                                                                                                                                                                                                                           |
 
 WIP. This list mainly includes companies that have public posts or have OKed being included for specific products so far.
 
-
 ## A Few Papers Using DSPy
 
-| **Name** | **Description** |
-|---|---|
-| **[STORM](https://arxiv.org/abs/2402.14207)** | Writing Wikipedia-like Articles From Scratch. |
-| **[PATH](https://arxiv.org/abs/2406.11706)** | Prompts as Auto-Optimized Training Hyperparameters: Training Best-in-Class IR Models from Scratch with 10 Gold Labels |
-| **[WangLab @ MEDIQA](https://arxiv.org/abs/2404.14544)** | UofT's winning system at MEDIQA, outperforms the next best system by 20 points |
-| **[UMD's Suicide Detection System](https://arxiv.org/abs/2406.06608)** | Outperforms 20-hour expert human prompt engineering by 40% |
-| **[IReRa](https://arxiv.org/abs/2401.12178)** | Infer-Retrieve-Rank: Extreme Classification with > 10,000 Labels |
-| **[Unreasonably Effective Eccentric Prompts](https://arxiv.org/abs/2402.10949v2)** | General Prompt Optimization |
-| **[Palimpzest](https://arxiv.org/abs/2405.14696)** | A Declarative System for Optimizing AI Workloads |
-| **[AI Agents that Matter](https://arxiv.org/abs/2407.01502v1)** | Agent Efficiency Optimization |
-| **[EDEN](https://arxiv.org/abs/2406.17982v1)** | Empathetic Dialogues for English Learning: Uses adaptive empathetic feedback to improve student grit |
-| **[ECG-Chat](https://arxiv.org/pdf/2408.08849)** | Uses DSPy with GraphRAG for medical report generation |
-| **[DSPy Assertions](https://arxiv.org/abs/2312.13382)** | Various applications of imposing hard and soft constraints on LM outputs |
-| **[DSPy Guardrails](https://boxiyu.github.io/assets/pdf/DSPy_Guardrails.pdf)** | Reduce the attack success rate of CodeAttack, decreasing from 75% to 5% |
-| **[Co-STORM](https://arxiv.org/pdf/2408.15232)** | Collaborative STORM: Generate Wikipedia-like articles through collaborative discourse among users and multiple LM agents |
+| **Name**                                                                           | **Description**                                                                                                          |
+| ---------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------ |
+| **[STORM](https://arxiv.org/abs/2402.14207)**                                      | Writing Wikipedia-like Articles From Scratch.                                                                            |
+| **[PATH](https://arxiv.org/abs/2406.11706)**                                       | Prompts as Auto-Optimized Training Hyperparameters: Training Best-in-Class IR Models from Scratch with 10 Gold Labels    |
+| **[WangLab @ MEDIQA](https://arxiv.org/abs/2404.14544)**                           | UofT's winning system at MEDIQA, outperforms the next best system by 20 points                                           |
+| **[UMD's Suicide Detection System](https://arxiv.org/abs/2406.06608)**             | Outperforms 20-hour expert human prompt engineering by 40%                                                               |
+| **[IReRa](https://arxiv.org/abs/2401.12178)**                                      | Infer-Retrieve-Rank: Extreme Classification with > 10,000 Labels                                                         |
+| **[Unreasonably Effective Eccentric Prompts](https://arxiv.org/abs/2402.10949v2)** | General Prompt Optimization                                                                                              |
+| **[Palimpzest](https://arxiv.org/abs/2405.14696)**                                 | A Declarative System for Optimizing AI Workloads                                                                         |
+| **[AI Agents that Matter](https://arxiv.org/abs/2407.01502v1)**                    | Agent Efficiency Optimization                                                                                            |
+| **[EDEN](https://arxiv.org/abs/2406.17982v1)**                                     | Empathetic Dialogues for English Learning: Uses adaptive empathetic feedback to improve student grit                     |
+| **[ECG-Chat](https://arxiv.org/pdf/2408.08849)**                                   | Uses DSPy with GraphRAG for medical report generation                                                                    |
+| **[DSPy Assertions](https://arxiv.org/abs/2312.13382)**                            | Various applications of imposing hard and soft constraints on LM outputs                                                 |
+| **[DSPy Guardrails](https://boxiyu.github.io/assets/pdf/DSPy_Guardrails.pdf)**     | Reduce the attack success rate of CodeAttack, decreasing from 75% to 5%                                                  |
+| **[Co-STORM](https://arxiv.org/pdf/2408.15232)**                                   | Collaborative STORM: Generate Wikipedia-like articles through collaborative discourse among users and multiple LM agents |
 
 ## A Few Repositories (or other OSS examples) using DSPy
 
-| **Name** | **Description/Link** |
-|---|---|
-| **Stanford CS 224U Homework** | [Github](https://github.com/cgpotts/cs224u/blob/main/hw_openqa.ipynb) |
-| **STORM Report Generation (10,000 GitHub stars)** | [Github](https://github.com/stanford-oval/storm) |
-| **DSPy Redteaming** | [Github](https://github.com/haizelabs/dspy-redteam) |
-| **DSPy Theory of Mind** |  [Github](https://github.com/plastic-labs/dspy-opentom) |
-| **Indic cross-lingual Natural Language Inference** |  [Github](https://github.com/saifulhaq95/DSPy-Indic/blob/main/indicxlni.ipynb) |
-| **Optimizing LM for Text2SQL using DSPy** | [Github](https://github.com/jjovalle99/DSPy-Text2SQL) |
-| **DSPy PII Masking Demo by Eric Ness** | [Colab](https://colab.research.google.com/drive/1KZR1sGTp_RLWUJPAiK1FKPKI-Qn9neUm?usp=sharing) |
-| **DSPy on BIG-Bench Hard Example** |  [Github](https://drchrislevy.github.io/posts/dspy/dspy.html) |
-| **Building a chess playing agent using DSPy** |  [Github](https://medium.com/thoughts-on-machine-learning/building-a-chess-playing-agent-using-dspy-9b87c868f71e) |
-| **Ittia Research Fact Checking** | [Github](https://github.com/ittia-research/check) |
-| **Strategic Debate via Tree-of-Thought** | [Github](https://github.com/zbambergerNLP/strategic-debate-tot) |
-| **Sanskrit to English Translation App**| [Github](https://github.com/ganarajpr/sanskrit-translator-dspy) |
-| **DSPy for extracting features from PDFs on arXiv**| [Github](https://github.com/S1M0N38/dspy-arxiv) |
-| **DSPygen: DSPy in Ruby on Rails**| [Github](https://github.com/seanchatmangpt/dspygen) |
-| **DSPy Inspector**| [Github](https://github.com/Neoxelox/dspy-inspector) |
-| **DSPy with FastAPI**| [Github](https://github.com/diicellman/dspy-rag-fastapi) |
-| **DSPy for Indian Languages**| [Github](https://github.com/saifulhaq95/DSPy-Indic) |
-| **Hurricane: Blog Posts with Generative Feedback Loops!**| [Github](https://github.com/weaviate-tutorials/Hurricane) |
-| **RAG example using DSPy, Gradio, FastAPI, and Ollama**| [Github](https://github.com/diicellman/dspy-gradio-rag) |
-| **Synthetic Data Generation**| [Github](https://colab.research.google.com/drive/1CweVOu0qhTC0yOfW5QkLDRIKuAuWJKEr?usp=sharing) |
-| **Self Discover**| [Github](https://colab.research.google.com/drive/1GkAQKmw1XQgg5UNzzy8OncRe79V6pADB?usp=sharing) |
+| **Name**                                                  | **Description/Link**                                                                                             |
+| --------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- |
+| **Stanford CS 224U Homework**                             | [Github](https://github.com/cgpotts/cs224u/blob/main/hw_openqa.ipynb)                                            |
+| **STORM Report Generation (10,000 GitHub stars)**         | [Github](https://github.com/stanford-oval/storm)                                                                 |
+| **DSPy Redteaming**                                       | [Github](https://github.com/haizelabs/dspy-redteam)                                                              |
+| **DSPy Theory of Mind**                                   | [Github](https://github.com/plastic-labs/dspy-opentom)                                                           |
+| **Indic cross-lingual Natural Language Inference**        | [Github](https://github.com/saifulhaq95/DSPy-Indic/blob/main/indicxlni.ipynb)                                    |
+| **Optimizing LM for Text2SQL using DSPy**                 | [Github](https://github.com/jjovalle99/DSPy-Text2SQL)                                                            |
+| **DSPy PII Masking Demo by Eric Ness**                    | [Colab](https://colab.research.google.com/drive/1KZR1sGTp_RLWUJPAiK1FKPKI-Qn9neUm?usp=sharing)                   |
+| **DSPy on BIG-Bench Hard Example**                        | [Github](https://drchrislevy.github.io/posts/dspy/dspy.html)                                                     |
+| **Building a chess playing agent using DSPy**             | [Github](https://medium.com/thoughts-on-machine-learning/building-a-chess-playing-agent-using-dspy-9b87c868f71e) |
+| **Ittia Research Fact Checking**                          | [Github](https://github.com/ittia-research/check)                                                                |
+| **Strategic Debate via Tree-of-Thought**                  | [Github](https://github.com/zbambergerNLP/strategic-debate-tot)                                                  |
+| **Sanskrit to English Translation App**                   | [Github](https://github.com/ganarajpr/sanskrit-translator-dspy)                                                  |
+| **DSPy for extracting features from PDFs on arXiv**       | [Github](https://github.com/S1M0N38/dspy-arxiv)                                                                  |
+| **DSPygen: DSPy in Ruby on Rails**                        | [Github](https://github.com/seanchatmangpt/dspygen)                                                              |
+| **DSPy Inspector**                                        | [Github](https://github.com/Neoxelox/dspy-inspector)                                                             |
+| **DSPy with FastAPI**                                     | [Github](https://github.com/diicellman/dspy-rag-fastapi)                                                         |
+| **DSPy for Indian Languages**                             | [Github](https://github.com/saifulhaq95/DSPy-Indic)                                                              |
+| **Hurricane: Blog Posts with Generative Feedback Loops!** | [Github](https://github.com/weaviate-tutorials/Hurricane)                                                        |
+| **RAG example using DSPy, Gradio, FastAPI, and Ollama**   | [Github](https://github.com/diicellman/dspy-gradio-rag)                                                          |
+| **Synthetic Data Generation**                             | [Github](https://colab.research.google.com/drive/1CweVOu0qhTC0yOfW5QkLDRIKuAuWJKEr?usp=sharing)                  |
+| **Self Discover**                                         | [Github](https://colab.research.google.com/drive/1GkAQKmw1XQgg5UNzzy8OncRe79V6pADB?usp=sharing)                  |
 
 TODO: This list in particular is highly incomplete. There are a couple dozen other good ones.
 
 ## A Few Providers, Integrations, and related Blog Releases
 
-| **Name** | **Link** |
-|---|---|
-| **Databricks** | [Link](https://www.databricks.com/blog/dspy-databricks) |
-| **Zenbase** | [Link](https://zenbase.ai/) |
-| **LangWatch** | [Link](https://langwatch.ai/blog/introducing-dspy-visualizer) |
-| **Gradient** | [Link](https://gradient.ai/blog/achieving-gpt-4-level-performance-at-lower-cost-using-dspy) |
-| **Snowflake** | [Link](https://medium.com/snowflake/dspy-snowflake-140d6d947d73) |
-| **Langchain** | [Link](https://python.langchain.com/v0.2/docs/integrations/providers/dspy/) |
-| **Weaviate** | [Link](https://weaviate.io/blog/dspy-optimizers) |
-| **Qdrant** | [Link](https://qdrant.tech/documentation/frameworks/dspy/) |
-| **Weights & Biases Weave** | [Link](https://weave-docs.wandb.ai/guides/integrations/dspy/) |
-| **Milvus** | [Link](https://milvus.io/docs/integrate_with_dspy.md) |
-| **Neo4j** | [Link](https://neo4j.com/labs/genai-ecosystem/dspy/) |
-| **Lightning AI** | [Link](https://lightning.ai/lightning-ai/studios/dspy-programming-with-foundation-models) |
-| **Haystack** | [Link](https://towardsdatascience.com/automating-prompt-engineering-with-dspy-and-haystack-926a637a3f43) |
-| **Arize** | [Link](https://docs.arize.com/phoenix/tracing/integrations-tracing/dspy) |
-| **LlamaIndex** | [Link](https://github.com/stanfordnlp/dspy/blob/main/examples/llamaindex/dspy_llamaindex_rag.ipynb) |
-| **Langtrace** | [Link](https://docs.langtrace.ai/supported-integrations/llm-frameworks/dspy) |
-| **Langfuse** | [Link]([https://docs.langtrace.ai/supported-integrations/llm-frameworks/dspy](https://langfuse.com/docs/integrations/dspy)) |
+| **Name**                   | **Link**                                                                                                 |
+| -------------------------- | -------------------------------------------------------------------------------------------------------- |
+| **Databricks**             | [Link](https://www.databricks.com/blog/dspy-databricks)                                                  |
+| **Zenbase**                | [Link](https://zenbase.ai/)                                                                              |
+| **LangWatch**              | [Link](https://langwatch.ai/blog/introducing-dspy-visualizer)                                            |
+| **Gradient**               | [Link](https://gradient.ai/blog/achieving-gpt-4-level-performance-at-lower-cost-using-dspy)              |
+| **Snowflake**              | [Link](https://medium.com/snowflake/dspy-snowflake-140d6d947d73)                                         |
+| **Langchain**              | [Link](https://python.langchain.com/v0.2/docs/integrations/providers/dspy/)                              |
+| **Weaviate**               | [Link](https://weaviate.io/blog/dspy-optimizers)                                                         |
+| **Qdrant**                 | [Link](https://qdrant.tech/documentation/frameworks/dspy/)                                               |
+| **Weights & Biases Weave** | [Link](https://weave-docs.wandb.ai/guides/integrations/dspy/)                                            |
+| **Milvus**                 | [Link](https://milvus.io/docs/integrate_with_dspy.md)                                                    |
+| **Neo4j**                  | [Link](https://neo4j.com/labs/genai-ecosystem/dspy/)                                                     |
+| **Lightning AI**           | [Link](https://lightning.ai/lightning-ai/studios/dspy-programming-with-foundation-models)                |
+| **Haystack**               | [Link](https://towardsdatascience.com/automating-prompt-engineering-with-dspy-and-haystack-926a637a3f43) |
+| **Arize**                  | [Link](https://docs.arize.com/phoenix/tracing/integrations-tracing/dspy)                                 |
+| **LlamaIndex**             | [Link](https://github.com/stanfordnlp/dspy/blob/main/examples/llamaindex/dspy_llamaindex_rag.ipynb)      |
+| **Langtrace**              | [Link](https://docs.langtrace.ai/supported-integrations/llm-frameworks/dspy)                             |
+| **Langfuse**               | [Link](https://langfuse.com/docs/integrations/dspy)                                                      |
 
 ## A Few Blogs & Videos on using DSPy
 
-| **Name** | **Link** |
-|---|---|
-| **Blog Posts** | |
-| **Why I bet on DSPy** | [Blog](https://blog.isaacbmiller.com/posts/dspy) |
-| **Not Your Average Prompt Engineering** | [Blog](https://jina.ai/news/dspy-not-your-average-prompt-engineering/) |
-| **Why I'm excited about DSPy** | [Blog](https://substack.stephen.so/p/why-im-excited-about-dspy) |
-| **Achieving GPT-4 Performance at Lower Cost** | [Link](https://gradient.ai/blog/achieving-gpt-4-level-performance-at-lower-cost-using-dspy) |
-| **Prompt engineering is a task best left to AI models** | [Link](https://www.theregister.com/2024/02/22/prompt_engineering_ai_models/) |
+| **Name**                                                                                  | **Link**                                                                                                                                         |
+| ----------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
+| **Blog Posts**                                                                            |                                                                                                                                                  |
+| **Why I bet on DSPy**                                                                     | [Blog](https://blog.isaacbmiller.com/posts/dspy)                                                                                                 |
+| **Not Your Average Prompt Engineering**                                                   | [Blog](https://jina.ai/news/dspy-not-your-average-prompt-engineering/)                                                                           |
+| **Why I'm excited about DSPy**                                                            | [Blog](https://substack.stephen.so/p/why-im-excited-about-dspy)                                                                                  |
+| **Achieving GPT-4 Performance at Lower Cost**                                             | [Link](https://gradient.ai/blog/achieving-gpt-4-level-performance-at-lower-cost-using-dspy)                                                      |
+| **Prompt engineering is a task best left to AI models**                                   | [Link](https://www.theregister.com/2024/02/22/prompt_engineering_ai_models/)                                                                     |
 | **What makes DSPy a valuable framework for developing complex language model pipelines?** | [Link](https://medium.com/@sujathamudadla1213/what-makes-dspy-a-valuable-framework-for-developing-complex-language-model-pipelines-edfa5b4bcf9b) |
-| **DSPy: A new framework to program your foundation models just by prompting** | [Link](https://www.linkedin.com/pulse/dspy-new-framework-program-your-foundation-models-just-prompting-lli4c/) |
-| **Intro to DSPy: Goodbye Prompting, Hello Programming** | [Link](https://medium.com/towards-data-science/intro-to-dspy-goodbye-prompting-hello-programming-4ca1c6ce3eb9) |
-| **DSPyGen: Revolutionizing AI** | [Link](https://www.linkedin.com/pulse/launch-alert-dspygen-20242252-revolutionizing-ai-sean-chatman--g9f1c/) |
-| **Building an AI Assistant with DSPy** | [Link](https://www.linkedin.com/pulse/building-ai-assistant-dspy-valliappa-lakshmanan-vgnsc/) |
-| **Videos** | |
-| **DSPy Explained! (60K views)** | [Link](https://www.youtube.com/watch?v=41EfOY0Ldkc) |
-| **DSPy Intro from Sephora (25K views)** | [Link](https://www.youtube.com/watch?v=D2HurSldDkE) |
-| **Structured Outputs with DSPy** | [Link](https://www.youtube.com/watch?v=tVw3CwrN5-8) |
-| **DSPy and ColBERT - Weaviate Podcast** | [Link](https://www.youtube.com/watch?v=CDung1LnLbY) |
-| **SBTB23 DSPy** | [Link](https://www.youtube.com/watch?v=Dt3H2ninoeY) |
-| **Optimization with DSPy and LangChain** | [Link](https://www.youtube.com/watch?v=4EXOmWeqXRc) |
-| **Automated Prompt Engineering + Visualization** | [Link](https://www.youtube.com/watch?v=eAZ2LtJ6D5k) |
-| **Transforming LM Calls into Pipelines** | [Link](https://www.youtube.com/watch?v=NoaDWKHdkHg) |
-| **NeurIPS Hacker Cup: DSPy for Code Gen** | [Link](https://www.youtube.com/watch?v=gpe-rtJN8z8) |
-| **MIPRO and DSPy - Weaviate Podcast** | [Link](https://www.youtube.com/watch?v=skMH3DOV_UQ) |
-| **Getting Started with RAG in DSPy** | [Link](https://www.youtube.com/watch?v=CEuUG4Umfxs) |
-| **Adding Depth to DSPy Programs** | [Link](https://www.youtube.com/watch?v=0c7Ksd6BG88) |
-| **Programming Foundation Models with DSPy** | [Link](https://www.youtube.com/watch?v=Y94tw4eDHW0) |
-| **DSPy End-to-End: SF Meetup** | [Link](https://www.youtube.com/watch?v=Y81DoFmt-2U) |
-| **Monitoring & Tracing DSPy with Langtrace** | [Link](https://langtrace.ai/blog/announcing-dspy-support-in-langtrace) |
-| **Teaching chat models to solve chess puzzles using DSPy + Finetuning** | [Link](https://raw.sh/posts/chess_puzzles) |
+| **DSPy: A new framework to program your foundation models just by prompting**             | [Link](https://www.linkedin.com/pulse/dspy-new-framework-program-your-foundation-models-just-prompting-lli4c/)                                   |
+| **Intro to DSPy: Goodbye Prompting, Hello Programming**                                   | [Link](https://medium.com/towards-data-science/intro-to-dspy-goodbye-prompting-hello-programming-4ca1c6ce3eb9)                                   |
+| **DSPyGen: Revolutionizing AI**                                                           | [Link](https://www.linkedin.com/pulse/launch-alert-dspygen-20242252-revolutionizing-ai-sean-chatman--g9f1c/)                                     |
+| **Building an AI Assistant with DSPy**                                                    | [Link](https://www.linkedin.com/pulse/building-ai-assistant-dspy-valliappa-lakshmanan-vgnsc/)                                                    |
+| **Videos**                                                                                |                                                                                                                                                  |
+| **DSPy Explained! (60K views)**                                                           | [Link](https://www.youtube.com/watch?v=41EfOY0Ldkc)                                                                                              |
+| **DSPy Intro from Sephora (25K views)**                                                   | [Link](https://www.youtube.com/watch?v=D2HurSldDkE)                                                                                              |
+| **Structured Outputs with DSPy**                                                          | [Link](https://www.youtube.com/watch?v=tVw3CwrN5-8)                                                                                              |
+| **DSPy and ColBERT - Weaviate Podcast**                                                   | [Link](https://www.youtube.com/watch?v=CDung1LnLbY)                                                                                              |
+| **SBTB23 DSPy**                                                                           | [Link](https://www.youtube.com/watch?v=Dt3H2ninoeY)                                                                                              |
+| **Optimization with DSPy and LangChain**                                                  | [Link](https://www.youtube.com/watch?v=4EXOmWeqXRc)                                                                                              |
+| **Automated Prompt Engineering + Visualization**                                          | [Link](https://www.youtube.com/watch?v=eAZ2LtJ6D5k)                                                                                              |
+| **Transforming LM Calls into Pipelines**                                                  | [Link](https://www.youtube.com/watch?v=NoaDWKHdkHg)                                                                                              |
+| **NeurIPS Hacker Cup: DSPy for Code Gen**                                                 | [Link](https://www.youtube.com/watch?v=gpe-rtJN8z8)                                                                                              |
+| **MIPRO and DSPy - Weaviate Podcast**                                                     | [Link](https://www.youtube.com/watch?v=skMH3DOV_UQ)                                                                                              |
+| **Getting Started with RAG in DSPy**                                                      | [Link](https://www.youtube.com/watch?v=CEuUG4Umfxs)                                                                                              |
+| **Adding Depth to DSPy Programs**                                                         | [Link](https://www.youtube.com/watch?v=0c7Ksd6BG88)                                                                                              |
+| **Programming Foundation Models with DSPy**                                               | [Link](https://www.youtube.com/watch?v=Y94tw4eDHW0)                                                                                              |
+| **DSPy End-to-End: SF Meetup**                                                            | [Link](https://www.youtube.com/watch?v=Y81DoFmt-2U)                                                                                              |
+| **Monitoring & Tracing DSPy with Langtrace**                                              | [Link](https://langtrace.ai/blog/announcing-dspy-support-in-langtrace)                                                                           |
+| **Teaching chat models to solve chess puzzles using DSPy + Finetuning**                   | [Link](https://raw.sh/posts/chess_puzzles)                                                                                                       |
 
 TODO: This list in particular is highly incomplete. There are dozens of other good ones. To allow space, divide into opintionated blogs / podcasts / interviews vs. tutorials & talks.
 

From ae86c9c3266cf22ccc98573e37364d03e5fbe466 Mon Sep 17 00:00:00 2001
From: dbczumar <corey.zumar@databricks.com>
Date: Mon, 30 Sep 2024 11:09:37 -0700
Subject: [PATCH 23/23] Reset use cases

Signed-off-by: dbczumar <corey.zumar@databricks.com>
---
 docs/docs/dspy-usecases.md | 222 +++++++++++++++++++------------------
 1 file changed, 112 insertions(+), 110 deletions(-)

diff --git a/docs/docs/dspy-usecases.md b/docs/docs/dspy-usecases.md
index 3ae7ec499..4d982f889 100644
--- a/docs/docs/dspy-usecases.md
+++ b/docs/docs/dspy-usecases.md
@@ -12,135 +12,137 @@ This list is ever expanding and highly incomplete (WIP)! We'll be adding a bunch
 4. [Providers with DSPy support](#a-few-providers-integrations-and-related-blog-releases)
 5. [Blogs & Videos on using DSPy](#a-few-blogs--videos-on-using-dspy)
 
+
 ## A Few Company Use Cases
 
-| **Name**                                                                                                                                | **Use Cases**                                                                                                                                                                                                                                                                          |
-| --------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| **[JetBlue](https://www.jetblue.com/)**                                                                                                 | Multiple chatbot use cases. [Blog](https://www.databricks.com/blog/optimizing-databricks-llm-pipelines-dspy)                                                                                                                                                                           |
-| **[Replit](https://replit.com/)**                                                                                                       | Synthesize diffs using code LLMs using a DSPy pipeline. [Blog](https://blog.replit.com/code-repair)                                                                                                                                                                                    |
-| **[Databricks](https://www.databricks.com/)**                                                                                           | Research, products, and customer solutions around LM Judges, RAG, classification, and other applications. [Blog](https://www.databricks.com/blog/dspy-databricks) [Blog II](https://www.databricks.com/customers/ddi)                                                                  |
-| **[Sephora](https://www.sephora.com/)**                                                                                                 | Undisclosed agent usecases; perspectives shared in [DAIS Session](https://www.youtube.com/watch?v=D2HurSldDkE).                                                                                                                                                                        |
-| **[Zoro UK](https://www.zoro.co.uk/)**                                                                                                  | E-commerce applications around structured shopping. [Portkey Session](https://www.youtube.com/watch?v=_vGKSc1tekE)                                                                                                                                                                     |
-| **[VMware](https://www.vmware.com/)**                                                                                                   | RAG and other prompt optimization applications. [Interview in The Register.](https://www.theregister.com/2024/02/22/prompt_engineering_ai_models/) [Business Insider.](https://www.businessinsider.com/chaptgpt-large-language-model-ai-prompt-engineering-automated-optimizer-2024-3) |
-| **[Haize Labs](https://www.haizelabs.com/)**                                                                                            | Automated red-teaming for LLMs. [Blog](https://blog.haizelabs.com/posts/dspy/)                                                                                                                                                                                                         |
-| **[Plastic Labs](https://www.plasticlabs.ai/)**                                                                                         | Different pipelines within Honcho. [Blog](https://blog.plasticlabs.ai/blog/User-State-is-State-of-the-Art)                                                                                                                                                                             |
-| **[PingCAP](https://pingcap.com/)**                                                                                                     | Building a knowledge graph. [Article](https://www.pingcap.com/article/building-a-graphrag-from-wikipedia-page-using-dspy-openai-and-tidb-vector-database/)                                                                                                                             |
-| **[Salomatic](https://langtrace.ai/blog/case-study-how-salomatic-used-langtrace-to-build-a-reliable-medical-report-generation-system)** | Enriching medical reports using DSPy. [Blog](https://langtrace.ai/blog/case-study-how-salomatic-used-langtrace-to-build-a-reliable-medical-report-generation-system)                                                                                                                   |
-| **[Truelaw](https://www.youtube.com/watch?v=O0F3RAWZNfM)**                                                                              | How Truelaw builds bespoke LLM pipelines for law firms using DSPy. [Podcast](https://www.youtube.com/watch?v=O0F3RAWZNfM)                                                                                                                                                              |
-| **[Moody's](https://www.moodys.com/)**                                                                                                  | Leveraging DSPy to optimize RAG systems, LLM-as-a-Judge, and agentic systems for financial workflows.                                                                                                                                                                                  |
-| **[Normal Computing](https://www.normalcomputing.com/)**                                                                                | Translate specs from chip companies from English to intermediate formal languages                                                                                                                                                                                                      |
-| **[Procure.FYI](https://www.procure.fyi/)**                                                                                             | Process messy, publicly available technology spending and pricing data via DSPy.                                                                                                                                                                                                       |
-| **[RadiantLogic](https://www.radiantlogic.com/)**                                                                                       | AI Data Assistant. DSPy is used for the agent that routes the query, the context extraction module, the text-to-sql conversion engine, and the table summarization module.                                                                                                             |
-| **[Hyperlint](https://hyperlint.com)**                                                                                                  | Uses DSPy to generate technical documentation. DSPy helps to fetch relevant information and synthesize that into tutorials.                                                                                                                                                            |
-| **[Starops](https://staropshq.com/) & [Saya](https://heysaya.ai/)**                                                                     | Building research documents given a user's corpus. Generate prompts to create more articles from example articles.                                                                                                                                                                     |
-| **[Tessel AI](https://tesselai.com/)**                                                                                                  | Enhancing human-machine interaction with data use cases.                                                                                                                                                                                                                               |
-| **[Dicer.ai](https://dicer.ai/)**                                                                                                       | Uses DSPy for marketing AI to get the most from their paid ads.                                                                                                                                                                                                                        |
-| **[Howie](https://howie.ai)**                                                                                                           | Using DSPy to automate meeting scheduling through email.                                                                                                                                                                                                                               |
-| **[Isoform.ai](https://isoform.ai)**                                                                                                    | Building custom integrations using DSPy.                                                                                                                                                                                                                                               |
-| **[Trampoline AI](https://trampoline.ai)**                                                                                              | Uses DSPy to power their data-augmentation and LM pipelines.                                                                                                                                                                                                                           |
+| **Name** | **Use Cases** |
+|---|---|
+| **[JetBlue](https://www.jetblue.com/)** | Multiple chatbot use cases. [Blog](https://www.databricks.com/blog/optimizing-databricks-llm-pipelines-dspy) |
+| **[Replit](https://replit.com/)** | Synthesize diffs using code LLMs using a DSPy pipeline. [Blog](https://blog.replit.com/code-repair) |
+| **[Databricks](https://www.databricks.com/)** | Research, products, and customer solutions around LM Judges, RAG, classification, and other applications. [Blog](https://www.databricks.com/blog/dspy-databricks) [Blog II](https://www.databricks.com/customers/ddi) |
+| **[Sephora](https://www.sephora.com/)** | Undisclosed agent usecases; perspectives shared in [DAIS Session](https://www.youtube.com/watch?v=D2HurSldDkE). |
+| **[Zoro UK](https://www.zoro.co.uk/)** | E-commerce applications around structured shopping. [Portkey Session](https://www.youtube.com/watch?v=_vGKSc1tekE) |
+| **[VMware](https://www.vmware.com/)** | RAG and other prompt optimization applications. [Interview in The Register.](https://www.theregister.com/2024/02/22/prompt_engineering_ai_models/) [Business Insider.](https://www.businessinsider.com/chaptgpt-large-language-model-ai-prompt-engineering-automated-optimizer-2024-3) |
+| **[Haize Labs](https://www.haizelabs.com/)** | Automated red-teaming for LLMs. [Blog](https://blog.haizelabs.com/posts/dspy/) |
+| **[Plastic Labs](https://www.plasticlabs.ai/)** | Different pipelines within Honcho. [Blog](https://blog.plasticlabs.ai/blog/User-State-is-State-of-the-Art) |
+| **[PingCAP](https://pingcap.com/)** | Building a knowledge graph. [Article](https://www.pingcap.com/article/building-a-graphrag-from-wikipedia-page-using-dspy-openai-and-tidb-vector-database/) |
+| **[Salomatic](https://langtrace.ai/blog/case-study-how-salomatic-used-langtrace-to-build-a-reliable-medical-report-generation-system)** | Enriching medical reports using DSPy. [Blog](https://langtrace.ai/blog/case-study-how-salomatic-used-langtrace-to-build-a-reliable-medical-report-generation-system) |
+| **[Truelaw](https://www.youtube.com/watch?v=O0F3RAWZNfM)** | How Truelaw builds bespoke LLM pipelines for law firms using DSPy. [Podcast](https://www.youtube.com/watch?v=O0F3RAWZNfM) |
+| **[Moody's](https://www.moodys.com/)** | Leveraging DSPy to optimize RAG systems, LLM-as-a-Judge, and agentic systems for financial workflows. |
+| **[Normal Computing](https://www.normalcomputing.com/)** | Translate specs from chip companies from English to intermediate formal languages |
+| **[Procure.FYI](https://www.procure.fyi/)** | Process messy, publicly available technology spending and pricing data via DSPy. |
+| **[RadiantLogic](https://www.radiantlogic.com/)** | AI Data Assistant. DSPy is used for the agent that routes the query, the context extraction module, the text-to-sql conversion engine, and the table summarization module. |
+| **[Hyperlint](https://hyperlint.com)** | Uses DSPy to generate technical documentation. DSPy helps to fetch relevant information and synthesize that into tutorials. |
+| **[Starops](https://staropshq.com/) & [Saya](https://heysaya.ai/)** | Building research documents given a user's corpus. Generate prompts to create more articles from example articles. |
+| **[Tessel AI](https://tesselai.com/)** | Enhancing human-machine interaction with data use cases. |
+| **[Dicer.ai](https://dicer.ai/)** | Uses DSPy for marketing AI to get the most from their paid ads. |
+| **[Howie](https://howie.ai)** | Using DSPy to automate meeting scheduling through email. |
+| **[Isoform.ai](https://isoform.ai)** | Building custom integrations using DSPy. |
+| **[Trampoline AI](https://trampoline.ai)** | Uses DSPy to power their data-augmentation and LM pipelines. |
 
 WIP. This list mainly includes companies that have public posts or have OKed being included for specific products so far.
 
+
 ## A Few Papers Using DSPy
 
-| **Name**                                                                           | **Description**                                                                                                          |
-| ---------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------ |
-| **[STORM](https://arxiv.org/abs/2402.14207)**                                      | Writing Wikipedia-like Articles From Scratch.                                                                            |
-| **[PATH](https://arxiv.org/abs/2406.11706)**                                       | Prompts as Auto-Optimized Training Hyperparameters: Training Best-in-Class IR Models from Scratch with 10 Gold Labels    |
-| **[WangLab @ MEDIQA](https://arxiv.org/abs/2404.14544)**                           | UofT's winning system at MEDIQA, outperforms the next best system by 20 points                                           |
-| **[UMD's Suicide Detection System](https://arxiv.org/abs/2406.06608)**             | Outperforms 20-hour expert human prompt engineering by 40%                                                               |
-| **[IReRa](https://arxiv.org/abs/2401.12178)**                                      | Infer-Retrieve-Rank: Extreme Classification with > 10,000 Labels                                                         |
-| **[Unreasonably Effective Eccentric Prompts](https://arxiv.org/abs/2402.10949v2)** | General Prompt Optimization                                                                                              |
-| **[Palimpzest](https://arxiv.org/abs/2405.14696)**                                 | A Declarative System for Optimizing AI Workloads                                                                         |
-| **[AI Agents that Matter](https://arxiv.org/abs/2407.01502v1)**                    | Agent Efficiency Optimization                                                                                            |
-| **[EDEN](https://arxiv.org/abs/2406.17982v1)**                                     | Empathetic Dialogues for English Learning: Uses adaptive empathetic feedback to improve student grit                     |
-| **[ECG-Chat](https://arxiv.org/pdf/2408.08849)**                                   | Uses DSPy with GraphRAG for medical report generation                                                                    |
-| **[DSPy Assertions](https://arxiv.org/abs/2312.13382)**                            | Various applications of imposing hard and soft constraints on LM outputs                                                 |
-| **[DSPy Guardrails](https://boxiyu.github.io/assets/pdf/DSPy_Guardrails.pdf)**     | Reduce the attack success rate of CodeAttack, decreasing from 75% to 5%                                                  |
-| **[Co-STORM](https://arxiv.org/pdf/2408.15232)**                                   | Collaborative STORM: Generate Wikipedia-like articles through collaborative discourse among users and multiple LM agents |
+| **Name** | **Description** |
+|---|---|
+| **[STORM](https://arxiv.org/abs/2402.14207)** | Writing Wikipedia-like Articles From Scratch. |
+| **[PATH](https://arxiv.org/abs/2406.11706)** | Prompts as Auto-Optimized Training Hyperparameters: Training Best-in-Class IR Models from Scratch with 10 Gold Labels |
+| **[WangLab @ MEDIQA](https://arxiv.org/abs/2404.14544)** | UofT's winning system at MEDIQA, outperforms the next best system by 20 points |
+| **[UMD's Suicide Detection System](https://arxiv.org/abs/2406.06608)** | Outperforms 20-hour expert human prompt engineering by 40% |
+| **[IReRa](https://arxiv.org/abs/2401.12178)** | Infer-Retrieve-Rank: Extreme Classification with > 10,000 Labels |
+| **[Unreasonably Effective Eccentric Prompts](https://arxiv.org/abs/2402.10949v2)** | General Prompt Optimization |
+| **[Palimpzest](https://arxiv.org/abs/2405.14696)** | A Declarative System for Optimizing AI Workloads |
+| **[AI Agents that Matter](https://arxiv.org/abs/2407.01502v1)** | Agent Efficiency Optimization |
+| **[EDEN](https://arxiv.org/abs/2406.17982v1)** | Empathetic Dialogues for English Learning: Uses adaptive empathetic feedback to improve student grit |
+| **[ECG-Chat](https://arxiv.org/pdf/2408.08849)** | Uses DSPy with GraphRAG for medical report generation |
+| **[DSPy Assertions](https://arxiv.org/abs/2312.13382)** | Various applications of imposing hard and soft constraints on LM outputs |
+| **[DSPy Guardrails](https://boxiyu.github.io/assets/pdf/DSPy_Guardrails.pdf)** | Reduce the attack success rate of CodeAttack, decreasing from 75% to 5% |
+| **[Co-STORM](https://arxiv.org/pdf/2408.15232)** | Collaborative STORM: Generate Wikipedia-like articles through collaborative discourse among users and multiple LM agents |
 
 ## A Few Repositories (or other OSS examples) using DSPy
 
-| **Name**                                                  | **Description/Link**                                                                                             |
-| --------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- |
-| **Stanford CS 224U Homework**                             | [Github](https://github.com/cgpotts/cs224u/blob/main/hw_openqa.ipynb)                                            |
-| **STORM Report Generation (10,000 GitHub stars)**         | [Github](https://github.com/stanford-oval/storm)                                                                 |
-| **DSPy Redteaming**                                       | [Github](https://github.com/haizelabs/dspy-redteam)                                                              |
-| **DSPy Theory of Mind**                                   | [Github](https://github.com/plastic-labs/dspy-opentom)                                                           |
-| **Indic cross-lingual Natural Language Inference**        | [Github](https://github.com/saifulhaq95/DSPy-Indic/blob/main/indicxlni.ipynb)                                    |
-| **Optimizing LM for Text2SQL using DSPy**                 | [Github](https://github.com/jjovalle99/DSPy-Text2SQL)                                                            |
-| **DSPy PII Masking Demo by Eric Ness**                    | [Colab](https://colab.research.google.com/drive/1KZR1sGTp_RLWUJPAiK1FKPKI-Qn9neUm?usp=sharing)                   |
-| **DSPy on BIG-Bench Hard Example**                        | [Github](https://drchrislevy.github.io/posts/dspy/dspy.html)                                                     |
-| **Building a chess playing agent using DSPy**             | [Github](https://medium.com/thoughts-on-machine-learning/building-a-chess-playing-agent-using-dspy-9b87c868f71e) |
-| **Ittia Research Fact Checking**                          | [Github](https://github.com/ittia-research/check)                                                                |
-| **Strategic Debate via Tree-of-Thought**                  | [Github](https://github.com/zbambergerNLP/strategic-debate-tot)                                                  |
-| **Sanskrit to English Translation App**                   | [Github](https://github.com/ganarajpr/sanskrit-translator-dspy)                                                  |
-| **DSPy for extracting features from PDFs on arXiv**       | [Github](https://github.com/S1M0N38/dspy-arxiv)                                                                  |
-| **DSPygen: DSPy in Ruby on Rails**                        | [Github](https://github.com/seanchatmangpt/dspygen)                                                              |
-| **DSPy Inspector**                                        | [Github](https://github.com/Neoxelox/dspy-inspector)                                                             |
-| **DSPy with FastAPI**                                     | [Github](https://github.com/diicellman/dspy-rag-fastapi)                                                         |
-| **DSPy for Indian Languages**                             | [Github](https://github.com/saifulhaq95/DSPy-Indic)                                                              |
-| **Hurricane: Blog Posts with Generative Feedback Loops!** | [Github](https://github.com/weaviate-tutorials/Hurricane)                                                        |
-| **RAG example using DSPy, Gradio, FastAPI, and Ollama**   | [Github](https://github.com/diicellman/dspy-gradio-rag)                                                          |
-| **Synthetic Data Generation**                             | [Github](https://colab.research.google.com/drive/1CweVOu0qhTC0yOfW5QkLDRIKuAuWJKEr?usp=sharing)                  |
-| **Self Discover**                                         | [Github](https://colab.research.google.com/drive/1GkAQKmw1XQgg5UNzzy8OncRe79V6pADB?usp=sharing)                  |
+| **Name** | **Description/Link** |
+|---|---|
+| **Stanford CS 224U Homework** | [Github](https://github.com/cgpotts/cs224u/blob/main/hw_openqa.ipynb) |
+| **STORM Report Generation (10,000 GitHub stars)** | [Github](https://github.com/stanford-oval/storm) |
+| **DSPy Redteaming** | [Github](https://github.com/haizelabs/dspy-redteam) |
+| **DSPy Theory of Mind** |  [Github](https://github.com/plastic-labs/dspy-opentom) |
+| **Indic cross-lingual Natural Language Inference** |  [Github](https://github.com/saifulhaq95/DSPy-Indic/blob/main/indicxlni.ipynb) |
+| **Optimizing LM for Text2SQL using DSPy** | [Github](https://github.com/jjovalle99/DSPy-Text2SQL) |
+| **DSPy PII Masking Demo by Eric Ness** | [Colab](https://colab.research.google.com/drive/1KZR1sGTp_RLWUJPAiK1FKPKI-Qn9neUm?usp=sharing) |
+| **DSPy on BIG-Bench Hard Example** |  [Github](https://drchrislevy.github.io/posts/dspy/dspy.html) |
+| **Building a chess playing agent using DSPy** |  [Github](https://medium.com/thoughts-on-machine-learning/building-a-chess-playing-agent-using-dspy-9b87c868f71e) |
+| **Ittia Research Fact Checking** | [Github](https://github.com/ittia-research/check) |
+| **Strategic Debate via Tree-of-Thought** | [Github](https://github.com/zbambergerNLP/strategic-debate-tot) |
+| **Sanskrit to English Translation App**| [Github](https://github.com/ganarajpr/sanskrit-translator-dspy) |
+| **DSPy for extracting features from PDFs on arXiv**| [Github](https://github.com/S1M0N38/dspy-arxiv) |
+| **DSPygen: DSPy in Ruby on Rails**| [Github](https://github.com/seanchatmangpt/dspygen) |
+| **DSPy Inspector**| [Github](https://github.com/Neoxelox/dspy-inspector) |
+| **DSPy with FastAPI**| [Github](https://github.com/diicellman/dspy-rag-fastapi) |
+| **DSPy for Indian Languages**| [Github](https://github.com/saifulhaq95/DSPy-Indic) |
+| **Hurricane: Blog Posts with Generative Feedback Loops!**| [Github](https://github.com/weaviate-tutorials/Hurricane) |
+| **RAG example using DSPy, Gradio, FastAPI, and Ollama**| [Github](https://github.com/diicellman/dspy-gradio-rag) |
+| **Synthetic Data Generation**| [Github](https://colab.research.google.com/drive/1CweVOu0qhTC0yOfW5QkLDRIKuAuWJKEr?usp=sharing) |
+| **Self Discover**| [Github](https://colab.research.google.com/drive/1GkAQKmw1XQgg5UNzzy8OncRe79V6pADB?usp=sharing) |
 
 TODO: This list in particular is highly incomplete. There are a couple dozen other good ones.
 
 ## A Few Providers, Integrations, and related Blog Releases
 
-| **Name**                   | **Link**                                                                                                 |
-| -------------------------- | -------------------------------------------------------------------------------------------------------- |
-| **Databricks**             | [Link](https://www.databricks.com/blog/dspy-databricks)                                                  |
-| **Zenbase**                | [Link](https://zenbase.ai/)                                                                              |
-| **LangWatch**              | [Link](https://langwatch.ai/blog/introducing-dspy-visualizer)                                            |
-| **Gradient**               | [Link](https://gradient.ai/blog/achieving-gpt-4-level-performance-at-lower-cost-using-dspy)              |
-| **Snowflake**              | [Link](https://medium.com/snowflake/dspy-snowflake-140d6d947d73)                                         |
-| **Langchain**              | [Link](https://python.langchain.com/v0.2/docs/integrations/providers/dspy/)                              |
-| **Weaviate**               | [Link](https://weaviate.io/blog/dspy-optimizers)                                                         |
-| **Qdrant**                 | [Link](https://qdrant.tech/documentation/frameworks/dspy/)                                               |
-| **Weights & Biases Weave** | [Link](https://weave-docs.wandb.ai/guides/integrations/dspy/)                                            |
-| **Milvus**                 | [Link](https://milvus.io/docs/integrate_with_dspy.md)                                                    |
-| **Neo4j**                  | [Link](https://neo4j.com/labs/genai-ecosystem/dspy/)                                                     |
-| **Lightning AI**           | [Link](https://lightning.ai/lightning-ai/studios/dspy-programming-with-foundation-models)                |
-| **Haystack**               | [Link](https://towardsdatascience.com/automating-prompt-engineering-with-dspy-and-haystack-926a637a3f43) |
-| **Arize**                  | [Link](https://docs.arize.com/phoenix/tracing/integrations-tracing/dspy)                                 |
-| **LlamaIndex**             | [Link](https://github.com/stanfordnlp/dspy/blob/main/examples/llamaindex/dspy_llamaindex_rag.ipynb)      |
-| **Langtrace**              | [Link](https://docs.langtrace.ai/supported-integrations/llm-frameworks/dspy)                             |
-| **Langfuse**               | [Link](https://langfuse.com/docs/integrations/dspy)                                                      |
+| **Name** | **Link** |
+|---|---|
+| **Databricks** | [Link](https://www.databricks.com/blog/dspy-databricks) |
+| **Zenbase** | [Link](https://zenbase.ai/) |
+| **LangWatch** | [Link](https://langwatch.ai/blog/introducing-dspy-visualizer) |
+| **Gradient** | [Link](https://gradient.ai/blog/achieving-gpt-4-level-performance-at-lower-cost-using-dspy) |
+| **Snowflake** | [Link](https://medium.com/snowflake/dspy-snowflake-140d6d947d73) |
+| **Langchain** | [Link](https://python.langchain.com/v0.2/docs/integrations/providers/dspy/) |
+| **Weaviate** | [Link](https://weaviate.io/blog/dspy-optimizers) |
+| **Qdrant** | [Link](https://qdrant.tech/documentation/frameworks/dspy/) |
+| **Weights & Biases Weave** | [Link](https://weave-docs.wandb.ai/guides/integrations/dspy/) |
+| **Milvus** | [Link](https://milvus.io/docs/integrate_with_dspy.md) |
+| **Neo4j** | [Link](https://neo4j.com/labs/genai-ecosystem/dspy/) |
+| **Lightning AI** | [Link](https://lightning.ai/lightning-ai/studios/dspy-programming-with-foundation-models) |
+| **Haystack** | [Link](https://towardsdatascience.com/automating-prompt-engineering-with-dspy-and-haystack-926a637a3f43) |
+| **Arize** | [Link](https://docs.arize.com/phoenix/tracing/integrations-tracing/dspy) |
+| **LlamaIndex** | [Link](https://github.com/stanfordnlp/dspy/blob/main/examples/llamaindex/dspy_llamaindex_rag.ipynb) |
+| **Langtrace** | [Link](https://docs.langtrace.ai/supported-integrations/llm-frameworks/dspy) |
+| **Langfuse** | [Link](https://langfuse.com/docs/integrations/dspy) |
 
 ## A Few Blogs & Videos on using DSPy
 
-| **Name**                                                                                  | **Link**                                                                                                                                         |
-| ----------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
-| **Blog Posts**                                                                            |                                                                                                                                                  |
-| **Why I bet on DSPy**                                                                     | [Blog](https://blog.isaacbmiller.com/posts/dspy)                                                                                                 |
-| **Not Your Average Prompt Engineering**                                                   | [Blog](https://jina.ai/news/dspy-not-your-average-prompt-engineering/)                                                                           |
-| **Why I'm excited about DSPy**                                                            | [Blog](https://substack.stephen.so/p/why-im-excited-about-dspy)                                                                                  |
-| **Achieving GPT-4 Performance at Lower Cost**                                             | [Link](https://gradient.ai/blog/achieving-gpt-4-level-performance-at-lower-cost-using-dspy)                                                      |
-| **Prompt engineering is a task best left to AI models**                                   | [Link](https://www.theregister.com/2024/02/22/prompt_engineering_ai_models/)                                                                     |
+| **Name** | **Link** |
+|---|---|
+| **Blog Posts** | |
+| **Why I bet on DSPy** | [Blog](https://blog.isaacbmiller.com/posts/dspy) |
+| **Not Your Average Prompt Engineering** | [Blog](https://jina.ai/news/dspy-not-your-average-prompt-engineering/) |
+| **Why I'm excited about DSPy** | [Blog](https://substack.stephen.so/p/why-im-excited-about-dspy) |
+| **Achieving GPT-4 Performance at Lower Cost** | [Link](https://gradient.ai/blog/achieving-gpt-4-level-performance-at-lower-cost-using-dspy) |
+| **Prompt engineering is a task best left to AI models** | [Link](https://www.theregister.com/2024/02/22/prompt_engineering_ai_models/) |
 | **What makes DSPy a valuable framework for developing complex language model pipelines?** | [Link](https://medium.com/@sujathamudadla1213/what-makes-dspy-a-valuable-framework-for-developing-complex-language-model-pipelines-edfa5b4bcf9b) |
-| **DSPy: A new framework to program your foundation models just by prompting**             | [Link](https://www.linkedin.com/pulse/dspy-new-framework-program-your-foundation-models-just-prompting-lli4c/)                                   |
-| **Intro to DSPy: Goodbye Prompting, Hello Programming**                                   | [Link](https://medium.com/towards-data-science/intro-to-dspy-goodbye-prompting-hello-programming-4ca1c6ce3eb9)                                   |
-| **DSPyGen: Revolutionizing AI**                                                           | [Link](https://www.linkedin.com/pulse/launch-alert-dspygen-20242252-revolutionizing-ai-sean-chatman--g9f1c/)                                     |
-| **Building an AI Assistant with DSPy**                                                    | [Link](https://www.linkedin.com/pulse/building-ai-assistant-dspy-valliappa-lakshmanan-vgnsc/)                                                    |
-| **Videos**                                                                                |                                                                                                                                                  |
-| **DSPy Explained! (60K views)**                                                           | [Link](https://www.youtube.com/watch?v=41EfOY0Ldkc)                                                                                              |
-| **DSPy Intro from Sephora (25K views)**                                                   | [Link](https://www.youtube.com/watch?v=D2HurSldDkE)                                                                                              |
-| **Structured Outputs with DSPy**                                                          | [Link](https://www.youtube.com/watch?v=tVw3CwrN5-8)                                                                                              |
-| **DSPy and ColBERT - Weaviate Podcast**                                                   | [Link](https://www.youtube.com/watch?v=CDung1LnLbY)                                                                                              |
-| **SBTB23 DSPy**                                                                           | [Link](https://www.youtube.com/watch?v=Dt3H2ninoeY)                                                                                              |
-| **Optimization with DSPy and LangChain**                                                  | [Link](https://www.youtube.com/watch?v=4EXOmWeqXRc)                                                                                              |
-| **Automated Prompt Engineering + Visualization**                                          | [Link](https://www.youtube.com/watch?v=eAZ2LtJ6D5k)                                                                                              |
-| **Transforming LM Calls into Pipelines**                                                  | [Link](https://www.youtube.com/watch?v=NoaDWKHdkHg)                                                                                              |
-| **NeurIPS Hacker Cup: DSPy for Code Gen**                                                 | [Link](https://www.youtube.com/watch?v=gpe-rtJN8z8)                                                                                              |
-| **MIPRO and DSPy - Weaviate Podcast**                                                     | [Link](https://www.youtube.com/watch?v=skMH3DOV_UQ)                                                                                              |
-| **Getting Started with RAG in DSPy**                                                      | [Link](https://www.youtube.com/watch?v=CEuUG4Umfxs)                                                                                              |
-| **Adding Depth to DSPy Programs**                                                         | [Link](https://www.youtube.com/watch?v=0c7Ksd6BG88)                                                                                              |
-| **Programming Foundation Models with DSPy**                                               | [Link](https://www.youtube.com/watch?v=Y94tw4eDHW0)                                                                                              |
-| **DSPy End-to-End: SF Meetup**                                                            | [Link](https://www.youtube.com/watch?v=Y81DoFmt-2U)                                                                                              |
-| **Monitoring & Tracing DSPy with Langtrace**                                              | [Link](https://langtrace.ai/blog/announcing-dspy-support-in-langtrace)                                                                           |
-| **Teaching chat models to solve chess puzzles using DSPy + Finetuning**                   | [Link](https://raw.sh/posts/chess_puzzles)                                                                                                       |
+| **DSPy: A new framework to program your foundation models just by prompting** | [Link](https://www.linkedin.com/pulse/dspy-new-framework-program-your-foundation-models-just-prompting-lli4c/) |
+| **Intro to DSPy: Goodbye Prompting, Hello Programming** | [Link](https://medium.com/towards-data-science/intro-to-dspy-goodbye-prompting-hello-programming-4ca1c6ce3eb9) |
+| **DSPyGen: Revolutionizing AI** | [Link](https://www.linkedin.com/pulse/launch-alert-dspygen-20242252-revolutionizing-ai-sean-chatman--g9f1c/) |
+| **Building an AI Assistant with DSPy** | [Link](https://www.linkedin.com/pulse/building-ai-assistant-dspy-valliappa-lakshmanan-vgnsc/) |
+| **Videos** | |
+| **DSPy Explained! (60K views)** | [Link](https://www.youtube.com/watch?v=41EfOY0Ldkc) |
+| **DSPy Intro from Sephora (25K views)** | [Link](https://www.youtube.com/watch?v=D2HurSldDkE) |
+| **Structured Outputs with DSPy** | [Link](https://www.youtube.com/watch?v=tVw3CwrN5-8) |
+| **DSPy and ColBERT - Weaviate Podcast** | [Link](https://www.youtube.com/watch?v=CDung1LnLbY) |
+| **SBTB23 DSPy** | [Link](https://www.youtube.com/watch?v=Dt3H2ninoeY) |
+| **Optimization with DSPy and LangChain** | [Link](https://www.youtube.com/watch?v=4EXOmWeqXRc) |
+| **Automated Prompt Engineering + Visualization** | [Link](https://www.youtube.com/watch?v=eAZ2LtJ6D5k) |
+| **Transforming LM Calls into Pipelines** | [Link](https://www.youtube.com/watch?v=NoaDWKHdkHg) |
+| **NeurIPS Hacker Cup: DSPy for Code Gen** | [Link](https://www.youtube.com/watch?v=gpe-rtJN8z8) |
+| **MIPRO and DSPy - Weaviate Podcast** | [Link](https://www.youtube.com/watch?v=skMH3DOV_UQ) |
+| **Getting Started with RAG in DSPy** | [Link](https://www.youtube.com/watch?v=CEuUG4Umfxs) |
+| **Adding Depth to DSPy Programs** | [Link](https://www.youtube.com/watch?v=0c7Ksd6BG88) |
+| **Programming Foundation Models with DSPy** | [Link](https://www.youtube.com/watch?v=Y94tw4eDHW0) |
+| **DSPy End-to-End: SF Meetup** | [Link](https://www.youtube.com/watch?v=Y81DoFmt-2U) |
+| **Monitoring & Tracing DSPy with Langtrace** | [Link](https://langtrace.ai/blog/announcing-dspy-support-in-langtrace) |
+| **Teaching chat models to solve chess puzzles using DSPy + Finetuning** | [Link](https://raw.sh/posts/chess_puzzles) |
 
 TODO: This list in particular is highly incomplete. There are dozens of other good ones. To allow space, divide into opintionated blogs / podcasts / interviews vs. tutorials & talks.