From 08db447a77dee88bcd4c779a52b10968551c9e0c Mon Sep 17 00:00:00 2001 From: Aniruddha Mandal Date: Fri, 2 Feb 2024 01:55:18 +0530 Subject: [PATCH 01/39] deeplake retriever implementation --- dspy/retrieve/deeplake_rm.py | 96 ++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 dspy/retrieve/deeplake_rm.py diff --git a/dspy/retrieve/deeplake_rm.py b/dspy/retrieve/deeplake_rm.py new file mode 100644 index 000000000..80c47d6d8 --- /dev/null +++ b/dspy/retrieve/deeplake_rm.py @@ -0,0 +1,96 @@ +""" +Retriever model for deeplake +""" + +from typing import Optional, List, Union +import openai +import dspy +from collections import defaultdict + +try: + import openai.error + + ERRORS = ( + openai.RateLimitError, + openai.error.ServiceUnavailableError, + openai.error.APIError, + ) +except Exception: + ERRORS = (openai.RateLimitError, openai.APIError) + +try: + from deeplake import VectorStore +except ImportError: + deeplake = None + +if deeplake is None: + raise ImportError( + "The deeplake library is required to use DeeplakeRM. Install it with `pip install dspy-ai[deeplake]`" + ) + + +class DeeplakeRM(dspy.RetrieverModel): + """ + A retriever module that uses deeplake to return the top passages for a given query. + + """ + + def __init__( + self, + deeplake_vectorstore_name: str, + deeplake_client: VectorStore, + k: int = 3, + ): + self._deeplake_vectorstore_name = deeplake_vectorstore_name + self._deeplake_client = deeplake_client + + super().__init__(k=k) + + def embedding_function(texts, model="text-embedding-ada-002"): + if isinstance(texts, str): + texts = [texts] + + texts = [t.replace("\n", " ") for t in texts] + return [ + data["embedding"] + for data in openai.Embedding.create(input=texts, model=model)["data"] + ] + + def forward( + self, query_or_queries: Union[str, List[str]], k: Optional[int] + ) -> dspy.Prediction: + """Search with DeepLake for self.k top passages for query + + Args: + query_or_queries (Union[str, List[str]]): The query or queries to search for. + k (Optional[int]): The number of top passages to retrieve. Defaults to self.k. + + Returns: + dspy.Prediction: An object containing the retrieved passages. + """ + queries = ( + [query_or_queries] + if isinstance(query_or_queries, str) + else query_or_queries + ) + queries = [q for q in queries if q] # Filter empty queries + + k = k if k is not None else self.k + # batch_results = self._deeplake_client( + # path=self._deeplake_vectorstore_name + # ).search(queries, embedding_function=self.embedding_function, k=k) + + passages = defaultdict(float) + for query in queries: + results = self._deeplake_client( + path=self._deeplake_vectorstore_name, + embedding_function=self.embedding_function + ).search(query, k=k) + + for score,text in zip(results.get('score',0.0),results.get('text',"")): + passages[text] += score + + sorted_passages = sorted( + passages.items(), key=lambda x: x[1], reverse=True)[:k] + + return dspy.Prediction(passages=[passage for passage, _ in sorted_passages]) \ No newline at end of file From 8af9de0e79acd1fa34da5f91d583ef3edd1d93b3 Mon Sep 17 00:00:00 2001 From: Aniruddha Mandal Date: Fri, 2 Feb 2024 02:08:57 +0530 Subject: [PATCH 02/39] Added comments for documentation --- dspy/retrieve/deeplake_rm.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/dspy/retrieve/deeplake_rm.py b/dspy/retrieve/deeplake_rm.py index 80c47d6d8..2ec913197 100644 --- a/dspy/retrieve/deeplake_rm.py +++ b/dspy/retrieve/deeplake_rm.py @@ -33,6 +33,28 @@ class DeeplakeRM(dspy.RetrieverModel): """ A retriever module that uses deeplake to return the top passages for a given query. + Assumes that a Deep Lake Vector Store has been created and populated with the following payload: + - text: The text of the passage + + Args: + deeplake_vectorstore_name (str): The name or path of the Deep Lake Vector Store. + deeplake_client (VectorStore): An instance of the Deep Lake client. + k (int, optional): The default number of top passages to retrieve. Defaults to 3. + + Examples: + Below is a code snippet that shows how to use Deep Lake as the default retriver: + ```python + from deeplake import VectorStore + llm = dspy.OpenAI(model="gpt-3.5-turbo") + deeplake_client = deeplake.Client() + retriever_model = DeeplakeRM("my_vectorstore_name", deeplake_client=deeplake_client) + dspy.settings.configure(lm=llm, rm=retriever_model) + ``` + + Below is a code snippet that shows how to use Deep Lake in the forward() function of a module + ```python + self.retrieve = DeeplakeRM("my_vectorstore_name", deeplake_client=deeplake_client, k=num_passages) + ``` """ def __init__( @@ -59,6 +81,7 @@ def embedding_function(texts, model="text-embedding-ada-002"): def forward( self, query_or_queries: Union[str, List[str]], k: Optional[int] ) -> dspy.Prediction: + """Search with DeepLake for self.k top passages for query Args: @@ -76,11 +99,9 @@ def forward( queries = [q for q in queries if q] # Filter empty queries k = k if k is not None else self.k - # batch_results = self._deeplake_client( - # path=self._deeplake_vectorstore_name - # ).search(queries, embedding_function=self.embedding_function, k=k) passages = defaultdict(float) + #deeplake doesn't support batch querying, manually querying each query and storing them for query in queries: results = self._deeplake_client( path=self._deeplake_vectorstore_name, From f633f0e66379d29a9efff07ac75921b6a03bee25 Mon Sep 17 00:00:00 2001 From: Aniruddha Mandal Date: Fri, 2 Feb 2024 02:28:02 +0530 Subject: [PATCH 03/39] [bugfix] Shifted the import check for deeplake library inside class --- dspy/retrieve/deeplake_rm.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/dspy/retrieve/deeplake_rm.py b/dspy/retrieve/deeplake_rm.py index 2ec913197..9e4552dea 100644 --- a/dspy/retrieve/deeplake_rm.py +++ b/dspy/retrieve/deeplake_rm.py @@ -18,18 +18,19 @@ except Exception: ERRORS = (openai.RateLimitError, openai.APIError) -try: - from deeplake import VectorStore -except ImportError: - deeplake = None -if deeplake is None: - raise ImportError( - "The deeplake library is required to use DeeplakeRM. Install it with `pip install dspy-ai[deeplake]`" - ) class DeeplakeRM(dspy.RetrieverModel): + try: + from deeplake import VectorStore + except ImportError: + deeplake = None + + if deeplake is None: + raise ImportError( + "The deeplake library is required to use DeeplakeRM. Install it with `pip install dspy-ai[deeplake]`" + ) """ A retriever module that uses deeplake to return the top passages for a given query. From 0a90070d172a70770670d960f133e1f0e45872c2 Mon Sep 17 00:00:00 2001 From: Aniruddha Mandal Date: Fri, 2 Feb 2024 20:20:26 +0530 Subject: [PATCH 04/39] [bug fix] Fixed import error and applied dotdict --- dspy/retrieve/deeplake_rm.py | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/dspy/retrieve/deeplake_rm.py b/dspy/retrieve/deeplake_rm.py index 9e4552dea..0435a4bc6 100644 --- a/dspy/retrieve/deeplake_rm.py +++ b/dspy/retrieve/deeplake_rm.py @@ -6,31 +6,22 @@ import openai import dspy from collections import defaultdict +from dsp.utils import dotdict try: import openai.error ERRORS = ( - openai.RateLimitError, + openai.error.RateLimitError, openai.error.ServiceUnavailableError, openai.error.APIError, ) except Exception: - ERRORS = (openai.RateLimitError, openai.APIError) + ERRORS = (openai.error.RateLimitError, openai.error.APIError) - - -class DeeplakeRM(dspy.RetrieverModel): - try: - from deeplake import VectorStore - except ImportError: - deeplake = None - - if deeplake is None: - raise ImportError( - "The deeplake library is required to use DeeplakeRM. Install it with `pip install dspy-ai[deeplake]`" - ) +class DeeplakeRM(dspy.Retrieve): + """ A retriever module that uses deeplake to return the top passages for a given query. @@ -47,14 +38,14 @@ class DeeplakeRM(dspy.RetrieverModel): ```python from deeplake import VectorStore llm = dspy.OpenAI(model="gpt-3.5-turbo") - deeplake_client = deeplake.Client() - retriever_model = DeeplakeRM("my_vectorstore_name", deeplake_client=deeplake_client) + deeplake_client = VectorStore + retriever_model = DeeplakeRM("my_vectorstore_path", deeplake_client=deeplake_client) dspy.settings.configure(lm=llm, rm=retriever_model) ``` Below is a code snippet that shows how to use Deep Lake in the forward() function of a module ```python - self.retrieve = DeeplakeRM("my_vectorstore_name", deeplake_client=deeplake_client, k=num_passages) + self.retrieve = DeeplakeRM("my_vectorstore_path", deeplake_client=deeplake_client, k=num_passages) ``` """ @@ -64,12 +55,18 @@ def __init__( deeplake_client: VectorStore, k: int = 3, ): + try: + from deeplake import VectorStore + except ImportError: + raise ImportError( + "The 'deeplake' extra is required to use DeepLakeRM. Install it with `pip install dspy-ai[deeplake]`" + ) self._deeplake_vectorstore_name = deeplake_vectorstore_name self._deeplake_client = deeplake_client super().__init__(k=k) - def embedding_function(texts, model="text-embedding-ada-002"): + def embedding_function(self, texts, model="text-embedding-ada-002"): if isinstance(texts, str): texts = [texts] @@ -115,4 +112,4 @@ def forward( sorted_passages = sorted( passages.items(), key=lambda x: x[1], reverse=True)[:k] - return dspy.Prediction(passages=[passage for passage, _ in sorted_passages]) \ No newline at end of file + return [dotdict({"long_text": p}) for p, _ in sorted_passages] \ No newline at end of file From 7b58fe604c83d6bc117e4006b076a499d163fbdd Mon Sep 17 00:00:00 2001 From: Aniruddha Mandal Date: Sun, 4 Feb 2024 07:28:00 +0530 Subject: [PATCH 05/39] [bug fix] Removed the type hint for VectorStore --- dspy/retrieve/deeplake_rm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dspy/retrieve/deeplake_rm.py b/dspy/retrieve/deeplake_rm.py index 0435a4bc6..37b008ee7 100644 --- a/dspy/retrieve/deeplake_rm.py +++ b/dspy/retrieve/deeplake_rm.py @@ -2,7 +2,7 @@ Retriever model for deeplake """ -from typing import Optional, List, Union +from typing import Optional, List, Union, Type import openai import dspy from collections import defaultdict @@ -52,7 +52,7 @@ class DeeplakeRM(dspy.Retrieve): def __init__( self, deeplake_vectorstore_name: str, - deeplake_client: VectorStore, + deeplake_client, k: int = 3, ): try: From b95e1457189f29663d0e8684233d827daed04d62 Mon Sep 17 00:00:00 2001 From: quangpham Date: Wed, 7 Feb 2024 17:21:14 +0700 Subject: [PATCH 06/39] Allow HFModel to use CPU --- dsp/modules/hf.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/dsp/modules/hf.py b/dsp/modules/hf.py index c87ce8261..87ff4e98b 100644 --- a/dsp/modules/hf.py +++ b/dsp/modules/hf.py @@ -9,6 +9,7 @@ from dsp.modules.cache_utils import CacheMemory, NotebookCacheMemory, cache_turn_on import functools + def openai_to_hf(**kwargs): hf_kwargs = {} for k, v in kwargs.items(): @@ -53,28 +54,35 @@ def __init__(self, model: str, checkpoint: Optional[str] = None, is_client: bool raise ModuleNotFoundError( "You need to install Hugging Face transformers library to use HF models." ) from exc - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.device = torch.device( + "cuda" if torch.cuda.is_available() else "cpu") try: - architecture = AutoConfig.from_pretrained(model).__dict__["architectures"][0] - self.encoder_decoder_model = ("ConditionalGeneration" in architecture) or ("T5WithLMHeadModel" in architecture) - self.decoder_only_model = ("CausalLM" in architecture) or ("GPT2LMHeadModel" in architecture) + architecture = AutoConfig.from_pretrained( + model).__dict__["architectures"][0] + self.encoder_decoder_model = ("ConditionalGeneration" in architecture) or ( + "T5WithLMHeadModel" in architecture) + self.decoder_only_model = ("CausalLM" in architecture) or ( + "GPT2LMHeadModel" in architecture) assert self.encoder_decoder_model or self.decoder_only_model, f"Unknown HuggingFace model class: {model}" - self.tokenizer = AutoTokenizer.from_pretrained(model if checkpoint is None else checkpoint) + self.tokenizer = AutoTokenizer.from_pretrained( + model if checkpoint is None else checkpoint) self.rationale = True AutoModelClass = AutoModelForSeq2SeqLM if self.encoder_decoder_model else AutoModelForCausalLM if checkpoint: # with open(os.path.join(checkpoint, '..', 'compiler_config.json'), 'r') as f: # config = json.load(f) - self.rationale = False #config['rationale'] + self.rationale = False # config['rationale'] # if config['peft']: # peft_config = PeftConfig.from_pretrained(checkpoint) # self.model = AutoModelClass.from_pretrained(peft_config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map=hf_device_map) # self.model = PeftModel.from_pretrained(self.model, checkpoint) # else: - self.model = AutoModelClass.from_pretrained(checkpoint).to("cuda") + self.model = AutoModelClass.from_pretrained( + checkpoint).to(self.device) else: - self.model = AutoModelClass.from_pretrained(model).to("cuda") + self.model = AutoModelClass.from_pretrained( + model).to(self.device) self.drop_prompt_from_output = False except ValueError: self.model = AutoModelForCausalLM.from_pretrained( From 1c7f7cc281209a3ef52c272d59b36b26350fe61f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20G=C3=B6hl?= <142921175+philippgoehl@users.noreply.github.com> Date: Wed, 7 Feb 2024 12:42:02 +0100 Subject: [PATCH 07/39] Fixed Typo - Update skycamp2023.ipynb --- skycamp2023.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skycamp2023.ipynb b/skycamp2023.ipynb index e0bde4157..11d375ade 100644 --- a/skycamp2023.ipynb +++ b/skycamp2023.ipynb @@ -193,7 +193,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now let's compile this using our six `train` examples. We will us the very simple `BootstrapFewShot` in DSPy." + "Now let's compile this using our six `train` examples. We will use the very simple `BootstrapFewShot` in DSPy." ] }, { From a36c5287adb8fdc1be48c4295ccba30e75b303e9 Mon Sep 17 00:00:00 2001 From: chris levy Date: Wed, 7 Feb 2024 07:43:48 -0400 Subject: [PATCH 08/39] fix(datasets): relax datasets version constraint --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2367eb398..d05bdfcc2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ dependencies = [ "regex~=2023.10.3", "ujson~=5.8.0", "tqdm~=4.66.1", - "datasets~=2.14.6", + "datasets~=2.14.6,<3.0.0", "requests~=2.31.0", "optuna~=3.4.0", ] From 0734c6105f1011a52b83eaacc799cdbfaf06a073 Mon Sep 17 00:00:00 2001 From: Daniel Jimenez <13410082+dannyoo@users.noreply.github.com> Date: Wed, 7 Feb 2024 14:30:59 -0500 Subject: [PATCH 09/39] Update react.py --- dspy/predict/react.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dspy/predict/react.py b/dspy/predict/react.py index d79b9b52f..d4b16703b 100644 --- a/dspy/predict/react.py +++ b/dspy/predict/react.py @@ -64,7 +64,7 @@ def act(self, output, hop): if action_name == 'Finish': return action_val - output[f"Observation_{hop+1}"] = self.tools[action_name](action_val).passages + output[f"Observation_{hop+1}"] = self.tools[action_name](action_val) except Exception as e: output[f"Observation_{hop+1}"] = "Failed to parse action. Bad formatting or incorrect action name." From 9fd61e058b7517f95fc2ca22ff2f2b5c73afa794 Mon Sep 17 00:00:00 2001 From: Connor Shorten Date: Thu, 8 Feb 2024 22:35:53 -0500 Subject: [PATCH 10/39] Allow custom text key to WeaviateRM - resolve issue #359 --- dspy/retrieve/weaviate_rm.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/dspy/retrieve/weaviate_rm.py b/dspy/retrieve/weaviate_rm.py index 0a82e2f5a..7a8f3f7d7 100644 --- a/dspy/retrieve/weaviate_rm.py +++ b/dspy/retrieve/weaviate_rm.py @@ -31,7 +31,9 @@ class WeaviateRM(dspy.Retrieve): llm = dspy.OpenAI(model="gpt-3.5-turbo") weaviate_client = weaviate.Client("your-path-here") - retriever_model = WeaviateRM("my_collection_name", weaviate_client=weaviate_client) + retriever_model = WeaviateRM(weaviate_collection_name="my_collection_name", + weaviate_collection_text_key="content", + weaviate_client=weaviate_client) dspy.settings.configure(lm=llm, rm=retriever_model) ``` @@ -44,11 +46,12 @@ class WeaviateRM(dspy.Retrieve): def __init__(self, weaviate_collection_name: str, weaviate_client: weaviate.Client, - k: int = 3 + k: int = 3, + weaviate_collection_text_key: Optional[str] = "content" ): self._weaviate_collection_name = weaviate_collection_name self._weaviate_client = weaviate_client - + self._weaviate_collection_text_key = weaviate_collection_text_key super().__init__(k=k) def forward(self, query_or_queries: Union[str, List[str]], k: Optional[int]) -> dspy.Prediction: @@ -71,13 +74,13 @@ def forward(self, query_or_queries: Union[str, List[str]], k: Optional[int]) -> passages = [] for query in queries: results = self._weaviate_client.query\ - .get(self._weaviate_collection_name, ["content"])\ + .get(self._weaviate_collection_name, [self._weaviate_collection_text_key])\ .with_hybrid(query=query)\ .with_limit(k)\ .do() results = results["data"]["Get"][self._weaviate_collection_name] - parsed_results = [result["content"] for result in results] + parsed_results = [result[self._weaviate_collection_text_key] for result in results] passages.extend(dotdict({"long_text": d}) for d in parsed_results) return passages From f8a1253dd234deb5160f19e7f35d6f8aa975e5f7 Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Fri, 9 Feb 2024 10:17:37 +0000 Subject: [PATCH 11/39] fix typos --- dspy/predict/predict.py | 2 +- dspy/primitives/python_interpreter.py | 10 ++--- dspy/teleprompt/random_search.py | 2 +- dspy/teleprompt/signature_opt.py | 2 +- dspy/teleprompt/signature_opt_bayesian.py | 4 +- dspy/teleprompt/teleprompt_optuna.py | 2 +- examples/knn.ipynb | 2 +- .../longformqa/longformqa_assertions.ipynb | 38 +++++++++---------- examples/tweets/compiling_langchain.ipynb | 2 +- intro.ipynb | 4 +- 10 files changed, 34 insertions(+), 34 deletions(-) diff --git a/dspy/predict/predict.py b/dspy/predict/predict.py index 5285f09fa..68d8c20ce 100644 --- a/dspy/predict/predict.py +++ b/dspy/predict/predict.py @@ -124,6 +124,6 @@ def __repr__(self): # TODO: get some defaults during init from the context window? # # TODO: FIXME: Hmm, I guess expected behavior is that contexts can -# affect exeuction. Well, we need to determine whether context dominates, __init__ demoninates, or forward dominates. +# affect execution. Well, we need to determine whether context dominates, __init__ demoninates, or forward dominates. # Generally, unless overwritten, we'd see n=None, temperature=None. # That will eventually mean we have to learn them. diff --git a/dspy/primitives/python_interpreter.py b/dspy/primitives/python_interpreter.py index c47da27f0..f05ec0111 100644 --- a/dspy/primitives/python_interpreter.py +++ b/dspy/primitives/python_interpreter.py @@ -43,7 +43,7 @@ class PythonInterpreter(): r"""A customized python interpreter to control the execution of LLM-generated codes. The interpreter makes sure the code can only execute functions given in action space and import white list. It also supports - fuzzy variable matching to reveive uncertain input variable name. + fuzzy variable matching to receive uncertain input variable name. .. highlight:: none @@ -116,9 +116,9 @@ def execute(self, code: str, state: Optional[Dict[str, Any]] = None, code (str): Generated python code to be executed. state (Optional[Dict[str, Any]], optional): External variables that may be used in the generated code. (default: :obj:`None`) - fuzz_state (Optional[Dict[str, Any]], optional): External varibles - that do not have certain varible names. The interpreter will - use fuzzy matching to access these varibales. For example, if + fuzz_state (Optional[Dict[str, Any]], optional): External variables + that do not have certain variable names. The interpreter will + use fuzzy matching to access these variables. For example, if :obj:`fuzz_state` has a variable :obj:`image`, the generated code can use :obj:`input_image` to access it. (default: :obj:`None`) @@ -577,7 +577,7 @@ def execute( Args: interpreter (PythonInterpreter, optional): interpreter to be used during code execution. (default: :obj:`None`) - user_variable (Optional[Dict[str, Any]]): varibales that can be + user_variable (Optional[Dict[str, Any]]): variables that can be used in the code, which applying fuzzy matching, such as images or documents. (default: :obj:`None`) diff --git a/dspy/teleprompt/random_search.py b/dspy/teleprompt/random_search.py index 04399d412..8605cd427 100644 --- a/dspy/teleprompt/random_search.py +++ b/dspy/teleprompt/random_search.py @@ -40,7 +40,7 @@ def __init__(self, metric, teacher_settings={}, max_bootstrapped_demos=4, max_la self.num_candidate_sets = num_candidate_programs # self.max_num_traces = 1 + int(max_bootstrapped_demos / 2.0 * self.num_candidate_sets) - # Semi-hacky way to get the parent class's _boostrap function to stop early. + # Semi-hacky way to get the parent class's _bootstrap function to stop early. # self.max_bootstrapped_demos = self.max_num_traces self.max_labeled_demos = max_labeled_demos diff --git a/dspy/teleprompt/signature_opt.py b/dspy/teleprompt/signature_opt.py index d0c2f2d69..4c047b3da 100644 --- a/dspy/teleprompt/signature_opt.py +++ b/dspy/teleprompt/signature_opt.py @@ -20,7 +20,7 @@ * prompt_model: The model used for prompt generation. When unspecified, defaults to the model set in settings (ie. dspy.settings.configure(lm=task_model)). * metric: The task metric used for optimization. * breadth: The number of new prompts to generate at each iteration. Default=10. -* depth: The number of times we should ask our prompt model to genereate new prompts, with the history of the past prompts as input. Default=3. +* depth: The number of times we should ask our prompt model to generate new prompts, with the history of the past prompts as input. Default=3. * init_temperature: The temperature used to generate new prompts. Higher roughly equals more creative. Default=1.4. * verbose: Tells the method whether or not to print intermediate steps. * track_stats: Tells the method whether or not to track statistics about the optimization process. diff --git a/dspy/teleprompt/signature_opt_bayesian.py b/dspy/teleprompt/signature_opt_bayesian.py index 790af70e6..b39e7c7f3 100644 --- a/dspy/teleprompt/signature_opt_bayesian.py +++ b/dspy/teleprompt/signature_opt_bayesian.py @@ -81,7 +81,7 @@ class ObservationSummarizer(dspy.Signature): class DatasetDescriptor(dspy.Signature): ("""Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """ - """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """ + """Some areas you may consider in your observations: topics, content, syntax, conciseness, etc. """ """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""") examples = dspy.InputField(desc="Sample data points from the dataset") @@ -90,7 +90,7 @@ class DatasetDescriptor(dspy.Signature): class DatasetDescriptorWithPriorObservations(dspy.Signature): ("""Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """ """I will also provide you with a few observations I have already made. Please add your own observations or if you feel the observations are comprehensive say 'COMPLETE' """ - """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """ + """Some areas you may consider in your observations: topics, content, syntax, conciseness, etc. """ """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""") examples = dspy.InputField(desc="Sample data points from the dataset") diff --git a/dspy/teleprompt/teleprompt_optuna.py b/dspy/teleprompt/teleprompt_optuna.py index 5528e5fb0..4847ce939 100644 --- a/dspy/teleprompt/teleprompt_optuna.py +++ b/dspy/teleprompt/teleprompt_optuna.py @@ -23,7 +23,7 @@ def __init__(self, metric, teacher_settings={}, max_bootstrapped_demos=4, max_la self.num_candidate_sets = num_candidate_programs # self.max_num_traces = 1 + int(max_bootstrapped_demos / 2.0 * self.num_candidate_sets) - # Semi-hacky way to get the parent class's _boostrap function to stop early. + # Semi-hacky way to get the parent class's _bootstrap function to stop early. # self.max_bootstrapped_demos = self.max_num_traces self.max_labeled_demos = max_labeled_demos diff --git a/examples/knn.ipynb b/examples/knn.ipynb index e54814232..51a50654c 100644 --- a/examples/knn.ipynb +++ b/examples/knn.ipynb @@ -5,7 +5,7 @@ "metadata": {}, "source": [ "# DSPy KNN few-shot example \n", - "This noteboook shows how KNN few-shot can be implemented with DSPy using the **KNNFewShot** teleprompter. To illustrate, we use the HotPotQA dataset. Please see [intro.ipynb](../intro.ipynb) for other example use cases of DSPy.\n" + "This notebook shows how KNN few-shot can be implemented with DSPy using the **KNNFewShot** teleprompter. To illustrate, we use the HotPotQA dataset. Please see [intro.ipynb](../intro.ipynb) for other example use cases of DSPy.\n" ] }, { diff --git a/examples/longformqa/longformqa_assertions.ipynb b/examples/longformqa/longformqa_assertions.ipynb index 75403383e..7dab608df 100644 --- a/examples/longformqa/longformqa_assertions.ipynb +++ b/examples/longformqa/longformqa_assertions.ipynb @@ -553,7 +553,7 @@ "\n", "We can also leverage **DSPy**'s advanced compiling features to enhance our program's performance. \n", "\n", - "For this, we utilize the `BootstrapFewShotWithRandomSearch` teleprompter, which automatically incorporates few-shot demonstrations and conducts a random search over a candidate set to output the best compiled program. We evaluate this over the `answer_correctness` metric as our ultimate goal is indeed to generate correct answers to the `HotPotQA` questions from the paragraphs, aiming to optimize both instrinsic and extrinsic metrics as a result. \n", + "For this, we utilize the `BootstrapFewShotWithRandomSearch` teleprompter, which automatically incorporates few-shot demonstrations and conducts a random search over a candidate set to output the best compiled program. We evaluate this over the `answer_correctness` metric as our ultimate goal is indeed to generate correct answers to the `HotPotQA` questions from the paragraphs, aiming to optimize both intrinsic and extrinsic metrics as a result. \n", "\n", "Let's evaluate this on the LongFormQA program first:" ] @@ -592,24 +592,24 @@ ] }, { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note** This pipeline on the other hand sets both the teacher and student with `LongFormQAWithAssertions()` to ensure the teacher correctly instructs the student with the right bootstrapped examples and the student has the chance to self-correct with **Assertions** for any examples that are still deemed incorrect." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "longformqa = LongFormQA()\n", - "teleprompter = BootstrapFewShotWithRandomSearch(metric = answer_correctness, max_bootstrapped_demos=2, num_candidate_programs=6)\n", - "cited_longformqa_student_teacher = teleprompter.compile(student=assert_transform_module(LongFormQAWithAssertions().map_named_predictors(Retry), backtrack_handler), teacher = assert_transform_module(LongFormQAWithAssertions().map_named_predictors(Retry), backtrack_handler), trainset=trainset, valset=devset[:100])\n", - "evaluate(cited_longformqa_student_teacher)" - ] - } + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note** This pipeline on the other hand sets both the teacher and student with `LongFormQAWithAssertions()` to ensure the teacher correctly instructs the student with the right bootstrapped examples and the student has the chance to self-correct with **Assertions** for any examples that are still deemed incorrect." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "longformqa = LongFormQA()\n", + "teleprompter = BootstrapFewShotWithRandomSearch(metric = answer_correctness, max_bootstrapped_demos=2, num_candidate_programs=6)\n", + "cited_longformqa_student_teacher = teleprompter.compile(student=assert_transform_module(LongFormQAWithAssertions().map_named_predictors(Retry), backtrack_handler), teacher = assert_transform_module(LongFormQAWithAssertions().map_named_predictors(Retry), backtrack_handler), trainset=trainset, valset=devset[:100])\n", + "evaluate(cited_longformqa_student_teacher)" + ] + } ], "metadata": { "kernelspec": { diff --git a/examples/tweets/compiling_langchain.ipynb b/examples/tweets/compiling_langchain.ipynb index e847df323..63a8d5525 100644 --- a/examples/tweets/compiling_langchain.ipynb +++ b/examples/tweets/compiling_langchain.ipynb @@ -117,7 +117,7 @@ "- **Input:** A factual **question**, which may be fairly complex.\n", "- **Output:** An engaging **tweet** that correctly answers the question from the retrieved info.\n", "\n", - "Let's use LangChain's expression langugage (LCEL) to illustrate this. Any prompt here will do, we will optimize the final prompt with DSPy.\n", + "Let's use LangChain's expression language (LCEL) to illustrate this. Any prompt here will do, we will optimize the final prompt with DSPy.\n", "\n", "Considering that, let's just keep it to the barebones: **Given {context}, answer the question {question} as a tweet.**" ] diff --git a/intro.ipynb b/intro.ipynb index 60bc0ebaf..bbdbc194f 100644 --- a/intro.ipynb +++ b/intro.ipynb @@ -130,7 +130,7 @@ "source": [ "### 2] Task Examples\n", "\n", - "**DSPy** accomodates a wide variety of applications and tasks. **In this intro notebook, we will work on the example task of multi-hop question answering (QA).**\n", + "**DSPy** accommodates a wide variety of applications and tasks. **In this intro notebook, we will work on the example task of multi-hop question answering (QA).**\n", "\n", "Other notebooks and tutorials will present different tasks. Now, let us load a tiny sample from the HotPotQA multi-hop dataset." ] @@ -1010,7 +1010,7 @@ "source": [ "##### Evaluating the Retrieval\n", "\n", - "It may also be instructive to look at the accuracy of retrieval. There are multiple ways to do this. Often, we can just check whether the rertieved passages contain the answer.\n", + "It may also be instructive to look at the accuracy of retrieval. There are multiple ways to do this. Often, we can just check whether the retrieved passages contain the answer.\n", "\n", "That said, since our dev set includes the gold titles that should be retrieved, we can just use these here." ] From 2c11aa0929080d289677882c5cb84c4d1e2986ed Mon Sep 17 00:00:00 2001 From: Patricio Cerda Mardini Date: Sun, 11 Feb 2024 02:35:05 -0300 Subject: [PATCH 12/39] fix: manually iterate kwargs['n'] in anyscale --- dsp/modules/hf_client.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/dsp/modules/hf_client.py b/dsp/modules/hf_client.py index 9c14a4b29..8ce1156d7 100644 --- a/dsp/modules/hf_client.py +++ b/dsp/modules/hf_client.py @@ -293,6 +293,8 @@ def _generate(self, prompt, use_chat_api=False, **kwargs): print(f"resp_json:{resp_json}") print(f"Failed to parse JSON response: {e}") raise Exception("Received invalid JSON response from server") + + class Anyscale(HFModel): def __init__(self, model, **kwargs): super().__init__(model=model, is_client=True) @@ -337,14 +339,16 @@ def _generate(self, prompt, use_chat_api=False, **kwargs): headers = {"Authorization": f"Bearer {self.token}"} try: - with self.session.post(url, headers=headers, json=body) as resp: - resp_json = resp.json() - if use_chat_api: - completions = [resp_json.get('choices', [])[0].get('message', {}).get('content', "")] - else: - completions = [resp_json.get('choices', [])[0].get('text', "")] - response = {"prompt": prompt, "choices": [{"text": c} for c in completions]} - return response + completions = [] + for i in range(kwargs.get('n', 1)): + with self.session.post(url, headers=headers, json=body) as resp: + resp_json = resp.json() + if use_chat_api: + completions.extend([resp_json.get('choices', [])[0].get('message', {}).get('content', "")]) + else: + completions.extend([resp_json.get('choices', [])[0].get('text', "")]) + response = {"prompt": prompt, "choices": [{"text": c} for c in completions]} + return response except Exception as e: print(f"Failed to parse JSON response: {e}") raise Exception("Received invalid JSON response from server") From 44329bbf5d425c0fb8702d10c7996cc190f82d88 Mon Sep 17 00:00:00 2001 From: Arnav Singhvi Date: Sat, 10 Feb 2024 22:45:17 -0800 Subject: [PATCH 13/39] documentation and QuizGen+TweetGen notebooks --- docs/assertions.md | 253 ++++++++ .../longformqa/longformqa_assertions.ipynb | 40 +- examples/quiz/quiz_assertions.ipynb | 495 ++++++++++++++++ examples/tweets/tweets_assertions.ipynb | 544 ++++++++++++++++++ 4 files changed, 1313 insertions(+), 19 deletions(-) create mode 100644 docs/assertions.md create mode 100644 examples/quiz/quiz_assertions.ipynb create mode 100644 examples/tweets/tweets_assertions.ipynb diff --git a/docs/assertions.md b/docs/assertions.md new file mode 100644 index 000000000..99da7de07 --- /dev/null +++ b/docs/assertions.md @@ -0,0 +1,253 @@ +# DSPy Assertions +## Introduction + +Language models (LMs) have transformed how we interact with machine learning, offering vast capabilities in natural language understanding and generation. However, ensuring these models adhere to domain-specific constraints remains a challenge. Despite the growth of techniques like fine-tuning or “prompt engineering”, these approaches are extremely tedious and rely on heavy, manual hand-waving to guide the LMs in adhering to specific constraints. Even DSPy's modularity of programming prompting pipelines lacks mechanisms to effectively and automatically enforce these constraints. + +To address this, we introduce DSPy Assertions, a feature within the DSPy framework designed to automate the enforcement of computational constraints on LMs. DSPy Assertions empower developers to guide LMs towards desired outcomes with minimal manual intervention, enhancing the reliability, predictability, and correctness of LM outputs. + +### dspy.Assert and dspy.Suggest API + +We introduce two primary constructs within DSPy Assertions: + +- **`dspy.Assert`**: + - **Parameters**: + - `constraint (bool)`: Outcome of Python-defined boolean validation check. + - `msg (Optional[str])`: User-defined error message providing feedback or correction guidance. + - `backtrack (Optional[module])`: Specifies target module for retry attempts upon constraint failure. + - **Behavior**: Initiates retry upon failure, dynamically adjusting the pipeline's execution. If failures persist, it halts execution and raises a `dspy.AssertionError`. + +- **`dspy.Suggest`**: + - **Parameters**: Similar to `dspy.Assert`. + - **Behavior**: Encourages self-refinement through retries without enforcing hard stops. Logs failures after maximum backtracking attempts and continues execution. + +- **dspy.Assert vs. Python Assertions**: Unlike conventional Python `assert` statements that terminate the program upon failure, `dspy.Assert` conducts a sophisticated retry mechanism, allowing the pipeline to adjust. + +Specifically, when a constraint is not met: + +- Backtracking Mechanism: An under-the-hood backtracking is initiated, offering the model a chance to self-refine and proceed, which is done through +- Dynamic Signature Modification: internally modifying your DSPy program’s Signature by adding the following fields: + - Past Output: your model's past output that did not pass the validation_fn + - Instruction: your user-defined feedback message on what went wrong and what possibly to fix + +If the error continues past the `max_backtracking_attempts`, then `dspy.Assert` will halt the pipeline execution, altering you with an `dspy.AssertionError`. This ensures your program doesn't continue executing with “bad” LM behavior and immediately highlights sample failure outputs for user assessment. + +- **dspy.Suggest vs. dspy.Assert**: `dspy.Suggest` on the other hand offers a softer approach. It maintains the same retry backtracking as `dspy.Assert` but instead serves as a gentle nudger. If the model outputs cannot pass the model constraints after the `max_backtracking_attempts`, `dspy.Suggest` will log the persistent failure and continue execution of the program on the rest of the data. This ensures the LM pipeline works in a "best-effort" manner without halting execution. + +- **`dspy.Suggestions`** are best utilized as "helpers" during the evaluation phase, offering guidance and potential corrections without halting the pipeline. +- **`dspy.Assertions`** are recommended during the development stage as "checkers" to ensure the LM behaves as expected, providing a robust mechanism for identifying and addressing errors early in the development cycle. + + +## Use Case: Including Assertions in DSPy Programs + +We start with using an example of a multi-hop QA SimplifiedBaleen pipeline as defined in the intro walkthrough. + +```python +class SimplifiedBaleen(dspy.Module): + def __init__(self, passages_per_hop=2, max_hops=2): + super().__init__() + + self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)] + self.retrieve = dspy.Retrieve(k=passages_per_hop) + self.generate_answer = dspy.ChainOfThought(GenerateAnswer) + self.max_hops = max_hops + + def forward(self, question): + context = [] + prev_queries = [question] + + for hop in range(self.max_hops): + query = self.generate_query[hop](context=context, question=question).query + prev_queries.append(query) + passages = self.retrieve(query).passages + context = deduplicate(context + passages) + + pred = self.generate_answer(context=context, question=question) + pred = dspy.Prediction(context=context, answer=pred.answer) + return pred + +baleen = SimplifiedBaleen() + +baleen(question = "Which award did Gary Zukav's first book receive?") +``` + +To include DSPy Assertions, we simply define our validation functions and declare our assertions following the respective model generation. + +For this use case, suppose we want to impose the following constraints: + 1. Length - each query should be less than 100 characters + 2. Uniqueness - each generated query should differ from previously-generated queries. + +We can define these validation checks as boolean functions: + +```python +#simplistic boolean check for query length +len(query) <= 100 + +#Python function for validating distinct queries +def validate_query_distinction_local(previous_queries, query): + """check if query is distinct from previous queries""" + if previous_queries == []: + return True + if dspy.evaluate.answer_exact_match_str(query, previous_queries, frac=0.8): + return False + return True +``` + +We can declare these validation checks through `dspy.Suggest` statements (as we want to test the program in a best-effort demonstration). We want to keep these after the query generation `query = self.generate_query[hop](context=context, question=question).query`. + +```python +dspy.Suggest( + len(query) <= 100, + "Query should be short and less than 100 characters", +) + +dspy.Suggest( + validate_query_distinction_local(prev_queries, query), + "Query should be distinct from: " + + "; ".join(f"{i+1}) {q}" for i, q in enumerate(prev_queries)), +) +``` + +It is recommended to define a program with assertions separately than your original program if you are doing comparative evaluation for the effect of assertions. If not, feel free to set Assertions away! + +Let's take a look at how the SimplifiedBaleen program will look with Assertions included: + +```python +class SimplifiedBaleenAssertions(dspy.Module): + def __init__(self, passages_per_hop=2, max_hops=2): + super().__init__() + self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)] + self.retrieve = dspy.Retrieve(k=passages_per_hop) + self.generate_answer = dspy.ChainOfThought(GenerateAnswer) + self.max_hops = max_hops + + def forward(self, question): + context = [] + prev_queries = [question] + + for hop in range(self.max_hops): + query = self.generate_query[hop](context=context, question=question).query + + dspy.Suggest( + len(query) <= 100, + "Query should be short and less than 100 characters", + ) + + dspy.Suggest( + validate_query_distinction_local(prev_queries, query), + "Query should be distinct from: " + + "; ".join(f"{i+1}) {q}" for i, q in enumerate(prev_queries)), + ) + + prev_queries.append(query) + passages = self.retrieve(query).passages + context = deduplicate(context + passages) + + if all_queries_distinct(prev_queries): + self.passed_suggestions += 1 + + pred = self.generate_answer(context=context, question=question) + pred = dspy.Prediction(context=context, answer=pred.answer) + return pred +``` + +Now calling programs with DSPy Assertions requires one last step, and that is transforming the program to wrap it with internal assertions backtracking and Retry logic. + +```python +from dspy.primitives.assertions import assert_transform_module, backtrack_handler + +baleen_with_assertions = assert_transform_module(SimplifiedBaleenAssertions().map_named_predictors(Retry), backtrack_handler) +``` + +Alternatively, you can also directly call `activate_assertions` on the program with `dspy.Assert/Suggest` statements: + +```python +baleen_with_assertions = SimplifiedBaleenAssertions().activate_assertions() +``` + +Now let's take a look at the internal LM backtracking by inspecting the history of the LM query generations. Here we see that when a query fails to pass the validation check of being less than 100 characters, its internal `GenerateSearchQuery` signature is dynamically modified during the backtracking+Retry process to include the past query and the corresponding user-defined instruction: `"Query should be short and less than 100 characters"`. + + +``` +Write a simple search query that will help answer a complex question. + +--- + +Follow the following format. + +Context: may contain relevant facts + +Question: ${question} + +Reasoning: Let's think step by step in order to ${produce the query}. We ... + +Query: ${query} + +--- + +Context: +[1] «Kerry Condon | Kerry Condon (born 4 January 1983) is [...]» +[2] «Corona Riccardo | Corona Riccardo (c. 1878October 15, 1917) was [...]» + +Question: Who acted in the shot film The Shore and is also the youngest actress ever to play Ophelia in a Royal Shakespeare Company production of "Hamlet." ? + +Reasoning: Let's think step by step in order to find the answer to this question. First, we need to identify the actress who played Ophelia in a Royal Shakespeare Company production of "Hamlet." Then, we need to find out if this actress also acted in the short film "The Shore." + +Query: "actress who played Ophelia in Royal Shakespeare Company production of Hamlet" + "actress in short film The Shore" + + + +Write a simple search query that will help answer a complex question. + +--- + +Follow the following format. + +Context: may contain relevant facts + +Question: ${question} + +Past Query: past output with errors + +Instructions: Some instructions you must satisfy + +Query: ${query} + +--- + +Context: +[1] «Kerry Condon | Kerry Condon (born 4 January 1983) is an Irish television and film actress, best known for her role as Octavia of the Julii in the HBO/BBC series "Rome," as Stacey Ehrmantraut in AMC's "Better Call Saul" and as the voice of F.R.I.D.A.Y. in various films in the Marvel Cinematic Universe. She is also the youngest actress ever to play Ophelia in a Royal Shakespeare Company production of "Hamlet."» +[2] «Corona Riccardo | Corona Riccardo (c. 1878October 15, 1917) was an Italian born American actress who had a brief Broadway stage career before leaving to become a wife and mother. Born in Naples she came to acting in 1894 playing a Mexican girl in a play at the Empire Theatre. Wilson Barrett engaged her for a role in his play "The Sign of the Cross" which he took on tour of the United States. Riccardo played the role of Ancaria and later played Berenice in the same play. Robert B. Mantell in 1898 who struck by her beauty also cast her in two Shakespeare plays, "Romeo and Juliet" and "Othello". Author Lewis Strang writing in 1899 said Riccardo was the most promising actress in America at the time. Towards the end of 1898 Mantell chose her for another Shakespeare part, Ophelia im Hamlet. Afterwards she was due to join Augustin Daly's Theatre Company but Daly died in 1899. In 1899 she gained her biggest fame by playing Iras in the first stage production of Ben-Hur.» + +Question: Who acted in the shot film The Shore and is also the youngest actress ever to play Ophelia in a Royal Shakespeare Company production of "Hamlet." ? + +Past Query: "actress who played Ophelia in Royal Shakespeare Company production of Hamlet" + "actress in short film The Shore" + +Instructions: Query should be short and less than 100 characters + +Query: "actress Ophelia RSC Hamlet" + "actress The Shore" + +``` + + +## Assertion-Driven Optimizations + +DSPy Assertions work with optimizations that DSPy offers, particularly with `BootstrapFewShotWithRandomSearch`, including the following settings: + +- Compilation with Assertions + This includes assertion-driven example bootstrapping and counterexample bootstrapping during compilation. The teacher model for bootstrapping few-shot demonstrations can make use of DSPy Assertions to offer robust bootstrapped examples for the student model to learn from during inference. +- Compilation + Inference with Assertions + -This includes assertion-driven optimizations in both compilation and inference. Now the teacher model offers assertion-driven examples but the student can further optimize with assertions of its own during inference time. +```python +teleprompter = BootstrapFewShotWithRandomSearch( + metric=validate_context_and_answer_and_hops, + max_bootstrapped_demos=max_bootstrapped_demos, + num_candidate_programs=6, +) + +#Compilation with Assertions +compiled_with_assertions_baleen = teleprompter.compile(student = baleen, teacher = baleen_with_assertions, trainset = trainset, valset = devset) + +#Compilation + Inference with Assertions +compiled_baleen_with_assertions = teleprompter.compile(student=baleen_with_assertions, teacher = baleen_with_assertions, trainset=trainset, valset=devset) + +``` diff --git a/examples/longformqa/longformqa_assertions.ipynb b/examples/longformqa/longformqa_assertions.ipynb index 75403383e..4130a9db7 100644 --- a/examples/longformqa/longformqa_assertions.ipynb +++ b/examples/longformqa/longformqa_assertions.ipynb @@ -6,7 +6,9 @@ "source": [ "\"DSPy7\n", "\n", - "## **DSPy Assertions**: Asserting Computational Constraints on Foundation Models" + "## **DSPy Assertions**: Asserting Computational Constraints on Foundation \n", + "\n", + "### **LongFormQA**: Generating long-form length responses to answer questions" ] }, { @@ -592,24 +594,24 @@ ] }, { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note** This pipeline on the other hand sets both the teacher and student with `LongFormQAWithAssertions()` to ensure the teacher correctly instructs the student with the right bootstrapped examples and the student has the chance to self-correct with **Assertions** for any examples that are still deemed incorrect." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "longformqa = LongFormQA()\n", - "teleprompter = BootstrapFewShotWithRandomSearch(metric = answer_correctness, max_bootstrapped_demos=2, num_candidate_programs=6)\n", - "cited_longformqa_student_teacher = teleprompter.compile(student=assert_transform_module(LongFormQAWithAssertions().map_named_predictors(Retry), backtrack_handler), teacher = assert_transform_module(LongFormQAWithAssertions().map_named_predictors(Retry), backtrack_handler), trainset=trainset, valset=devset[:100])\n", - "evaluate(cited_longformqa_student_teacher)" - ] - } + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note** This pipeline on the other hand sets both the teacher and student with `LongFormQAWithAssertions()` to ensure the teacher correctly instructs the student with the right bootstrapped examples and the student has the chance to self-correct with **Assertions** for any examples that are still deemed incorrect." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "longformqa = LongFormQA()\n", + "teleprompter = BootstrapFewShotWithRandomSearch(metric = answer_correctness, max_bootstrapped_demos=2, num_candidate_programs=6)\n", + "cited_longformqa_student_teacher = teleprompter.compile(student=assert_transform_module(LongFormQAWithAssertions().map_named_predictors(Retry), backtrack_handler), teacher = assert_transform_module(LongFormQAWithAssertions().map_named_predictors(Retry), backtrack_handler), trainset=trainset, valset=devset[:100])\n", + "evaluate(cited_longformqa_student_teacher)" + ] + } ], "metadata": { "kernelspec": { diff --git a/examples/quiz/quiz_assertions.ipynb b/examples/quiz/quiz_assertions.ipynb new file mode 100644 index 000000000..8cdbdbcc9 --- /dev/null +++ b/examples/quiz/quiz_assertions.ipynb @@ -0,0 +1,495 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"DSPy7\n", + "\n", + "## **DSPy Assertions**: Asserting Computational Constraints on Foundation Models\n", + "\n", + "### **QuizGen**: Generating multiple choice quiz questions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/examples/quiz/quiz_assertions.ipynb)\n", + "\n", + "\n", + "This notebook highlights an example of [**DSPy Assertions**](../../docs/assertions.md), allowing for declaration of computational constraints within DSPy programs. \n", + "\n", + "\n", + "This notebook builds upon the foundational concepts of the **DSPy** framework. Prerequisites of following this notebook is having gone through the [DSPy tutorial](../../intro.ipynb), the [**DSPy Assertions documentation**](../../docs/assertions.md) and the introductory DSPy Assertions [tutorial on LongFormQA](../longformqa/longformqa_assertions.ipynb).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!git clone https://huggingface.co/arnavs11/DSPy_QuizGen_Cache\n", + "%cd DSPy_QuizGen_Cache/\n", + "!git checkout master\n", + "%cd ..\n", + "import os\n", + "repo_clone_path = '/content/DSPy_QuizGen_Cache'\n", + "\n", + "# Set up the cache for this notebook\n", + "os.environ[\"DSP_NOTEBOOK_CACHEDIR\"] = repo_clone_path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import sys\n", + "import os\n", + "import regex as re\n", + "import json\n", + "\n", + "try: # When on google Colab, let's clone the notebook so we download the cache.\n", + " import google.colab\n", + " repo_path = 'dspy'\n", + " \n", + " !git -C $repo_path pull origin || git clone https://github.com/stanfordnlp/dspy $repo_path\n", + "except:\n", + " repo_path = '.'\n", + "\n", + "if repo_path not in sys.path:\n", + " sys.path.append(repo_path)\n", + "\n", + "\n", + "import pkg_resources # Install the package if it's not installed\n", + "if not \"dspy-ai\" in {pkg.key for pkg in pkg_resources.working_set}:\n", + " !pip install -U pip\n", + " !pip install dspy-ai\n", + " !pip install openai~=0.28.1\n", + " !pip install -e $repo_path\n", + "\n", + "import dspy\n", + "from dspy.predict import Retry\n", + "from dspy.datasets import HotPotQA\n", + "from dspy.teleprompt import BootstrapFewShotWithRandomSearch\n", + "from dspy.evaluate.evaluate import Evaluate\n", + "from dspy.primitives.assertions import assert_transform_module, backtrack_handler" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')\n", + "dspy.settings.configure(rm=colbertv2_wiki17_abstracts)\n", + "turbo = dspy.OpenAI(model='gpt-3.5-turbo', max_tokens=500)\n", + "dspy.settings.configure(lm=turbo, trace=[], temperature=0.7)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = HotPotQA(train_seed=1, train_size=300, eval_seed=2023, dev_size=300, test_size=0, keep_details=True)\n", + "trainset = [x.with_inputs('question', 'answer') for x in dataset.train]\n", + "devset = [x.with_inputs('question', 'answer') for x in dataset.dev]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3] QuizGen\n", + "\n", + "Let's introduce a new task: QuizGen. \n", + "\n", + "QuizGen takes HotPotQA data points and turns them into multiple choice quiz questions with the corresponding options. Each set of options for the question is produced in a JSON key-value pair format. For this case, we specify the generation of 4 choices." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With this program, we aim to generate quiz choices that adhere to the following guidelines:\n", + "1. The generated choices are in a JSON format.\n", + "2. The generated choices include the correct answer.\n", + "3. The generated choices include plausible distractor options besides the correct answer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class GenerateAnswerChoices(dspy.Signature):\n", + " \"\"\"Generate answer choices in JSON format that include the correct answer and plausible distractors for the specified question.\"\"\"\n", + " question = dspy.InputField()\n", + " correct_answer = dspy.InputField()\n", + " number_of_choices = dspy.InputField()\n", + " answer_choices = dspy.OutputField(desc='JSON key-value pairs')\n", + "\n", + "class QuizAnswerGenerator(dspy.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.generate_choices = dspy.ChainOfThought(GenerateAnswerChoices)\n", + "\n", + " def forward(self, question, answer):\n", + " choices = self.generate_choices(question=question, correct_answer=answer, number_of_choices=number_of_choices).answer_choices\n", + " return dspy.Prediction(choices = choices)\n", + "\n", + "number_of_choices = '4'\n", + "quiz_generator = QuizAnswerGenerator()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4] Evaluation - Intrinsic and Extrinsic\n", + "\n", + "#### Intrinsic Metrics: passing internal computational constraints is the goal \n", + "\n", + "**Valid Formatting** - The outputted answer choices should be in JSON format which is verified after parsing the key-value pairs.\n", + "\n", + "**Correct Answer Inclusion** - This is a general check to ensure the generated quiz choices actually include the correct answer to the question.\n", + "\n", + "**Plausible Distractors** - This validation is to check that the generated choices include distractor answer options that are reasonable options as answers to the question. We define and call another **DSPy** program: ``Predict`` on ``AssessQuizChoices``, relying on the same LM to answer the question: `\"Are the distractors in the answer choices plausible and not easily identifiable as incorrect?\"`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def format_checker(choice_string):\n", + " try:\n", + " choices = json.loads(choice_string)\n", + " if isinstance(choices, dict) and all(isinstance(key, str) and isinstance(value, str) for key, value in choices.items()):\n", + " return True\n", + " except json.JSONDecodeError:\n", + " return False\n", + "\n", + " return False\n", + "\n", + "def is_correct_answer_included(correct_answer, generated_choices):\n", + " try:\n", + " choices_dict = json.loads(generated_choices)\n", + " return correct_answer in choices_dict.values()\n", + " except json.JSONDecodeError:\n", + " return False\n", + "\n", + "def is_plausibility_yes(assessment_answer):\n", + " \"\"\"Check if the first word of the assessment answer is 'yes'.\"\"\"\n", + " return assessment_answer.split()[0].lower() == 'yes'\n", + " \n", + "class AssessQuizChoices(dspy.Signature):\n", + " \"\"\"Assess the quality of quiz answer choices along specified dimensions.\"\"\"\n", + " \n", + " question = dspy.InputField()\n", + " answer_choices = dspy.InputField()\n", + " assessment_question = dspy.InputField()\n", + " assessment_answer = dspy.OutputField(desc=\"Yes or No\")\n", + " \n", + "def format_valid_metric(gold, pred, trace=None):\n", + " generated_choices = pred.choices\n", + " format_valid = format_checker(generated_choices)\n", + " score = format_valid\n", + " return score\n", + "\n", + "def is_correct_metric(gold, pred, trace=None):\n", + " correct_answer, generated_choices = gold.answer, pred.choices\n", + " correct_included = is_correct_answer_included(correct_answer, generated_choices)\n", + " score = correct_included\n", + " return score\n", + "\n", + "def plausibility_metric(gold, pred, trace=None):\n", + " question, generated_choices = gold.question, pred.choices\n", + " plausibility_question = \"Are the distractors in the answer choices plausible and not easily identifiable as incorrect?\"\n", + " plausibility_assessment = dspy.Predict(AssessQuizChoices)(question=question, answer_choices=generated_choices, assessment_question=plausibility_question)\n", + " plausibility_result = plausibility_assessment.assessment_answer.split()[0].lower() == 'yes'\n", + " score = plausibility_result\n", + " return score" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Extrinsic Metrics: Assess the overall quality and effectiveness of generated output on downstream task\n", + "\n", + "The extrinsic metric is defined as the overall quality of the generated quiz choices and is evaluated over a composite metric, accounting for these constraints.\n", + "\n", + "The composite metric maintains the core intrinsic metrics required for producing a valid set of quiz choices in validating valid formatting and correct answere icnlusion, and the overall composite metric returns an averaged score over the 3 intrinsic metrics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def overall_metric(gold, pred, trace=None):\n", + " question, correct_answer, generated_choices = gold.question, gold.answer, pred.choices\n", + " format_valid = format_checker(generated_choices)\n", + " correct_included = is_correct_answer_included(correct_answer, generated_choices)\n", + " plausibility_question = \"Are the distractors in the answer choices plausible and not easily identifiable as incorrect?\"\n", + " plausibility_assessment = dspy.Predict(AssessQuizChoices)(question=question, answer_choices=generated_choices, assessment_question=plausibility_question)\n", + " plausibility_result = plausibility_assessment.assessment_answer.split()[0].lower() == 'yes'\n", + " score = (format_valid + correct_included + plausibility_result) / 3.0 if correct_included and format_valid else 0\n", + " return score" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We hence define the evaluation as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metrics = [format_valid_metric, is_correct_metric, plausibility_metric, overall_metric]\n", + "\n", + "for metric in metrics:\n", + " evaluate = Evaluate(metric=metric, devset=devset, num_threads=1, display_progress=True, display_table=5)\n", + " evaluate(quiz_generator)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's take a look at an example quiz choice generation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "example = devset[38]\n", + "quiz_choices = quiz_generator(question=example.question, answer = example.answer)\n", + "print(f'Generated Quiz Choices: ', quiz_choices.choices)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for metric in metrics:\n", + " evaluate = Evaluate(metric=metric, devset=devset[38:39], num_threads=1, display_progress=True, display_table=5)\n", + " evaluate(quiz_generator)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We see that the generated quiz choices maintain valid JSON formatting and do include the correct answer `\"Exon\"`. However, when looking closely at the other answer choices, we see that the options are not real/common abbreviations of universities, leaving them as uneffective distractor choices. \n", + "\n", + "Let's take a look at how we can integrate DSPy Assertions and impose constraints to produce better answer choices." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5] Introducing Assertions: QuizAnswerGeneratorWithAssertions\n", + "Let's include assertions that simply reiterate our computational constraints within DSPy Assertion semantics. \n", + "\n", + "In the first **Assertion**, we check for if the generated quiz choices are in JSON format and if not, assert: **\"The format of the answer choices should be in JSON format. Please revise accordingly.\"**\n", + "\n", + "We also check for if the set of quiz choices includes the correct answer and ensure this if violated with the feedback message: **\"The answer choices do not include the correct answer to the question. Please revise accordingly.\"**\n", + "\n", + "Lastly, we assess if the plausible distractor choices are indeed good distractor options and if not, assert: **\"The answer choices are not plausible distractors or are too easily identifiable as incorrect. Please revise to provide more challenging and plausible distractors.\"**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class QuizAnswerGeneratorWithAssertions(dspy.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.generate_choices = dspy.ChainOfThought(GenerateAnswerChoices)\n", + "\n", + " def forward(self, question, answer):\n", + " choice_string = self.generate_choices(question=question, correct_answer=answer, number_of_choices=number_of_choices).answer_choices\n", + " dspy.Suggest(format_checker(choice_string), \"The format of the answer choices should be in JSON format. Please revise accordingly.\", target_module=GenerateAnswerChoices)\n", + " dspy.Suggest(is_correct_answer_included(answer, choice_string), \"The answer choices do not include the correct answer to the question. Please revise accordingly.\", target_module=GenerateAnswerChoices)\n", + " plausibility_question = \"Are the distractors in the answer choices plausible and not easily identifiable as incorrect?\"\n", + " plausibility_assessment = dspy.Predict(AssessQuizChoices)(question=question, answer_choices=choice_string, assessment_question=plausibility_question)\n", + " dspy.Suggest(is_plausibility_yes(plausibility_assessment.assessment_answer), \"The answer choices are not plausible distractors or are too easily identifiable as incorrect. Please revise to provide more challenging and plausible distractors.\", target_module=GenerateAnswerChoices)\n", + " return dspy.Prediction(choices = choice_string)\n", + "\n", + "number_of_choices = '4'\n", + "quiz_generator_with_assertions = assert_transform_module(QuizAnswerGeneratorWithAssertions().map_named_predictors(Retry), backtrack_handler) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's evaluate the `QuizAnswerGeneratorWithAssertions` now over the devset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metrics = [format_valid_metric, is_correct_metric, plausibility_metric, overall_metric]\n", + "\n", + "for metric in metrics:\n", + " evaluate = Evaluate(metric=metric, devset=devset, num_threads=1, display_progress=True, display_table=5)\n", + " evaluate(quiz_generator_with_assertions)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's take a look at how our generated set of quiz choices has improved with the addition of assertions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "example = devset[38]\n", + "quiz_choices = quiz_generator_with_assertions(question=example.question, answer = example.answer)\n", + "print(f'Generated Quiz Choices: ', quiz_choices.choices)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for metric in metrics:\n", + " evaluate = Evaluate(metric=metric, devset=devset[38:39], num_threads=1, display_progress=True, display_table=30)\n", + " evaluate(quiz_generator_with_assertions)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We see that the quiz choices follow all of our constraints!\n", + "\n", + "While maintaining the JSON formatting and including the correct answer as before, we now see the answer choices are much more relevant to the question now. Abbreviations like `\"Oxon\", \"Camb\", \"Lond\"` correspond to widely-known universities Oxford, Cambridge and University of London, which is fairly discernible from the abbreviations even if one does not know this information beforehand. This hence makes these choices as much more plausible distractors which forces the test-taker to have had context particularly about the college involved with working on the project. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6] Compilation With Assertions\n", + "\n", + "We can leverage **DSPy**'s`BootstrapFewShotWithRandomSearch` optimizer, to automatically generate few-shot demonstrations and conduct a random search over the candidates to output the best compiled program. We evaluate this over the `final_metric` composite metric. \n", + "\n", + "We can first evaluate this on `QuizAnswerGenerator` to see how compilation performs without the inclusion of assertions. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "teleprompter = BootstrapFewShotWithRandomSearch(metric = overall_metric, max_bootstrapped_demos=2, num_candidate_programs=6)\n", + "compiled_quiz_generator = teleprompter.compile(student = quiz_generator, teacher = quiz_generator, trainset=trainset, valset=devset[:100])\n", + "\n", + "for metric in metrics:\n", + " evaluate = Evaluate(metric=metric, devset=devset, num_threads=1, display_progress=True, display_table=5)\n", + " evaluate(compiled_quiz_generator)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we test the compilation on 2 settings with assertions:\n", + "\n", + "**Compilation with Assertions**: assertion-driven example bootstrapping and counterexample bootstrapping during compilation. Teacher has assertions while the student does not as the student learns from the teacher's assertion-driven bootstrapped examples. \n", + "\n", + "**Compilation + Inference with Assertions**: assertion-driven optimizations for both the teacher and student to offer enhanced assertion-driven outputs during both compilation and inference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "teleprompter = BootstrapFewShotWithRandomSearch(metric = overall_metric, max_bootstrapped_demos=2, num_candidate_programs=6)\n", + "compiled_with_assertions_quiz_generator = teleprompter.compile(student=quiz_generator, teacher = quiz_generator_with_assertions, trainset=trainset, valset=devset[:100])\n", + "\n", + "\n", + "for metric in metrics:\n", + " evaluate = Evaluate(metric=metric, devset=devset, num_threads=1, display_progress=True, display_table=5)\n", + " evaluate(compiled_with_assertions_quiz_generator)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "teleprompter = BootstrapFewShotWithRandomSearch(metric = overall_metric, max_bootstrapped_demos=2, num_candidate_programs=6)\n", + "compiled_quiz_generator_with_assertions = teleprompter.compile(student=quiz_generator_with_assertions, teacher = quiz_generator_with_assertions, trainset=trainset, valset=devset[:100])\n", + "\n", + "for metric in metrics:\n", + " evaluate = Evaluate(metric=metric, devset=devset, num_threads=1, display_progress=True, display_table=5)\n", + " evaluate(compiled_quiz_generator_with_assertions)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dspy_dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/tweets/tweets_assertions.ipynb b/examples/tweets/tweets_assertions.ipynb new file mode 100644 index 000000000..91dffc47b --- /dev/null +++ b/examples/tweets/tweets_assertions.ipynb @@ -0,0 +1,544 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"DSPy7\n", + "\n", + "## **DSPy Assertions**: Asserting Computational Constraints on Foundation Models\n", + "\n", + "### **TweetGen**: Generating tweets to answer questions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/examples/tweets/tweets_assertions.ipynb)\n", + "\n", + "\n", + "This notebook highlights an example of [**DSPy Assertions**](../../docs/assertions.md), allowing for declaration of computational constraints within DSPy programs. \n", + "\n", + "\n", + "This notebook builds upon the foundational concepts of the **DSPy** framework. Prerequisites of following this notebook is having gone through the [DSPy tutorial](../../intro.ipynb), the [**DSPy Assertions documentation**](../../docs/assertions.md) and the introductory DSPy Assertions [tutorial on LongFormQA](../longformqa/longformqa_assertions.ipynb).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!git clone https://huggingface.co/arnavs11/DSPy_TweetGen_Cache\n", + "%cd DSPy_TweetGen_Cache/\n", + "!git checkout master\n", + "%cd ..\n", + "import os\n", + "repo_clone_path = '/content/DSPy_TweetGen_Cache'\n", + "\n", + "# Set up the cache for this notebook\n", + "os.environ[\"DSP_NOTEBOOK_CACHEDIR\"] = repo_clone_path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import sys\n", + "import os\n", + "import regex as re\n", + "import json\n", + "\n", + "try: # When on google Colab, let's clone the notebook so we download the cache.\n", + " import google.colab\n", + " repo_path = 'dspy'\n", + " \n", + " !git -C $repo_path pull origin || git clone https://github.com/stanfordnlp/dspy $repo_path\n", + "except:\n", + " repo_path = '.'\n", + "\n", + "if repo_path not in sys.path:\n", + " sys.path.append(repo_path)\n", + "\n", + "\n", + "import pkg_resources # Install the package if it's not installed\n", + "if not \"dspy-ai\" in {pkg.key for pkg in pkg_resources.working_set}:\n", + " !pip install -U pip\n", + " !pip install dspy-ai\n", + " !pip install openai~=0.28.1\n", + " !pip install -e $repo_path\n", + "\n", + "import dspy\n", + "from dspy.predict import Retry\n", + "from dspy.datasets import HotPotQA\n", + "from dspy.teleprompt import BootstrapFewShotWithRandomSearch\n", + "from dsp.utils import deduplicate\n", + "from dspy.evaluate.evaluate import Evaluate\n", + "from dspy.primitives.assertions import assert_transform_module, backtrack_handler" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')\n", + "dspy.settings.configure(rm=colbertv2_wiki17_abstracts)\n", + "turbo = dspy.OpenAI(model='gpt-3.5-turbo', max_tokens=500)\n", + "dspy.settings.configure(lm=turbo, trace=[], temperature=0.7)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = HotPotQA(train_seed=1, train_size=300, eval_seed=2023, dev_size=300, test_size=0, keep_details=True)\n", + "trainset = [x.with_inputs('question', 'answer') for x in dataset.train]\n", + "devset = [x.with_inputs('question', 'answer') for x in dataset.dev]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3] TweetGen\n", + "\n", + "Let's introduce a new task: TweetGen. We extend the `Multi-Hop QA` program, but now aim to present the answer generation in the form of a tweet. \n", + "\n", + "The `Tweeter` module captures the iterative multi-hop generation process from `Multi-Hop QA` in query generation, passage retrieval, and context assembly. The `GenerateTweet` layer now utilizes the context alongside the question to generate an tweet that effectively answers the question." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With this program, we aim to generate tweets that adhere to the following guidelines:\n", + "1. The tweet has no hashtags. \n", + "2. The tweet includes the correct answer\n", + "3. The tweet is within a character limit. \n", + "4. The tweet is engaging\n", + "5. The tweet is faithful" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class GenerateSearchQuery(dspy.Signature):\n", + " \"\"\"Write a simple search query that will help answer a complex question.\"\"\"\n", + " context = dspy.InputField(desc=\"may contain relevant facts\")\n", + " question = dspy.InputField()\n", + " query = dspy.OutputField()\n", + "\n", + "class GenerateTweet(dspy.Signature):\n", + " \"\"\"Generate an engaging tweet that effectively answers a question staying faithful to the context, is less than 280 characters, and has no hashtags.\"\"\"\n", + " question = dspy.InputField()\n", + " context = dspy.InputField(desc=\"may contain relevant facts\")\n", + " tweet = dspy.OutputField()\n", + "\n", + "class Tweeter(dspy.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.generate_tweet = dspy.ChainOfThought(GenerateTweet)\n", + "\n", + " def forward(self, question, answer):\n", + " context = []\n", + " max_hops=2\n", + " passages_per_hop=3\n", + " generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)]\n", + " retrieve = dspy.Retrieve(k=passages_per_hop)\n", + " for hop in range(max_hops):\n", + " query = generate_query[hop](context=context, question=question).query\n", + " passages = retrieve(query).passages\n", + " context = deduplicate(context + passages)\n", + " generated_tweet = self.generate_tweet(question=question, context=context).tweet\n", + " return dspy.Prediction(generated_tweet=generated_tweet, context=context)\n", + " \n", + "tweeter = Tweeter()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4] Evaluation - Intrinsic and Extrinsic\n", + "\n", + "#### Intrinsic Metrics: passing internal computational constraints is the goal \n", + "\n", + "**No Hashtags** - This is a user-personalized constraint to test how well the model can follow a specific, yet simple guideline of not including any hashtags within the generated tweet.\n", + "\n", + "**Correct Answer Inclusion** - This is a general check to ensure the tweet indeed has the correct answer to the question.\n", + "\n", + "**Within Length** - This check follows Twitter platform guidelines of 280 character limits per tweet.\n", + "\n", + "**Engagement** - To verify the engagement quality of the tweet, we define and call another **DSPy** program: ``Predict`` on ``AssessTweet``, relying on the same LM to answer the question: `\"Does the assessed text make for a self-contained, engaging tweet? Say no if it is not engaging.\"`\n", + "\n", + "**Faithfulness** - To verify the faithfulness of the tweet to its referenced context, we similarly use `AssessTweet` as above but prompt it with the question: `\"Is the assessed text grounded in the context? Say no if it includes significant facts not in the context.\"`\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def has_no_hashtags(text):\n", + " return len(re.findall(r\"#\\w+\", text)) == 0\n", + "\n", + "def is_within_length_limit(text, length_limit=280):\n", + " return len(text) <= length_limit\n", + "\n", + "def is_assessment_yes(assessment_answer):\n", + " \"\"\"Check if the first word of the assessment answer is 'yes'.\"\"\"\n", + " return assessment_answer.split()[0].lower() == 'yes'\n", + "\n", + "def has_correct_answer(text, answer):\n", + " return answer in text\n", + "\n", + "\n", + "class AssessTweet(dspy.Signature):\n", + " \"\"\"Assess the quality of a tweet along the specified dimension.\"\"\"\n", + "\n", + " context = dspy.InputField(desc='ignore if N/A')\n", + " assessed_text = dspy.InputField()\n", + " assessment_question = dspy.InputField()\n", + " assessment_answer = dspy.OutputField(desc=\"Yes or No\")\n", + "\n", + "def no_hashtags_metric(gold, pred, trace=None):\n", + " tweet = pred.generated_tweet\n", + " no_hashtags = has_no_hashtags(tweet)\n", + " score = no_hashtags\n", + " return score\n", + "\n", + "def is_correct_metric(gold, pred, trace=None):\n", + " answer, tweet = gold.answer, pred.generated_tweet\n", + " correct = has_correct_answer(tweet, answer)\n", + " score = correct\n", + " return score\n", + "\n", + "def within_length_metric(gold, pred, trace=None):\n", + " tweet = pred.generated_tweet\n", + " within_length_limit = is_within_length_limit(tweet, 280)\n", + " score = within_length_limit\n", + " return score\n", + "\n", + "def engaging_metric(gold, pred, trace=None):\n", + " tweet = pred.generated_tweet\n", + " engaging = \"Does the assessed text make for a self-contained, engaging tweet? Say no if it is not engaging.\"\n", + " engaging = dspy.Predict(AssessTweet)(context='N/A', assessed_text=tweet, assessment_question=engaging)\n", + " engaging = engaging.assessment_answer.split()[0].lower() == 'yes'\n", + " score = engaging\n", + " return score\n", + "\n", + "def faithful_metric(gold, pred, trace=None):\n", + " context, tweet = pred.context, pred.generated_tweet\n", + " faithful = \"Is the assessed text grounded in the context? Say no if it includes significant facts not in the context.\" \n", + " faithful = dspy.Predict(AssessTweet)(context=context, assessed_text=tweet, assessment_question=faithful)\n", + " faithful = faithful.assessment_answer.split()[0].lower() == 'yes'\n", + " score = faithful\n", + " return score" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Extrinsic Metrics: Assess the overall quality and effectiveness of generated output on downstream task\n", + "\n", + "The extrinsic metric is defined as the overall quality of the generated tweet in following the mentioned constraints, and this is evaluated over a composite metric.\n", + "\n", + "While maintaining the most relevant intrinsic metrics of forming a valid tweet in the correctness and within_length constraints, the overall composite metric returns an averaged score over the 5 intrinsic metrics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def overall_metric(gold, pred, trace=None):\n", + " answer, context, tweet = gold.answer, pred.context, pred.generated_tweet\n", + " no_hashtags = has_no_hashtags(tweet)\n", + " within_length_limit = is_within_length_limit(tweet, 280)\n", + " correct = has_correct_answer(tweet, answer)\n", + " engaging = \"Does the assessed text make for a self-contained, engaging tweet? Say no if it is not engaging.\"\n", + " faithful = \"Is the assessed text grounded in the context? Say no if it includes significant facts not in the context.\" \n", + " faithful = dspy.Predict(AssessTweet)(context=context, assessed_text=tweet, assessment_question=faithful)\n", + " engaging = dspy.Predict(AssessTweet)(context='N/A', assessed_text=tweet, assessment_question=engaging)\n", + " engaging, faithful = [m.assessment_answer.split()[0].lower() == 'yes' for m in [engaging, faithful]]\n", + " score = (correct + engaging + faithful + no_hashtags + within_length_limit) if correct and within_length_limit else 0\n", + " return score / 5.0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We hence define the evaluation as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metrics = [no_hashtags_metric, is_correct_metric, within_length_metric, engaging_metric, faithful_metric, overall_metric]\n", + "\n", + "for metric in metrics:\n", + " evaluate = Evaluate(metric=metric, devset=devset, num_threads=1, display_progress=True, display_table=5)\n", + " evaluate(tweeter)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's take a look at an example tweet generation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "example = devset[10]\n", + "tweet = tweeter(question=example.question, answer = example.answer)\n", + "print(f'Generated Tweet: ', tweet.generated_tweet)\n", + "tweet.context" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for metric in metrics:\n", + " evaluate = Evaluate(metric=metric, devset=devset[10:11], num_threads=1, display_progress=True, display_table=5)\n", + " evaluate(tweeter)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, we see that the generated tweet is within the length of 280 characters at 120 characters. It does in fact include the correct answer `79 AD`.\n", + "\n", + "However, it fails to not include hashtags as we see `#MountVesuvius #History` at the end of the tweet. Additionally, the tweet has been determined to not be engaging, which makes sense from an eye-test as it simply states the answer and nothing more. Furthermore, it is determined to not be grounded to its retrieved context.\n", + "\n", + "Let's try to fix this and produce tweets using DSPy Assertions. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5] Introducing Assertions: TweeterWithAssertions\n", + "\n", + "To correct these various errors, let's include assertions that simply reiterate our computational constraints within DSPy Assertion semantics. \n", + "\n", + "In the first **Assertion**, we check for if the generated tweet has any hashtags through regex and if violated, assert: **\"Please revise the tweet to remove hashtag phrases following it.\"**\n", + "\n", + "Similarly, we check for the tweet length and if it is not within 280 characters, we send the feedback message: **\"Please ensure the tweet is within {280} characters.\"**\n", + "\n", + "We check for if the generated tweet has the answer and if not, we assert: **\"The tweet does not include the correct answer to the question. Please revise accordingly.\"**\n", + "\n", + "For the engagement and faithfulness checks, we make use of the setup from above, checking for if the respective assessment is determined as `Yes` or `No`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class TweeterWithAssertions(dspy.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.generate_tweet = dspy.ChainOfThought(GenerateTweet)\n", + "\n", + " def forward(self, question, answer):\n", + " context = []\n", + " max_hops=2\n", + " passages_per_hop=3\n", + " generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)]\n", + " retrieve = dspy.Retrieve(k=passages_per_hop)\n", + " for hop in range(max_hops):\n", + " query = generate_query[hop](context=context, question=question).query\n", + " passages = retrieve(query).passages\n", + " context = deduplicate(context + passages)\n", + " generated_tweet = self.generate_tweet(question=question, context=context).tweet\n", + " dspy.Suggest(has_no_hashtags(generated_tweet), f\"Please revise the tweet to remove hashtag phrases following it.\", target_module=GenerateTweet)\n", + " dspy.Suggest(is_within_length_limit(generated_tweet, 280), f\"Please ensure the tweet is within {280} characters.\", target_module=GenerateTweet)\n", + " dspy.Suggest(has_correct_answer(generated_tweet, answer), \"The tweet does not include the correct answer to the question. Please revise accordingly.\", target_module=GenerateTweet)\n", + " engaging_question = \"Does the assessed text make for a self-contained, engaging tweet? Say no if it is not engaging.\"\n", + " engaging_assessment = dspy.Predict(AssessTweet)(context=context, assessed_text=generated_tweet, assessment_question=engaging_question)\n", + " dspy.Suggest(is_assessment_yes(engaging_assessment.assessment_answer), \"The text is not engaging enough. Please revise to make it more captivating.\", target_module=GenerateTweet)\n", + " faithful_question = \"Is the assessed text grounded in the context? Say no if it includes significant facts not in the context.\"\n", + " faithful_assessment = dspy.Predict(AssessTweet)(context='N/A', assessed_text=generated_tweet, assessment_question=faithful_question)\n", + " dspy.Suggest(is_assessment_yes(faithful_assessment.assessment_answer), \"The text contains unfaithful elements or significant facts not in the context. Please revise for accuracy.\", target_module=GenerateTweet)\n", + " return dspy.Prediction(generated_tweet=generated_tweet, context=context)\n", + "\n", + "tweeter_with_assertions = assert_transform_module(TweeterWithAssertions().map_named_predictors(Retry), backtrack_handler) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's evaluate the `TweeterWithAssertions` now over the devset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metrics = [no_hashtags_metric, is_correct_metric, within_length_metric, engaging_metric, faithful_metric, overall_metric]\n", + "\n", + "for metric in metrics:\n", + " evaluate = Evaluate(metric=metric, devset=devset, num_threads=1, display_progress=True, display_table=5)\n", + " evaluate(tweeter_with_assertions)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's take a look at how our generated tweet has improved with the addition of assertions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "example = devset[10]\n", + "tweet = tweeter_with_assertions(question=example.question, answer = example.answer)\n", + "print(f'Generated Tweet: ', tweet.generated_tweet)\n", + "tweet.context" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for metric in metrics:\n", + " evaluate = Evaluate(metric=metric, devset=devset[10:11], num_threads=1, display_progress=True, display_table=5)\n", + " evaluate(tweeter_with_assertions)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We see that the tweet has improved significantly, following all of our set constraints! \n", + "\n", + "It no longer has hashtags, and is both engaging and faithful, while maintaining the inclusion of the correct answer within 280 characters. Exciting!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6] Compilation With Assertions\n", + "\n", + "We can leverage **DSPy**'s`BootstrapFewShotWithRandomSearch` optimizer, to automatically generate few-shot demonstrations and conduct a random search over the candidates to output the best compiled program. We evaluate this over the `overall_metric` composite metric. \n", + "\n", + "We can first evaluate this on `Tweeter` to see how compilation performs without the inclusion of assertions. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "teleprompter = BootstrapFewShotWithRandomSearch(metric = overall_metric, max_bootstrapped_demos=2, num_candidate_programs=6)\n", + "compiled_tweeter = teleprompter.compile(student = tweeter, teacher = tweeter, trainset=trainset, valset=devset[:100])\n", + "\n", + "for metric in metrics:\n", + " evaluate = Evaluate(metric=metric, devset=devset, num_threads=1, display_progress=True, display_table=5)\n", + " evaluate(compiled_tweeter)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we test the compilation on 2 settings with assertions:\n", + "\n", + "**Compilation with Assertions**: assertion-driven example bootstrapping and counterexample bootstrapping during compilation. Teacher has assertions while the student does not as the student learns from the teacher's assertion-driven bootstrapped examples. \n", + "\n", + "**Compilation + Inference with Assertions**: assertion-driven optimizations for both the teacher and student to offer enhanced assertion-driven outputs during both compilation and inference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "teleprompter = BootstrapFewShotWithRandomSearch(metric = overall_metric, max_bootstrapped_demos=2, num_candidate_programs=6)\n", + "compiled_with_assertions_tweeter = teleprompter.compile(student=tweeter, teacher = tweeter_with_assertions, trainset=trainset, valset=devset[:100])\n", + "\n", + "\n", + "for metric in metrics:\n", + " evaluate = Evaluate(metric=metric, devset=devset, num_threads=1, display_progress=True, display_table=5)\n", + " evaluate(compiled_with_assertions_tweeter)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "teleprompter = BootstrapFewShotWithRandomSearch(metric = overall_metric, max_bootstrapped_demos=2, num_candidate_programs=6)\n", + "compiled_tweeter_with_assertions = teleprompter.compile(student=tweeter_with_assertions, teacher = tweeter_with_assertions, trainset=trainset, valset=devset[:100])\n", + "\n", + "for metric in metrics:\n", + " evaluate = Evaluate(metric=metric, devset=devset, num_threads=1, display_progress=True, display_table=5)\n", + " evaluate(compiled_tweeter_with_assertions)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dspy_dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 92a5addc1daf67e7ba83df2d0c3fa56cce315320 Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Mon, 12 Feb 2024 08:53:27 +0000 Subject: [PATCH 14/39] add fix for max_bootstraps --- dspy/teleprompt/bootstrap.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dspy/teleprompt/bootstrap.py b/dspy/teleprompt/bootstrap.py index fe1e9ca9e..b885bea0f 100644 --- a/dspy/teleprompt/bootstrap.py +++ b/dspy/teleprompt/bootstrap.py @@ -96,15 +96,15 @@ def _prepare_predictor_mappings(self): self.name2predictor = name2predictor self.predictor2name = predictor2name - def _bootstrap(self, *, max_bootsraps=None): - max_bootsraps = max_bootsraps or self.max_bootstrapped_demos + def _bootstrap(self, *, max_bootstraps=None): + max_bootstraps = max_bootstraps or self.max_bootstrapped_demos bootstrapped = {} self.name2traces = {name: [] for name in self.name2predictor} for round_idx in range(self.max_rounds): for example_idx, example in enumerate(tqdm.tqdm(self.trainset)): - if len(bootstrapped) >= max_bootsraps: + if len(bootstrapped) >= max_bootstraps: break if example_idx not in bootstrapped: From 41fbdb35a715efa4f53e78664bbaeefabd2d46cd Mon Sep 17 00:00:00 2001 From: quangpham Date: Wed, 14 Feb 2024 00:31:49 +0700 Subject: [PATCH 15/39] Revert the coding style to original --- dsp/modules/hf.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/dsp/modules/hf.py b/dsp/modules/hf.py index 87ff4e98b..c87ce8261 100644 --- a/dsp/modules/hf.py +++ b/dsp/modules/hf.py @@ -9,7 +9,6 @@ from dsp.modules.cache_utils import CacheMemory, NotebookCacheMemory, cache_turn_on import functools - def openai_to_hf(**kwargs): hf_kwargs = {} for k, v in kwargs.items(): @@ -54,35 +53,28 @@ def __init__(self, model: str, checkpoint: Optional[str] = None, is_client: bool raise ModuleNotFoundError( "You need to install Hugging Face transformers library to use HF models." ) from exc - self.device = torch.device( - "cuda" if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") try: - architecture = AutoConfig.from_pretrained( - model).__dict__["architectures"][0] - self.encoder_decoder_model = ("ConditionalGeneration" in architecture) or ( - "T5WithLMHeadModel" in architecture) - self.decoder_only_model = ("CausalLM" in architecture) or ( - "GPT2LMHeadModel" in architecture) + architecture = AutoConfig.from_pretrained(model).__dict__["architectures"][0] + self.encoder_decoder_model = ("ConditionalGeneration" in architecture) or ("T5WithLMHeadModel" in architecture) + self.decoder_only_model = ("CausalLM" in architecture) or ("GPT2LMHeadModel" in architecture) assert self.encoder_decoder_model or self.decoder_only_model, f"Unknown HuggingFace model class: {model}" - self.tokenizer = AutoTokenizer.from_pretrained( - model if checkpoint is None else checkpoint) + self.tokenizer = AutoTokenizer.from_pretrained(model if checkpoint is None else checkpoint) self.rationale = True AutoModelClass = AutoModelForSeq2SeqLM if self.encoder_decoder_model else AutoModelForCausalLM if checkpoint: # with open(os.path.join(checkpoint, '..', 'compiler_config.json'), 'r') as f: # config = json.load(f) - self.rationale = False # config['rationale'] + self.rationale = False #config['rationale'] # if config['peft']: # peft_config = PeftConfig.from_pretrained(checkpoint) # self.model = AutoModelClass.from_pretrained(peft_config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map=hf_device_map) # self.model = PeftModel.from_pretrained(self.model, checkpoint) # else: - self.model = AutoModelClass.from_pretrained( - checkpoint).to(self.device) + self.model = AutoModelClass.from_pretrained(checkpoint).to("cuda") else: - self.model = AutoModelClass.from_pretrained( - model).to(self.device) + self.model = AutoModelClass.from_pretrained(model).to("cuda") self.drop_prompt_from_output = False except ValueError: self.model = AutoModelForCausalLM.from_pretrained( From 87fca722c4ed4a04d872ac4abea241beb6d909c0 Mon Sep 17 00:00:00 2001 From: quangpham Date: Wed, 14 Feb 2024 00:33:28 +0700 Subject: [PATCH 16/39] Allow HFModel to use CPU (fix) --- dsp/modules/hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dsp/modules/hf.py b/dsp/modules/hf.py index c87ce8261..9b2cc8dc5 100644 --- a/dsp/modules/hf.py +++ b/dsp/modules/hf.py @@ -72,9 +72,9 @@ def __init__(self, model: str, checkpoint: Optional[str] = None, is_client: bool # self.model = AutoModelClass.from_pretrained(peft_config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map=hf_device_map) # self.model = PeftModel.from_pretrained(self.model, checkpoint) # else: - self.model = AutoModelClass.from_pretrained(checkpoint).to("cuda") + self.model = AutoModelClass.from_pretrained(checkpoint).to(self.device) else: - self.model = AutoModelClass.from_pretrained(model).to("cuda") + self.model = AutoModelClass.from_pretrained(model).to(self.device) self.drop_prompt_from_output = False except ValueError: self.model = AutoModelForCausalLM.from_pretrained( From be07293afeb91de3d1d824094fe31d11f6a4f644 Mon Sep 17 00:00:00 2001 From: Shangyint Date: Tue, 13 Feb 2024 23:54:27 -0800 Subject: [PATCH 17/39] Improve assertion document --- docs/assertions.md | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/docs/assertions.md b/docs/assertions.md index 99da7de07..a1601227d 100644 --- a/docs/assertions.md +++ b/docs/assertions.md @@ -13,7 +13,7 @@ We introduce two primary constructs within DSPy Assertions: - **Parameters**: - `constraint (bool)`: Outcome of Python-defined boolean validation check. - `msg (Optional[str])`: User-defined error message providing feedback or correction guidance. - - `backtrack (Optional[module])`: Specifies target module for retry attempts upon constraint failure. + - `backtrack (Optional[module])`: Specifies target module for retry attempts upon constraint failure. The default backtracking module is the last module before the assertion. - **Behavior**: Initiates retry upon failure, dynamically adjusting the pipeline's execution. If failures persist, it halts execution and raises a `dspy.AssertionError`. - **`dspy.Suggest`**: @@ -33,8 +33,8 @@ If the error continues past the `max_backtracking_attempts`, then `dspy.Assert` - **dspy.Suggest vs. dspy.Assert**: `dspy.Suggest` on the other hand offers a softer approach. It maintains the same retry backtracking as `dspy.Assert` but instead serves as a gentle nudger. If the model outputs cannot pass the model constraints after the `max_backtracking_attempts`, `dspy.Suggest` will log the persistent failure and continue execution of the program on the rest of the data. This ensures the LM pipeline works in a "best-effort" manner without halting execution. -- **`dspy.Suggestions`** are best utilized as "helpers" during the evaluation phase, offering guidance and potential corrections without halting the pipeline. -- **`dspy.Assertions`** are recommended during the development stage as "checkers" to ensure the LM behaves as expected, providing a robust mechanism for identifying and addressing errors early in the development cycle. +- **`dspy.Suggest`** are best utilized as "helpers" during the evaluation phase, offering guidance and potential corrections without halting the pipeline. +- **`dspy.Assert`** are recommended during the development stage as "checkers" to ensure the LM behaves as expected, providing a robust mechanism for identifying and addressing errors early in the development cycle. ## Use Case: Including Assertions in DSPy Programs @@ -155,10 +155,15 @@ Now calling programs with DSPy Assertions requires one last step, and that is tr ```python from dspy.primitives.assertions import assert_transform_module, backtrack_handler -baleen_with_assertions = assert_transform_module(SimplifiedBaleenAssertions().map_named_predictors(Retry), backtrack_handler) +baleen_with_assertions = assert_transform_module(SimplifiedBaleenAssertions(), backtrack_handler) + +# backtrack_handler is parameterized over a few settings for the backtracking mechanism +# To change the number of max retry attempts, you can do +baleen_with_assertions_retry_once = assert_transform_module(SimplifiedBaleenAssertions(), + functools.partial(backtrack_handler, max_backtracks=1)) ``` -Alternatively, you can also directly call `activate_assertions` on the program with `dspy.Assert/Suggest` statements: +Alternatively, you can also directly call `activate_assertions` on the program with `dspy.Assert/Suggest` statements using the default backtracking mechanism (`max_backtracks=2`): ```python baleen_with_assertions = SimplifiedBaleenAssertions().activate_assertions() @@ -234,7 +239,7 @@ Query: "actress Ophelia RSC Hamlet" + "actress The Shore" DSPy Assertions work with optimizations that DSPy offers, particularly with `BootstrapFewShotWithRandomSearch`, including the following settings: - Compilation with Assertions - This includes assertion-driven example bootstrapping and counterexample bootstrapping during compilation. The teacher model for bootstrapping few-shot demonstrations can make use of DSPy Assertions to offer robust bootstrapped examples for the student model to learn from during inference. + This includes assertion-driven example bootstrapping and counterexample bootstrapping during compilation. The teacher model for bootstrapping few-shot demonstrations can make use of DSPy Assertions to offer robust bootstrapped examples for the student model to learn from during inference. In this setting, the student model does not perform assertion aware optimizations (backtracking and retry) during inference. - Compilation + Inference with Assertions -This includes assertion-driven optimizations in both compilation and inference. Now the teacher model offers assertion-driven examples but the student can further optimize with assertions of its own during inference time. ```python From bef884e24073100ec183d00c3b291aaa244770bb Mon Sep 17 00:00:00 2001 From: Jamie Scharf Date: Wed, 14 Feb 2024 11:58:09 -0500 Subject: [PATCH 18/39] feat(hotfix): Fixing bug with bedrock parameter and parameters --- dsp/modules/aws_lm.py | 50 ++++++++++++++++++++++++++++++++---------- dsp/modules/bedrock.py | 12 +++++++++- 2 files changed, 50 insertions(+), 12 deletions(-) diff --git a/dsp/modules/aws_lm.py b/dsp/modules/aws_lm.py index 05a733851..33185254b 100644 --- a/dsp/modules/aws_lm.py +++ b/dsp/modules/aws_lm.py @@ -28,6 +28,7 @@ def __init__( max_new_tokens: int, truncate_long_prompts: bool = False, input_output_ratio: int = 3, + batch_n: bool = False, ) -> None: """_summary_ @@ -40,6 +41,7 @@ def __init__( input_output_ratio (int, optional): The rough size of the number of input tokens to output tokens in the worst case. Defaults to 3. temperature (float, optional): _description_. Defaults to 0.0. truncate_long_prompts (bool, optional): If True, remove extremely long inputs to context. Defaults to False. + batch_n (bool, False): If False, call the LM N times rather than batching. Not all AWS models support the n parameter. """ super().__init__(model=model) # AWS doesn't have an equivalent of max_tokens so let's clarify @@ -48,9 +50,10 @@ def __init__( self._max_new_tokens: int = max_new_tokens self._model_name: str = model self._truncate_long_prompt_prompts: bool = truncate_long_prompts + self._batch_n: bool = batch_n import boto3 - + self.predictor = boto3.client(service_name, region_name=region_name) @abstractmethod @@ -72,7 +75,7 @@ def _sanitize_kwargs(self, query_kwargs: dict[str, Any]) -> dict[str, Any]: return query_kwargs @abstractmethod - def _call_model(self, body: str) -> str: + def _call_model(self, body: str) -> str | list[str]: """Call model, get generated input without the formatted prompt""" pass @@ -82,7 +85,20 @@ def _extract_input_parameters( ) -> dict[str, str | float | int]: pass - def basic_request(self, prompt, **kwargs) -> str: + def _simple_api_call(self, formatted_prompt: str, **kwargs) -> str | list[str]: + body = self._create_body(formatted_prompt, **kwargs) + json_body = json.dumps(body) + llm_out: str | list[str] = self._call_model(json_body) + if isinstance(llm_out, str): + llm_out = llm_out.replace(formatted_prompt, "") + else: + llm_out = [generated.replace(formatted_prompt, "") for generated in llm_out] + self.history.append( + {"prompt": formatted_prompt, "response": llm_out, "kwargs": body} + ) + return llm_out + + def basic_request(self, prompt, **kwargs) -> str | list[str]: """Query the endpoint.""" # Remove any texts that are too long @@ -92,16 +108,28 @@ def basic_request(self, prompt, **kwargs) -> str: formatted_prompt = self._format_prompt(truncated_prompt) else: formatted_prompt = self._format_prompt((prompt)) - body = self._create_body(formatted_prompt, **kwargs) - json_body: str = json.dumps(body) - - generated: str = self._call_model(json_body) - self.history.append( - {"prompt": formatted_prompt, "response": generated, "kwargs": body} - ) + llm_out: str | list[str] + if "n" in kwargs.keys(): + if self._batch_n: + llm_out = self._simple_api_call( + formatted_prompt=formatted_prompt, **kwargs + ) + else: + del kwargs["n"] + llm_out = [] + for _ in range(0, kwargs["n"]): + generated: str | list[str] = self._simple_api_call( + formatted_prompt=formatted_prompt, **kwargs + ) + if isinstance(generated, str): + llm_out.append(generated) + else: + raise TypeError("Error, list type was returned from LM call") + else: + llm_out = self._simple_api_call(formatted_prompt=formatted_prompt, **kwargs) - return generated.replace(formatted_prompt, "") + return llm_out def _estimate_tokens(self, text: str) -> int: return len(text) * CHARS2TOKENS diff --git a/dsp/modules/bedrock.py b/dsp/modules/bedrock.py index 8e737c16e..192a3016b 100644 --- a/dsp/modules/bedrock.py +++ b/dsp/modules/bedrock.py @@ -31,6 +31,11 @@ def __init__( input_output_ratio=input_output_ratio, max_new_tokens=max_new_tokens, ) + self._validate_model(model) + + def _validate_model(self, model: str) -> None: + if "claude" not in model.lower(): + raise NotImplementedError("Only claude models are supported as of now") def _create_body(self, prompt: str, **kwargs) -> dict[str, str | float]: base_args: dict[str, Any] = { @@ -41,7 +46,12 @@ def _create_body(self, prompt: str, **kwargs) -> dict[str, str | float]: query_args: dict[str, Any] = self._sanitize_kwargs(base_args) query_args["prompt"] = prompt # AWS Bedrock forbids these keys - + if "max_tokens" in query_args.keys(): + max_tokens: int = query_args["max_tokens"] + input_tokens: int = self._estimate_tokens(prompt) + max_tokens_to_sample: int = max_tokens - input_tokens + del query_args["max_tokens"] + query_args["max_tokens_to_sample"] = max_tokens_to_sample return query_args def _call_model(self, body: str) -> str: From 4fd70be99cfd5a187d894a2b473d1f25e6d9b8f7 Mon Sep 17 00:00:00 2001 From: Jamie Scharf Date: Wed, 14 Feb 2024 11:58:33 -0500 Subject: [PATCH 19/39] feat(hotfix): Fixing bug with bedrock n parameter and max_tokens parameters --- dsp/modules/aws_lm.py | 2 +- dsp/modules/bedrock.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dsp/modules/aws_lm.py b/dsp/modules/aws_lm.py index 33185254b..00906282a 100644 --- a/dsp/modules/aws_lm.py +++ b/dsp/modules/aws_lm.py @@ -28,7 +28,7 @@ def __init__( max_new_tokens: int, truncate_long_prompts: bool = False, input_output_ratio: int = 3, - batch_n: bool = False, + batch_n: bool = True, ) -> None: """_summary_ diff --git a/dsp/modules/bedrock.py b/dsp/modules/bedrock.py index 192a3016b..64277f65c 100644 --- a/dsp/modules/bedrock.py +++ b/dsp/modules/bedrock.py @@ -30,6 +30,7 @@ def __init__( truncate_long_prompts=False, input_output_ratio=input_output_ratio, max_new_tokens=max_new_tokens, + batch_n=True, # Bedrock does not support the `n` parameter ) self._validate_model(model) From ed5899929ee5a2b1040ff230e702e4a06e66f62d Mon Sep 17 00:00:00 2001 From: arnavsinghvi11 <54859892+arnavsinghvi11@users.noreply.github.com> Date: Thu, 15 Feb 2024 09:50:46 -0800 Subject: [PATCH 20/39] Update tweets_assertions.ipynb --- examples/tweets/tweets_assertions.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/tweets/tweets_assertions.ipynb b/examples/tweets/tweets_assertions.ipynb index 91dffc47b..22c2db296 100644 --- a/examples/tweets/tweets_assertions.ipynb +++ b/examples/tweets/tweets_assertions.ipynb @@ -511,7 +511,7 @@ "metadata": {}, "outputs": [], "source": [ - "teleprompter = BootstrapFewShotWithRandomSearch(metric = overall_metric, max_bootstrapped_demos=2, num_candidate_programs=6)\n", + "teleprompter = BootstrapFewShotWithRandomSearch(metric = overall_metric, max_bootstrapped_demos=2, num_candidate_programs=6, num_threads=1)\n", "compiled_tweeter_with_assertions = teleprompter.compile(student=tweeter_with_assertions, teacher = tweeter_with_assertions, trainset=trainset, valset=devset[:100])\n", "\n", "for metric in metrics:\n", From 9b213df36e4ab3776ac3568eabad7a00e7b61d29 Mon Sep 17 00:00:00 2001 From: Connor Shorten Date: Thu, 15 Feb 2024 17:05:24 -0500 Subject: [PATCH 21/39] Create Google LM --- dsp/modules/google.py | 105 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 dsp/modules/google.py diff --git a/dsp/modules/google.py b/dsp/modules/google.py new file mode 100644 index 000000000..93d6e39c2 --- /dev/null +++ b/dsp/modules/google.py @@ -0,0 +1,105 @@ +import math +from typing import Any, Optional +import backoff + +from dsp.modules.lm import LM + +try: + import google.generativeai as genai +except ImportError: + google_api_error = Exception + print("Not loading Google because it is not installed.") + +def backoff_hdlr(details): + """Handler from https://pypi.org/project/backoff/""" + print( + "Backing off {wait:0.1f} seconds after {tries} tries " + "calling function {target} with kwargs " + "{kwargs}".format(**details) + ) + + +def giveup_hdlr(details): + """wrapper function that decides when to give up on retry""" + if "rate limits" in details.message: + return False + return True + + +class Google(LM): + """Wrapper around Google's API. + + Currently supported models include `gemini-pro-1.0`. + """ + + def __init__( + self, + model: str = "gemini-pro-1.0", + api_key: Optional[str] = None, + stop_sequences: list[str] = [], + **kwargs + ): + """ + Parameters + ---------- + model : str + Which pre-trained model from Google to use? + Choices are [`gemini-pro-1.0`] + api_key : str + The API key for Google. + It can be obtained from https://cloud.google.com/generative-ai-studio + **kwargs: dict + Additional arguments to pass to the API provider. + """ + super().__init__(model) + self.google = genai.configure(api_key="") + self.provider = "google" + self.kwargs = { + "model_name": model, + "temperature": 0.0, + "max_output_tokens": 2048, + "top_p": 1, + "top_k": 1, + **kwargs + } + + self.history: list[dict[str, Any]] = [] + + def basic_request(self, prompt: str, **kwargs): + raw_kwargs = kwargs + kwargs = { + **self.kwargs, + "prompt": prompt, + **kwargs, + } + response = self.co.generate(**kwargs) + + history = { + "prompt": prompt, + "response": response, + "kwargs": kwargs, + "raw_kwargs": raw_kwargs, + } + self.history.append(history) + + return response + + @backoff.on_exception( + backoff.expo, + (google_api_error), + max_time=1000, + on_backoff=backoff_hdlr, + giveup=giveup_hdlr, + ) + def request(self, prompt: str, **kwargs): + """Handles retrieval of completions from Google whilst handling API errors""" + return self.basic_request(prompt, **kwargs) + + def __call__( + self, + prompt: str, + only_completed: bool = True, + return_sorted: bool = False, + **kwargs + ): + return self.request(prompt, **kwargs) From 0c5386f0b5b59bbeacd0cbedb113e06682434b6b Mon Sep 17 00:00:00 2001 From: Daniel Kovalenko Date: Thu, 15 Feb 2024 23:23:26 +0000 Subject: [PATCH 22/39] Refactored ChromadbRM to use generic EmbeddingFunction --- dspy/retrieve/chromadb_rm.py | 39 +++++++++--------------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/dspy/retrieve/chromadb_rm.py b/dspy/retrieve/chromadb_rm.py index f95d1993c..d6500c305 100644 --- a/dspy/retrieve/chromadb_rm.py +++ b/dspy/retrieve/chromadb_rm.py @@ -18,6 +18,11 @@ import chromadb from chromadb.config import Settings from chromadb.utils import embedding_functions + from chromadb.api.types import ( + Embeddable, + EmbeddingFunction + ) + import chromadb.utils.embedding_functions as ef except ImportError: chromadb = None @@ -65,29 +70,14 @@ def __init__( self, collection_name: str, persist_directory: str, - openai_embed_model: str = "text-embedding-ada-002", - openai_api_provider: Optional[str] = None, - openai_api_key: Optional[str] = None, - openai_api_type: Optional[str] = None, - openai_api_base: Optional[str] = None, - openai_api_version: Optional[str] = None, + embedding_function: Optional[ + EmbeddingFunction[Embeddable] + ] = ef.DefaultEmbeddingFunction(), k: int = 7, ): - self._openai_embed_model = openai_embed_model - self._init_chromadb(collection_name, persist_directory) - self.openai_ef = embedding_functions.OpenAIEmbeddingFunction( - api_key=openai_api_key, - api_base=openai_api_base, - api_type=openai_api_type, - api_version=openai_api_version, - model_name=openai_embed_model, - ) - self.api_version = openai_api_version - self.api_base = openai_api_base - self.model_name = openai_embed_model - self.openai_api_type = openai_api_type + self.ef = embedding_function super().__init__(k=k) @@ -130,16 +120,7 @@ def _get_embeddings(self, queries: List[str]) -> List[List[float]]: Returns: List[List[float]]: List of embeddings corresponding to each query. """ - - model_arg = {"engine": self.model_name, - "deployment_id": self.model_name, - "api_version": self.api_version, - "api_base": self.api_base, - } - embedding = self.openai_ef._client.create( - input=queries, model=self._openai_embed_model, **model_arg, api_provider=self.openai_api_type - ) - return [embedding.embedding for embedding in embedding.data] + return self.ef(queries) def forward( self, query_or_queries: Union[str, List[str]], k: Optional[int] = None From 7dab4bbdba14387bfe358ba73362468ea13f2b92 Mon Sep 17 00:00:00 2001 From: Daniel Kovalenko Date: Thu, 15 Feb 2024 23:34:38 +0000 Subject: [PATCH 23/39] Updated ChromadbRM documentation to use generic EmbeddingFunction --- dspy/retrieve/chromadb_rm.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dspy/retrieve/chromadb_rm.py b/dspy/retrieve/chromadb_rm.py index d6500c305..aa4398bb3 100644 --- a/dspy/retrieve/chromadb_rm.py +++ b/dspy/retrieve/chromadb_rm.py @@ -42,10 +42,8 @@ class ChromadbRM(dspy.Retrieve): Args: collection_name (str): chromadb collection name persist_directory (str): chromadb persist directory - openai_embed_model (str, optional): The OpenAI embedding model to use. Defaults to "text-embedding-ada-002". - openai_api_key (str, optional): The API key for OpenAI. Defaults to None. - openai_org (str, optional): The organization for OpenAI. Defaults to None. - k (int, optional): The number of top passages to retrieve. Defaults to 3. + embedding_function (Optional[EmbeddingFunction[Embeddable]]): Optional function to use to embed documents. Defaults to DefaultEmbeddingFunction. + k (int, optional): The number of top passages to retrieve. Defaults to 7. Returns: dspy.Prediction: An object containing the retrieved passages. From a9ef3e1d98dc8d7f3479c2e6ad25e3a7a32e1aa7 Mon Sep 17 00:00:00 2001 From: Daniel Kovalenko Date: Fri, 16 Feb 2024 01:13:48 +0000 Subject: [PATCH 24/39] Add documentation for ChromadbRM in DSPy retrievals documentation --- docs/retrieval_models_client.md | 65 ++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/docs/retrieval_models_client.md b/docs/retrieval_models_client.md index 4dfc5add7..2632961b8 100644 --- a/docs/retrieval_models_client.md +++ b/docs/retrieval_models_client.md @@ -8,6 +8,7 @@ This documentation provides an overview of the DSPy Retrieval Model Clients. | --- | --- | | ColBERTv2 | [ColBERTv2 Section](#ColBERTv2) | | AzureCognitiveSearch | [AzureCognitiveSearch Section](#AzureCognitiveSearch) | +| ChromadbRM | [ChromadbRM Section](#ChromadbRM) | ## ColBERTv2 @@ -91,4 +92,66 @@ class AzureCognitiveSearch: Refer to [ColBERTv2](#ColBERTv2) documentation. Keep in mind there is no `simplify` flag for AzureCognitiveSearch. -AzureCognitiveSearch supports sending queries and processing the received results, mapping content and scores to a correct format for the Azure Cognitive Search server. \ No newline at end of file +AzureCognitiveSearch supports sending queries and processing the received results, mapping content and scores to a correct format for the Azure Cognitive Search server. + +## ChromadbRM + +### Quickstart with OpenAI Embeddings + +ChromadbRM have the flexibility from a variety of embedding functions as outlined in the [chromadb embeddings documentation](https://docs.trychroma.com/embeddings). While different options are available, this example demonstrates how to utilize OpenAI embeddings specifically. + +```python +from dspy.retrieve import ChromadbRM +import os +import openai +from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction + +embedding_function = OpenAIEmbeddingFunction( + api_key=os.environ.get('OPENAI_API_KEY'), + model_name="text-embedding-ada-002" +) + +retriever_model = ChromadbRM( + 'your_collection_name', + '/path/to/your/db', + embedding_function=embedding_function, + k=5 +) + +results = retriever_model("Explore the significance of quantum computing", k=5) + +for result in results: + print("Document:", result.long_text, "\n") +``` + +### Constructor + +Initialize an instance of the `ChromadbRM` class, with the option to use OpenAI's embeddings or any alternative supported by chromadb, as detailed in the official [chromadb embeddings documentation](https://docs.trychroma.com/embeddings). + +```python +ChromadbRM( + collection_name: str, + persist_directory: str, + embedding_function: Optional[EmbeddingFunction[Embeddable]] = OpenAIEmbeddingFunction(), + k: int = 7, +) +``` + +**Parameters:** +- `collection_name` (_str_): The name of the chromadb collection. +- `persist_directory` (_str_): Path to the directory where chromadb data is persisted. +- `embedding_function` (_Optional[EmbeddingFunction[Embeddable]]_, _optional_): The function used for embedding documents and queries. Defaults to `DefaultEmbeddingFunction()` if not specified. +- `k` (_int_, _optional_): The number of top passages to retrieve. Defaults to 7. + +### Methods + +#### `forward(self, query_or_queries: Union[str, List[str]], k: Optional[int] = None) -> dspy.Prediction` + +Search the chromadb collection for the top `k` passages matching the given query or queries, using embeddings generated via the specified `embedding_function`. + +**Parameters:** +- `query_or_queries` (_Union[str, List[str]]_): The query or list of queries to search for. +- `k` (_Optional[int]_, _optional_): The number of results to retrieve. If not specified, defaults to the value set during initialization. + +**Returns:** +- `dspy.Prediction`: Contains the retrieved passages, each represented as a `dotdict` with a `long_text` attribute. \ No newline at end of file From 9c11eba8e515aa1a904ce7abb22b16e6a81b89eb Mon Sep 17 00:00:00 2001 From: Connor Shorten Date: Thu, 15 Feb 2024 21:51:59 -0500 Subject: [PATCH 25/39] Oops, great catch from insop! --- dsp/modules/google.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dsp/modules/google.py b/dsp/modules/google.py index 93d6e39c2..2e57f6a97 100644 --- a/dsp/modules/google.py +++ b/dsp/modules/google.py @@ -52,7 +52,7 @@ def __init__( Additional arguments to pass to the API provider. """ super().__init__(model) - self.google = genai.configure(api_key="") + self.google = genai.configure(api_key=self.api_key) self.provider = "google" self.kwargs = { "model_name": model, From b5b7e2f05180b9e2de72e6ab0645a4323b14267f Mon Sep 17 00:00:00 2001 From: Connor Shorten Date: Thu, 15 Feb 2024 21:52:33 -0500 Subject: [PATCH 26/39] Another fantastic catch! Thank you! --- dsp/modules/google.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dsp/modules/google.py b/dsp/modules/google.py index 2e57f6a97..94e8688ce 100644 --- a/dsp/modules/google.py +++ b/dsp/modules/google.py @@ -36,7 +36,6 @@ def __init__( self, model: str = "gemini-pro-1.0", api_key: Optional[str] = None, - stop_sequences: list[str] = [], **kwargs ): """ From 6c208cb4572020740ca6f4e9f918729698ac73b3 Mon Sep 17 00:00:00 2001 From: Connor Shorten Date: Thu, 15 Feb 2024 21:54:23 -0500 Subject: [PATCH 27/39] Great use of kwargs to set temperature value --- dsp/modules/google.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dsp/modules/google.py b/dsp/modules/google.py index 94e8688ce..581c98414 100644 --- a/dsp/modules/google.py +++ b/dsp/modules/google.py @@ -55,7 +55,7 @@ def __init__( self.provider = "google" self.kwargs = { "model_name": model, - "temperature": 0.0, + "temperature": 0.0 if "temperature" not in kwargs else kwargs["temperature"], "max_output_tokens": 2048, "top_p": 1, "top_k": 1, From 48f5393786142f4e36ecf058227da31255d2899b Mon Sep 17 00:00:00 2001 From: Anush008 Date: Fri, 16 Feb 2024 16:09:43 +0530 Subject: [PATCH 28/39] chore: Bump qdrant extra deps --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ef7005e0e..5c1794f5a 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ install_requires=requirements, extras_require={ "pinecone": ["pinecone-client~=2.2.4"], - "qdrant": ["qdrant-client~=1.6.2", "fastembed~=0.1.0"], + "qdrant": ["qdrant-client~=1.7.3", "fastembed~=0.2.1"], "chromadb": ["chromadb~=0.4.14"], "marqo": ["marqo"], "weaviate": ["weaviate-client~=3.26.1"], From 553189992c3b94fce5274f2d22323d82c907006e Mon Sep 17 00:00:00 2001 From: Omar Khattab Date: Fri, 16 Feb 2024 16:08:37 -0800 Subject: [PATCH 29/39] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 03d7f29c9..780406aaa 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ The DSPy documentation is divided into **tutorials** (step-by-step illustration - [DSPy webinar with MLOps Learners](https://www.youtube.com/watch?v=im7bCLW2aM4), a bit longer with Q&A. - Hands-on Overviews of DSPy by the community: [DSPy Explained! by Connor Shorten](https://www.youtube.com/watch?v=41EfOY0Ldkc), [DSPy explained by code_your_own_ai](https://www.youtube.com/watch?v=ycfnKPxBMck) - Interviews: [Weaviate Podcast in-person](https://www.youtube.com/watch?v=CDung1LnLbY), and you can find 6-7 other remote podcasts on YouTube from a few different perspectives/audiences. - +- **Tracing in DSPy** with Arize Phoenix: [Tutorial for tracing your prompts and the steps of your DSPy programs](https://colab.research.google.com/github/Arize-ai/phoenix/blob/main/tutorials/tracing/dspy_tracing_tutorial.ipynb) ### B) Guides From 8ea1e93dd5393b05172a7466922461c71c4de3f1 Mon Sep 17 00:00:00 2001 From: Anush Date: Sat, 17 Feb 2024 10:45:37 +0530 Subject: [PATCH 30/39] chore: remove pinned versions setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5c1794f5a..4d3b45ba8 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ install_requires=requirements, extras_require={ "pinecone": ["pinecone-client~=2.2.4"], - "qdrant": ["qdrant-client~=1.7.3", "fastembed~=0.2.1"], + "qdrant": ["qdrant-client", "fastembed"], "chromadb": ["chromadb~=0.4.14"], "marqo": ["marqo"], "weaviate": ["weaviate-client~=3.26.1"], From 630bf9712494fc29055bc19f474d386ee0fbd9f4 Mon Sep 17 00:00:00 2001 From: Omar Khattab Date: Fri, 16 Feb 2024 21:20:29 -0800 Subject: [PATCH 31/39] Adding back typos in signatures for signature_opt_bayesian for consistency, before merging PR --- dspy/teleprompt/signature_opt_bayesian.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dspy/teleprompt/signature_opt_bayesian.py b/dspy/teleprompt/signature_opt_bayesian.py index ad311050c..e91b60faa 100644 --- a/dspy/teleprompt/signature_opt_bayesian.py +++ b/dspy/teleprompt/signature_opt_bayesian.py @@ -81,7 +81,7 @@ class ObservationSummarizer(dspy.Signature): class DatasetDescriptor(dspy.Signature): ("""Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """ - """Some areas you may consider in your observations: topics, content, syntax, conciseness, etc. """ + """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """ """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""") examples = dspy.InputField(desc="Sample data points from the dataset") @@ -90,7 +90,7 @@ class DatasetDescriptor(dspy.Signature): class DatasetDescriptorWithPriorObservations(dspy.Signature): ("""Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """ """I will also provide you with a few observations I have already made. Please add your own observations or if you feel the observations are comprehensive say 'COMPLETE' """ - """Some areas you may consider in your observations: topics, content, syntax, conciseness, etc. """ + """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """ """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""") examples = dspy.InputField(desc="Sample data points from the dataset") From 180c0a126f5b2886d3bcbd9aa067b9907071b775 Mon Sep 17 00:00:00 2001 From: Christopher Akiki Date: Sun, 18 Feb 2024 08:46:43 +0100 Subject: [PATCH 32/39] [TYPO] Update tweet_metric.py autoamtic -> automatic --- examples/tweets/tweet_metric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/tweets/tweet_metric.py b/examples/tweets/tweet_metric.py index dc4696fd0..c4f11709c 100644 --- a/examples/tweets/tweet_metric.py +++ b/examples/tweets/tweet_metric.py @@ -10,7 +10,7 @@ valset, devset = devset[:50], devset[50:] -# Define the signature for autoamtic assessments. +# Define the signature for automatic assessments. class Assess(dspy.Signature): """Assess the quality of a tweet along the specified dimension.""" From 677f5071be37ca206ad411c8232918fb668eb6f1 Mon Sep 17 00:00:00 2001 From: Christopher Akiki Date: Sun, 18 Feb 2024 08:47:10 +0100 Subject: [PATCH 33/39] [TYPO] Update tweet.py autoamtic -> automatic --- testing/tasks/tweet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/tasks/tweet.py b/testing/tasks/tweet.py index ef3277ed0..aeeddc6a7 100644 --- a/testing/tasks/tweet.py +++ b/testing/tasks/tweet.py @@ -36,7 +36,7 @@ def forward (self,question) : context += self.retrieve(query).passages return dspy.Prediction(context=context, answer=self.generate_answer(context = context , question = question).answer) -# Define the signature for autoamtic assessments. +# Define the signature for automatic assessments. class Assess(dspy.Signature): """Assess the quality of a tweet along the specified dimension.""" @@ -107,4 +107,4 @@ def get_program(self): def get_metric(self): return self.metric - \ No newline at end of file + From 06a1ecd2159e7764584d1fa92522226bc744520b Mon Sep 17 00:00:00 2001 From: Christopher Akiki Date: Sun, 18 Feb 2024 08:47:35 +0100 Subject: [PATCH 34/39] [TYPO] Update tweet_metric.py autoamtic -> automatic --- testing/tasks/tweet_metric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/tasks/tweet_metric.py b/testing/tasks/tweet_metric.py index 2e237d0c2..ea51d7167 100644 --- a/testing/tasks/tweet_metric.py +++ b/testing/tasks/tweet_metric.py @@ -39,7 +39,7 @@ def forward (self,question) : context += self.retrieve(query).passages return dspy.Prediction(context=context, answer=self.generate_answer(context = context , question = question).answer) -# Define the signature for autoamtic assessments. +# Define the signature for automatic assessments. class Assess(dspy.Signature): """Assess the quality of a tweet along the specified dimension.""" @@ -145,4 +145,4 @@ def get_program(self): def get_metric(self): return self.metric - \ No newline at end of file + From b9516c45dbcdedbed6b71dab7cd4a6c1024e0dcf Mon Sep 17 00:00:00 2001 From: Michael Ryan Date: Sun, 18 Feb 2024 14:00:59 -0800 Subject: [PATCH 35/39] Added saving/loading to Predict and Chain_Of_Thought After the optimizers change the instruction we need to be able to save/load the optimized instructions. --- dspy/predict/chain_of_thought.py | 21 +++++++++++++++++++++ dspy/predict/predict.py | 20 ++++++++++++++++---- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/dspy/predict/chain_of_thought.py b/dspy/predict/chain_of_thought.py index bfd62e5ef..fceb3b651 100644 --- a/dspy/predict/chain_of_thought.py +++ b/dspy/predict/chain_of_thought.py @@ -66,6 +66,27 @@ def forward(self, **kwargs): return super().forward(signature=signature, **kwargs) + def dump_state(self): + state = super().dump_state() + + # Cache the signature instructions and the last field's name. + state["extended_signature_instructions"] = self.extended_signature.instructions + state["extended_signature_prefix"] = self.extended_signature.fields[-1].name + + return state + + def load_state(self, state): + super().load_state(state) + + # Reconstruct the signature. + if "extended_signature_instructions" in state: + instructions = state["extended_signature_instructions"] + self.extended_signature.instructions = instructions + + if "extended_signature_prefix" in state: + prefix = state["extended_signature_prefix"] + self.extended_signature.fields[-1] = self.extended_signature.fields[-1]._replace(name=prefix) + """ TODO: In principle, we can update the field's prefix during forward too to fill any thing based on the input args. diff --git a/dspy/predict/predict.py b/dspy/predict/predict.py index 68d8c20ce..c68ac1a5a 100644 --- a/dspy/predict/predict.py +++ b/dspy/predict/predict.py @@ -47,14 +47,26 @@ def reset(self): def dump_state(self): state_keys = ["lm", "traces", "train", "demos"] - return {k: getattr(self, k) for k in state_keys} + state = {k: getattr(self, k) for k in state_keys} + + # Cache the signature instructions and the last field's name. + state["signature_instructions"] = self.signature.instructions + state["signature_prefix"] = self.signature.fields[-1].name + + return state def load_state(self, state): for name, value in state.items(): setattr(self, name, value) - - import dspy - self.demos = [dspy.Example(**x) for x in self.demos] + + # Reconstruct the signature. + if "signature_instructions" in state: + instructions = state["signature_instructions"] + self.signature.instructions = instructions + + if "signature_prefix" in state: + prefix = state["signature_prefix"] + self.signature.fields[-1] = self.signature.fields[-1]._replace(name=prefix) def __call__(self, **kwargs): return self.forward(**kwargs) From 5e77ea0e5d1c0d5aeced6abb3c11996d412e0f7e Mon Sep 17 00:00:00 2001 From: klopsahlong Date: Sun, 18 Feb 2024 14:24:04 -0800 Subject: [PATCH 36/39] switching trial.suggest_int --> trial.suggest_categorical for demos and instruction selection --- dspy/teleprompt/signature_opt_bayesian.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dspy/teleprompt/signature_opt_bayesian.py b/dspy/teleprompt/signature_opt_bayesian.py index e91b60faa..68d7aacf0 100644 --- a/dspy/teleprompt/signature_opt_bayesian.py +++ b/dspy/teleprompt/signature_opt_bayesian.py @@ -303,8 +303,8 @@ def objective(trial): p_demo_candidates = demo_candidates[id(p_old)] # Suggest the index of the instruction candidate to use in our trial - instruction_idx = trial.suggest_int(f"{id(p_old)}_predictor_instruction",low=0, high=len(p_instruction_candidates)-1) - demos_idx = trial.suggest_int(f"{id(p_old)}_predictor_demos",low=0, high=len(p_demo_candidates)-1) + instruction_idx = trial.suggest_categorical(f"{id(p_old)}_predictor_instruction",range(len(p_instruction_candidates))) + demos_idx = trial.suggest_categorical(f"{id(p_old)}_predictor_demos",range(len(p_demo_candidates))) trial_logs[trial_num][f"{id(p_old)}_predictor_instruction"] = instruction_idx trial_logs[trial_num][f"{id(p_old)}_predictor_demos"] = demos_idx From f26ef164112f29a696f1a186bbfb10573f67e94e Mon Sep 17 00:00:00 2001 From: Omar Khattab Date: Sun, 18 Feb 2024 18:38:40 -0600 Subject: [PATCH 37/39] Update dspy/predict/react.py Co-authored-by: Stephen Witkowski --- dspy/predict/react.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dspy/predict/react.py b/dspy/predict/react.py index d4b16703b..7dc3d1bd9 100644 --- a/dspy/predict/react.py +++ b/dspy/predict/react.py @@ -64,7 +64,12 @@ def act(self, output, hop): if action_name == 'Finish': return action_val - output[f"Observation_{hop+1}"] = self.tools[action_name](action_val) + try: + output[f"Observation_{hop+1}"] = self.tools[action_name](action_val).passages + except AttributeError: + # Handle the case where 'passages' attribute is missing + # TODO: This is a hacky way to handle this. Need to fix this. + output[f"Observation_{hop+1}"] = self.tools[action_name](action_val) except Exception as e: output[f"Observation_{hop+1}"] = "Failed to parse action. Bad formatting or incorrect action name." From 78973942cbc7fbbb88bd5b0978a1ddca9c7d8154 Mon Sep 17 00:00:00 2001 From: Arun Patro Date: Mon, 19 Feb 2024 22:45:42 -0500 Subject: [PATCH 38/39] Fix typo in intro.ipynb --- intro.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intro.ipynb b/intro.ipynb index bbdbc194f..1dc2cf6d9 100644 --- a/intro.ipynb +++ b/intro.ipynb @@ -287,7 +287,7 @@ "A signature consists of three simple elements:\n", "\n", "- A minimal description of the sub-task the LM is supposed to solve.\n", - "- A description of one or more input fields (e.g., input question) that will we will give to the LM.\n", + "- A description of one or more input fields (e.g., input question) that we will give to the LM.\n", "- A description of one or more output fields (e.g., the question's answer) that we will expect from the LM.\n", "\n", "Let's define a simple signature for basic question answering." From 571d4bb8642fc1dcb06539d574ac0fd1b23d8706 Mon Sep 17 00:00:00 2001 From: arnavsinghvi11 <54859892+arnavsinghvi11@users.noreply.github.com> Date: Mon, 19 Feb 2024 22:02:20 -0800 Subject: [PATCH 39/39] Update README.md with DSPy Assertions examples linking --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 780406aaa..59dca08ac 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,7 @@ The DSPy documentation is divided into **tutorials** (step-by-step illustration | Beginner | [**Compiling for Tricky Tasks**](examples/nli/scone/scone.ipynb) | N/A | Teaches LMs to reason about logical statements and negation. Uses GPT-4 to bootstrap few-shot CoT demonstations for GPT-3.5. Establishes a state-of-the-art result on [ScoNe](https://arxiv.org/abs/2305.19426). Contributed by [Chris Potts](https://twitter.com/ChrisGPotts/status/1740033519446057077). | | Beginner | [**Local Models & Custom Datasets**](skycamp2023.ipynb) | [](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/skycamp2023.ipynb) | Illustrates two different things together: how to use local models (Llama-2-13B in particular) and how to use your own data examples for training and development. | Intermediate | [**The DSPy Paper**](https://arxiv.org/abs/2310.03714) | N/A | Sections 3, 5, 6, and 7 of the DSPy paper can be consumed as a tutorial. They include explained code snippets, results, and discussions of the abstractions and API. +| Intermediate | [**DSPy Assertions**](https://arxiv.org/abs/2312.13382) | [](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/examples/longformqa/longformqa_assertions.ipynb) | Introduces example of applying DSPy Assertions while generating long-form responses to questions with citations. Presents comparative evaluation in both zero-shot and compiled settings. | Intermediate | [**Finetuning for Complex Programs**](https://twitter.com/lateinteraction/status/1712135660797317577) | [](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/examples/qa/hotpot/multihop_finetune.ipynb) | Teaches a local T5 model (770M) to do exceptionally well on HotPotQA. Uses only 200 labeled answers. Uses no hand-written prompts, no calls to OpenAI, and no labels for retrieval or reasoning. | Advanced | [**Information Extraction**](https://twitter.com/KarelDoostrlnck/status/1724991014207930696) | [](https://colab.research.google.com/drive/1CpsOiLiLYKeGrhmq579_FmtGsD5uZ3Qe) | Tackles extracting information from long articles (biomedical research papers). Combines in-context learning and retrieval to set SOTA on BioDEX. Contributed by [Karel D’Oosterlinck](https://twitter.com/KarelDoostrlnck/status/1724991014207930696). | @@ -124,7 +125,10 @@ You can find other examples tweeted by [@lateinteraction](https://twitter.com/la **Some other examples (not exhaustive, feel free to add more via PR):** -- [Generating long answers with citations via DSPy Assertions, by Arnav Singhvi](https://github.com/stanfordnlp/dspy/blob/main/examples/longformqa/longformqa_assertions.ipynb) +- Applying DSPy Assertions + - [Long-form Answer Generation with Citations, by Arnav Singhvi](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/examples/longformqa/longformqa_assertions.ipynb) + - [Generating Answer Choices for Quiz Questions, by Arnav Singhvi](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/examples/quiz/quiz_assertions.ipynb) + - [Generating Tweets for QA, by Arnav Singhvi](https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/examples/tweets/tweets_assertions.ipynb) - [Compiling LCEL runnables from LangChain in DSPy](https://github.com/stanfordnlp/dspy/blob/main/examples/tweets/compiling_langchain.ipynb) - [AI feedback, or writing LM-based metrics in DSPy](https://github.com/stanfordnlp/dspy/blob/main/examples/tweets/tweet_metric.py) - [DSPy Optimizers Benchmark on a bunch of different tasks, by Michael Ryan](https://github.com/stanfordnlp/dspy/tree/main/testing/tasks)