diff --git a/.github/workflows/plan_and_execute_ci_dev_workflow.yml b/.github/workflows/plan_and_execute_ci_dev_workflow.yml new file mode 100644 index 000000000..9f17d8934 --- /dev/null +++ b/.github/workflows/plan_and_execute_ci_dev_workflow.yml @@ -0,0 +1,44 @@ +name: plan_and_execute_ci_dev_workflow + +on: + workflow_call: + workflow_dispatch: + inputs: + env_name: + type: string + description: "Execution Environment" + required: true + default: "dev" + use_case_base_path: + type: string + description: "The flow usecase to execute" + required: true + default: "plan_and_execute" + deployment_type: + type: string + description: "Determine type of deployment - aml, aks, docker, webapp" + required: true + push: + branches: + - main + - development + paths: + - 'plan_and_execute/**' + - '.github/**' + - 'llmops/**' + + +#===================================== +# Execute platform_ci_dev_workflow workflow for experiment, evaluation and deployment of flows +#===================================== +jobs: + execute-platform-flow-ci: + uses: ./.github/workflows/platform_ci_dev_workflow.yml + with: + env_name: ${{ inputs.env_name || 'dev'}} + use_case_base_path: ${{ inputs.use_case_base_path || 'plan_and_execute' }} + deployment_type: ${{ inputs.deployment_type|| 'aml' }} + secrets: + azure_credentials: ${{ secrets.AZURE_CREDENTIALS }} + registry_details: ${{ secrets.DOCKER_IMAGE_REGISTRY }} + env_vars: ${{ secrets.ENV_VARS }} diff --git a/.github/workflows/plan_and_execute_pr_dev_workflow.yml b/.github/workflows/plan_and_execute_pr_dev_workflow.yml new file mode 100644 index 000000000..b73dda79b --- /dev/null +++ b/.github/workflows/plan_and_execute_pr_dev_workflow.yml @@ -0,0 +1,36 @@ +name: plan_and_execute_pr_dev_workflow + +on: + workflow_call: + inputs: + env_name: + type: string + description: "Execution Environment" + required: true + default: "dev" + use_case_base_path: + type: string + description: "The flow usecase to execute" + required: true + default: "plan_and_execute" + pull_request: + branches: + - main + - development + paths: + - 'plan_and_execute/**' + - '.github/**' + - 'llmops/**' + +#===================================== +# Execute platform_pr_dev_workflow workflow for experiment, evaluation and deployment of flows +#===================================== +jobs: + execute-platform-pr-workflow: + uses: ./.github/workflows/platform_pr_dev_workflow.yml + with: + env_name: ${{ inputs.env_name || 'pr'}} + use_case_base_path: ${{ inputs.use_case_base_path || 'plan_and_execute' }} + secrets: + azure_credentials: ${{ secrets.AZURE_CREDENTIALS }} + env_vars: ${{ secrets.ENV_VARS }} diff --git a/plan_and_execute/.azure-pipelines/plan_and_execute_ci_dev_pipeline.yml b/plan_and_execute/.azure-pipelines/plan_and_execute_ci_dev_pipeline.yml new file mode 100644 index 000000000..6489030d0 --- /dev/null +++ b/plan_and_execute/.azure-pipelines/plan_and_execute_ci_dev_pipeline.yml @@ -0,0 +1,44 @@ +pr: none +trigger: + branches: + include: + - main + - development + paths: + include: + - .azure-pipelines/* + - llmops/* + - plan_and_execute/* + +pool: + vmImage: ubuntu-latest + + +variables: +- group: llmops_platform_dev_vg + +parameters: + - name: env_name + displayName: "Execution Environment" + default: "dev" + - name: use_case_base_path + displayName: "flow to execute" + default: "plan_and_execute" + - name: deployment_type + displayName: "Determine type of deployment - aml, aks, docker, webapp" + default: "aml" + +#===================================== +# Execute platform_ci_dev_pipeline pipeline for experiment, evaluation and deployment of flows +#===================================== +stages: + - template: ../../.azure-pipelines/platform_ci_dev_pipeline.yml + parameters: + RESOURCE_GROUP_NAME: $(rg_name) # Expected in llmops_platform_dev_vg + WORKSPACE_NAME: $(ws_name) # Expected in llmops_platform_dev_vg + KEY_VAULT_NAME: $(kv_name) # Expected in llmops_platform_dev_vg + exec_environment: ${{ parameters.env_name }} + use_case_base_path: ${{ parameters.use_case_base_path }} + deployment_type: ${{ lower(parameters.deployment_type) }} + registry_details: '$(DOCKER_IMAGE_REGISTRY)' + env_vars: $(env_vars) \ No newline at end of file diff --git a/plan_and_execute/.azure-pipelines/plan_and_execute_pr_dev_pipeline.yml b/plan_and_execute/.azure-pipelines/plan_and_execute_pr_dev_pipeline.yml new file mode 100644 index 000000000..051d1dab6 --- /dev/null +++ b/plan_and_execute/.azure-pipelines/plan_and_execute_pr_dev_pipeline.yml @@ -0,0 +1,37 @@ +trigger: none +pr: + branches: + include: + - main + - development + paths: + include: + - .azure-pipelines/* + - llmops/* + - plan_and_execute/* + +pool: + vmImage: ubuntu-latest + +variables: +- group: llmops_platform_dev_vg + +parameters: + - name: env_name + displayName: "Execution Environment" + default: "pr" + - name: use_case_base_path + displayName: "Base path of flow to execute" + default: "plan_and_execute" + +#===================================== +# Execute platform_pr_dev_pipeline pipeline for experiment, evaluation and deployment of flows +#===================================== +stages: + - template: ../../.azure-pipelines/platform_pr_dev_pipeline.yml + parameters: + RESOURCE_GROUP_NAME: $(rg_name) + WORKSPACE_NAME: $(ws_name) + exec_environment: ${{ parameters.env_name }} + use_case_base_path: ${{ parameters.use_case_base_path }} + env_vars: $(env_vars) \ No newline at end of file diff --git a/plan_and_execute/README.md b/plan_and_execute/README.md new file mode 100644 index 000000000..6ee12a3fa --- /dev/null +++ b/plan_and_execute/README.md @@ -0,0 +1,43 @@ +# Plan and Execute with LLM Agents + +This is an example implementation of an agentic flow, capable of planning the steps needed to execute a user's request, then efficiently executing the plan through external function calling, and assembling a final response. + +It implements the core ideas from these two papers: +- [ReWOO: Decoupling Reasoning from Observations for Efficient Augmented Language Models](https://arxiv.org/abs/2305.18323) +- [An LLM Compiler for Parallel Function Calling](https://arxiv.org/abs/2312.04511) + +The idea is to optimize the traditional loop of reasoning and acting for planning and executing tasks with LLM-based agents, usually implemented by the [ReAct pattern](https://arxiv.org/abs/2210.03629), where the planning and acting steps are interleaved in a sequential manner. + +By decoupling the planning from the acting, we make several potential optimizations possible: +- by having a separate LLM agent concerned with the planning only, we open up the possibility of fine-tuning a specialized model, which could lead to more efficiency and reduced costs, depending on the scenario. +- by having a separate component for orchestrating external tools calling for the execution of the plan steps, we can optimize for latency by executing functions in parallel, when they are not dependent from each other. + +This implementation also uses components of the [Microsoft's AutoGen framework](https://github.com/microsoft/autogen), to facilitate the interaction with LLMs in all modules and execute external functions, as explained in the Architecture Overview below. + +## Architecture Overview +Plan and Execute - Architecture Overview + +The main components of this implementation are depicted in the architecture diagram above. Planner, Executor, and Solver are implemented as Prompt flow Python nodes. Tools are implemented as standard Python functions. + +### Planner +The Planner is implemented as an [AutoGen AssistantAgent](https://microsoft.github.io/autogen/docs/reference/agentchat/assistant_agent). Its system message with few shot examples is implemented as a Prompt flow prompt. Planner is aware of the available tools capabilities and how to use them. It takes as input a user's request and is instructed to generate a step-by-step plan to solve it. The plan is specified to be generated as a valid JSON object, with a list of descriptions for each plan step, and a list of functions to be called to solve each step. Dependencies between those functions are specified as variable assignments using a specific notation. + +### Executor +The Executor is implemented as a combination of custom Python code and an [AutoGen UserProxyAgent](https://microsoft.github.io/autogen/docs/reference/agentchat/user_proxy_agent/). It takes the generated plan as input. The custom Python code takes care of fetching function calls from the plan, solving function dependencies, dispatching functions for execution, and collecting results. The AutoGen UserProxyAgent facilitates the actual execution of Python functions, including parallel execution, as it already has these functionalities implemented. The output of Executor is a list with the results from all plan steps. + +### Tools +Tools are implemented as standard Python functions, but strongly typed. In this way, they can seamlessly be registered within Autogen AssistantAgent and UserProxyAgent, without the need of maintaining a separate function definitions dictionary. + +### Solver +The Solver is also implemented as an AutoGen AssistantAgent. Its system message is implemented as a Prompt flow prompt. It takes as input the user's request and the plan steps results and is instructed to use the information from the plan step results to answer the user's request. + +## Prerequisites +You will need the following: + +- a Bing Web Search API key. You create one in your Azure subscription following the instructions [here](https://aka.ms/bingapisignup). +- a `gpt-35-turbo` and a `gpt-4o` model deployment on your Azure Open AI service. Both should be under the same service (same base URL). +- a Prompt flow custom connection. Please see below. + +Create a Prompt flow connection of type Custom and name it `plan_execute_agent_connection`. To do so, go to your Azure Machine Learning workspace portal, click `Prompt flow` -> `Connections` -> `Create` -> `Custom`. Fill in the key-value pairs according to the figure below: + +Custom Connection Information diff --git a/plan_and_execute/configs/deployment_config.json b/plan_and_execute/configs/deployment_config.json new file mode 100644 index 000000000..f3c113636 --- /dev/null +++ b/plan_and_execute/configs/deployment_config.json @@ -0,0 +1,57 @@ +{ + "azure_managed_endpoint":[ + { + "ENV_NAME": "dev", + "TEST_FILE_PATH": "sample-request.json", + "ENDPOINT_NAME": "", + "ENDPOINT_DESC": "An online endpoint serving a flow for plan_and_execute flow", + "DEPLOYMENT_DESC": "prompt flow deployment", + "PRIOR_DEPLOYMENT_NAME": "", + "PRIOR_DEPLOYMENT_TRAFFIC_ALLOCATION": "", + "CURRENT_DEPLOYMENT_NAME": "", + "CURRENT_DEPLOYMENT_TRAFFIC_ALLOCATION": "100", + "DEPLOYMENT_VM_SIZE": "Standard_F4s_v2", + "DEPLOYMENT_INSTANCE_COUNT": 1, + "ENVIRONMENT_VARIABLES": { + "example-name": "example-value" + } + } + ], + "kubernetes_endpoint":[ + { + "ENV_NAME": "dev", + "TEST_FILE_PATH": "sample-request.json", + "ENDPOINT_NAME": "", + "ENDPOINT_DESC": "An kubernetes endpoint serving a flow for plan_and_execute", + "DEPLOYMENT_DESC": "prompt flow deployment", + "PRIOR_DEPLOYMENT_NAME": "", + "PRIOR_DEPLOYMENT_TRAFFIC_ALLOCATION": "", + "CURRENT_DEPLOYMENT_NAME": "", + "CURRENT_DEPLOYMENT_TRAFFIC_ALLOCATION": 100, + "COMPUTE_NAME": "", + "DEPLOYMENT_VM_SIZE": "", + "DEPLOYMENT_CONDA_PATH": "", + "DEPLOYMENT_INSTANCE_COUNT": 1, + "CPU_ALLOCATION": "", + "MEMORY_ALLOCATION": "", + "ENVIRONMENT_VARIABLES": { + "example-name": "example-value" + } + } + ], + "webapp_endpoint":[ + { + "ENV_NAME": "dev", + "TEST_FILE_PATH": "sample-request.json", + "CONNECTION_NAMES": [""], + "REGISTRY_NAME": "", + "REGISTRY_RG_NAME": "", + "APP_PLAN_NAME": "", + "WEB_APP_NAME": "", + "WEB_APP_RG_NAME": "", + "WEB_APP_SKU": "B3", + "USER_MANAGED_ID": "" + + } + ] +} \ No newline at end of file diff --git a/plan_and_execute/data/plan_and_execute_data.jsonl b/plan_and_execute/data/plan_and_execute_data.jsonl new file mode 100644 index 000000000..60ee0c52f --- /dev/null +++ b/plan_and_execute/data/plan_and_execute_data.jsonl @@ -0,0 +1,8 @@ +{"question": "What was the total box office performance of 'Inception' and 'Interstellar' together?"} +{"question": "What is the change rate of the U.S. inflation between 2022 and 2023? Was there an increase or decrease in the inflation?"} +{"question": "What is the percentage breakdown of the number of native speakers of the top 3 languages in the world?"} +{"question": "What was the percentage change of the Tokyo population from 2010 to 2020? Did it increase or decrease?"} +{"question": "Which Nobel Prize category has awarded the most prizes to women, and who was the latest female recipient?"} +{"question": "Calculate the total number of goals scored by Lionel Messi, Cristiano Ronaldo, and Neymar in international matches."} +{"question": "Who invented the first programmable computer, and what is the inventor's name and place of birth?"} +{"question": "How does the GDP per capita of the wealthiest country compare to that of the poorest country, in order of magnitude?"} \ No newline at end of file diff --git a/plan_and_execute/data/plan_and_execute_eval_data.jsonl b/plan_and_execute/data/plan_and_execute_eval_data.jsonl new file mode 100644 index 000000000..237b9322e --- /dev/null +++ b/plan_and_execute/data/plan_and_execute_eval_data.jsonl @@ -0,0 +1,8 @@ +{"question": "What was the total box office performance of 'Inception' and 'Interstellar' together?", "answer": "The total box office performance of 'Inception' and 'Interstellar' together was $1,509,329,092."} +{"question": "What is the change rate of the U.S. inflation between 2022 and 2023? Was there an increase or decrease in the inflation?", "answer": "The change rate of the U.S. inflation between 2022 and 2023 was approximately -48.75%. There was a decrease in the inflation rate."} +{"question": "What is the percentage breakdown of the number of native speakers of the top 3 languages in the world?", "answer": "The percentage breakdown of the number of native speakers of the top 3 languages in the world is as follows:\n\n- Chinese (Mandarin): approximately 52.06%\n- Spanish: approximately 26.89%\n- English: approximately 21.05%"} +{"question": "What was the percentage change of the Tokyo population from 2010 to 2020? Did it increase or decrease?", "answer": "The population of Tokyo increased by approximately 7.80% from 2010 to 2020."} +{"question": "Which Nobel Prize category has awarded the most prizes to women, and who was the latest female recipient?", "answer": "The Nobel Prize category that has awarded the most prizes to women is the Nobel Peace Prize, and the latest female recipient is Narges Mohammadi in 2023."} +{"question": "Calculate the total number of goals scored by Lionel Messi, Cristiano Ronaldo, and Neymar in international matches.", "answer": "Lionel Messi, Cristiano Ronaldo, and Neymar have scored a total of 296 international goals combined."} +{"question": "Who invented the first programmable computer, and what is the inventor's name and place of birth?", "answer": "Charles Babbage invented the first programmable computer. He was born at 44 Crosby Row, Walworth Road, London, England."} +{"question": "How does the GDP per capita of the wealthiest country compare to that of the poorest country, in order of magnitude?", "answer": "The GDP per capita of the wealthiest country, Monaco, is approximately three orders of magnitude higher than that of the poorest country, Burundi."} \ No newline at end of file diff --git a/plan_and_execute/data/plan_and_execute_pr_data.jsonl b/plan_and_execute/data/plan_and_execute_pr_data.jsonl new file mode 100644 index 000000000..11ad032fe --- /dev/null +++ b/plan_and_execute/data/plan_and_execute_pr_data.jsonl @@ -0,0 +1,3 @@ +{"question": "What was the total box office performance of 'Inception' and 'Interstellar' together?"} +{"question": "Calculate the total number of goals scored by Lionel Messi, Cristiano Ronaldo, and Neymar in international matches."} +{"question": "How does the GDP per capita of the wealthiest country compare to that of the poorest country, in order of magnitude?"} \ No newline at end of file diff --git a/plan_and_execute/environment/Dockerfile b/plan_and_execute/environment/Dockerfile new file mode 100644 index 000000000..a57a78f41 --- /dev/null +++ b/plan_and_execute/environment/Dockerfile @@ -0,0 +1,34 @@ +# syntax=docker/dockerfile:1 +FROM docker.io/continuumio/miniconda3:latest + +WORKDIR / + +COPY ./flow/requirements.txt /flow/requirements.txt + +# gcc is for build psutil in MacOS +RUN apt-get update && apt-get install -y runit gcc + +# create conda environment +RUN conda create -n promptflow-serve python=3.9.16 pip=23.0.1 -q -y && \ + conda run -n promptflow-serve \ + pip install -r /flow/requirements.txt && \ + conda run -n promptflow-serve pip install keyrings.alt && \ + conda run -n promptflow-serve pip install gunicorn==20.1.0 && \ + conda run -n promptflow-serve pip cache purge && \ + conda clean -a -y + +COPY ./flow /flow + + +EXPOSE 8080 + +COPY ./connections/* /connections/ + +# reset runsvdir +RUN rm -rf /var/runit +COPY ./runit /var/runit +# grant permission +RUN chmod -R +x /var/runit + +COPY ./start.sh / +CMD ["bash", "./start.sh"] diff --git a/plan_and_execute/experiment.dev.yaml b/plan_and_execute/experiment.dev.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/plan_and_execute/experiment.pr.yaml b/plan_and_execute/experiment.pr.yaml new file mode 100644 index 000000000..41564cfec --- /dev/null +++ b/plan_and_execute/experiment.pr.yaml @@ -0,0 +1,10 @@ +name: plan_and_execute + +datasets: +- name: plan_and_execute_pr_data + source: data/plan_and_execute_pr_data.jsonl + description: "This dataset is for pr validation only." + mappings: + question: "${data.question}" + +evaluators: \ No newline at end of file diff --git a/plan_and_execute/experiment.yaml b/plan_and_execute/experiment.yaml new file mode 100644 index 000000000..f0e4e01a9 --- /dev/null +++ b/plan_and_execute/experiment.yaml @@ -0,0 +1,41 @@ +name: plan_and_execute +flow: flows/standard + +connections: +- name: plan_execute_agent_connection + connection_type: CustomConnection + secrets: + aoai_api_key: ${aoai_api_key} + bing_api_key: ${bing_api_key} + configs: + aoai_base_url: ${aoai_base_url} + aoai_api_version: 2023-07-01-preview + bing_endpoint: https://api.bing.microsoft.com/v7.0/search + aoai_model_gpt4: gpt-4o + aoai_model_gpt35: gpt-35-turbo-16k + +datasets: +- name: plan_and_execute_data + source: data/plan_and_execute_data.jsonl + description: "This dataset is for prompt experiments." + mappings: + question: "${data.question}" + +evaluators: +- name: plan_and_execute_evaluation_flow + flow: flows/evaluation + datasets: + - name: plan_and_execute_eval_data + reference: plan_and_execute_data + source: data/plan_and_execute_eval_data.jsonl + description: "This dataset is for evaluating flows." + mappings: + groundtruth: "${data.answer}" + prediction: "${run.outputs.answer}" + json_schema_path: plan_and_execute_json_schema.json + json_string: "${run.outputs.plan}" + plan_steps_count: "${run.outputs.number_of_steps}" + steps: "${run.outputs.steps}" + question: "${data.question}" + answer: "${run.outputs.answer}" + ground_truth: "${data.answer}" diff --git a/plan_and_execute/figs/architecture.svg b/plan_and_execute/figs/architecture.svg new file mode 100644 index 000000000..0e184f521 --- /dev/null +++ b/plan_and_execute/figs/architecture.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/plan_and_execute/figs/connection.svg b/plan_and_execute/figs/connection.svg new file mode 100644 index 000000000..494a3eb36 --- /dev/null +++ b/plan_and_execute/figs/connection.svg @@ -0,0 +1 @@ +<your AOAI API key><your Bing Web Search API key><your GPT-4 deployment name><your GPT-35 deployment name><your AOAI base URL>2023-12-01-preview<Bing Web Search API endpoint> \ No newline at end of file diff --git a/plan_and_execute/flows/evaluation/connection_utils.py b/plan_and_execute/flows/evaluation/connection_utils.py new file mode 100644 index 000000000..7bea784d8 --- /dev/null +++ b/plan_and_execute/flows/evaluation/connection_utils.py @@ -0,0 +1,41 @@ +"""A helper class to provide custom connection information in promptflow.""" +from promptflow.connections import CustomStrongTypeConnection +from promptflow.contracts.types import Secret + + +class CustomConnection(CustomStrongTypeConnection): + """Define the custom connection keys and values. + + :param aoai_api_key: The api key for Azure Open AI. + :type aoai_api_key: Secret + :param bing_api_key: The api key for the Bing Search. + :type bing_api_key: Secret + :param aoai_model_gpt4: The deployment name for the GPT-4 model. + :type aoai_model_gpt4: String + :param aoai_model_gpt35: The deployment name for the GPT-3.5 model. + :type aoai_model_gpt35: String + :param aoai_base_url: The base url for the Azure Open AI. + :type aoai_base_url: String + :param aoai_api_version: The api version for the Azure Open AI. + :type aoai_api_version: String + :param bing_endpoint: The endpoint for the Bing Search. + :type bing_endpoint: String + """ + + aoai_api_key: Secret + bing_api_key: Secret + aoai_model_gpt4: str + aoai_model_gpt35: str + aoai_base_url: str + aoai_api_version: str + bing_endpoint: str + + +class ConnectionInfo(object): + """Singleton class to store connection information.""" + + def __new__(cls): + """Store connection information.""" + if not hasattr(cls, "instance"): + cls.instance = super(ConnectionInfo, cls).__new__(cls) + return cls.instance diff --git a/plan_and_execute/flows/evaluation/executor_evaluator.py b/plan_and_execute/flows/evaluation/executor_evaluator.py new file mode 100644 index 000000000..dfa10abdb --- /dev/null +++ b/plan_and_execute/flows/evaluation/executor_evaluator.py @@ -0,0 +1,28 @@ +"""Executor evaluation node for the plan_and_execute evaluation flow.""" +from promptflow.core import tool +import re + + +@tool +def executor_evaluator_tool(plan_steps_count: str, steps: str) -> str: + """ + Validate whether the generated number of steps is equal to the expected + number of steps according to the execution plan. + + :param plan_steps_count: The number of plan steps expected in the result. + :param steps: The generated steps to validate. + :return: The list 'missing_steps' listing any missing step IDs. + """ + step_pattern = re.compile(r"#E(\d+)\s*=\s*.*?(?=#E\d+\s*=|$)", re.DOTALL) + found_steps = step_pattern.findall(steps) + + # Convert found steps to a set of integers + found_steps = set(map(int, found_steps)) + + # Expected steps based on plan_steps_count + expected_steps = set(range(1, int(plan_steps_count) + 1)) + + # Determine missing steps + missing_steps = expected_steps - found_steps + + return {"missing_steps": list(missing_steps)} diff --git a/plan_and_execute/flows/evaluation/flow.dag.yaml b/plan_and_execute/flows/evaluation/flow.dag.yaml new file mode 100644 index 000000000..d6bdc9419 --- /dev/null +++ b/plan_and_execute/flows/evaluation/flow.dag.yaml @@ -0,0 +1,106 @@ +inputs: + json_schema_path: + type: string + default: plan_and_execute_json_schema.json + json_string: + type: string + default: '{"Plan":["Search the Web for the U.S. inflation rate in 2022.","Search + the Web for the U.S. inflation rate in 2023.","Use Math to calculate the change + rate of the U.S. inflation between 2022 and 2023.","Use LLM to determine if + there was an increase or decrease in the inflation based on the change rate."],"Functions":[{"id":"#E1","function":{"arguments":"{\"query\": + \"U.S. inflation rate in 2022\"}","name":"web_tool"},"type":"function"},{"id":"#E2","function":{"arguments":"{\"query\": + \"U.S. inflation rate in 2023\"}","name":"web_tool"},"type":"function"},{"id":"#E3","function":{"arguments":"{\"problem_description\": + \"Calculate the change rate of the U.S. inflation between 2022 and 2023\", \"context\": + \"#E1, #E2\"}","name":"math_tool"},"type":"function"},{"id":"#E4","function":{"arguments":"{\"request\": + \"Determine if there was an increase or decrease in the U.S. inflation between + 2022 and 2023\", \"context\": \"#E3\"}","name":"llm_tool"},"type":"function"}]}' + plan_steps_count: + type: string + default: "4" + steps: + type: string + default: >- + #E1 = The U.S. inflation rate in 2022 is not provided in the given + context. #E2 = The U.S. inflation rate in 2023 was 3.4%. + + #E3 = The change rate of the U.S. inflation between 2022 (assuming an inflation + rate of 6.5%) and 2023 (with an inflation rate of 3.4%) is approximately -47.69%. + + + + This indicates a decrease of about 47.69% in the inflation rate from 2022 to + 2023. #E4 = There was a decrease in the U.S. inflation between 2022 and 2023. + question: + type: string + default: What is the change rate of the U.S. inflation between 2022 and 2023? + Was there an increase or decrease in the inflation? + answer: + type: string + default: The change rate of U.S. inflation between 2022 and 2023 is approximately + -47.69%, indicating a decrease in the inflation rate. + ground_truth: + type: string + default: The change rate of the U.S. inflation between 2022 and 2023 was approximately + -48.75%. There was a decrease in the inflation rate. +outputs: + json_evaluator_result: + type: string + reference: ${json_evaluator.output} + executor_evaluator_result: + type: string + reference: ${executor_evaluator.output} + similarity_evaluator_result: + type: string + reference: ${similarity_evaluator.output} + groundedness_evaluator_result: + type: string + reference: ${groundedness_evaluator.output} + relevance_evaluator_result: + type: string + reference: ${relevance_evaluator.output} +nodes: +- name: json_evaluator + type: python + source: + type: code + path: json_evaluator.py + inputs: + json_schema_path: ${inputs.json_schema_path} + json_string: ${inputs.json_string} +- name: executor_evaluator + type: python + source: + type: code + path: executor_evaluator.py + inputs: + plan_steps_count: ${inputs.plan_steps_count} + steps: ${inputs.steps} +- name: similarity_evaluator + type: python + source: + type: code + path: similarity_evaluator.py + inputs: + connection: plan_execute_agent_connection + question: ${inputs.question} + answer: ${inputs.answer} + ground_truth: ${inputs.ground_truth} +- name: groundedness_evaluator + type: python + source: + type: code + path: groundedness_evaluator.py + inputs: + connection: plan_execute_agent_connection + steps: ${inputs.steps} + answer: ${inputs.answer} +- name: relevance_evaluator + type: python + source: + type: code + path: relevance_evaluator.py + inputs: + connection: plan_execute_agent_connection + question: ${inputs.question} + answer: ${inputs.answer} + steps: ${inputs.steps} diff --git a/plan_and_execute/flows/evaluation/groundedness_evaluator.py b/plan_and_execute/flows/evaluation/groundedness_evaluator.py new file mode 100644 index 000000000..8cb9313e0 --- /dev/null +++ b/plan_and_execute/flows/evaluation/groundedness_evaluator.py @@ -0,0 +1,30 @@ +"""Groundedness evaluation node for the plan_and_execute evaluation flow.""" +from promptflow.core import tool +from promptflow.core import AzureOpenAIModelConfiguration +from connection_utils import CustomConnection +from promptflow.evals.evaluators import GroundednessEvaluator + + +@tool +def groundedness_evaluator_tool( + connection: CustomConnection, steps: str, answer: str +) -> dict: + """ + Evaluate the groundedness between answer and steps. + + :param connection: The connection object. + :param steps: The context to ground the answer to. + :param answer: The answer to evaluate against the context. + :return: A dictionary with 'gpt_groundedness' + indicating the grounding score. + """ + model_config = AzureOpenAIModelConfiguration( + azure_endpoint=connection.configs["aoai_base_url"], + api_key=connection.secrets["aoai_api_key"], + api_version=connection.configs["aoai_api_version"], + azure_deployment=connection.configs["aoai_model_gpt4"], + ) + + groundedness_evaluator = GroundednessEvaluator(model_config) + + return groundedness_evaluator(answer=answer, context=steps) diff --git a/plan_and_execute/flows/evaluation/json_evaluator.py b/plan_and_execute/flows/evaluation/json_evaluator.py new file mode 100644 index 000000000..bd7bf730e --- /dev/null +++ b/plan_and_execute/flows/evaluation/json_evaluator.py @@ -0,0 +1,67 @@ +"""JSON evaluation node for the plan_and_execute evaluation flow.""" +from promptflow.core import tool +import json +from jsonschema import validate, ValidationError, SchemaError + + +def _load_json(json_string): + """ + Try to load a JSON string into a JSON object. + + :param json_string: The JSON string to load. + :return: The loaded JSON object if successful, + raises a ValueError otherwise. + """ + try: + json_object = json.loads(json_string) + return json_object + except json.JSONDecodeError as e: + raise ValueError(f"Invalid JSON string: {e}") + + +def _validate_json(json_object, json_schema): + """ + Validate a JSON object against the specified schema. + + :param json_object: The JSON object to validate. + :param json_schema: The JSON schema to validate against. + :return: True if the JSON object is valid, + raises a ValidationError otherwise. + """ + try: + validate(instance=json_object, schema=json_schema) + return True + except ValidationError as e: + raise ValidationError(f"JSON validation error: {e}") + except SchemaError as e: + raise SchemaError(f"Invalid schema: {e}") + + +@tool +def json_evaluator_tool(json_schema_path: str, json_string: str) -> dict: + """ + Evaluate whether a JSON string can be loaded + and validated against the schema. + + :param json_schema_path: File path to the JSON schema to validate against. + :param json_string: The JSON string to evaluate. + :return: A dictionary with 'valid_json' and 'valid_schema' + indicating the validity. + """ + with open(json_schema_path, "r") as schema_file: + json_schema = json.load(schema_file) + + result = {"valid_json": 0, "valid_schema": 0} + + try: + json_object = _load_json(json_string) + result["valid_json"] = 1 + try: + _validate_json(json_object, json_schema) + result["valid_schema"] = 1 + except (ValidationError, SchemaError): + pass + except ValueError: + pass + + return result diff --git a/plan_and_execute/flows/evaluation/plan_and_execute_json_schema.json b/plan_and_execute/flows/evaluation/plan_and_execute_json_schema.json new file mode 100644 index 000000000..2425bf53d --- /dev/null +++ b/plan_and_execute/flows/evaluation/plan_and_execute_json_schema.json @@ -0,0 +1,46 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "Plan": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "Functions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "pattern": "^#E[1-9][0-9]*$" + }, + "function": { + "type": "object", + "properties": { + "arguments": { + "type": "string", + "pattern": "^{\".*\"}$" + }, + "name": { + "type": "string" + } + }, + "required": ["arguments", "name"] + }, + "type": { + "type": "string", + "enum": ["function"] + } + }, + "required": ["id", "function", "type"] + }, + "minItems": 1 + } + }, + "required": ["Plan", "Functions"] + } + \ No newline at end of file diff --git a/plan_and_execute/flows/evaluation/relevance_evaluator.py b/plan_and_execute/flows/evaluation/relevance_evaluator.py new file mode 100644 index 000000000..040d5c9d2 --- /dev/null +++ b/plan_and_execute/flows/evaluation/relevance_evaluator.py @@ -0,0 +1,30 @@ +"""Relevance evaluation node for the plan_and_execute evaluation flow.""" +from promptflow.core import tool +from promptflow.core import AzureOpenAIModelConfiguration +from connection_utils import CustomConnection +from promptflow.evals.evaluators import RelevanceEvaluator + + +@tool +def relevance_evaluator_tool( + connection: CustomConnection, question: str, answer: str, steps: str +) -> dict: + """ + Evaluate the relevance of the answer to the question, given the steps. + + :param connection: The connection object. + :param question: The question to evaluate. + :param answer: The answer to evaluate. + :param steps: The context to evaluate against. + :return: A dictionary with 'gpt_relevance' indicating the relevance score. + """ + model_config = AzureOpenAIModelConfiguration( + azure_endpoint=connection.configs["aoai_base_url"], + api_key=connection.secrets["aoai_api_key"], + api_version=connection.configs["aoai_api_version"], + azure_deployment=connection.configs["aoai_model_gpt4"], + ) + + relevance_evaluator = RelevanceEvaluator(model_config) + + return relevance_evaluator(question=question, answer=answer, context=steps) diff --git a/plan_and_execute/flows/evaluation/requirements.txt b/plan_and_execute/flows/evaluation/requirements.txt new file mode 100644 index 000000000..34a541bf5 --- /dev/null +++ b/plan_and_execute/flows/evaluation/requirements.txt @@ -0,0 +1,6 @@ +pyautogen==0.2.27 +bs4==0.0.2 +wikipedia==1.4.0 +numexpr==2.10.0 +jsonschema==4.22.0 +promptflow-evals \ No newline at end of file diff --git a/plan_and_execute/flows/evaluation/similarity_evaluator.py b/plan_and_execute/flows/evaluation/similarity_evaluator.py new file mode 100644 index 000000000..645a5aea2 --- /dev/null +++ b/plan_and_execute/flows/evaluation/similarity_evaluator.py @@ -0,0 +1,33 @@ +"""Similarity evaluation node for the plan_and_execute evaluation flow.""" +from promptflow.core import tool +from promptflow.core import AzureOpenAIModelConfiguration +from connection_utils import CustomConnection +from promptflow.evals.evaluators import SimilarityEvaluator + + +@tool +def similarity_evaluator_tool( + connection: CustomConnection, question: str, answer: str, ground_truth: str +) -> dict: + """ + Evaluate the similarity between answer and ground_truth. + + :param connection: The connection object. + :param question: The question to evaluate. + :param answer: The answer to evaluate. + :param ground_truth: The ground truth answer to evaluate against. + :return: A dictionary with 'gpt_similarity' + indicating the similarity score. + """ + model_config = AzureOpenAIModelConfiguration( + azure_endpoint=connection.configs["aoai_base_url"], + api_key=connection.secrets["aoai_api_key"], + api_version=connection.configs["aoai_api_version"], + azure_deployment=connection.configs["aoai_model_gpt4"], + ) + + similarity_evaluator = SimilarityEvaluator(model_config) + + return similarity_evaluator( + question=question, answer=answer, ground_truth=ground_truth + ) diff --git a/plan_and_execute/flows/standard/connection_utils.py b/plan_and_execute/flows/standard/connection_utils.py new file mode 100644 index 000000000..7bea784d8 --- /dev/null +++ b/plan_and_execute/flows/standard/connection_utils.py @@ -0,0 +1,41 @@ +"""A helper class to provide custom connection information in promptflow.""" +from promptflow.connections import CustomStrongTypeConnection +from promptflow.contracts.types import Secret + + +class CustomConnection(CustomStrongTypeConnection): + """Define the custom connection keys and values. + + :param aoai_api_key: The api key for Azure Open AI. + :type aoai_api_key: Secret + :param bing_api_key: The api key for the Bing Search. + :type bing_api_key: Secret + :param aoai_model_gpt4: The deployment name for the GPT-4 model. + :type aoai_model_gpt4: String + :param aoai_model_gpt35: The deployment name for the GPT-3.5 model. + :type aoai_model_gpt35: String + :param aoai_base_url: The base url for the Azure Open AI. + :type aoai_base_url: String + :param aoai_api_version: The api version for the Azure Open AI. + :type aoai_api_version: String + :param bing_endpoint: The endpoint for the Bing Search. + :type bing_endpoint: String + """ + + aoai_api_key: Secret + bing_api_key: Secret + aoai_model_gpt4: str + aoai_model_gpt35: str + aoai_base_url: str + aoai_api_version: str + bing_endpoint: str + + +class ConnectionInfo(object): + """Singleton class to store connection information.""" + + def __new__(cls): + """Store connection information.""" + if not hasattr(cls, "instance"): + cls.instance = super(ConnectionInfo, cls).__new__(cls) + return cls.instance diff --git a/plan_and_execute/flows/standard/docker/dockerfile b/plan_and_execute/flows/standard/docker/dockerfile new file mode 100644 index 000000000..942c8c97e --- /dev/null +++ b/plan_and_execute/flows/standard/docker/dockerfile @@ -0,0 +1,3 @@ +FROM mcr.microsoft.com/azureml/promptflow/promptflow-runtime:latest +COPY ./requirements.txt . +RUN pip install -r requirements.txt \ No newline at end of file diff --git a/plan_and_execute/flows/standard/executor.py b/plan_and_execute/flows/standard/executor.py new file mode 100644 index 000000000..b1c10c60d --- /dev/null +++ b/plan_and_execute/flows/standard/executor.py @@ -0,0 +1,288 @@ +"""Executor node of the plan_and_execute flow.""" + +import concurrent.futures +import json +from promptflow.core import tool +from autogen import UserProxyAgent, AssistantAgent +from connection_utils import CustomConnection, ConnectionInfo +from tools import register_tools + + +def prepare_connection_info(connection): + """Prepare the connection info for the agents.""" + return { + "aoai_model_gpt35": connection.configs["aoai_model_gpt35"], + "aoai_model_gpt4": connection.configs["aoai_model_gpt4"], + "aoai_api_key": connection.secrets["aoai_api_key"], + "aoai_base_url": connection.configs["aoai_base_url"], + "aoai_api_version": connection.configs["aoai_api_version"], + "bing_api_key": connection.secrets["bing_api_key"], + "bing_endpoint": connection.configs["bing_endpoint"], + } + + +def prepare_executor(connection_info): + """Prepare the executor agent.""" + config_list_gpt35 = [ + { + "model": connection_info["aoai_model_gpt35"], + "api_key": connection_info["aoai_api_key"], + "base_url": connection_info["aoai_base_url"], + "api_type": "azure", + "api_version": connection_info["aoai_api_version"], + } + ] + executor = UserProxyAgent( + name="EXECUTOR", + description=( + "An agent that acts as a proxy for the user and executes the " + "suggested function calls from PLANNER." + ), + code_execution_config=False, + llm_config={ + "config_list": config_list_gpt35, + "timeout": 60, + "cache_seed": None, + }, + human_input_mode="NEVER", + ) + return executor, config_list_gpt35 + + +def llm_tool(request, context, config_list_gpt35): + """Define the LLM agent.""" + llm_assistant = AssistantAgent( + name="LLM_ASSISTANT", + description=( + "An agent expert in answering requests by analyzing and " + "extracting information from the given context." + ), + system_message=( + "Given a request and optionally some context with potentially " + "relevant information to answer it, analyze the context and " + "extract the information needed to answer the request. Then, " + "create a sentence that answers the request. You must strictly " + "limit your response to only what was asked in the request." + ), + code_execution_config=False, + llm_config={ + "config_list": config_list_gpt35, + "timeout": 60, + "temperature": 0.3, + "cache_seed": None, + }, + ) + + llm_assistant.clear_history() + + message = f""" + Request: + {request} + + Context: + {context} + """ + try: + reply = llm_assistant.generate_reply( + messages=[{"content": message, "role": "user"}] + ) + return reply + except Exception as e: + return f"Error: {str(e)}" + + +def substitute_dependency( + id, original_argument_value, dependency_value, config_list_gpt35 +): + """Substitute dependencies in the execution plan.""" + instruction = ( + "Extract the entity name or fact from the dependency value in a way " + "that makes sense to use it to substitute the variable #E in the " + "original argument value. Do not include any other text in your " + "response, other than the entity name or fact extracted." + ) + + context = f""" + original argument value: + {original_argument_value} + + dependency value: + {dependency_value} + + extracted fact or entity: + + """ + + return llm_tool(instruction, context, config_list_gpt35) + + +def has_unresolved_dependencies(item, resolved_ids, plan_ids): + """Check for unresolved dependencies in a plan step.""" + try: + args = json.loads(item["function"]["arguments"]) + except json.JSONDecodeError: + return False + + for arg in args.values(): + if isinstance(arg, str) and any( + ref_id + for ref_id in plan_ids + if ref_id not in resolved_ids and ref_id in arg + ): + return True + return False + + +def submit_task(item_id, item, thread_executor, executor_agent, futures): + """Submit a task for execution.""" + arguments = item["function"]["arguments"] + future = thread_executor.submit( + executor_agent.execute_function, + {"name": item["function"]["name"], "arguments": arguments}, + ) + futures[item_id] = future + + +def process_done_future( + future, + futures, + results, + resolved_ids, + plan_ids, + thread_executor, + executor_agent, + config_list_gpt35, +): + """Process a completed future and trigger the submission of ready tasks.""" + item_id = next((id for id, f in futures.items() if f == future), None) + if item_id: + _, result = future.result() + results[item_id] = result + resolved_ids.add(item_id) + del futures[item_id] + submit_ready_tasks( + plan_ids, + resolved_ids, + futures, + results, + thread_executor, + executor_agent, + config_list_gpt35, + ) + + +def submit_ready_tasks( + plan_ids, + resolved_ids, + futures, + results, + thread_executor, + executor_agent, + config_list_gpt35, +): + """Submit plan tasks that have all dependencies resolved + and are ready to be executed.""" + for next_item_id, next_item in plan_ids.items(): + if ( + next_item_id not in resolved_ids + and next_item_id not in futures + and not has_unresolved_dependencies( + next_item, resolved_ids, plan_ids + ) + ): + update_and_submit_task( + next_item_id, + next_item, + thread_executor, + executor_agent, + futures, + results, + config_list_gpt35, + ) + + +def update_and_submit_task( + item_id, item, thread_executor, executor_agent, + futures, results, config_list_gpt35 +): + """Update the arguments of a task with dependency results + and submit it for execution.""" + updated_arguments = json.loads(item["function"]["arguments"]) + for arg_key, arg_value in updated_arguments.items(): + if isinstance(arg_value, str): + for res_id, res in results.items(): + if arg_key == "context": + arg_value = arg_value.replace(res_id, res["content"]) + else: + arg_value = arg_value.replace( + res_id, + substitute_dependency( + res_id, arg_value, res["content"], + config_list_gpt35 + ), + ) + updated_arguments[arg_key] = arg_value + future = thread_executor.submit( + executor_agent.execute_function, + {"name": item["function"]["name"], + "arguments": json.dumps(updated_arguments)}, + ) + futures[item_id] = future + + +def execute_plan_parallel(plan, executor_agent, config_list_gpt35): + """Execute the plan in parallel.""" + plan_ids = {item["id"]: item for item in plan} + results = {} + resolved_ids = set() + futures = {} + + with concurrent.futures.ThreadPoolExecutor() as thread_executor: + for item_id, item in plan_ids.items(): + if not has_unresolved_dependencies(item, resolved_ids, plan_ids): + submit_task( + item_id, item, thread_executor, + executor_agent, futures + ) + + while futures: + done, _ = concurrent.futures.wait( + futures.values(), + return_when=concurrent.futures.FIRST_COMPLETED + ) + for future in done: + process_done_future( + future, + futures, + results, + resolved_ids, + plan_ids, + thread_executor, + executor_agent, + config_list_gpt35, + ) + + result_str = "\n".join( + [f"{key} = {value['content']}" for key, value in results.items()] + ) + return result_str + + +@tool +def worker_tool(connection: CustomConnection, plan: str) -> str: + """Execute the plan generated by the planner node.""" + connection_info = prepare_connection_info(connection) + ConnectionInfo().connection_info = connection_info + + executor, config_list_gpt35 = prepare_executor(connection_info) + register_tools(executor) + + plan = json.loads(plan) + executor_reply = execute_plan_parallel( + plan["Functions"], executor, config_list_gpt35 + ) + number_of_steps = len(plan["Plan"]) + + return { + "executor_reply": executor_reply, "number_of_steps": number_of_steps + } diff --git a/plan_and_execute/flows/standard/flow.dag.yaml b/plan_and_execute/flows/standard/flow.dag.yaml new file mode 100644 index 000000000..09d8a9c65 --- /dev/null +++ b/plan_and_execute/flows/standard/flow.dag.yaml @@ -0,0 +1,58 @@ +inputs: + question: + type: string + default: What was the total box office performance of 'Inception' and 'Interstellar' + together? +outputs: + plan: + type: string + reference: ${planner.output} + steps: + type: string + reference: ${executor.output.executor_reply} + answer: + type: string + reference: ${solver.output} + number_of_steps: + type: string + reference: ${executor.output.number_of_steps} +nodes: +- name: planner_system_prompt + type: prompt + source: + type: code + path: planner_system_prompt.jinja2 + inputs: {} +- name: planner + type: python + source: + type: code + path: planner.py + inputs: + connection: plan_execute_agent_connection + system_message: ${planner_system_prompt.output} + question: ${inputs.question} +- name: executor + type: python + source: + type: code + path: executor.py + inputs: + connection: plan_execute_agent_connection + plan: ${planner.output} +- name: solver_system_prompt + type: prompt + source: + type: code + path: solver_system_prompt.jinja2 + inputs: {} +- name: solver + type: python + source: + type: code + path: solver.py + inputs: + connection: plan_execute_agent_connection + system_message: ${solver_system_prompt.output} + question: ${inputs.question} + results: ${executor.output} diff --git a/plan_and_execute/flows/standard/planner.py b/plan_and_execute/flows/standard/planner.py new file mode 100644 index 000000000..b680b6465 --- /dev/null +++ b/plan_and_execute/flows/standard/planner.py @@ -0,0 +1,47 @@ +"""Planner node for the plan_and_execute flow.""" +from promptflow.core import tool +from autogen import AssistantAgent +from connection_utils import CustomConnection +from tools import register_tools + + +@tool +def planner_tool( + connection: CustomConnection, system_message: str, question: str +) -> str: + """Generate a step-by-step execution plan to solve the user's request.""" + config_list_gpt4 = [ + { + "model": connection.configs["aoai_model_gpt4"], + "api_key": connection.secrets["aoai_api_key"], + "base_url": connection.configs["aoai_base_url"], + "api_type": "azure", + "api_version": connection.configs["aoai_api_version"], + } + ] + + planner = AssistantAgent( + name="PLANNER", + description=""" + An agent expert in creating a step-by-step execution plan + to solve the user's request. + """, + system_message=system_message, + code_execution_config=False, + llm_config={ + "config_list": config_list_gpt4, + "temperature": 0, + "timeout": 120, + "cache_seed": None, + }, + ) + + register_tools(planner) + + planner_reply = planner.generate_reply( + messages=[{"content": question, "role": "user"}] + ) + planner_reply = planner_reply.replace( + "```json", "").replace("```", "").strip() + + return planner_reply diff --git a/plan_and_execute/flows/standard/planner_system_prompt.jinja2 b/plan_and_execute/flows/standard/planner_system_prompt.jinja2 new file mode 100644 index 000000000..5e9347adc --- /dev/null +++ b/plan_and_execute/flows/standard/planner_system_prompt.jinja2 @@ -0,0 +1,119 @@ +For the given question, you make a plan that can solve the problem step by step. For each plan step, +indicate which external function should be used to retrieve evidence, together with the function arguments. +For each function call, you must use '#En' as the id for the function call, where n is the plan step number. +When you need to reference the output of a function call as an argument or part of an argument to another function call, +you must reference the corresponding '#En' in the argument, as a comma separated string, such as: "#E1, #E2, ...". +You must follow the JSON schema provided in the examples below. + +Examples: + +Question: Were Pavel Urysohn and Leonid Levin known for the same type of work? +{"Plan": ["Search Wikipedia for Pavel Urysohn.", + "Search Wikipedia for Leonid Levin.", + "Use LLM to compare the two and determine if they were known for the same type of work."], + "Functions": [ + {"id": "#E1", + "function": { + "arguments": "{\"query\": \"Pavel Urysohn\"}", + "name": "wikipedia_tool"}, + "type": "function"}, + {"id": "#E2", + "function": { + "arguments": "{\"query\": \"Leonid Levin\"}", + "name": "wikipedia_tool"}, + "type": "function"}, + {"id": "#E3", + "function": { + "arguments": "{\"request\": \"Were Pavel Urysohn and Leonid Levin known for the same type of work\", \"context\": \"#E1, #E2\"}", + "name": "llm_tool"}, + "type": "function"}]} + +Question: What is the hometown of the 2024 australian open winner? +{"Plan": ["Search the Web for the name of the 2024 Australian Open winner.", + "Search Wikipedia for more information about the 2024 Australian Open winner.", + "Use LLM to find the hometown of the 2024 Australian Open winner."], + "Functions": [ + {"id": "#E1", + "function": { + "arguments": "{\"query\": \"2024 Australian Open winner\"}", + "name": "web_tool"}, + "type": "function"}, + {"id": "#E2", + "function": { + "arguments": "{\"query\": \"#E1\"}", + "name": "wikipedia_tool"}, + "type": "function"}, + {"id": "#E3", + "function": { + "arguments": "{\"request\": \"Find the hometown of the 2024 Australian Open Winner\", \"context\": \"#E1, #E2\"}", + "name": "llm_tool"}, + "type": "function"}]} + +Question: What is the combined age of the latest 2 former Unites States presidents when they left office? +{"Plan": ["Search the Web for the age of the most recent former President of the United States when they left office.", + "Search the Web for the age of the second most recent former President of the United States when they left office.", + "Use Math to add the ages of the two former Presidents when they left office."], + "Functions": [ + {"id": "#E1", + "function": { + "arguments": "{\"query\": \"Age of most recent former President of United States when left office\"}", + "name": "web_tool"}, + "type": "function"}, + {"id": "#E2", + "function": { + "arguments": "{\"query\": \"Age of second most recent former President of United States when left office\"}", + "name": "web_tool"}, + "type": "function"}, + {"id": "#E3", + "function": { + "arguments": "{\"problem_description\": \"Add the ages of the two former Presidents when they left office\", \"context\": \"#E1, #E2\"}", + "name": "math_tool"}, + "type": "function"}]} + +Question: What is the sum of the GDPs of the top 3 countries with the highest GDPs? +{"Plan": ["Search the Web for the GDPs of the top 3 countries with the highest GDPs.", + "Use LLM to extract the GDP of the first country with the highest GDP.", + "Use LLM to extract the GDP of the second country with the highest GDP.", + "Use LLM to extract the GDP of the third country with the highest GDP.", + "Use Math to add the GDPs of the top 3 countries with the highest GDPs."], +"Functions": [ +{"id": "#E1", +"function": { + "arguments": "{\"query\": \"GDPs of the top 3 countries with the highest GDPs\"}", + "name": "web_tool"}, +"type": "function"}, +{"id": "#E2", +"function": { + "arguments": "{\"request\": \"GDP of the first country with the highest GDP\", \"context\": \"#E1\"}", + "name": "llm_tool"}, +"type": "function"}, +{"id": "#E3", +"function": { + "arguments": "{\"request\": \"GDP of the second country with the highest GDP\", \"context\": \"#E1\"}", + "name": "llm_tool"}, +"type": "function"}, +{"id": "#E4", +"function": { + "arguments": "{\"request\": \"GDP of the third country with the highest GDP\", \"context\": \"#E1\"}", + "name": "llm_tool"}, +"type": "function"}, +{"id": "#E5", +"function": { + "arguments": "{\"problem_description\": \"Add the GDPs of the top 3 countries\", \"context\": \"#E2, #E3, #E4\"}", + "name": "math_tool"}, +"type": "function"}]} + +You should describe your plans with rich details. Each plan step should correspond to only one function call. +Make sure you don't include redundant or irrelevant plan steps and make the plan as efficient as possible. +When uisng web_tool, make your query as specific as possible to get the most relevant information. +When using wikipedia_tool, make sure your query specifies a single person name, place, entity or concept only, or a single '#En'. + +Do not respond with actual function calls. Follow the response format in the examples above instead. +Do not use any prefix for the function names. +You must respond with a valid JSON string only. Do not include any other text in your response. +Remember that ou must follow the JSON schema provided in the examples above. +Very important: the value for the "arguments" key has to be a string that represents a JSON object. + +Begin! + +Question: diff --git a/plan_and_execute/flows/standard/requirements.txt b/plan_and_execute/flows/standard/requirements.txt new file mode 100644 index 000000000..99e17887c --- /dev/null +++ b/plan_and_execute/flows/standard/requirements.txt @@ -0,0 +1,5 @@ +pyautogen==0.2.27 +bs4==0.0.2 +wikipedia==1.4.0 +numexpr==2.10.0 +jsonschema==4.22.0 \ No newline at end of file diff --git a/plan_and_execute/flows/standard/solver.py b/plan_and_execute/flows/standard/solver.py new file mode 100644 index 000000000..34a777309 --- /dev/null +++ b/plan_and_execute/flows/standard/solver.py @@ -0,0 +1,44 @@ +"""Solver node for the plan_and_execute flow.""" +from promptflow.core import tool +from autogen import AssistantAgent +from connection_utils import CustomConnection + + +@tool +def solver_tool( + connection: CustomConnection, system_message: str, + question: str, results: str +) -> str: + """Create a final response to the user's request.""" + config_list_gpt4 = [ + { + "model": connection.configs["aoai_model_gpt4"], + "api_key": connection.secrets["aoai_api_key"], + "base_url": connection.configs["aoai_base_url"], + "api_type": "azure", + "api_version": connection.configs["aoai_api_version"], + } + ] + + solver = AssistantAgent( + name="SOLVER", + description=""" + An agent expert in creating a final response to the user's request. + """, + system_message=system_message, + code_execution_config=False, + llm_config={"config_list": config_list_gpt4, + "timeout": 60, "cache_seed": None}, + ) + + solver_message = f""" + Question: + {question} + + Step results: + {results} + """ + + return solver.generate_reply( + messages=[{"content": solver_message, "role": "user"}] + ) diff --git a/plan_and_execute/flows/standard/solver_system_prompt.jinja2 b/plan_and_execute/flows/standard/solver_system_prompt.jinja2 new file mode 100644 index 000000000..e033f15c8 --- /dev/null +++ b/plan_and_execute/flows/standard/solver_system_prompt.jinja2 @@ -0,0 +1,6 @@ +For the given question and plan steps results, you synthesize a final response to the user. +You should use analyze the steps results to find the necessary information to synthesize the final response. +Do not include any explanation regarding the plan steps results in the final response. +If there is not enough information in the plan steps results to synthesize the final response, you should try to construct a response using your own knowledge and indicate that in the response. +You must limit your response to only what was asked in the question, without writing any further explanations. + diff --git a/plan_and_execute/flows/standard/tools.py b/plan_and_execute/flows/standard/tools.py new file mode 100644 index 000000000..289c82a22 --- /dev/null +++ b/plan_and_execute/flows/standard/tools.py @@ -0,0 +1,327 @@ +"""Tools definitions for AutoGen.""" +from autogen import AssistantAgent, UserProxyAgent +from autogen.agentchat import register_function +from connection_utils import ConnectionInfo +from typing_extensions import Annotated, Optional + +tool_descriptions = { + "web_tool": { + "function": ( + "Worker that searches results from the internet. \ + Useful when you need to find short and succinct " + "answers about a specific topic." + ), + "query": "The search query string.", + "number_of_results": "The number of search results to return.", + }, + "wikipedia_tool": { + "function": ( + "Worker that search for page contents from Wikipedia. \ + Useful when you need to get holistic " + "knowledge about people, places, companies, historical events, \ + or other subjects. You use it when you " + "already have identified the entity name, usually after \ + searching for the entity name using web_tool." + ), + "query": "The single person name, entity, or concept to be searched.", + "number_of_results": "The number of search results to return.", + }, + "llm_tool": { + "function": ( + "An agent expert in solving problems by analyzing and \ + extracting information from the given " + "context. It should never be used to do calculations." + ), + "request": "The request to be answered.", + "context": "Context with the relevant information to \ + answer the request.", + }, + "math_tool": { + "function": ( + "A tool that can solve math problems by computing \ + arithmetic expressions. It must be used " + "whenever you need to do calculations or solve math problems. " + "You can use it to solve simple or complex math problems." + ), + "problem_description": "The problem to be solved.", + "context": "Context with the relevant information \ + to solve the problem.", + }, +} + + +def register_tools(agent): + """Register tools for the agent.""" + for tool in tool_descriptions.keys(): + register_function( + globals()[tool], + caller=agent, + executor=agent, + description=tool_descriptions[tool]["function"], + ) + + +def llm_tool( + request: Annotated[str, tool_descriptions["llm_tool"]["request"]], + context: Optional[Annotated[ + str, tool_descriptions["llm_tool"]["context"] + ]] = None, +) -> str: + """Use an LLM to analyze and extract information from the \ + given context to answer the request.""" + connection_info = ConnectionInfo().connection_info + + try: + llm_assistant = AssistantAgent( + name="LLM_ASSISTANT", + description=( + "An agent expert in answering requests by analyzing and \ + extracting information from the given context." + ), + system_message=( + "Given a request and optionally some context with \ + potentially relevant information to answer it, " + "analyze the context and extract the information \ + needed to answer the request. " + "Then, create a sentence that answers the request. " + "You must strictly limit your response to only \ + what was asked in the request." + ), + code_execution_config=False, + llm_config={ + "config_list": [ + { + "model": connection_info["aoai_model_gpt35"], + "api_key": connection_info["aoai_api_key"], + "base_url": connection_info["aoai_base_url"], + "api_type": "azure", + "api_version": connection_info["aoai_api_version"], + } + ], + "timeout": 60, + "temperature": 0.3, + "cache_seed": None, + }, + ) + except Exception as e: + print("LLM_ASSISTANT error:", e) + return "" + + llm_assistant.clear_history() + + message = f""" + Request: + {request} + + Context: + {context} + """ + try: + reply = llm_assistant.generate_reply( + messages=[{"content": message, "role": "user"}] + ) + return reply + except Exception as e: + return f"Error: {str(e)}" + + +def web_tool( + query: Annotated[str, tool_descriptions["web_tool"]["query"]], + number_of_results: Optional[ + Annotated[int, tool_descriptions["web_tool"]["number_of_results"]] + ] = 3, +) -> list: + """Search results from the internet.""" + import requests + from bs4 import BeautifulSoup + + connection_info = ConnectionInfo().connection_info + + headers = {"Ocp-Apim-Subscription-Key": connection_info["bing_api_key"]} + params = { + "q": query, + "count": number_of_results, + "offset": 0, + "mkt": "en-US", + "safesearch": "Strict", + "textDecorations": False, + "textFormat": "HTML", + } + response = requests.get( + connection_info["bing_endpoint"], headers=headers, params=params + ) + response.raise_for_status() + results = response.json() + + search_results = [] + for i in range(len(results["webPages"]["value"])): + title = results["webPages"]["value"][i]["name"] + url = results["webPages"]["value"][i]["url"] + snippet = results["webPages"]["value"][i]["snippet"] + + try: + response = requests.get(url) + if response.status_code == 200: + soup = BeautifulSoup(response.content, "html.parser") + text = soup.get_text(separator=" ", strip=True) + text = text[:5000] + else: + text = f"Failed to fetch content, \ + status code: {response.status_code}" + except Exception as e: + text = f"Error fetching the page: {str(e)}" + + search_results.append( + {"title": title, "url": url, "snippet": snippet, "content": text} + ) + + return llm_tool(query, search_results) + + +def wikipedia_tool( + query: Annotated[str, tool_descriptions["wikipedia_tool"]["query"]], + number_of_results: Optional[ + Annotated[ + int, tool_descriptions["wikipedia_tool"]["number_of_results"] + ] + ] = 3, +) -> list: + """Search for page contents from Wikipedia.""" + import wikipedia + + wikipedia.set_lang("en") + results = wikipedia.search(query, results=number_of_results) + + search_results = [] + + for title in results: + try: + page = wikipedia.page(title) + search_results.append( + {"title": page.title, "url": page.url, + "content": page.content[:5000]} + ) + except wikipedia.exceptions.DisambiguationError: + continue + except wikipedia.exceptions.PageError: + continue + except Exception as e: + search_results.append(f"Error fetching the page: {str(e)}") + + return search_results + + +def math_tool( + problem_description: Annotated[ + str, tool_descriptions["math_tool"]["problem_description"] + ], + context: Optional[ + Annotated[str, tool_descriptions["math_tool"]["context"]] + ] = None, +) -> str: + """Solve math problems by computing arithmetic expressions.""" + connection_info = ConnectionInfo().connection_info + + def is_termination_msg(content): + have_content = content.get("content", None) is not None + if have_content and "TERMINATE" in content["content"]: + return True + return False + + math_assistant = AssistantAgent( + name="MATH_ASSISTANT", + description="An agent expert in solving math \ + problems and math expressions.", + system_message=( + "Given a math problem and optionally some context with \ + relevant information to solve the problem, " + "translate the math problem into an expression that can \ + be executed using Python's numexpr library. " + "Then, use the available tool (evaluate_math_expression) \ + to solve the expression and return the result. " + "Reply 'TERMINATE' in the end when everything is done." + ), + code_execution_config=False, + is_termination_msg=is_termination_msg, + llm_config={ + "config_list": [ + { + "model": connection_info["aoai_model_gpt4"], + "api_key": connection_info["aoai_api_key"], + "base_url": connection_info["aoai_base_url"], + "api_type": "azure", + "api_version": connection_info["aoai_api_version"], + } + ], + "timeout": 60, + "cache_seed": None, + }, + ) + + math_executor = UserProxyAgent( + name="TOOL_EXECUTOR", + description=( + "An agent that acts as a proxy for the user and executes " + "the suggested function calls from MATH_ASSISTANT." + ), + code_execution_config=False, + is_termination_msg=is_termination_msg, + human_input_mode="NEVER", + ) + + tool_descriptions = { + "evaluate_math_expression": { + "function": "Function to evaluate math expressions \ + using Python's numexpr library.", + "expression": "The expression to be evaluated. \ + It should be a valid numerical expression.", + } + } + + @math_executor.register_for_execution() + @math_assistant.register_for_llm( + description=tool_descriptions["evaluate_math_expression"]["function"] + ) + def evaluate_math_expression( + expression: Annotated[ + str, tool_descriptions["evaluate_math_expression"]["expression"] + ] + ) -> str: + import math + import numexpr + import re + + try: + local_dict = {"pi": math.pi, "e": math.e} + output = str( + numexpr.evaluate( + expression.strip(), + global_dict={}, # restrict access to globals + local_dict=local_dict, # add common mathematical functions + ) + ) + except Exception as e: + raise ValueError( + f'Failed to evaluate "{expression}". Raised error: {repr(e)}. ' + "Please try again with a valid numerical expression." + ) + + return re.sub(r"^\[|\]$", "", output) + + message = f""" + Problem: + {problem_description} + + Context: + {context} + """ + math_assistant.clear_history() + math_executor.clear_history() + + math_executor.initiate_chat( + message=message, recipient=math_assistant, + silent=True, clear_history=True + ) + result = math_executor.last_message()["content"] \ + .split("TERMINATE")[0].strip() + return result diff --git a/plan_and_execute/sample-request.json b/plan_and_execute/sample-request.json new file mode 100644 index 000000000..0fd145c3a --- /dev/null +++ b/plan_and_execute/sample-request.json @@ -0,0 +1 @@ +{"question": "What was the total box office performance of 'Inception' and 'Interstellar' together?"} \ No newline at end of file diff --git a/plan_and_execute/tests/test_delete_this_file.py b/plan_and_execute/tests/test_delete_this_file.py new file mode 100644 index 000000000..7fe8bbded --- /dev/null +++ b/plan_and_execute/tests/test_delete_this_file.py @@ -0,0 +1,6 @@ +def test_print(): + try: + print("Hello") is None + except Exception: + print("Test print function failed.") + assert False