microsoft · vilcek · Aug 26, 2024 · Aug 27, 2024
diff --git a/.github/workflows/plan_and_execute_ci_dev_workflow.yml b/.github/workflows/plan_and_execute_ci_dev_workflow.yml
@@ -0,0 +1,44 @@
+name: plan_and_execute_ci_dev_workflow
+
+on:
+  workflow_call:
+  workflow_dispatch:
+    inputs:
+      env_name:
+        type: string
+        description: "Execution Environment"
+        required: true
+        default: "dev"
+      use_case_base_path:
+        type: string
+        description: "The flow usecase to execute"
+        required: true
+        default: "plan_and_execute"
+      deployment_type:
+        type: string
+        description: "Determine type of deployment - aml, aks, docker, webapp"
+        required: true
+  push:
+    branches:
+      - main
+      - development
+    paths:
+      - 'plan_and_execute/**'
+      - '.github/**'
+      - 'llmops/**'
+
+
+#=====================================
+# Execute platform_ci_dev_workflow workflow for experiment, evaluation and deployment of flows
+#=====================================
+jobs:
+  execute-platform-flow-ci:
+    uses: ./.github/workflows/platform_ci_dev_workflow.yml
+    with:
+      env_name: ${{ inputs.env_name || 'dev'}}
+      use_case_base_path: ${{ inputs.use_case_base_path || 'plan_and_execute' }}
+      deployment_type: ${{ inputs.deployment_type|| 'aml' }}
+    secrets:
+      azure_credentials: ${{ secrets.AZURE_CREDENTIALS }}
+      registry_details: ${{ secrets.DOCKER_IMAGE_REGISTRY }}
+      env_vars: ${{ secrets.ENV_VARS }}
diff --git a/.github/workflows/plan_and_execute_pr_dev_workflow.yml b/.github/workflows/plan_and_execute_pr_dev_workflow.yml
@@ -0,0 +1,36 @@
+name: plan_and_execute_pr_dev_workflow
+
+on:
+  workflow_call:
+    inputs:
+      env_name:
+        type: string
+        description: "Execution Environment"
+        required: true
+        default: "dev"
+      use_case_base_path:
+        type: string
+        description: "The flow usecase to execute"
+        required: true
+        default: "plan_and_execute"
+  pull_request:
+    branches:
+      - main
+      - development
+    paths:
+      - 'plan_and_execute/**'
+      - '.github/**'
+      - 'llmops/**'
+
+#=====================================
+# Execute platform_pr_dev_workflow workflow for experiment, evaluation and deployment of flows
+#=====================================
+jobs:
+  execute-platform-pr-workflow:
+    uses: ./.github/workflows/platform_pr_dev_workflow.yml
+    with:
+      env_name: ${{ inputs.env_name || 'pr'}}
+      use_case_base_path: ${{ inputs.use_case_base_path || 'plan_and_execute' }}
+    secrets:
+      azure_credentials: ${{ secrets.AZURE_CREDENTIALS }}
+      env_vars: ${{ secrets.ENV_VARS }}
diff --git a/plan_and_execute/.azure-pipelines/plan_and_execute_ci_dev_pipeline.yml b/plan_and_execute/.azure-pipelines/plan_and_execute_ci_dev_pipeline.yml
@@ -0,0 +1,44 @@
+pr: none
+trigger:
+  branches:
+   include:
+     - main
+     - development
+  paths:
+    include:
+      - .azure-pipelines/*
+      - llmops/*
+      - plan_and_execute/*
+
+pool:
+  vmImage: ubuntu-latest
+
+
+variables:
+- group: llmops_platform_dev_vg
+
+parameters:
+ - name: env_name
+   displayName: "Execution Environment"
+   default: "dev"
+ - name: use_case_base_path
+   displayName: "flow to execute"
+   default: "plan_and_execute"
+ - name: deployment_type
+   displayName: "Determine type of deployment - aml, aks, docker, webapp"
+   default: "aml"
+
+#=====================================
+# Execute platform_ci_dev_pipeline pipeline for experiment, evaluation and deployment of flows
+#=====================================
+stages:
+  - template: ../../.azure-pipelines/platform_ci_dev_pipeline.yml
+    parameters:
+      RESOURCE_GROUP_NAME: $(rg_name) # Expected in llmops_platform_dev_vg
+      WORKSPACE_NAME: $(ws_name)      # Expected in llmops_platform_dev_vg
+      KEY_VAULT_NAME: $(kv_name)      # Expected in llmops_platform_dev_vg
+      exec_environment: ${{ parameters.env_name }}
+      use_case_base_path: ${{ parameters.use_case_base_path }}
+      deployment_type: ${{ lower(parameters.deployment_type) }}
+      registry_details: '$(DOCKER_IMAGE_REGISTRY)'
+      env_vars: $(env_vars)
diff --git a/plan_and_execute/.azure-pipelines/plan_and_execute_pr_dev_pipeline.yml b/plan_and_execute/.azure-pipelines/plan_and_execute_pr_dev_pipeline.yml
@@ -0,0 +1,37 @@
+trigger: none
+pr:
+  branches:
+   include:
+     - main
+     - development
+  paths:
+    include:
+      - .azure-pipelines/*
+      - llmops/*
+      - plan_and_execute/*
+
+pool:
+  vmImage: ubuntu-latest
+
+variables:
+- group: llmops_platform_dev_vg
+
+parameters:
+ - name: env_name
+   displayName: "Execution Environment"
+   default: "pr"
+ - name: use_case_base_path
+   displayName: "Base path of flow to execute"
+   default: "plan_and_execute"
+
+#=====================================
+# Execute platform_pr_dev_pipeline pipeline for experiment, evaluation and deployment of flows
+#=====================================
+stages:
+  - template: ../../.azure-pipelines/platform_pr_dev_pipeline.yml
+    parameters:
+      RESOURCE_GROUP_NAME: $(rg_name)
+      WORKSPACE_NAME: $(ws_name)
+      exec_environment: ${{ parameters.env_name }}
+      use_case_base_path: ${{ parameters.use_case_base_path }}
+      env_vars: $(env_vars)
diff --git a/plan_and_execute/README.md b/plan_and_execute/README.md
@@ -0,0 +1,43 @@
+# Plan and Execute with LLM Agents
+
+This is an example implementation of an agentic flow, capable of planning the steps needed to execute a user's request, then efficiently executing the plan through external function calling, and assembling a final response.
+
+It implements the core ideas from these two papers:
+- [ReWOO: Decoupling Reasoning from Observations for Efficient Augmented Language Models](https://arxiv.org/abs/2305.18323)
+- [An LLM Compiler for Parallel Function Calling](https://arxiv.org/abs/2312.04511)
+
+The idea is to optimize the traditional loop of reasoning and acting for planning and executing tasks with LLM-based agents, usually implemented by the [ReAct pattern](https://arxiv.org/abs/2210.03629), where the planning and acting steps are interleaved in a sequential manner.
+
+By decoupling the planning from the acting, we make several potential optimizations possible:
+- by having a separate LLM agent concerned with the planning only, we open up the possibility of fine-tuning a specialized model, which could lead to more efficiency and reduced costs, depending on the scenario.
+- by having a separate component for orchestrating external tools calling for the execution of the plan steps, we can optimize for latency by executing functions in parallel, when they are not dependent from each other.
+
+This implementation also uses components of the [Microsoft's AutoGen framework](https://github.com/microsoft/autogen), to facilitate the interaction with LLMs in all modules and execute external functions, as explained in the Architecture Overview below.
+
+## Architecture Overview
+<img src="figs/architecture.svg" alt="Plan and Execute - Architecture Overview" width="2000"/>
+
+The main components of this implementation are depicted in the architecture diagram above. Planner, Executor, and Solver are implemented as Prompt flow Python nodes. Tools are implemented as standard Python functions.
+
+### Planner
+The Planner is implemented as an [AutoGen AssistantAgent](https://microsoft.github.io/autogen/docs/reference/agentchat/assistant_agent). Its system message with few shot examples is implemented as a Prompt flow prompt. Planner is aware of the available tools capabilities and how to use them. It takes as input a user's request and is instructed to generate a step-by-step plan to solve it. The plan is specified to be generated as a valid JSON object, with a list of descriptions for each plan step, and a list of functions to be called to solve each step. Dependencies between those functions are specified as variable assignments using a specific notation.
+
+### Executor
+The Executor is implemented as a combination of custom Python code and an [AutoGen UserProxyAgent](https://microsoft.github.io/autogen/docs/reference/agentchat/user_proxy_agent/). It takes the generated plan as input. The custom Python code takes care of fetching function calls from the plan, solving function dependencies, dispatching functions for execution, and collecting results. The AutoGen UserProxyAgent facilitates the actual execution of Python functions, including parallel execution, as it already has these functionalities implemented. The output of Executor is a list with the results from all plan steps.
+
+### Tools
+Tools are implemented as standard Python functions, but strongly typed. In this way, they can seamlessly be registered within Autogen AssistantAgent and UserProxyAgent, without the need of maintaining a separate function definitions dictionary.
+
+### Solver
+The Solver is also implemented as an AutoGen AssistantAgent. Its system message is implemented as a Prompt flow prompt. It takes as input the user's request and the plan steps results and is instructed to use the information from the plan step results to answer the user's request.
+
+## Prerequisites
+You will need the following:
+
+- a Bing Web Search API key. You create one in your Azure subscription following the instructions [here](https://aka.ms/bingapisignup).
+- a `gpt-35-turbo` and a `gpt-4o` model deployment on your Azure Open AI service. Both should be under the same service (same base URL).
+- a Prompt flow custom connection. Please see below.
+
+Create a Prompt flow connection of type Custom and name it `plan_execute_agent_connection`. To do so, go to your Azure Machine Learning workspace portal, click `Prompt flow` -> `Connections` -> `Create` -> `Custom`. Fill in the key-value pairs according to the figure below:
+
+<img src="figs/connection.svg" alt="Custom Connection Information" width="500"/>
diff --git a/plan_and_execute/configs/deployment_config.json b/plan_and_execute/configs/deployment_config.json
@@ -0,0 +1,57 @@
+{
+    "azure_managed_endpoint":[
+        {
+            "ENV_NAME": "dev",
+            "TEST_FILE_PATH": "sample-request.json",
+            "ENDPOINT_NAME": "",
+            "ENDPOINT_DESC": "An online endpoint serving a flow for plan_and_execute flow",
+            "DEPLOYMENT_DESC": "prompt flow deployment",
+            "PRIOR_DEPLOYMENT_NAME": "",
+            "PRIOR_DEPLOYMENT_TRAFFIC_ALLOCATION": "",
+            "CURRENT_DEPLOYMENT_NAME": "",
+            "CURRENT_DEPLOYMENT_TRAFFIC_ALLOCATION": "100",
+            "DEPLOYMENT_VM_SIZE": "Standard_F4s_v2",
+            "DEPLOYMENT_INSTANCE_COUNT": 1,
+            "ENVIRONMENT_VARIABLES": {
+                "example-name": "example-value"
+            }
+        }
+    ],
+    "kubernetes_endpoint":[
+        {
+            "ENV_NAME": "dev",
+            "TEST_FILE_PATH": "sample-request.json",
+            "ENDPOINT_NAME": "",
+            "ENDPOINT_DESC": "An kubernetes endpoint serving a flow for plan_and_execute",
+            "DEPLOYMENT_DESC": "prompt flow deployment",
+            "PRIOR_DEPLOYMENT_NAME": "",
+            "PRIOR_DEPLOYMENT_TRAFFIC_ALLOCATION": "",
+            "CURRENT_DEPLOYMENT_NAME": "",
+            "CURRENT_DEPLOYMENT_TRAFFIC_ALLOCATION": 100,
+            "COMPUTE_NAME": "",
+            "DEPLOYMENT_VM_SIZE": "",
+            "DEPLOYMENT_CONDA_PATH": "",
+            "DEPLOYMENT_INSTANCE_COUNT": 1,
+            "CPU_ALLOCATION": "",
+            "MEMORY_ALLOCATION": "",
+            "ENVIRONMENT_VARIABLES": {
+                "example-name": "example-value"
+            }
+        }
+    ],
+    "webapp_endpoint":[
+        {
+            "ENV_NAME": "dev",
+            "TEST_FILE_PATH": "sample-request.json",
+            "CONNECTION_NAMES": [""],
+            "REGISTRY_NAME": "",
+            "REGISTRY_RG_NAME": "",
+            "APP_PLAN_NAME": "",
+            "WEB_APP_NAME": "",
+            "WEB_APP_RG_NAME": "",
+            "WEB_APP_SKU": "B3",
+            "USER_MANAGED_ID": ""
+
+        }
+    ]
+}
diff --git a/plan_and_execute/data/plan_and_execute_data.jsonl b/plan_and_execute/data/plan_and_execute_data.jsonl
@@ -0,0 +1,8 @@
+{"question": "What was the total box office performance of 'Inception' and 'Interstellar' together?"}
+{"question": "What is the change rate of the U.S. inflation between 2022 and 2023? Was there an increase or decrease in the inflation?"}
+{"question": "What is the percentage breakdown of the number of native speakers of the top 3 languages in the world?"}
+{"question": "What was the percentage change of the Tokyo population from 2010 to 2020? Did it increase or decrease?"}
+{"question": "Which Nobel Prize category has awarded the most prizes to women, and who was the latest female recipient?"}
+{"question": "Calculate the total number of goals scored by Lionel Messi, Cristiano Ronaldo, and Neymar in international matches."}
+{"question": "Who invented the first programmable computer, and what is the inventor's name and place of birth?"}
+{"question": "How does the GDP per capita of the wealthiest country compare to that of the poorest country, in order of magnitude?"}
diff --git a/plan_and_execute/data/plan_and_execute_eval_data.jsonl b/plan_and_execute/data/plan_and_execute_eval_data.jsonl
@@ -0,0 +1,8 @@
+{"question": "What was the total box office performance of 'Inception' and 'Interstellar' together?", "answer": "The total box office performance of 'Inception' and 'Interstellar' together was $1,509,329,092."}
+{"question": "What is the change rate of the U.S. inflation between 2022 and 2023? Was there an increase or decrease in the inflation?", "answer": "The change rate of the U.S. inflation between 2022 and 2023 was approximately -48.75%. There was a decrease in the inflation rate."}
+{"question": "What is the percentage breakdown of the number of native speakers of the top 3 languages in the world?", "answer": "The percentage breakdown of the number of native speakers of the top 3 languages in the world is as follows:\n\n- Chinese (Mandarin): approximately 52.06%\n- Spanish: approximately 26.89%\n- English: approximately 21.05%"}
+{"question": "What was the percentage change of the Tokyo population from 2010 to 2020? Did it increase or decrease?", "answer": "The population of Tokyo increased by approximately 7.80% from 2010 to 2020."}
+{"question": "Which Nobel Prize category has awarded the most prizes to women, and who was the latest female recipient?", "answer": "The Nobel Prize category that has awarded the most prizes to women is the Nobel Peace Prize, and the latest female recipient is Narges Mohammadi in 2023."}
+{"question": "Calculate the total number of goals scored by Lionel Messi, Cristiano Ronaldo, and Neymar in international matches.", "answer": "Lionel Messi, Cristiano Ronaldo, and Neymar have scored a total of 296 international goals combined."}
+{"question": "Who invented the first programmable computer, and what is the inventor's name and place of birth?", "answer": "Charles Babbage invented the first programmable computer. He was born at 44 Crosby Row, Walworth Road, London, England."}
+{"question": "How does the GDP per capita of the wealthiest country compare to that of the poorest country, in order of magnitude?", "answer": "The GDP per capita of the wealthiest country, Monaco, is approximately three orders of magnitude higher than that of the poorest country, Burundi."}
diff --git a/plan_and_execute/data/plan_and_execute_pr_data.jsonl b/plan_and_execute/data/plan_and_execute_pr_data.jsonl
@@ -0,0 +1,3 @@
+{"question": "What was the total box office performance of 'Inception' and 'Interstellar' together?"}
+{"question": "Calculate the total number of goals scored by Lionel Messi, Cristiano Ronaldo, and Neymar in international matches."}
+{"question": "How does the GDP per capita of the wealthiest country compare to that of the poorest country, in order of magnitude?"}
diff --git a/plan_and_execute/environment/Dockerfile b/plan_and_execute/environment/Dockerfile
@@ -0,0 +1,34 @@
+# syntax=docker/dockerfile:1
+FROM docker.io/continuumio/miniconda3:latest
+
+WORKDIR /
+
+COPY ./flow/requirements.txt /flow/requirements.txt
+
+# gcc is for build psutil in MacOS
+RUN apt-get update && apt-get install -y runit gcc
+
+# create conda environment
+RUN conda create -n promptflow-serve python=3.9.16 pip=23.0.1 -q -y && \
+    conda run -n promptflow-serve \
+    pip install -r /flow/requirements.txt && \
+    conda run -n promptflow-serve pip install keyrings.alt && \
+    conda run -n promptflow-serve pip install gunicorn==20.1.0 && \
+    conda run -n promptflow-serve pip cache purge && \
+    conda clean -a -y
+
+COPY ./flow /flow
+
+
+EXPOSE 8080
+
+COPY ./connections/* /connections/
+
+# reset runsvdir
+RUN rm -rf /var/runit
+COPY ./runit /var/runit
+# grant permission
+RUN chmod -R +x /var/runit
+
+COPY ./start.sh /
+CMD ["bash", "./start.sh"]
diff --git a/plan_and_execute/experiment.dev.yaml b/plan_and_execute/experiment.dev.yaml
diff --git a/plan_and_execute/experiment.pr.yaml b/plan_and_execute/experiment.pr.yaml
@@ -0,0 +1,10 @@
+name: plan_and_execute
+
+datasets:
+- name: plan_and_execute_pr_data
+  source: data/plan_and_execute_pr_data.jsonl
+  description: "This dataset is for pr validation only."
+  mappings:
+    question: "${data.question}"
+
+evaluators:
diff --git a/plan_and_execute/experiment.yaml b/plan_and_execute/experiment.yaml
@@ -0,0 +1,41 @@
+name: plan_and_execute
+flow: flows/standard
+
+connections:
+- name: plan_execute_agent_connection
+  connection_type: CustomConnection
+  secrets:
+    aoai_api_key: ${aoai_api_key}
+    bing_api_key: ${bing_api_key}
+  configs:
+    aoai_base_url: ${aoai_base_url}
+    aoai_api_version: 2023-07-01-preview
+    bing_endpoint: https://api.bing.microsoft.com/v7.0/search
+    aoai_model_gpt4: gpt-4o
+    aoai_model_gpt35: gpt-35-turbo-16k
+
+datasets:
+- name: plan_and_execute_data
+  source: data/plan_and_execute_data.jsonl
+  description: "This dataset is for prompt experiments."
+  mappings:
+    question: "${data.question}"
+
+evaluators:
+- name: plan_and_execute_evaluation_flow
+  flow: flows/evaluation
+  datasets:
+  - name: plan_and_execute_eval_data
+    reference: plan_and_execute_data
+    source: data/plan_and_execute_eval_data.jsonl
+    description: "This dataset is for evaluating flows."
+    mappings:
+      groundtruth: "${data.answer}"
+      prediction: "${run.outputs.answer}"
+      json_schema_path: plan_and_execute_json_schema.json
+      json_string: "${run.outputs.plan}"
+      plan_steps_count: "${run.outputs.number_of_steps}"
+      steps: "${run.outputs.steps}"
+      question: "${data.question}"
+      answer: "${run.outputs.answer}"
+      ground_truth: "${data.answer}"
diff --git a/plan_and_execute/figs/architecture.svg b/plan_and_execute/figs/architecture.svg
diff --git a/plan_and_execute/figs/connection.svg b/plan_and_execute/figs/connection.svg