Brandon/eng 290 make tool inputs actual objects and not strings (#1868)

* Improving tool calling to pass dictionaries instead of strings * Fix issues with parsing none/null * remove prints and unnecessary comments * Fix crew_test issues with function calling * improve prompting * add back in support for add_image * add tests for tool validation * revert back to figure out why tests are timing out * Update cassette * trying to find what is timing out * add back in guardrails * add back in manager delegation tests * Trying to fix tests * Force test to pass * Trying to fix tests * add in more role tests * add back old tool validation * updating tests * vcr * Fix tests * improve function llm logic * vcr 2 * drop llm * Failing test * add more tests back in * Revert tool validation
crewAIInc · Jan 10, 2025 · b8d07fe · b8d07fe
1 parent be8e33d
commit b8d07fe
Show file tree

Hide file tree

Showing 14 changed files with 856 additions and 3,980 deletions.
diff --git a/src/crewai/agent.py b/src/crewai/agent.py
@@ -86,7 +86,7 @@ class Agent(BaseAgent):
     llm: Union[str, InstanceOf[LLM], Any] = Field(
         description="Language model that will run the agent.", default=None
     )
-    function_calling_llm: Optional[Any] = Field(
+    function_calling_llm: Optional[Union[str, InstanceOf[LLM], Any]] = Field(
         description="Language model that will run the agent.", default=None
     )
     system_template: Optional[str] = Field(
@@ -142,7 +142,8 @@ def post_init_setup(self):
         self.agent_ops_agent_name = self.role
 
         self.llm = create_llm(self.llm)
-        self.function_calling_llm = create_llm(self.function_calling_llm)
+        if self.function_calling_llm and not isinstance(self.function_calling_llm, LLM):
+            self.function_calling_llm = create_llm(self.function_calling_llm)
 
         if not self.agent_executor:
             self._setup_agent_executor()

diff --git a/src/crewai/agents/crew_agent_executor.py b/src/crewai/agents/crew_agent_executor.py
@@ -145,8 +145,6 @@ def _invoke_loop(self):
                 if self._is_context_length_exceeded(e):
                     self._handle_context_length()
                     continue
-                else:
-                    raise e
 
         self._show_logs(formatted_answer)
         return formatted_answer
@@ -316,7 +314,7 @@ def _execute_tool_and_check_finality(self, agent_action: AgentAction) -> ToolRes
             agent=self.agent,
             action=agent_action,
         )
-        tool_calling = tool_usage.parse(agent_action.text)
+        tool_calling = tool_usage.parse_tool_calling(agent_action.text)
 
         if isinstance(tool_calling, ToolUsageErrorException):
             tool_result = tool_calling.message

diff --git a/src/crewai/crew.py b/src/crewai/crew.py
@@ -47,6 +47,7 @@
     aggregate_raw_outputs_from_task_outputs,
     aggregate_raw_outputs_from_tasks,
 )
+from crewai.utilities.llm_utils import create_llm
 from crewai.utilities.planning_handler import CrewPlanner
 from crewai.utilities.task_output_storage_handler import TaskOutputStorageHandler
 from crewai.utilities.training_handler import CrewTrainingHandler
@@ -149,7 +150,7 @@ class Crew(BaseModel):
     manager_agent: Optional[BaseAgent] = Field(
         description="Custom agent that will be used as manager.", default=None
     )
-    function_calling_llm: Optional[Any] = Field(
+    function_calling_llm: Optional[Union[str, InstanceOf[LLM], Any]] = Field(
         description="Language model that will run the agent.", default=None
     )
     config: Optional[Union[Json, Dict[str, Any]]] = Field(default=None)
@@ -245,15 +246,9 @@ def set_private_attrs(self) -> "Crew":
         if self.output_log_file:
             self._file_handler = FileHandler(self.output_log_file)
         self._rpm_controller = RPMController(max_rpm=self.max_rpm, logger=self._logger)
-        if self.function_calling_llm:
-            if isinstance(self.function_calling_llm, str):
-                self.function_calling_llm = LLM(model=self.function_calling_llm)
-            elif not isinstance(self.function_calling_llm, LLM):
-                self.function_calling_llm = LLM(
-                    model=getattr(self.function_calling_llm, "model_name", None)
-                    or getattr(self.function_calling_llm, "deployment_name", None)
-                    or str(self.function_calling_llm)
-                )
+        if self.function_calling_llm and not isinstance(self.function_calling_llm, LLM):
+            self.function_calling_llm = create_llm(self.function_calling_llm)
+
         self._telemetry = Telemetry()
         self._telemetry.set_tracer()
         return self

diff --git a/src/crewai/tools/tool_usage.py b/src/crewai/tools/tool_usage.py
@@ -1,9 +1,13 @@
 import ast
 import datetime
+import json
+import re
 import time
 from difflib import SequenceMatcher
 from textwrap import dedent
-from typing import Any, List, Union
+from typing import Any, Dict, List, Union
+
+from json_repair import repair_json
 
 import crewai.utilities.events as events
 from crewai.agents.tools_handler import ToolsHandler
@@ -19,7 +23,15 @@
     import agentops  # type: ignore
 except ImportError:
     agentops = None
-OPENAI_BIGGER_MODELS = ["gpt-4", "gpt-4o", "o1-preview", "o1-mini", "o1", "o3", "o3-mini"]
+OPENAI_BIGGER_MODELS = [
+    "gpt-4",
+    "gpt-4o",
+    "o1-preview",
+    "o1-mini",
+    "o1",
+    "o3",
+    "o3-mini",
+]
 
 
 class ToolUsageErrorException(Exception):
@@ -80,7 +92,7 @@ def __init__(
             self._max_parsing_attempts = 2
             self._remember_format_after_usages = 4
 
-    def parse(self, tool_string: str):
+    def parse_tool_calling(self, tool_string: str):
         """Parse the tool string and return the tool calling."""
         return self._tool_calling(tool_string)
 
@@ -94,7 +106,6 @@ def use(
             self.task.increment_tools_errors()
             return error
 
-        # BUG? The code below seems to be unreachable
         try:
             tool = self._select_tool(calling.tool_name)
         except Exception as e:
@@ -116,7 +127,7 @@ def use(
                     self._printer.print(content=f"\n\n{error}\n", color="red")
                 return error
 
-        return f"{self._use(tool_string=tool_string, tool=tool, calling=calling)}"  # type: ignore # BUG?: "_use" of "ToolUsage" does not return a value (it only ever returns None)
+        return f"{self._use(tool_string=tool_string, tool=tool, calling=calling)}"
 
     def _use(
         self,
@@ -349,28 +360,28 @@ def _original_tool_calling(self, tool_string: str, raise_error: bool = False):
         tool_name = self.action.tool
         tool = self._select_tool(tool_name)
         try:
-            tool_input = self._validate_tool_input(self.action.tool_input)
-            arguments = ast.literal_eval(tool_input)
+            arguments = self._validate_tool_input(self.action.tool_input)
+
         except Exception:
             if raise_error:
                 raise
             else:
-                return ToolUsageErrorException(  # type: ignore # Incompatible return value type (got "ToolUsageErrorException", expected "ToolCalling | InstructorToolCalling")
+                return ToolUsageErrorException(
                     f'{self._i18n.errors("tool_arguments_error")}'
                 )
 
         if not isinstance(arguments, dict):
             if raise_error:
                 raise
             else:
-                return ToolUsageErrorException(  # type: ignore # Incompatible return value type (got "ToolUsageErrorException", expected "ToolCalling | InstructorToolCalling")
+                return ToolUsageErrorException(
                     f'{self._i18n.errors("tool_arguments_error")}'
                 )
 
         return ToolCalling(
             tool_name=tool.name,
             arguments=arguments,
-            log=tool_string,  # type: ignore
+            log=tool_string,
         )
 
     def _tool_calling(
@@ -396,57 +407,28 @@ def _tool_calling(
                 )
             return self._tool_calling(tool_string)
 
-    def _validate_tool_input(self, tool_input: str) -> str:
+    def _validate_tool_input(self, tool_input: str) -> Dict[str, Any]:
         try:
-            ast.literal_eval(tool_input)
-            return tool_input
-        except Exception:
-            # Clean and ensure the string is properly enclosed in braces
-            tool_input = tool_input.strip()
-            if not tool_input.startswith("{"):
-                tool_input = "{" + tool_input
-            if not tool_input.endswith("}"):
-                tool_input += "}"
-
-            # Manually split the input into key-value pairs
-            entries = tool_input.strip("{} ").split(",")
-            formatted_entries = []
-
-            for entry in entries:
-                if ":" not in entry:
-                    continue  # Skip malformed entries
-                key, value = entry.split(":", 1)
-
-                # Remove extraneous white spaces and quotes, replace single quotes
-                key = key.strip().strip('"').replace("'", '"')
-                value = value.strip()
-
-                # Handle replacement of single quotes at the start and end of the value string
-                if value.startswith("'") and value.endswith("'"):
-                    value = value[1:-1]  # Remove single quotes
-                    value = (
-                        '"' + value.replace('"', '\\"') + '"'
-                    )  # Re-encapsulate with double quotes
-                elif value.isdigit():  # Check if value is a digit, hence integer
-                    value = value
-                elif value.lower() in [
-                    "true",
-                    "false",
-                ]:  # Check for boolean and null values
-                    value = value.lower().capitalize()
-                elif value.lower() == "null":
-                    value = "None"
-                else:
-                    # Assume the value is a string and needs quotes
-                    value = '"' + value.replace('"', '\\"') + '"'
+            # Replace Python literals with JSON equivalents
+            replacements = {
+                r"'": '"',
+                r"None": "null",
+                r"True": "true",
+                r"False": "false",
+            }
+            for pattern, replacement in replacements.items():
+                tool_input = re.sub(pattern, replacement, tool_input)
 
-                # Rebuild the entry with proper quoting
-                formatted_entry = f'"{key}": {value}'
-                formatted_entries.append(formatted_entry)
+            arguments = json.loads(tool_input)
+        except json.JSONDecodeError:
+            # Attempt to repair JSON string
+            repaired_input = repair_json(tool_input)
+            try:
+                arguments = json.loads(repaired_input)
+            except json.JSONDecodeError as e:
+                raise Exception(f"Invalid tool input JSON: {e}")
 
-            # Reconstruct the JSON string
-            new_json_string = "{" + ", ".join(formatted_entries) + "}"
-            return new_json_string
+        return arguments
 
     def on_tool_error(self, tool: Any, tool_calling: ToolCalling, e: Exception) -> None:
         event_data = self._prepare_event_data(tool, tool_calling)

diff --git a/src/crewai/translations/en.json b/src/crewai/translations/en.json
@@ -9,11 +9,11 @@
     "task": "\nCurrent Task: {input}\n\nBegin! This is VERY important to you, use the tools available and give your best Final Answer, your job depends on it!\n\nThought:",
     "memory": "\n\n# Useful context: \n{memory}",
     "role_playing": "You are {role}. {backstory}\nYour personal goal is: {goal}",
-    "tools": "\nYou ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n\n{tools}\n\nUse the following format:\n\nThought: you should always think about what to do\nAction: the action to take, only one name of [{tool_names}], just the name, exactly as it's written.\nAction Input: the input to the action, just a simple python dictionary, enclosed in curly braces, using \" to wrap keys and values.\nObservation: the result of the action\n\nOnce all necessary information is gathered:\n\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n",
-    "no_tools": "\nTo give my best complete final answer to the task use the exact following format:\n\nThought: I now can give a great answer\nFinal Answer: Your final answer must be the great and the most complete as possible, it must be outcome described.\n\nI MUST use these formats, my job depends on it!",
-    "format": "I MUST either use a tool (use one at time) OR give my best final answer not both at the same time. To Use the following format:\n\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action, dictionary enclosed in curly braces\nObservation: the result of the action\n... (this Thought/Action/Action Input/Result can repeat N times)\nThought: I now can give a great answer\nFinal Answer: Your final answer must be the great and the most complete as possible, it must be outcome described\n\n",
-    "final_answer_format": "If you don't need to use any more tools, you must give your best complete final answer, make sure it satisfies the expected criteria, use the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer: my best complete final answer to the task.\n\n",
-    "format_without_tools": "\nSorry, I didn't use the right format. I MUST either use a tool (among the available ones), OR give my best final answer.\nI just remembered the expected format I must follow:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Result can repeat N times)\nThought: I now can give a great answer\nFinal Answer: Your final answer must be the great and the most complete as possible, it must be outcome described\n\n",
+    "tools": "\nYou ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n\n{tools}\n\nIMPORTANT: Use the following format in your response:\n\n```\nThought: you should always think about what to do\nAction: the action to take, only one name of [{tool_names}], just the name, exactly as it's written.\nAction Input: the input to the action, just a simple JSON object, enclosed in curly braces, using \" to wrap keys and values.\nObservation: the result of the action\n```\n\nOnce all necessary information is gathered, return the following format:\n\n```\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n```",
+    "no_tools": "\nTo give my best complete final answer to the task respond using the exact following format:\n\nThought: I now can give a great answer\nFinal Answer: Your final answer must be the great and the most complete as possible, it must be outcome described.\n\nI MUST use these formats, my job depends on it!",
+    "format": "I MUST either use a tool (use one at time) OR give my best final answer not both at the same time. When responding, I must use the following format:\n\n```\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action, dictionary enclosed in curly braces\nObservation: the result of the action\n```\nThis Thought/Action/Action Input/Result can repeat N times. Once I know the final answer, I must return the following format:\n\n```\nThought: I now can give a great answer\nFinal Answer: Your final answer must be the great and the most complete as possible, it must be outcome described\n\n```",
+    "final_answer_format": "If you don't need to use any more tools, you must give your best complete final answer, make sure it satisfies the expected criteria, use the EXACT format below:\n\n```\nThought: I now can give a great answer\nFinal Answer: my best complete final answer to the task.\n\n```",
+    "format_without_tools": "\nSorry, I didn't use the right format. I MUST either use a tool (among the available ones), OR give my best final answer.\nHere is the expected format I must follow:\n\n```\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action\nObservation: the result of the action\n```\n This Thought/Action/Action Input/Result process can repeat N times. Once I know the final answer, I must return the following format:\n\n```\nThought: I now can give a great answer\nFinal Answer: Your final answer must be the great and the most complete as possible, it must be outcome described\n\n```",
     "task_with_context": "{task}\n\nThis is the context you're working with:\n{context}",
     "expected_output": "\nThis is the expect criteria for your final answer: {expected_output}\nyou MUST return the actual complete content as the final answer, not a summary.",
     "human_feedback": "You got human feedback on your work, re-evaluate it and give a new Final Answer when ready.\n {human_feedback}",