Fix tests

aorwall · Aug 18, 2024 · d70085d · d70085d
1 parent 4b1380d
commit d70085d
Show file tree

Hide file tree

Showing 24 changed files with 12,248 additions and 602 deletions.
diff --git a/moatless/benchmark/evaluation.py b/moatless/benchmark/evaluation.py
@@ -46,7 +46,6 @@ def __init__(
         evaluations_dir: str,
         evaluation_name: str,
         transitions: TransitionRules,
-        workspace: Workspace | None = None,
         report_mode: str | None = None,
         max_cost: float = 0.5,
         max_transitions: int = 25,
@@ -74,7 +73,6 @@ def __init__(
         self.reward_threshold = reward_threshold
 
         self.transitions = transitions
-        self.workspace = workspace
 
         litellm.drop_params = True
 
@@ -346,13 +344,13 @@ def _run_evaluation_threads(self, instances: list[dict]):
         logger.info(
             f"Processing {len(instances)} instances with {len(repo_groups)} repos with {self.num_workers} workers"
         )
+        logger.info(self.transitions)
 
         with concurrent.futures.ProcessPoolExecutor(
             max_workers=self.num_workers
         ) as executor:
             futures = []
             for repo, group in repo_groups.items():
-                logger.info(json.dumps(group, indent=2))
                 futures.append(executor.submit(self._process_repo_group, repo, group))
 
             pbar = tqdm(concurrent.futures.as_completed(futures), total=len(futures))
@@ -366,7 +364,7 @@ def _run_evaluation_threads(self, instances: list[dict]):
                         continue
                 except Exception:
                     error += 1
-                    logger.exception("Error in processing repo group")
+                    logger.exception(f"Error in processing repo group.")
                     continue
 
                 results.extend(group_results)

diff --git a/moatless/benchmark/plan/classify.py b/moatless/benchmark/plan/classify.py
diff --git a/moatless/benchmark/report_v2.py b/moatless/benchmark/report_v2.py
@@ -532,7 +532,7 @@ def to_dataframe(report_mode: str, results: list[BenchmarkResult]) -> pd.DataFra
 
     def flatten_dict(d, parent_key="", sep="_"):
         items = []
-        general_keys = ["instance_id", "duration", "total_cost", "resolved_by", "status",
+        general_keys = ["instance_id", "duration", "total_cost", "prompt_tokens", "completion_tokens", "resolved_by", "status",
                         "transitions", "all_transitions", "alternative_solutions", "resolved",
                         "expected_spans", "expected_files", "error"]
 
@@ -562,7 +562,7 @@ def flatten_dict(d, parent_key="", sep="_"):
 
     # Reorder columns
     column_order = [
-        "instance_id", "duration", "total_cost", "promt_tokens", "completion_tokens", "resolved_by", "status", "resolved",
+        "instance_id", "duration", "total_cost", "prompt_tokens", "completion_tokens", "resolved_by", "status", "resolved",
         "transitions", "all_transitions", "expected_spans", "expected_files", "alternative_solutions",
         "expected_spans_details", "error"
     ]

diff --git a/moatless/benchmark/run_evaluation.py b/moatless/benchmark/run_evaluation.py
@@ -59,7 +59,7 @@ def parse_args():
     return parser.parse_args()
 
 search_model = "openrouter/anthropic/claude-3.5-sonnet"
-plan_model = "claude-3-5-sonnet-20240620" # "openrouter/anthropic/claude-3.5-sonnet"
+plan_model = "azure/gpt-4o" # "claude-3-5-sonnet-20240620" # "openrouter/anthropic/claude-3.5-sonnet"
 edit_model = "azure/gpt-4o"
 
 DEFAULT_STATE_PARAMS = {
@@ -86,7 +86,7 @@ def parse_args():
         "finish_on_review": True,
     },
     ExpandContext: {
-        "expand_to_max_tokens": 8000
+        "expand_to_max_tokens": 4000
     },
     ClarifyCodeChange: {
         "model": "azure/gpt-4o",

diff --git a/moatless/benchmark/state/expand.py b/moatless/benchmark/state/expand.py
diff --git a/moatless/edit/expand.py b/moatless/edit/expand.py
@@ -30,6 +30,11 @@ class ExpandContext(State):
         description="Whether to expand with related spans.",
     )
 
+    expand_other: bool = Field(
+        False,
+        description="Whether to expand with related spans.",
+    )
+
     def execute(self, mocked_action_request: ActionRequest | None = None) -> StateOutcome:
         self.file_context.expand_context_with_init_spans()
 
@@ -56,20 +61,25 @@ def execute(self, mocked_action_request: ActionRequest | None = None) -> StateOu
         original_tokens = self.file_context.context_size()
 
         for file_path, span_id, rank in flattened_results:
+            if span_id not in span_ids:
+                continue
+
             # TODO: Check the sum of the tokens in the context and the tokens in the span
             if self.file_context.context_size() > self.expand_to_max_tokens:
                 break
 
+
             added_spans += 1
             self.file_context.add_span_to_context(file_path, span_id)
 
-        # Add possibly relevant spans from the same file
-        for file_path, span_id, rank in flattened_results:
-            if self.file_context.context_size() > self.expand_to_max_tokens:
-                break
+        if self.expand_other:
+            # Add possibly relevant spans from the same file
+            for file_path, span_id, rank in flattened_results:
+                if self.file_context.context_size() > self.expand_to_max_tokens:
+                    break
 
-            added_spans += 1
-            self.file_context.add_span_to_context(file_path, span_id)
+                added_spans += 1
+                self.file_context.add_span_to_context(file_path, span_id)
 
         logger.debug(f"Expanded context with {added_spans} spans. Original tokens: {original_tokens}, Expanded tokens: {self.file_context.context_size()}")
 

diff --git a/moatless/edit/plan.py b/moatless/edit/plan.py
@@ -94,7 +94,7 @@ class PlanToCode(AgenticState):
     )
 
     write_code_suggestions: bool = Field(
-        True,
+        False,
         description="Whether to instruct the LLM to write out the actual code in the instructions.",
     )
 

diff --git a/moatless/index/code_index.py b/moatless/index/code_index.py
@@ -163,7 +163,7 @@ def from_index_name(
         if os.getenv("INDEX_STORE_URL"):
             index_store_url = os.getenv("INDEX_STORE_URL")
         else:
-            index_store_url = "https://stmoatless.blob.core.windows.net/indexstore/20240522-voyage-code-2"
+            index_store_url = "https://stmoatless.blob.core.windows.net/indexstore/20240814-voyage-code-2/"
 
         store_url = os.path.join(index_store_url, f"{index_name}.zip")
         logger.info(f"Downloading existing index {index_name} from {store_url}.")

diff --git a/moatless/transitions.py b/moatless/transitions.py
@@ -183,18 +183,14 @@ def identify_directly_transition(
 
 
 def search_and_code_transitions(
-    max_tokens_in_edit_prompt: Optional[int] = 500,
     global_params: Optional[dict] = None,
     state_params: Optional[dict] = None,
 ) -> TransitionRules:
     state_params = state_params or {}
-    if max_tokens_in_edit_prompt is not None:
-        state_params.setdefault(
-            PlanToCode, {"max_tokens_in_edit_prompt": max_tokens_in_edit_prompt}
-        )
     return TransitionRules(
         global_params=global_params,
         state_params=state_params,
+        initial_state=SearchCode,
         transition_rules=[
             TransitionRule(source=Pending, dest=SearchCode, trigger="init"),
             TransitionRule(source=SearchCode, dest=IdentifyCode, trigger="did_search"),