Skip to content

Commit

Permalink
Fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
aorwall committed Aug 18, 2024
1 parent 4b1380d commit d70085d
Show file tree
Hide file tree
Showing 24 changed files with 12,248 additions and 602 deletions.
6 changes: 2 additions & 4 deletions moatless/benchmark/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def __init__(
evaluations_dir: str,
evaluation_name: str,
transitions: TransitionRules,
workspace: Workspace | None = None,
report_mode: str | None = None,
max_cost: float = 0.5,
max_transitions: int = 25,
Expand Down Expand Up @@ -74,7 +73,6 @@ def __init__(
self.reward_threshold = reward_threshold

self.transitions = transitions
self.workspace = workspace

litellm.drop_params = True

Expand Down Expand Up @@ -346,13 +344,13 @@ def _run_evaluation_threads(self, instances: list[dict]):
logger.info(
f"Processing {len(instances)} instances with {len(repo_groups)} repos with {self.num_workers} workers"
)
logger.info(self.transitions)

with concurrent.futures.ProcessPoolExecutor(
max_workers=self.num_workers
) as executor:
futures = []
for repo, group in repo_groups.items():
logger.info(json.dumps(group, indent=2))
futures.append(executor.submit(self._process_repo_group, repo, group))

pbar = tqdm(concurrent.futures.as_completed(futures), total=len(futures))
Expand All @@ -366,7 +364,7 @@ def _run_evaluation_threads(self, instances: list[dict]):
continue
except Exception:
error += 1
logger.exception("Error in processing repo group")
logger.exception(f"Error in processing repo group.")
continue

results.extend(group_results)
Expand Down
Empty file.
4 changes: 2 additions & 2 deletions moatless/benchmark/report_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ def to_dataframe(report_mode: str, results: list[BenchmarkResult]) -> pd.DataFra

def flatten_dict(d, parent_key="", sep="_"):
items = []
general_keys = ["instance_id", "duration", "total_cost", "resolved_by", "status",
general_keys = ["instance_id", "duration", "total_cost", "prompt_tokens", "completion_tokens", "resolved_by", "status",
"transitions", "all_transitions", "alternative_solutions", "resolved",
"expected_spans", "expected_files", "error"]

Expand Down Expand Up @@ -562,7 +562,7 @@ def flatten_dict(d, parent_key="", sep="_"):

# Reorder columns
column_order = [
"instance_id", "duration", "total_cost", "promt_tokens", "completion_tokens", "resolved_by", "status", "resolved",
"instance_id", "duration", "total_cost", "prompt_tokens", "completion_tokens", "resolved_by", "status", "resolved",
"transitions", "all_transitions", "expected_spans", "expected_files", "alternative_solutions",
"expected_spans_details", "error"
]
Expand Down
4 changes: 2 additions & 2 deletions moatless/benchmark/run_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def parse_args():
return parser.parse_args()

search_model = "openrouter/anthropic/claude-3.5-sonnet"
plan_model = "claude-3-5-sonnet-20240620" # "openrouter/anthropic/claude-3.5-sonnet"
plan_model = "azure/gpt-4o" # "claude-3-5-sonnet-20240620" # "openrouter/anthropic/claude-3.5-sonnet"
edit_model = "azure/gpt-4o"

DEFAULT_STATE_PARAMS = {
Expand All @@ -86,7 +86,7 @@ def parse_args():
"finish_on_review": True,
},
ExpandContext: {
"expand_to_max_tokens": 8000
"expand_to_max_tokens": 4000
},
ClarifyCodeChange: {
"model": "azure/gpt-4o",
Expand Down
128 changes: 0 additions & 128 deletions moatless/benchmark/state/expand.py

This file was deleted.

22 changes: 16 additions & 6 deletions moatless/edit/expand.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ class ExpandContext(State):
description="Whether to expand with related spans.",
)

expand_other: bool = Field(
False,
description="Whether to expand with related spans.",
)

def execute(self, mocked_action_request: ActionRequest | None = None) -> StateOutcome:
self.file_context.expand_context_with_init_spans()

Expand All @@ -56,20 +61,25 @@ def execute(self, mocked_action_request: ActionRequest | None = None) -> StateOu
original_tokens = self.file_context.context_size()

for file_path, span_id, rank in flattened_results:
if span_id not in span_ids:
continue

# TODO: Check the sum of the tokens in the context and the tokens in the span
if self.file_context.context_size() > self.expand_to_max_tokens:
break


added_spans += 1
self.file_context.add_span_to_context(file_path, span_id)

# Add possibly relevant spans from the same file
for file_path, span_id, rank in flattened_results:
if self.file_context.context_size() > self.expand_to_max_tokens:
break
if self.expand_other:
# Add possibly relevant spans from the same file
for file_path, span_id, rank in flattened_results:
if self.file_context.context_size() > self.expand_to_max_tokens:
break

added_spans += 1
self.file_context.add_span_to_context(file_path, span_id)
added_spans += 1
self.file_context.add_span_to_context(file_path, span_id)

logger.debug(f"Expanded context with {added_spans} spans. Original tokens: {original_tokens}, Expanded tokens: {self.file_context.context_size()}")

Expand Down
2 changes: 1 addition & 1 deletion moatless/edit/plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class PlanToCode(AgenticState):
)

write_code_suggestions: bool = Field(
True,
False,
description="Whether to instruct the LLM to write out the actual code in the instructions.",
)

Expand Down
2 changes: 1 addition & 1 deletion moatless/index/code_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def from_index_name(
if os.getenv("INDEX_STORE_URL"):
index_store_url = os.getenv("INDEX_STORE_URL")
else:
index_store_url = "https://stmoatless.blob.core.windows.net/indexstore/20240522-voyage-code-2"
index_store_url = "https://stmoatless.blob.core.windows.net/indexstore/20240814-voyage-code-2/"

store_url = os.path.join(index_store_url, f"{index_name}.zip")
logger.info(f"Downloading existing index {index_name} from {store_url}.")
Expand Down
6 changes: 1 addition & 5 deletions moatless/transitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,18 +183,14 @@ def identify_directly_transition(


def search_and_code_transitions(
max_tokens_in_edit_prompt: Optional[int] = 500,
global_params: Optional[dict] = None,
state_params: Optional[dict] = None,
) -> TransitionRules:
state_params = state_params or {}
if max_tokens_in_edit_prompt is not None:
state_params.setdefault(
PlanToCode, {"max_tokens_in_edit_prompt": max_tokens_in_edit_prompt}
)
return TransitionRules(
global_params=global_params,
state_params=state_params,
initial_state=SearchCode,
transition_rules=[
TransitionRule(source=Pending, dest=SearchCode, trigger="init"),
TransitionRule(source=SearchCode, dest=IdentifyCode, trigger="did_search"),
Expand Down
Loading

0 comments on commit d70085d

Please sign in to comment.