Skip to content

Commit

Permalink
Fix types and add indexing script
Browse files Browse the repository at this point in the history
  • Loading branch information
aorwall committed Aug 14, 2024
1 parent 37a8dbe commit 439fbbd
Show file tree
Hide file tree
Showing 32 changed files with 260 additions and 29 deletions.
2 changes: 1 addition & 1 deletion moatless/benchmark/report_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from moatless.benchmark.utils import get_missing_files
from moatless.file_context import FileContext
from moatless.trajectory import Trajectory
from moatless.types import ActionTransaction, Usage, Content
from moatless.schema import ActionTransaction, Usage, Content
from moatless.state import AgenticState

logger = logging.getLogger(__name__)
Expand Down
179 changes: 179 additions & 0 deletions moatless/benchmark/swebench/index_instances.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import logging

from moatless import FileRepository
from moatless.benchmark.swebench import load_instances, setup_swebench_repo
import json

from moatless.benchmark.utils import calculate_estimated_context_window
from moatless.index.settings import IndexSettings, CommentStrategy
from moatless.index.code_index import CodeIndex
from dotenv import load_dotenv
from moatless.benchmark.swebench import get_repo_dir_name
import os


logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

index_store_dir = "/home/albert/.moatless/index_stores/20240814-voyage-code-2"

evaluation_report = "report.jsonl"


def create_instance_list():
#lite_instance_by_id = load_instances("princeton-nlp/SWE-bench_Lite", split="test")
instance_by_id = load_instances("princeton-nlp/SWE-bench_Verified", split="test")

logger.info(
f"Number of instances: {len(instance_by_id)} from {len(instance_by_id)} SWE-bench_Lite and SWE-bench_Verified")

instances = list(instance_by_id.values())
# instances = [instance for instance in instances if instance["instance_id"] in white_list]
instances = sorted(instances, key=lambda x: x["created_at"])

logger.info(f"Number of instances: {len(instances)}")
return instances

#with open("index_eval.csv", "w") as f:
# f.write("instance_id,vectors,indexed_tokens,all_matching_context_window,any_matching_context_window\n")


previous_instances = {
}



index_settings = IndexSettings(
embed_model="voyage-code-2",
dimensions=1536,
language="python",
min_chunk_size=200,
chunk_size=750,
hard_token_limit=3000,
max_chunks=200,
comment_strategy=CommentStrategy.ASSOCIATE
)

load_dotenv('../.env')


def get_persist_dir(instance):
return os.path.join(index_store_dir, get_repo_dir_name(instance["instance_id"]))


def ingest(code_index, instance):
vectors, indexed_tokens = code_index.run_ingestion(num_workers=1)
logger.info(f"Indexed {vectors} vectors and {indexed_tokens} tokens.")

persist_dir = get_persist_dir(instance)
code_index.persist(persist_dir=persist_dir)
logger.info(f"Index persisted to {persist_dir}")
return vectors, indexed_tokens

def evaluate(code_index, instance):
results = code_index._vector_search(instance["problem_statement"], top_k=1000)

expected_changes, sum_tokens = calculate_estimated_context_window(instance, results)
all_matching_context_window = None
any_matching_context_window = None

expected_matches = [context for context in expected_changes if context["context_window"] is not None]
if expected_matches:
all_matching_context_window = max(context["context_window"] for context in expected_matches)
any_matching_context_window = min(context["context_window"] for context in expected_matches)

if len(expected_matches) == len(expected_changes):
logger.info(
f"Found all expected changes within a context window of {all_matching_context_window} tokens, first match at context window {any_matching_context_window}")
else:
any_matching_context_window = min(
context["context_window"] for context in expected_changes if context["context_window"] is not None)
logger.info(
f"Found {len(expected_matches)} expected changes within a context window {all_matching_context_window} tokens, first match at context window {any_matching_context_window} max context window {sum_tokens} tokens")
else:
logger.info(f"No expected changes found in context window of {sum_tokens} tokens")

for change in expected_changes:
if change["context_window"] is None:
logger.info(
f"Expected change: {change['file_path']} ({change['start_line']}-{change['end_line']}) not fund, closest match: {change.get('closest_match_lines')}")
else:
logger.info(
f"Expected change: {change['file_path']} ({change['start_line']}-{change['end_line']}) found at context window {change['context_window']} tokens. Distance: {change['distance']}. Position: {change['position']}")

return expected_changes, all_matching_context_window, any_matching_context_window


def write_report(instance, expected_changes, vectors, indexed_tokens, all_matching_context_window,
any_matching_context_window):
with open("report.jsonl", "a") as f:
f.write(json.dumps({
"instance_id": instance["instance_id"],
"vectors": vectors,
"indexed_tokens": indexed_tokens,
"all_matching_context_window": all_matching_context_window,
"any_matching_context_window": any_matching_context_window,
"expected_changes": expected_changes,
}) + "\n")

with open("index_eval.csv", "a") as f:
f.write(f"{instance['instance_id']},{vectors},{indexed_tokens},{all_matching_context_window},{any_matching_context_window}\n")

def run_indexing():
#lite_instance_by_id = load_instances("princeton-nlp/SWE-bench_Lite", split="test")
instance_by_id = load_instances("princeton-nlp/SWE-bench_Verified", split="test")

#instance_by_id = {**lite_instance_by_id, **verified_instance_by_id}
#logger.info(
# f"Number of instances: {len(instance_by_id)} from {len(lite_instance_by_id)} SWE-bench_Lite and {len(verified_instance_by_id)} SWE-bench_Verified")

instances = list(instance_by_id.values())
instances = sorted(instances, key=lambda x: x["created_at"])

logger.info(f"Number of instances: {len(instances)}")

if os.path.exists(evaluation_report):
with open(evaluation_report, "r") as f:
for line in f:
report = json.loads(line)
previous_instance = instance_by_id.get(report["instance_id"])
if previous_instance:
previous_instances[previous_instance["repo"]] = previous_instance
del instance_by_id[report["instance_id"]]

for i, instance in enumerate(instances):
logger.info(f"Processing instance {i + 1}/{len(instances)}: {instance['instance_id']} {instance['created_at']}")

repo_dir = setup_swebench_repo(instance)
repo = FileRepository(repo_dir)
persist_dir = get_persist_dir(instance)

code_index = None

if os.path.exists(persist_dir):
logger.info(f"Index exists on {persist_dir}")
#try:
# logger.info(f"Loading index from {persist_dir}")
# code_index = CodeIndex.from_persist_dir(persist_dir, file_repo=repo)
#except Exception as e:
# logger.error(f"Error loading index: {e}")
else:
logger.info(f"No index found at {persist_dir}")

#if not code_index:
previous_instance = previous_instances.get(instance["repo"])
if previous_instance:
logger.info(f"Loading previous index from {get_persist_dir(previous_instance)}")
code_index = CodeIndex.from_persist_dir(get_persist_dir(previous_instance), file_repo=repo)
else:
code_index = CodeIndex(settings=index_settings, file_repo=repo)

vectors, indexed_tokens = ingest(code_index, instance)
expected_changes, all_matching_context_window, any_matching_context_window = evaluate(code_index, instance)
write_report(instance, expected_changes, vectors, indexed_tokens, all_matching_context_window,
any_matching_context_window)

previous_instances[instance["repo"]] = instance


run_indexing()
2 changes: 1 addition & 1 deletion moatless/benchmark/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from moatless.codeblocks.module import Module
from moatless.index.types import SearchCodeHit, CodeSnippet
from moatless.repository import FileRepository
from moatless.types import FileWithSpans
from moatless.schema import FileWithSpans

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion moatless/edit/clarify.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from moatless.edit.prompt import CLARIFY_CHANGE_SYSTEM_PROMPT
from moatless.repository import CodeFile
from moatless.state import ActionResponse, AgenticState
from moatless.types import (
from moatless.schema import (
ActionRequest,
FileWithSpans,
Message,
Expand Down
2 changes: 1 addition & 1 deletion moatless/edit/edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pydantic import BaseModel, Field, PrivateAttr

from moatless.state import AgenticState, Finished
from moatless.types import (
from moatless.schema import (
ActionRequest,
ActionResponse,
AssistantMessage,
Expand Down
2 changes: 1 addition & 1 deletion moatless/edit/plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
SELECT_SPAN_SYSTEM_PROMPT,
)
from moatless.state import AgenticState
from moatless.types import (
from moatless.schema import (
ActionRequest,
ActionResponse,
AssistantMessage,
Expand Down
2 changes: 1 addition & 1 deletion moatless/edit/plan_lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
SELECT_LINES_SYSTEM_PROMPT,
)
from moatless.state import AgenticState
from moatless.types import (
from moatless.schema import (
ActionRequest,
ActionResponse,
AssistantMessage,
Expand Down
2 changes: 1 addition & 1 deletion moatless/edit/review.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
CODER_FINAL_SYSTEM_PROMPT,
)
from moatless.state import AgenticState
from moatless.types import (
from moatless.schema import (
ActionRequest,
ActionResponse,
Message,
Expand Down
2 changes: 1 addition & 1 deletion moatless/file_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
SpanType,
)
from moatless.repository import CodeFile, FileRepository, UpdateResult
from moatless.types import FileWithSpans
from moatless.schema import FileWithSpans

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion moatless/find/decide.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from moatless.find import SearchCode
from moatless.state import AgenticState
from moatless.types import (
from moatless.schema import (
ActionRequest,
ActionResponse,
Message,
Expand Down
2 changes: 1 addition & 1 deletion moatless/find/identify.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from moatless.file_context import RankedFileSpan
from moatless.state import AgenticState
from moatless.types import (
from moatless.schema import (
ActionRequest,
ActionResponse,
FileWithSpans,
Expand Down
2 changes: 1 addition & 1 deletion moatless/find/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from moatless.file_context import RankedFileSpan
from moatless.index.types import SearchCodeHit
from moatless.state import ActionResponse, AgenticState
from moatless.types import (
from moatless.schema import (
ActionRequest,
AssistantMessage,
Message,
Expand Down
2 changes: 1 addition & 1 deletion moatless/index/code_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
SearchCodeResponse,
)
from moatless.repository import FileRepository
from moatless.types import FileWithSpans
from moatless.schema import FileWithSpans
from moatless.utils.tokenizer import count_tokens

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion moatless/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
)
from moatless.trajectory import Trajectory
from moatless.transition_rules import TransitionRule, TransitionRules
from moatless.types import (
from moatless.schema import (
ActionRequest,
AssistantMessage,
Content,
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion moatless/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from moatless.file_context import FileContext
from moatless.repository import FileRepository
from moatless.types import (
from moatless.schema import (
ActionRequest,
ActionResponse,
ActionTransaction,
Expand Down
2 changes: 1 addition & 1 deletion moatless/trajectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from moatless.workspace import Workspace
from moatless.transition_rules import TransitionRules
from moatless.state import AgenticState, get_state_class
from moatless.types import ActionRequest, ActionTransaction, ActionResponse, Usage, Content
from moatless.schema import ActionRequest, ActionTransaction, ActionResponse, Usage, Content

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion moatless/verify/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pylint.testutils import MinimalTestReporter

from moatless.repository import CodeFile
from moatless.types import VerificationError
from moatless.schema import VerificationError
from moatless.verify.verify import Verifier

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion moatless/verify/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import subprocess

from moatless.repository import CodeFile
from moatless.types import VerificationError
from moatless.schema import VerificationError
from moatless.verify.verify import Verifier

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion moatless/verify/verify.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from abc import ABC, abstractmethod

from moatless.repository import CodeFile
from moatless.types import VerificationError
from moatless.schema import VerificationError


class Verifier(ABC):
Expand Down
2 changes: 1 addition & 1 deletion moatless/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from moatless.index import IndexSettings
from moatless.index.code_index import CodeIndex
from moatless.repository import CodeFile, FileRepository, GitRepository
from moatless.types import FileWithSpans, VerificationError
from moatless.schema import FileWithSpans, VerificationError
from moatless.verify.lint import PylintVerifier
from moatless.verify.maven import MavenVerifier

Expand Down
2 changes: 1 addition & 1 deletion tests/edit/test_clarify.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
from unittest.mock import Mock, patch
from moatless.edit.clarify import ClarifyCodeChange, LineNumberClarification
from moatless.types import ActionResponse, FileWithSpans
from moatless.schema import ActionResponse, FileWithSpans
from moatless.workspace import Workspace
from moatless.file_context import FileContext
from moatless.repository import CodeFile
Expand Down
2 changes: 1 addition & 1 deletion tests/edit/test_edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from unittest.mock import Mock, patch
from moatless.edit.edit import EditCode
from moatless.repository.file import UpdateResult
from moatless.types import ActionResponse, Content
from moatless.schema import ActionResponse, Content
from moatless.workspace import Workspace
from moatless.file_context import FileContext
from moatless.repository import CodeFile
Expand Down
2 changes: 1 addition & 1 deletion tests/edit/test_plan.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
from unittest.mock import Mock, patch
from moatless.edit.plan import PlanToCode, ApplyChange
from moatless.types import ActionResponse, ActionTransaction
from moatless.schema import ActionResponse, ActionTransaction
from moatless.workspace import Workspace
from moatless.file_context import FileContext

Expand Down
2 changes: 1 addition & 1 deletion tests/find/test_decide.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
from moatless.find.decide import DecideRelevance, Decision
from moatless.find.identify import Identify, IdentifyCode
from moatless.types import ActionResponse, ActionTransaction
from moatless.schema import ActionResponse, ActionTransaction
from moatless.workspace import Workspace
from moatless.file_context import FileContext
from unittest.mock import Mock, MagicMock, patch
Expand Down
2 changes: 1 addition & 1 deletion tests/find/test_identify.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from moatless.find.identify import IdentifyCode, Identify, is_test_pattern
from moatless.file_context import RankedFileSpan
from moatless.repository.file import CodeFile
from moatless.types import FileWithSpans, ActionResponse
from moatless.schema import FileWithSpans, ActionResponse
from moatless.workspace import Workspace
from unittest.mock import Mock, MagicMock

Expand Down
Loading

0 comments on commit 439fbbd

Please sign in to comment.