Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sweep: refactor edit_sweep_comment in on_ticket.py to not use nonlocals. put all of these in the function call, and add each argument accordingly #3672

Open
wwzeng1 opened this issue May 3, 2024 · 1 comment · May be fixed by #4026
Labels
sweep Assigns Sweep to an issue or pull request.

Comments

@wwzeng1
Copy link
Contributor

wwzeng1 commented May 3, 2024

Branch

No response

@wwzeng1 wwzeng1 added the sweep Assigns Sweep to an issue or pull request. label May 3, 2024
@wwzeng1 wwzeng1 changed the title Sweep: refactor edit_sweep_comment in on_ticket.py to not use nonlocals. put all of these in the function call, and add each argument accordingly. our goal is to move edit_sweep_comment to a different file Sweep: refactor edit_sweep_comment in on_ticket.py to not use nonlocals. put all of these in the function call, and add each argument accordingly May 3, 2024
Copy link
Contributor

sweep-nightly bot commented May 3, 2024

🚀 Here's the PR! #4026

💎 Sweep Pro: You have unlimited Sweep issues

Actions

  • ↻ Restart Sweep

Step 1: 🔎 Searching

(Click to expand) Here are the code search results. I'm now analyzing these search results to write the PR.

"""
on_ticket is the main function that is called when a new issue is created.
It is only called by the webhook handler in sweepai/api.py.
"""
import copy
import os
import traceback
from time import time
from github import BadCredentialsException
from github.PullRequest import PullRequest as GithubPullRequest
from loguru import logger
from sweepai.chat.api import posthog_trace
from sweepai.agents.image_description_bot import ImageDescriptionBot
from sweepai.config.client import (
RESET_FILE,
REVERT_CHANGED_FILES_TITLE,
SweepConfig,
)
from sweepai.config.server import (
ENV,
GITHUB_LABEL_NAME,
IS_SELF_HOSTED,
MONGODB_URI,
)
from sweepai.core.entities import (
MockPR,
NoFilesException,
SweepPullRequest,
render_fcrs,
)
from sweepai.core.pr_reader import PRReader
from sweepai.core.pull_request_bot import PRSummaryBot
from sweepai.core.sweep_bot import get_files_to_change
from sweepai.handlers.on_failing_github_actions import on_failing_github_actions
from sweepai.handlers.create_pr import (
handle_file_change_requests,
)
from sweepai.utils.concurrency_utils import fire_and_forget_wrapper
from sweepai.utils.image_utils import get_image_contents_from_urls, get_image_urls_from_issue
from sweepai.utils.issue_validator import validate_issue
from sweepai.utils.prompt_constructor import get_issue_request
from sweepai.utils.ticket_rendering_utils import add_emoji, center, process_summary, remove_emoji, get_payment_messages, get_comment_header, send_email_to_user, raise_on_no_file_change_requests, handle_empty_repository, delete_old_prs
from sweepai.utils.validate_license import validate_license
from sweepai.utils.buttons import Button, ButtonList
from sweepai.utils.chat_logger import ChatLogger
from sentry_sdk import set_user
from sweepai.utils.event_logger import posthog
from sweepai.utils.github_utils import (
CURRENT_USERNAME,
ClonedRepo,
commit_multi_file_changes,
convert_pr_draft_field,
create_branch,
get_github_client,
refresh_token,
sanitize_string_for_github,
validate_and_sanitize_multi_file_changes,
)
from sweepai.utils.slack_utils import add_slack_context
from sweepai.utils.str_utils import (
BOT_SUFFIX,
FASTER_MODEL_MESSAGE,
blockquote,
bold,
bot_suffix,
create_collapsible,
discord_suffix,
get_hash,
strip_sweep,
to_branch_name
)
from sweepai.utils.ticket_utils import (
fetch_relevant_files,
)
@posthog_trace
def on_ticket(
username: str,
title: str,
summary: str,
issue_number: int,
issue_url: str, # purely for logging purposes
repo_full_name: str,
repo_description: str,
installation_id: int,
comment_id: int = None,
edited: bool = False,
tracking_id: str | None = None,
):
set_user({"username": username})
if not os.environ.get("CLI"):
assert validate_license(), "License key is invalid or expired. Please contact us at [email protected] to upgrade to an enterprise license."
with logger.contextualize(
tracking_id=tracking_id,
):
if tracking_id is None:
tracking_id = get_hash()
on_ticket_start_time = time()
logger.info(f"Starting on_ticket with title {title} and summary {summary}")
(
title,
slow_mode,
do_map,
subissues_mode,
sandbox_mode,
fast_mode,
lint_mode,
) = strip_sweep(title)
summary, repo_name, user_token, g, repo, current_issue, assignee, overrided_branch_name = process_summary(summary, issue_number, repo_full_name, installation_id)
chat_logger: ChatLogger = (
ChatLogger(
{
"repo_name": repo_name,
"title": title,
"summary": summary,
"issue_number": issue_number,
"issue_url": issue_url,
"username": (
username if not username.startswith("sweep") else assignee
),
"repo_full_name": repo_full_name,
"repo_description": repo_description,
"installation_id": installation_id,
"type": "ticket",
"mode": ENV,
"comment_id": comment_id,
"edited": edited,
"tracking_id": tracking_id,
},
active=True,
)
if MONGODB_URI
else None
)
modify_files_dict_history: list[dict[str, dict[str, str]]] = []
if chat_logger and not IS_SELF_HOSTED:
is_paying_user = chat_logger.is_paying_user()
use_faster_model = chat_logger.use_faster_model()
else:
is_paying_user = True
use_faster_model = False
if use_faster_model:
raise Exception(FASTER_MODEL_MESSAGE)
if fast_mode:
use_faster_model = True
if not comment_id and not edited and chat_logger and not sandbox_mode:
fire_and_forget_wrapper(chat_logger.add_successful_ticket)(
gpt3=use_faster_model
)
organization, repo_name = repo_full_name.split("/")
metadata = {
"issue_url": issue_url,
"repo_full_name": repo_full_name,
"organization": organization,
"repo_name": repo_name,
"repo_description": repo_description,
"username": username,
"comment_id": comment_id,
"title": title,
"installation_id": installation_id,
"function": "on_ticket",
"edited": edited,
"model": "gpt-3.5" if use_faster_model else "gpt-4",
"tier": "pro" if is_paying_user else "free",
"mode": ENV,
"slow_mode": slow_mode,
"do_map": do_map,
"subissues_mode": subissues_mode,
"sandbox_mode": sandbox_mode,
"fast_mode": fast_mode,
"is_self_hosted": IS_SELF_HOSTED,
"tracking_id": tracking_id,
}
fire_and_forget_wrapper(posthog.capture)(
username, "started", properties=metadata
)
try:
if current_issue.state == "closed":
fire_and_forget_wrapper(posthog.capture)(
username,
"issue_closed",
properties={
**metadata,
"duration": round(time() - on_ticket_start_time),
},
)
return {"success": False, "reason": "Issue is closed"}
fire_and_forget_wrapper(add_emoji)(current_issue, comment_id)
fire_and_forget_wrapper(remove_emoji)(
current_issue, comment_id, content_to_delete="rocket"
)
fire_and_forget_wrapper(remove_emoji)(
current_issue, comment_id, content_to_delete="confused"
)
fire_and_forget_wrapper(current_issue.edit)(body=summary)
replies_text = ""
summary = summary if summary else ""
fire_and_forget_wrapper(delete_old_prs)(repo, issue_number)
progress_headers = [
None,
"Step 1: 🔎 Searching",
"Step 2: ⌨️ Coding",
"Step 3: 🔄️ Validating",
]
issue_comment = None
payment_message, payment_message_start = get_payment_messages(
chat_logger
)
config_pr_url = None
cloned_repo: ClonedRepo = ClonedRepo(
repo_full_name,
installation_id=installation_id,
token=user_token,
repo=repo,
branch=overrided_branch_name,
)
# check that repo's directory is non-empty
if os.listdir(cloned_repo.repo_dir) == []:
handle_empty_repository(comment_id, current_issue, progress_headers, issue_comment)
return {"success": False}
indexing_message = (
"I'm searching for relevant snippets in your repository. If this is your first"
" time using Sweep, I'm indexing your repository, which will take a few minutes."
)
first_comment = (
f"{get_comment_header(0, progress_headers, payment_message_start)}\n## "
f"{progress_headers[1]}\n{indexing_message}{bot_suffix}{discord_suffix}"
)
# Find Sweep's previous comment
comments = []
for comment in current_issue.get_comments():
comments.append(comment)
if comment.user.login == CURRENT_USERNAME:
issue_comment = comment
break
if issue_comment is None:
issue_comment = current_issue.create_comment(first_comment)
else:
fire_and_forget_wrapper(issue_comment.edit)(first_comment)
old_edit = issue_comment.edit
issue_comment.edit = lambda msg: old_edit(msg + BOT_SUFFIX)
past_messages = {}
current_index = 0
initial_sandbox_response = -1
initial_sandbox_response_file = None
def edit_sweep_comment(
message: str,
index: int,
pr_message="",
done=False,
step_complete=True,
add_bonus_message=True,
):
nonlocal current_index, user_token, g, repo, issue_comment, initial_sandbox_response, initial_sandbox_response_file
message = sanitize_string_for_github(message)
if pr_message:
pr_message = sanitize_string_for_github(pr_message)
errored = index == -1
if index >= 0:
past_messages[index] = message
current_index = index
agg_message = ""
# Include progress history
for i in range(
current_index + 2
): # go to next header (for Working on it... text)
if i >= len(progress_headers):
continue # skip None header
if not step_complete and i >= current_index + 1:
continue
if i == 0 and index != 0:
continue
header = progress_headers[i]
header = "## " + (header if header is not None else "") + "\n"
msg = header + (past_messages.get(i) or "Working on it...")
agg_message += "\n" + msg
suffix = bot_suffix + discord_suffix
if errored:
agg_message = (
"## ❌ Unable to Complete PR"
+ "\n"
+ message
+ (
"\n\n"
" **[Report a bug](https://community.sweep.dev/)**."
if add_bonus_message
else ""
)
)
suffix = bot_suffix # don't include discord suffix for error messages
# Update the issue comment
msg = f"""{get_comment_header(
current_index,
progress_headers,
payment_message_start,
errored=errored,
pr_message=pr_message,
done=done,
config_pr_url=config_pr_url
)}\n{agg_message}{suffix}"""
try:
issue_comment.edit(msg)
except BadCredentialsException:
logger.error(
f"Bad credentials, refreshing token (tracking ID: `{tracking_id}`)"
)
user_token, g = get_github_client(installation_id)
repo = g.get_repo(repo_full_name)
issue_comment = None
for comment in comments:
if comment.user.login == CURRENT_USERNAME:
issue_comment = comment
current_issue = repo.get_issue(number=issue_number)
if issue_comment is None:
issue_comment = current_issue.create_comment(msg)
else:
issue_comment = [
comment
for comment in current_issue.get_comments()
if comment.user.login == CURRENT_USERNAME
][0]
issue_comment.edit(msg)
if use_faster_model:
edit_sweep_comment(
FASTER_MODEL_MESSAGE, -1, add_bonus_message=False
)
posthog.capture(
username,
"ran_out_of_tickets",
properties={
**metadata,
"duration": round(time() - on_ticket_start_time),
},
)
fire_and_forget_wrapper(add_emoji)(
current_issue, comment_id, reaction_content="confused"
)
fire_and_forget_wrapper(remove_emoji)(content_to_delete="eyes")
return {
"success": False,
"error_message": "We deprecated supporting GPT 3.5.",
}
internal_message_summary = summary
internal_message_summary += add_slack_context(internal_message_summary)
error_message = validate_issue(title + internal_message_summary)
if error_message:
logger.warning(f"Validation error: {error_message}")
edit_sweep_comment(
(
f"\n\n{bold(error_message)}"
),
-1,
)
fire_and_forget_wrapper(add_emoji)(
current_issue, comment_id, reaction_content="confused"
)
fire_and_forget_wrapper(remove_emoji)(content_to_delete="eyes")
posthog.capture(
username,
"invalid_issue",
properties={
**metadata,
"duration": round(time() - on_ticket_start_time),
},
)
return {"success": True}
edit_sweep_comment(
"I've just finished validating the issue. I'm now going to start searching for relevant files.",
0
)
prs_extracted = PRReader.extract_prs(repo, summary)
if prs_extracted:
internal_message_summary += "\n\n" + prs_extracted
edit_sweep_comment(
create_collapsible(
"I found that you mentioned the following Pull Requests that might be important:",
blockquote(
prs_extracted,
),
),
1,
)
try:
# search/context manager
logger.info("Searching for relevant snippets...")
# fetch images from body of issue
image_urls = get_image_urls_from_issue(issue_number, repo_full_name, installation_id)
image_contents = get_image_contents_from_urls(image_urls)
if image_contents: # doing it here to avoid editing the original issue
internal_message_summary += ImageDescriptionBot().describe_images(text=title + internal_message_summary, images=image_contents)
_user_token, g = get_github_client(installation_id)
user_token, g, repo = refresh_token(repo_full_name, installation_id)
cloned_repo.token = user_token
repo = g.get_repo(repo_full_name)
newline = "\n"
for message, repo_context_manager in fetch_relevant_files.stream(
cloned_repo,
title,
internal_message_summary,
replies_text,
username,
metadata,
on_ticket_start_time,
tracking_id,
is_paying_user,
issue_url,
chat_logger,
images=image_contents
):
if repo_context_manager.current_top_snippets + repo_context_manager.read_only_snippets:
edit_sweep_comment(
create_collapsible(
"(Click to expand) " + message,
"\n".join(
[
f"https://github.com/{organization}/{repo_name}/blob/{repo.get_commits()[0].sha}/{snippet.file_path}#L{max(snippet.start, 1)}-L{max(min(snippet.end, snippet.content.count(newline) - 1), 1)}\n"
for snippet in list(dict.fromkeys(repo_context_manager.current_top_snippets + repo_context_manager.read_only_snippets))
]
),
)
+ (
create_collapsible(
"I also found that you mentioned the following Pull Requests that may be helpful:",
blockquote(prs_extracted),
)
if prs_extracted
else ""
),
1,
step_complete=False
)
else:
edit_sweep_comment(
message,
1,
step_complete=False
)
edit_sweep_comment(
create_collapsible(
"(Click to expand) " + message,
"\n".join(
[
f"https://github.com/{organization}/{repo_name}/blob/{repo.get_commits()[0].sha}/{snippet.file_path}#L{max(snippet.start, 1)}-L{max(min(snippet.end, snippet.content.count(newline) - 1), 1)}\n"
for snippet in list(dict.fromkeys(repo_context_manager.current_top_snippets + repo_context_manager.read_only_snippets))
]
),
)
+ (
create_collapsible(
"I also found that you mentioned the following Pull Requests that may be helpful:",
blockquote(prs_extracted),
)
if prs_extracted
else ""
),
1,
)
# # Search agent
# search_agent_results = search(
# internal_message_summary,
# cloned_repo,
# repo_context_manager.current_top_snippets + repo_context_manager.read_only_snippets,
# )
# breakpoint()
cloned_repo = repo_context_manager.cloned_repo
user_token, g, repo = refresh_token(repo_full_name, installation_id)
except Exception as e:
edit_sweep_comment(
(
"It looks like an issue has occurred around fetching the files."
f" The exception was {str(e)}. If this error persists"
f" contact [email protected].\n\n> @{username}, editing this issue description to include more details will automatically make me relaunch. Please join our [community forum](https://community.sweep.dev/) for support (tracking_id={tracking_id})"
),
-1,
)
raise e
# Fetch git commit history
if not repo_description:
repo_description = "No description provided."
internal_message_summary += replies_text
issue_request = get_issue_request(title, internal_message_summary)
try:
newline = "\n"
logger.info("Fetching files to modify/create...")
for renames_dict, user_facing_message, file_change_requests in get_files_to_change.stream(
relevant_snippets=repo_context_manager.current_top_snippets,
read_only_snippets=repo_context_manager.read_only_snippets,
problem_statement=f"{title}\n\n{internal_message_summary}",
repo_name=repo_full_name,
cloned_repo=cloned_repo,
images=image_contents,
chat_logger=chat_logger
):
planning_markdown = render_fcrs(file_change_requests)
edit_sweep_comment(user_facing_message + planning_markdown, 2, step_complete=False)
edit_sweep_comment(user_facing_message + planning_markdown, 2)
raise_on_no_file_change_requests(title, summary, edit_sweep_comment, file_change_requests, renames_dict)
except Exception as e:
logger.exception(e)
# title and summary are defined elsewhere
edit_sweep_comment(
(
"I'm sorry, but it looks like an error has occurred due to"
+ f" a planning failure. The error message is {str(e).rstrip('.')}. Feel free to add more details to the issue description"
+ " so Sweep can better address it. Alternatively, reach out to Kevin or William for help at"
+ " https://community.sweep.dev/."
),
-1,
)
raise e
# VALIDATION (modify)
try:
edit_sweep_comment(
"I'm currently validating your changes using parsers and linters to check for mistakes like syntax errors or undefined variables. If I see any of these errors, I will automatically fix them.",
3,
)
pull_request: SweepPullRequest = SweepPullRequest(
title="Sweep: " + title,
branch_name="sweep/" + to_branch_name(title),
content="",
)
logger.info("Making PR...")
pull_request.branch_name = create_branch(
cloned_repo.repo, pull_request.branch_name, base_branch=overrided_branch_name
)
modify_files_dict, changed_file, file_change_requests = handle_file_change_requests(
file_change_requests=file_change_requests,
request=issue_request,
cloned_repo=cloned_repo,
username=username,
installation_id=installation_id,
renames_dict=renames_dict
)
pull_request_bot = PRSummaryBot()
commit_message = pull_request_bot.get_commit_message(modify_files_dict, renames_dict=renames_dict, chat_logger=chat_logger)[:50]
modify_files_dict_history.append(copy.deepcopy(modify_files_dict))
new_file_contents_to_commit = {file_path: file_data["contents"] for file_path, file_data in modify_files_dict.items()}
previous_file_contents_to_commit = copy.deepcopy(new_file_contents_to_commit)
new_file_contents_to_commit, files_removed = validate_and_sanitize_multi_file_changes(cloned_repo.repo, new_file_contents_to_commit, file_change_requests)
if files_removed and username:
posthog.capture(
username,
"polluted_commits_error",
properties={
"old_keys": ",".join(previous_file_contents_to_commit.keys()),
"new_keys": ",".join(new_file_contents_to_commit.keys())
},
)
commit = commit_multi_file_changes(cloned_repo, new_file_contents_to_commit, commit_message, pull_request.branch_name, renames_dict=renames_dict)
edit_sweep_comment(
f"Your changes have been successfully made to the branch [`{pull_request.branch_name}`](https://github.com/{repo_full_name}/tree/{pull_request.branch_name}). I have validated these changes using a syntax checker and a linter.",
3,
)
except Exception as e:
logger.exception(e)
edit_sweep_comment(
(
"I'm sorry, but it looks like an error has occurred due to"
+ f" a code validation failure. The error message is {str(e)}. Here were the changes I had planned:\n\n{planning_markdown}\n\n"
+ "Feel free to add more details to the issue description"
+ " so Sweep can better address it. Alternatively, reach out to Kevin or William for help at"
+ " https://community.sweep.dev/."
),
-1,
)
raise e
else:
try:
fire_and_forget_wrapper(remove_emoji)(content_to_delete="eyes")
fire_and_forget_wrapper(add_emoji)("rocket")
except Exception as e:
logger.error(e)
# set all fcrs without a corresponding change to be failed
for file_change_request in file_change_requests:
if file_change_request.status != "succeeded":
file_change_request.status = "failed"
# also update all commit hashes associated with the fcr
file_change_request.commit_hash_url = commit.html_url if commit else None
if not file_change_requests:
raise NoFilesException()
changed_files = []
# append all files that have been changed
if modify_files_dict:
for file_name, _ in modify_files_dict.items():
changed_files.append(file_name)
# Refresh token
try:
current_issue = repo.get_issue(number=issue_number)
except BadCredentialsException:
user_token, g, repo = refresh_token(repo_full_name, installation_id)
cloned_repo.token = user_token
pr_changes = MockPR(
file_count=len(modify_files_dict),
title=pull_request.title,
body="", # overrided later
pr_head=pull_request.branch_name,
base=cloned_repo.repo.get_branch(
SweepConfig.get_branch(cloned_repo.repo)
).commit,
head=cloned_repo.repo.get_branch(pull_request.branch_name).commit,
)
pr_changes = PRSummaryBot.get_pull_request_summary(
title + "\n" + internal_message_summary,
issue_number,
repo,
overrided_branch_name,
pull_request,
pr_changes
)
change_location = f" [`{pr_changes.pr_head}`](https://github.com/{repo_full_name}/commits/{pr_changes.pr_head}).\n\n"
review_message = (
"Here are my self-reviews of my changes at" + change_location
)
fire_and_forget_wrapper(remove_emoji)(content_to_delete="eyes")
# create draft pr, then convert to regular pr later
pr: GithubPullRequest = repo.create_pull(
title=pr_changes.title,
body=pr_changes.body,
head=pr_changes.pr_head,
base=overrided_branch_name or SweepConfig.get_branch(repo),
draft=False,
)
try:
pr.add_to_assignees(username)
except Exception as e:
logger.warning(
f"Failed to add assignee {username}: {e}, probably a bot."
)
if len(changed_files) > 1:
revert_buttons = []
for changed_file in set(changed_files):
revert_buttons.append(
Button(label=f"{RESET_FILE} {changed_file}")
)
revert_buttons_list = ButtonList(
buttons=revert_buttons, title=REVERT_CHANGED_FILES_TITLE
)
if revert_buttons:
pr.create_issue_comment(
revert_buttons_list.serialize() + BOT_SUFFIX
)
# add comments before labelling
pr.add_to_labels(GITHUB_LABEL_NAME)
current_issue.create_reaction("rocket")
heres_pr_message = f'<h1 align="center">🚀 Here\'s the PR! <a href="{pr.html_url}">#{pr.number}</a></h1>'
progress_message = ''
edit_sweep_comment(
review_message + "\n\nSuccess! 🚀",
4,
pr_message=(
f"{center(heres_pr_message)}\n{center(progress_message)}\n{center(payment_message_start)}"
),
done=True,
)
on_failing_github_actions(
f"{title}\n{internal_message_summary}\n{replies_text}",
repo,
username,
pr,
user_token,
installation_id,
chat_logger=chat_logger
)
send_email_to_user(title, issue_number, username, repo_full_name, tracking_id, repo_name, g, file_change_requests, pr_changes, pr)
# break from main for loop
convert_pr_draft_field(pr, is_draft=False, installation_id=installation_id)
except Exception as e:
posthog.capture(
username,
"failed",
properties={
**metadata,
"error": str(e),
"trace": traceback.format_exc(),
"duration": round(time() - on_ticket_start_time),
},
)
raise e
posthog.capture(
username,
"success",
properties={**metadata, "duration": round(time() - on_ticket_start_time)},
)

"""
on_ticket is the main function that is called when a new issue is created.
It is only called by the webhook handler in sweepai/api.py.
"""
import difflib
import io
import os
import re
import zipfile
import markdown
import requests
from github import Repository, IncompletableObject
from github.PullRequest import PullRequest
from github.Issue import Issue
from loguru import logger
from tqdm import tqdm
import hashlib
from sweepai.agents.pr_description_bot import PRDescriptionBot
from sweepai.config.client import (
RESTART_SWEEP_BUTTON,
SweepConfig,
)
from sweepai.core.entities import (
SandboxResponse,
)
from sweepai.dataclasses.codereview import CodeReview, CodeReviewIssue
from sweepai.handlers.create_pr import (
safe_delete_sweep_branch,
)
from sweepai.handlers.on_check_suite import clean_gh_logs, remove_ansi_tags
from sweepai.utils.buttons import create_action_buttons
from sweepai.utils.chat_logger import ChatLogger
from sweepai.utils.concurrency_utils import fire_and_forget_wrapper
from sweepai.utils.github_utils import (
CURRENT_USERNAME,
get_github_client,
get_token,
)
from sweepai.utils.str_utils import (
BOT_SUFFIX,
blockquote,
bot_suffix,
clean_logs,
create_collapsible,
discord_suffix,
format_sandbox_success,
sep,
stars_suffix,
)
from sweepai.utils.user_settings import UserSettings
sweeping_gif = """<a href="https://github.com/sweepai/sweep"><img class="swing" src="https://raw.githubusercontent.com/sweepai/sweep/main/.assets/sweeping.gif" width="100" style="width:50px; margin-bottom:10px" alt="Sweeping"></a>"""
custom_config = """
extends: relaxed
rules:
line-length: disable
indentation: disable
"""
INSTRUCTIONS_FOR_REVIEW = """\
### 💡 To get Sweep to edit this pull request, you can:
* Comment below, and Sweep can edit the entire PR
* Comment on a file, Sweep will only modify the commented file
* Edit the original issue to get Sweep to recreate the PR from scratch"""
email_template = """Hey {name},
<br/><br/>
🚀 I just finished creating a pull request for your issue ({repo_full_name}#{issue_number}) at <a href="{pr_url}">{repo_full_name}#{pr_number}</a>!
<br/><br/>
<h2>Summary</h2>
<blockquote>
{summary}
</blockquote>
<h2>Files Changed</h2>
<ul>
{files_changed}
</ul>
{sweeping_gif}
<br/>
Cheers,
<br/>
Sweep
<br/>"""
FAILING_GITHUB_ACTION_PROMPT = """\
The following Github Actions failed on a previous attempt at fixing this issue.
Propose a fix to the failing github actions. You must edit the source code, not the github action itself.
{github_action_log}
"""
SWEEP_PR_REVIEW_HEADER = "# Sweep: PR Review"
def center(text: str) -> str:
return f"<div align='center'>{text}</div>"
# Add :eyes: emoji to ticket
def add_emoji(issue: Issue, comment_id: int = None, reaction_content="eyes"):
item_to_react_to = issue.get_comment(comment_id) if comment_id else issue
item_to_react_to.create_reaction(reaction_content)
# Add :eyes: emoji to ticket
def add_emoji_to_pr(pr: PullRequest, comment_id: int = None, reaction_content="eyes"):
item_to_react_to = pr.get_comment(comment_id) if comment_id else pr
item_to_react_to.create_reaction(reaction_content)
# If SWEEP_BOT reacted to item_to_react_to with "rocket", then remove it.
def remove_emoji(issue: Issue, comment_id: int = None, content_to_delete="eyes"):
item_to_react_to = issue.get_comment(comment_id) if comment_id else issue
reactions = item_to_react_to.get_reactions()
for reaction in reactions:
if (
reaction.content == content_to_delete
and reaction.user.login == CURRENT_USERNAME
):
item_to_react_to.delete_reaction(reaction.id)
def create_error_logs(
commit_url_display: str,
sandbox_response: SandboxResponse,
status: str = "✓",
):
return (
(
"<br/>"
+ create_collapsible(
f"Sandbox logs for {commit_url_display} {status}",
blockquote(
"\n\n".join(
[
create_collapsible(
f"<code>{output}</code> {i + 1}/{len(sandbox_response.outputs)} {format_sandbox_success(sandbox_response.success)}",
f"<pre>{clean_logs(output)}</pre>",
i == len(sandbox_response.outputs) - 1,
)
for i, output in enumerate(sandbox_response.outputs)
if len(sandbox_response.outputs) > 0
]
)
),
opened=True,
)
)
if sandbox_response
else ""
)
# takes in a list of workflow runs and returns a list of messages containing the logs of the failing runs
def get_failing_gha_logs(runs, installation_id) -> str:
token = get_token(installation_id)
all_logs = ""
for run in runs:
# jobs_url
jobs_url = run.jobs_url
jobs_response = requests.get(
jobs_url,
headers={
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {token}",
"X-GitHub-Api-Version": "2022-11-28",
},
)
if jobs_response.status_code == 200:
failed_jobs = []
jobs = jobs_response.json()["jobs"]
for job in jobs:
if job["conclusion"] == "failure":
failed_jobs.append(job)
failed_jobs_name_list = []
for job in failed_jobs:
# add failed steps
for step in job["steps"]:
if step["conclusion"] == "failure":
parsed_name = step['name'].replace('/','')
failed_jobs_name_list.append(
f"{job['name']}/{step['number']}_{parsed_name}"
)
else:
logger.error(
"Failed to get jobs for failing github actions, possible a credentials issue"
)
return all_logs
# make sure jobs in valid
if jobs_response.json()["total_count"] == 0:
logger.warning(f"no jobs for this run: {run}, continuing...")
continue
# logs url
logs_url = run.logs_url
logs_response = requests.get(
logs_url,
headers={
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {token}",
"X-GitHub-Api-Version": "2022-11-28",
},
allow_redirects=True,
)
# Check if the request was successful
if logs_response.status_code == 200:
zip_data = io.BytesIO(logs_response.content)
zip_file = zipfile.ZipFile(zip_data, "r")
zip_file_names = zip_file.namelist()
for file in failed_jobs_name_list:
if f"{file}.txt" in zip_file_names:
logs = zip_file.read(f"{file}.txt").decode("utf-8")
logs_prompt = clean_gh_logs(logs)
all_logs += logs_prompt + "\n"
else:
logger.error(
"Failed to get logs for failing github actions, likely a credentials issue"
)
return remove_ansi_tags(all_logs)
def delete_old_prs(repo: Repository, issue_number: int):
logger.info("Deleting old PRs...")
prs = repo.get_pulls(
state="open",
sort="created",
direction="desc",
base=SweepConfig.get_branch(repo),
)
for pr in tqdm(prs.get_page(0)):
# # Check if this issue is mentioned in the PR, and pr is owned by bot
# # This is done in create_pr, (pr_description = ...)
if pr.user.login == CURRENT_USERNAME and f"Fixes #{issue_number}.\n" in pr.body:
safe_delete_sweep_branch(pr, repo)
break
def get_comment_header(
index: int,
progress_headers: list[None | str],
payment_message_start: str,
errored: bool = False,
pr_message: str = "",
done: bool = False,
config_pr_url: str | None = None,
):
config_pr_message = (
"\n"
+ f"<div align='center'>Install Sweep Configs: <a href='{config_pr_url}'>Pull Request</a></div>"
if config_pr_url is not None
else ""
)
actions_message = create_action_buttons(
[
RESTART_SWEEP_BUTTON,
]
)
if index < 0:
index = 0
if index == 4:
return pr_message + config_pr_message + f"\n\n{actions_message}"
total = len(progress_headers)
index += 1 if done else 0
index *= 100 / total
index = int(index)
index = min(100, index)
if errored:
pbar = f"\n\n<img src='https://progress-bar.dev/{index}/?&title=Errored&width=600' alt='{index}%' />"
return (
f"{center(sweeping_gif)}<br/>{center(pbar)}\n\n" + f"\n\n{actions_message}"
)
pbar = f"\n\n<img src='https://progress-bar.dev/{index}/?&title=Progress&width=600' alt='{index}%' />"
return (
f"{center(sweeping_gif)}"
+ f"<br/>{center(pbar)}"
+ ("\n" + stars_suffix if index != -1 else "")
+ "\n"
+ center(payment_message_start)
+ config_pr_message
+ f"\n\n{actions_message}"
)
def process_summary(summary, issue_number, repo_full_name, installation_id):
summary = summary or ""
summary = re.sub(
"<details (open)?>(\r)?\n<summary>Checklist</summary>.*",
"",
summary,
flags=re.DOTALL,
).strip()
summary = re.sub(
"---\s+Checklist:(\r)?\n(\r)?\n- \[[ X]\].*",
"",
summary,
flags=re.DOTALL,
).strip()
summary = re.sub("### Details\n\n_No response_", "", summary, flags=re.DOTALL)
summary = re.sub("\n\n", "\n", summary, flags=re.DOTALL)
repo_name = repo_full_name
user_token, g = get_github_client(installation_id)
repo = g.get_repo(repo_full_name)
current_issue: Issue = repo.get_issue(number=issue_number)
assignee = current_issue.assignee.login if current_issue.assignee else None
if assignee is None:
assignee = current_issue.user.login
branch_match = re.search(r"(\s[B|b]ranch:) *(?P<branch_name>.+?)(\s|$)", summary)
overrided_branch_name = None
if branch_match and "branch_name" in branch_match.groupdict():
overrided_branch_name = (
branch_match.groupdict()["branch_name"].strip().strip("`\"'")
)
# TODO: this code might be finicky, might have missed edge cases
if overrided_branch_name.startswith("https://github.com/"):
overrided_branch_name = overrided_branch_name.split("?")[0].split("tree/")[
-1
]
SweepConfig.get_branch(repo, overrided_branch_name)
return (
summary,
repo_name,
user_token,
g,
repo,
current_issue,
assignee,
overrided_branch_name,
)
def raise_on_no_file_change_requests(
title, summary, edit_sweep_comment, file_change_requests, renames_dict
):
if not file_change_requests and not renames_dict:
if len(title + summary) < 60:
edit_sweep_comment(
(
"Sorry, I could not find any files to modify, can you please"
" provide more details? Please make sure that the title and"
" summary of the issue are at least 60 characters."
),
-1,
)
else:
edit_sweep_comment(
(
"Sorry, I could not find any files to modify, can you please"
" provide more details?"
),
-1,
)
raise Exception(
"Sorry, we failed to make the file changes. Please report this and we will fix it."
)
def rewrite_pr_description(
issue_number, repo, overrided_branch_name, pull_request, pr_changes
):
# change the body here
diff_text = get_branch_diff_text(
repo=repo,
branch=pull_request.branch_name,
base_branch=overrided_branch_name,
)
new_description = PRDescriptionBot().describe_diffs(
diff_text,
pull_request.title,
) # TODO: update the title as well
if new_description:
pr_changes.body = (
f"{new_description}\n\nFixes"
f" #{issue_number}.\n\n---\n\n{INSTRUCTIONS_FOR_REVIEW}{BOT_SUFFIX}"
)
return pr_changes
def send_email_to_user(
title,
issue_number,
username,
repo_full_name,
tracking_id,
repo_name,
g,
file_change_requests,
pr_changes,
pr,
):
user_settings = UserSettings.from_username(username=username)
user = g.get_user(username)
full_name = user.name or user.login
name = full_name.split(" ")[0]
files_changed = []
for fcr in file_change_requests:
if fcr.change_type in ("create", "modify"):
diff = list(
difflib.unified_diff(
(fcr.old_content or "").splitlines() or [],
(fcr.new_content or "").splitlines() or [],
lineterm="",
)
)
added = sum(
1
for line in diff
if line.startswith("+") and not line.startswith("+++")
)
removed = sum(
1
for line in diff
if line.startswith("-") and not line.startswith("---")
)
files_changed.append(f"<code>{fcr.filename}</code> (+{added}/-{removed})")
user_settings.send_email(
subject=f"Sweep Pull Request Complete for {repo_name}#{issue_number} {title}",
html=email_template.format(
name=name,
pr_url=pr.html_url,
issue_number=issue_number,
repo_full_name=repo_full_name,
pr_number=pr.number,
summary=markdown.markdown(pr_changes.body),
files_changed="\n".join([f"<li>{item}</li>" for item in files_changed]),
sweeping_gif=sweeping_gif,
),
)
def handle_empty_repository(comment_id, current_issue, progress_headers, issue_comment):
first_comment = (
"Sweep is currently not supported on empty repositories. Please add some"
f" code to your repository and try again.\n{sep}##"
f" {progress_headers[1]}\n{bot_suffix}{discord_suffix}"
)
if issue_comment is None:
issue_comment = current_issue.create_comment(first_comment + BOT_SUFFIX)
else:
issue_comment.edit(first_comment + BOT_SUFFIX)
fire_and_forget_wrapper(add_emoji)(
current_issue, comment_id, reaction_content="confused"
)
fire_and_forget_wrapper(remove_emoji)(content_to_delete="eyes")
def get_branch_diff_text(repo, branch, base_branch=None):
base_branch = base_branch or SweepConfig.get_branch(repo)
comparison = repo.compare(base_branch, branch)
file_diffs = comparison.files
priorities = {
"added": 0,
"renamed": 1,
"modified": 2,
"removed": 3,
}
file_diffs = sorted(file_diffs, key=lambda x: priorities.get(x.status, 4))
pr_diffs = []
for file in file_diffs:
diff = file.patch
if (
file.status == "added"
or file.status == "modified"
or file.status == "removed"
or file.status == "renamed"
):
pr_diffs.append((file.filename, diff))
else:
logger.info(
f"File status {file.status} not recognized"
) # TODO(sweep): We don't handle renamed files
return "\n".join([f"{filename}\n{diff}" for filename, diff in pr_diffs])
def get_payment_messages(chat_logger: ChatLogger):
if chat_logger:
is_paying_user = chat_logger.is_paying_user()
is_consumer_tier = chat_logger.is_consumer_tier()
use_faster_model = chat_logger.use_faster_model()
else:
is_paying_user = True
is_consumer_tier = False
use_faster_model = False
# Find the first comment made by the bot
tickets_allocated = 5
if is_consumer_tier:
tickets_allocated = 15
if is_paying_user:
tickets_allocated = 500
purchased_ticket_count = (
chat_logger.get_ticket_count(purchased=True) if chat_logger else 0
)
ticket_count = (
max(tickets_allocated - chat_logger.get_ticket_count(), 0)
+ purchased_ticket_count
if chat_logger
else 999
)
daily_ticket_count = (
(3 - chat_logger.get_ticket_count(use_date=True) if not use_faster_model else 0)
if chat_logger
else 999
)
single_payment_link = "https://buy.stripe.com/00g3fh7qF85q0AE14d"
pro_payment_link = "https://buy.stripe.com/00g5npeT71H2gzCfZ8"
daily_message = (
f" and {daily_ticket_count} for the day"
if not is_paying_user and not is_consumer_tier
else ""
)
user_type = (
"💎 <b>Sweep Pro</b>" if is_paying_user else "⚡ <b>Sweep Basic Tier</b>"
)
gpt_tickets_left_message = (
f"{ticket_count} Sweep issues left for the month"
if not is_paying_user
else "unlimited Sweep issues"
)
purchase_message = f"<br/><br/> For more Sweep issues, visit <a href={single_payment_link}>our payment portal</a>. For a one week free trial, try <a href={pro_payment_link}>Sweep Pro</a> (unlimited GPT-4 tickets)."
payment_message = (
f"{user_type}: You have {gpt_tickets_left_message}{daily_message}"
+ (purchase_message if not is_paying_user else "")
)
payment_message_start = (
f"{user_type}: You have {gpt_tickets_left_message}{daily_message}"
+ (purchase_message if not is_paying_user else "")
)
return payment_message, payment_message_start
def parse_issues_from_code_review(issue_string: str):
issue_regex = r"<issue>(?P<issue>.*?)<\/issue>"
issue_matches = list(re.finditer(issue_regex, issue_string, re.DOTALL))
potential_issues = set()
for issue in issue_matches:
issue_content = issue.group("issue")
issue_params = ["issue_description", "file_name", "line_number"]
issue_args = {}
issue_failed = False
for param in issue_params:
regex = rf"<{param}>(?P<{param}>.*?)<\/{param}>"
result = re.search(regex, issue_content, re.DOTALL)
try:
issue_args[param] = result.group(param).strip()
except AttributeError:
issue_failed = True
break
if not issue_failed:
potential_issues.add(CodeReviewIssue(**issue_args))
return list(potential_issues)
# converts the list of issues inside a code_review into markdown text to display in a github comment
def render_code_review_issues(
username: str,
pr: PullRequest,
code_review: CodeReview,
issue_type: str = "",
sorted_issues: list[CodeReviewIssue] = [], # changes how issues are rendered
):
files_to_blobs = {file.filename: file.blob_url for file in list(pr.get_files())}
# generate the diff urls
files_to_diffs = {}
for file_name, _ in files_to_blobs.items():
sha_256 = hashlib.sha256(file_name.encode("utf-8")).hexdigest()
files_to_diffs[file_name] = f"{pr.html_url}/files#diff-{sha_256}"
if sorted_issues:
code_issues = sorted_issues
else:
code_issues = code_review.issues
if issue_type == "potential":
code_issues = code_review.potential_issues
code_issues_string = ""
for issue in code_issues:
if issue.file_name in files_to_blobs:
issue_blob_url = (
f"{files_to_blobs[issue.file_name]}#L{issue.line_number}"
)
issue_diff_url = (
f"{files_to_diffs[issue.file_name]}R{issue.line_number}"
)
if sorted_issues:
code_issues_string += f"<li>In `{issue.file_name}`: {issue.issue_description}</li>\n\n{issue_blob_url}\n[View Diff]({issue_diff_url})"
else:
code_issues_string += f"<li>{issue.issue_description}</li>\n\n{issue_blob_url}\n[View Diff]({issue_diff_url})"
return code_issues_string
def escape_html(text: str) -> str:
return text.replace("<", "&lt;").replace(">", "&gt;")
# make sure code blocks are render properly in github comments markdown
def format_code_sections(text: str) -> str:
backtick_count = text.count("`")
if backtick_count % 2 != 0:
# If there's an odd number of backticks, return the original text
return text
result = []
last_index = 0
inside_code = False
while True:
try:
index = text.index("`", last_index)
result.append(text[last_index:index])
if inside_code:
result.append("</code>")
else:
result.append("<code>")
inside_code = not inside_code
last_index = index + 1
except ValueError:
# No more backticks found
break
result.append(text[last_index:])
formatted_text = "".join(result)
# Escape HTML characters within <code> tags
formatted_text = formatted_text.replace("<code>", "<code>").replace(
"</code>", "</code>"
)
parts = formatted_text.split("<code>")
for i in range(1, len(parts)):
code_content, rest = parts[i].split("</code>", 1)
parts[i] = escape_html(code_content) + "</code>" + rest
return "<code>".join(parts)
def create_review_comments_for_code_issues(
pr: PullRequest,
code_issues: list[CodeReviewIssue]
):
commit_sha = pr.head.sha
commits = list(pr.get_commits())
pr_commit = None
for commit in commits:
if commit.sha == commit_sha:
pr_commit = commit
break
for issue in code_issues:
comment_body = issue.issue_description
comment_line = int(issue.line_number)
comment_path = os.path.normpath(issue.file_name)
pr.create_review_comment(
body=comment_body,
commit=pr_commit,
path=comment_path,
line=comment_line
)
# turns code_review_by_file into markdown string
def render_pr_review_by_file(
username: str,
pr: PullRequest,
code_review_by_file: dict[str, CodeReview],
formatted_comment_threads: dict[str, str],
pull_request_summary: str = "",
dropped_files: list[str] = [],
unsuitable_files: list[tuple[str, Exception]] = [],
pr_authors: str = "",
) -> str:
body = f"{SWEEP_PR_REVIEW_HEADER}\n"
pr_summary = ""
if pr_authors:
body += f"Authors: {pr_authors}\n" if ", " in pr_authors else f"Author: {pr_authors}\n"
# pull request summary goes to the bottom
if pull_request_summary:
pr_summary += f"\n<h3>Summary</h3>\n{pull_request_summary}\n<hr>\n"
issues_section = ""
potential_issues_section = ""
# build issues section
# create review comments for all the issues
all_issues = []
all_potential_issues = []
for _, code_review in code_review_by_file.items():
all_issues.extend(code_review.issues)
all_potential_issues.extend(code_review.potential_issues)
create_review_comments_for_code_issues(pr, all_issues)
# build potential issues section
for file_name, code_review in code_review_by_file.items():
potential_issues = code_review.potential_issues
if potential_issues:
potential_issues_string = render_code_review_issues(
username, pr, code_review, issue_type="potential"
)
potential_issues_section += f"""<details>
<summary>{file_name}</summary>
<ul>{format_code_sections(potential_issues_string)}</ul></details>"""
# add titles/dropdowns for issues and potential issues section depending on if there were any issues/potential issues
if potential_issues_section:
potential_issues_section = f"<details><summary><h3>Potential Issues</h3></summary><p><strong>Sweep is unsure if these are issues, but they might be worth checking out.</strong></p>\n\n{potential_issues_section}</details><hr>"
# add footer describing dropped files
footer = ""
if len(dropped_files) == 1:
footer += f"<p>{dropped_files[0]} was not reviewed because our filter identified it as typically a non-human-readable (auto-generated) or less important file (e.g., dist files, package.json, images). If this is an error, please let us know.</p>"
elif len(dropped_files) > 1:
dropped_files_string = "".join([f"<li>{file}</li>" for file in dropped_files])
footer += f"<p>The following files were not reviewed because our filter identified them as typically non-human-readable (auto-generated) or less important files (e.g., dist files, package.json, images). If this is an error, please let us know.</p><ul>{dropped_files_string}</ul>"
if len(unsuitable_files) == 1:
footer += f"<p>The following file {unsuitable_files[0][0]} were not reviewed as they were deemed unsuitable for the following reason: {str(unsuitable_files[0][1])}. If this is an error please let us know.</p>"
elif len(unsuitable_files) > 1:
unsuitable_files_string = "".join(
[
f"<li>{file}: {str(exception)}</li>"
for file, exception in unsuitable_files
]
)
footer += f"<p>The following files were not reviewed as they were deemed unsuitable for a variety of reasons. If this is an error please let us know.</p><ul>{unsuitable_files_string}</ul>"
if len(all_issues) == 0 and len(all_potential_issues) == 0:
issues_section = "The Pull Request looks good! Sweep did not find any issues."
if not formatted_comment_threads:
issues_section = "The Pull Request looks good! Sweep did not find any new issues."
elif len(all_issues) == 0:
issues_section = "The Pull Request looks good! Sweep did not find any issues but found some potential issues that you may want to take a look at."
if not formatted_comment_threads:
issues_section = "The Pull Request looks good! Sweep did not find any new issues but found some potential issues that you may want to take a look at."
else:
if len(all_issues) == 1:
issues_section = f"\n\nSweep found `{len(all_issues)}` new issue.\n\n"
else:
issues_section = f"\n\nSweep found `{len(all_issues)}` new issues.\n\n"
issues_section += "Sweep has left comments on the pull request for you to review. \nYou may respond to any comment Sweep made your feedback will be taken into consideration if you run the review again. If Sweep made a mistake, you can resolve the comment or let Sweep know by responding to the comment."
return body + issues_section + potential_issues_section + pr_summary + footer
# handles the creation or update of the Sweep comment letting the user know that Sweep is reviewing a pr
# returns the comment_id
def create_update_review_pr_comment(
username: str,
pr: PullRequest,
formatted_comment_threads: dict[str, str],
code_review_by_file: dict[str, CodeReview] | None = None,
pull_request_summary: str = "",
dropped_files: list[str] = [],
unsuitable_files: list[tuple[str, Exception]] = [],
error_message: str = "", # passing in an error message takes priority over everything else
) -> int:
comment_id = -1
sweep_comment = None
# comments that appear in the github ui in the conversation tab are considered issue comments
pr_comments = list(pr.get_issue_comments())
# make sure we don't already have a comment created
for comment in pr_comments:
# a comment has already been created
if comment.body.startswith(SWEEP_PR_REVIEW_HEADER):
comment_id = comment.id
sweep_comment = comment
break
commits = list(pr.get_commits())
pr_authors = set()
try:
pr_authors.add(f"{pr.user.login}")
except Exception as e:
logger.error(f"Failed to retrieve {pr.user}: {str(e)}")
for commit in commits:
author = commit.author
try:
if author:
pr_authors.add(f"{author.login}")
except IncompletableObject as e:
logger.error(f"Failed to retrieve author {author} for commit {commit.sha}: {str(e)}")
pr_authors = ", ".join(pr_authors)
# comment has not yet been created
if not sweep_comment:
comment_content = (
f"{SWEEP_PR_REVIEW_HEADER}\nSweep is currently reviewing your pr..."
)
if pr_authors:
comment_content = f"{SWEEP_PR_REVIEW_HEADER}\nAuthors of pull request: {pr_authors}\n\nSweep is currently reviewing your pr..."
sweep_comment = pr.create_issue_comment(comment_content)
# update the comment
if error_message:
sweep_comment.edit(
f"{SWEEP_PR_REVIEW_HEADER}\nSweep was unable to review your pull request due to the following reasons:\n\n{error_message}"
)
comment_id = sweep_comment.id
return comment_id # early return
# update body of sweep_comment
if code_review_by_file:
rendered_pr_review = render_pr_review_by_file(
username,
pr,
code_review_by_file,
formatted_comment_threads,
pull_request_summary=pull_request_summary,
dropped_files=dropped_files,
unsuitable_files=unsuitable_files,
pr_authors=pr_authors,
)
sweep_comment.edit(rendered_pr_review)
comment_id = sweep_comment.id
return comment_id

sweep/sweepai/api.py

Lines 1 to 875 in 6dc1689

from __future__ import annotations
import ctypes
import os
import threading
import time
from typing import Optional
from fastapi import (
Body,
Depends,
FastAPI,
Header,
HTTPException,
Path,
Request,
)
from fastapi.responses import HTMLResponse
from fastapi.security import HTTPBearer
from fastapi.templating import Jinja2Templates
from github.Commit import Commit
from github import GithubException
from sweepai.config.client import (
RESTART_SWEEP_BUTTON,
REVERT_CHANGED_FILES_TITLE,
RULES_TITLE,
SweepConfig,
get_gha_enabled,
)
from sweepai.config.server import (
BLACKLISTED_USERS,
DISABLED_REPOS,
ENV,
GHA_AUTOFIX_ENABLED,
GITHUB_BOT_USERNAME,
GITHUB_LABEL_COLOR,
GITHUB_LABEL_DESCRIPTION,
GITHUB_LABEL_NAME,
IS_SELF_HOSTED,
SENTRY_URL,
)
from sweepai.chat.api import app as chat_app
from sweepai.core.entities import PRChangeRequest
from sweepai.global_threads import global_threads
from sweepai.handlers.review_pr import review_pr
from sweepai.handlers.create_pr import ( # type: ignore
create_gha_pr,
)
from sweepai.handlers.on_button_click import handle_button_click
from sweepai.handlers.on_check_suite import ( # type: ignore
clean_gh_logs,
download_logs,
)
from sweepai.handlers.on_comment import on_comment
from sweepai.handlers.on_jira_ticket import handle_jira_ticket
from sweepai.handlers.on_ticket import on_ticket
from sweepai.utils.buttons import (
check_button_activated,
check_button_title_match,
)
from sweepai.utils.chat_logger import ChatLogger
from sweepai.utils.event_logger import logger, posthog
from sweepai.utils.github_utils import CURRENT_USERNAME, get_github_client
from sweepai.utils.hash import verify_signature
from sweepai.utils.progress import TicketProgress
from sweepai.utils.safe_pqueue import SafePriorityQueue
from sweepai.utils.str_utils import BOT_SUFFIX, get_hash
from sweepai.utils.validate_license import validate_license
from sweepai.web.events import (
CheckRunCompleted,
CommentCreatedRequest,
IssueCommentRequest,
IssueRequest,
PREdited,
PRLabeledRequest,
PRRequest,
)
from sweepai.web.health import health_check
import sentry_sdk
from sentry_sdk import set_user
version = time.strftime("%y.%m.%d.%H")
if SENTRY_URL:
sentry_sdk.init(
dsn=SENTRY_URL,
traces_sample_rate=1.0,
profiles_sample_rate=1.0,
release=version
)
app = FastAPI()
app.mount("/chat", chat_app)
events = {}
on_ticket_events = {}
review_pr_events = {}
security = HTTPBearer()
templates = Jinja2Templates(directory="sweepai/web")
logger.bind(application="webhook")
def run_on_ticket(*args, **kwargs):
tracking_id = get_hash()
with logger.contextualize(
**kwargs,
name="ticket_" + kwargs["username"],
tracking_id=tracking_id,
):
return on_ticket(*args, **kwargs, tracking_id=tracking_id)
def run_on_comment(*args, **kwargs):
tracking_id = get_hash()
with logger.contextualize(
**kwargs,
name="comment_" + kwargs["username"],
tracking_id=tracking_id,
):
on_comment(*args, **kwargs, tracking_id=tracking_id)
def run_review_pr(*args, **kwargs):
tracking_id = get_hash()
with logger.contextualize(
**kwargs,
name="review_" + kwargs["username"],
tracking_id=tracking_id,
):
review_pr(*args, **kwargs, tracking_id=tracking_id)
def run_on_button_click(*args, **kwargs):
thread = threading.Thread(target=handle_button_click, args=args, kwargs=kwargs)
thread.start()
global_threads.append(thread)
def terminate_thread(thread):
"""Terminate a python threading.Thread."""
try:
if not thread.is_alive():
return
exc = ctypes.py_object(SystemExit)
res = ctypes.pythonapi.PyThreadState_SetAsyncExc(
ctypes.c_long(thread.ident), exc
)
if res == 0:
raise ValueError("Invalid thread ID")
elif res != 1:
# Call with exception set to 0 is needed to cleanup properly.
ctypes.pythonapi.PyThreadState_SetAsyncExc(thread.ident, 0)
raise SystemError("PyThreadState_SetAsyncExc failed")
except Exception as e:
logger.exception(f"Failed to terminate thread: {e}")
# def delayed_kill(thread: threading.Thread, delay: int = 60 * 60):
# time.sleep(delay)
# terminate_thread(thread)
def call_on_ticket(*args, **kwargs):
global on_ticket_events
key = f"{kwargs['repo_full_name']}-{kwargs['issue_number']}" # Full name, issue number as key
# Use multithreading
# Check if a previous process exists for the same key, cancel it
e = on_ticket_events.get(key, None)
if e:
logger.info(f"Found previous thread for key {key} and cancelling it")
terminate_thread(e)
thread = threading.Thread(target=run_on_ticket, args=args, kwargs=kwargs)
on_ticket_events[key] = thread
thread.start()
global_threads.append(thread)
def call_on_comment(
*args, **kwargs
): # TODO: if its a GHA delete all previous GHA and append to the end
def worker():
while not events[key].empty():
task_args, task_kwargs = events[key].get()
run_on_comment(*task_args, **task_kwargs)
global events
repo_full_name = kwargs["repo_full_name"]
pr_id = kwargs["pr_number"]
key = f"{repo_full_name}-{pr_id}" # Full name, comment number as key
comment_type = kwargs["comment_type"]
logger.info(f"Received comment type: {comment_type}")
if key not in events:
events[key] = SafePriorityQueue()
events[key].put(0, (args, kwargs))
# If a thread isn't running, start one
if not any(
thread.name == key and thread.is_alive() for thread in threading.enumerate()
):
thread = threading.Thread(target=worker, name=key)
thread.start()
global_threads.append(thread)
# add a review by sweep on the pr
def call_review_pr(*args, **kwargs):
global review_pr_events
key = f"{kwargs['repository'].full_name}-{kwargs['pr'].number}" # Full name, issue number as key
# Use multithreading
# Check if a previous process exists for the same key, cancel it
e = review_pr_events.get(key, None)
if e:
logger.info(f"Found previous thread for key {key} and cancelling it")
terminate_thread(e)
thread = threading.Thread(target=run_review_pr, args=args, kwargs=kwargs)
review_pr_events[key] = thread
thread.start()
global_threads.append(thread)
@app.get("/health")
def redirect_to_health():
return health_check()
@app.get("/", response_class=HTMLResponse)
def home(request: Request):
try:
validate_license()
license_expired = False
except Exception as e:
logger.warning(e)
license_expired = True
return templates.TemplateResponse(
name="index.html", context={"version": version, "request": request, "license_expired": license_expired}
)
@app.get("/ticket_progress/{tracking_id}")
def progress(tracking_id: str = Path(...)):
ticket_progress = TicketProgress.load(tracking_id)
return ticket_progress.dict()
def handle_github_webhook(event_payload):
handle_event(event_payload.get("request"), event_payload.get("event"))
def handle_request(request_dict, event=None):
"""So it can be exported to the listen endpoint."""
with logger.contextualize(tracking_id="main", env=ENV):
action = request_dict.get("action")
try:
handle_github_webhook(
{
"request": request_dict,
"event": event,
}
)
except Exception as e:
logger.exception(str(e))
logger.info(f"Done handling {event}, {action}")
return {"success": True}
# @app.post("/")
async def validate_signature(
request: Request,
x_hub_signature: Optional[str] = Header(None, alias="X-Hub-Signature-256")
):
payload_body = await request.body()
if not verify_signature(payload_body=payload_body, signature_header=x_hub_signature):
raise HTTPException(status_code=403, detail="Request signatures didn't match!")
@app.post("/", dependencies=[Depends(validate_signature)])
def webhook(
request_dict: dict = Body(...),
x_github_event: Optional[str] = Header(None, alias="X-GitHub-Event"),
):
"""Handle a webhook request from GitHub"""
with logger.contextualize(tracking_id="main", env=ENV):
action = request_dict.get("action", None)
logger.info(f"Received event: {x_github_event}, {action}")
return handle_request(request_dict, event=x_github_event)
@app.post("/jira")
def jira_webhook(
request_dict: dict = Body(...),
) -> None:
def call_jira_ticket(*args, **kwargs):
thread = threading.Thread(target=handle_jira_ticket, args=args, kwargs=kwargs)
thread.start()
call_jira_ticket(event=request_dict)
# Set up cronjob for this
@app.get("/update_sweep_prs_v2")
def update_sweep_prs_v2(repo_full_name: str, installation_id: int):
# Get a Github client
_, g = get_github_client(installation_id)
# Get the repository
repo = g.get_repo(repo_full_name)
config = SweepConfig.get_config(repo)
try:
branch_ttl = int(config.get("branch_ttl", 7))
except Exception:
branch_ttl = 7
branch_ttl = max(branch_ttl, 1)
# Get all open pull requests created by Sweep
pulls = repo.get_pulls(
state="open", head="sweep", sort="updated", direction="desc"
)[:5]
# For each pull request, attempt to merge the changes from the default branch into the pull request branch
try:
for pr in pulls:
try:
# make sure it's a sweep ticket
feature_branch = pr.head.ref
if not feature_branch.startswith(
"sweep/"
) and not feature_branch.startswith("sweep_"):
continue
if "Resolve merge conflicts" in pr.title:
continue
if (
pr.mergeable_state != "clean"
and (time.time() - pr.created_at.timestamp()) > 60 * 60 * 24
and pr.title.startswith("[Sweep Rules]")
):
pr.edit(state="closed")
continue
repo.merge(
feature_branch,
pr.base.ref,
f"Merge main into {feature_branch}",
)
# Check if the merged PR is the config PR
if pr.title == "Configure Sweep" and pr.merged:
# Create a new PR to add "gha_enabled: True" to sweep.yaml
create_gha_pr(g, repo)
except Exception as e:
logger.warning(
f"Failed to merge changes from default branch into PR #{pr.number}: {e}"
)
except Exception:
logger.warning("Failed to update sweep PRs")
def should_handle_comment(request: CommentCreatedRequest | IssueCommentRequest):
comment = request.comment.body
return (
(
comment.lower().startswith("sweep:") # we will handle all comments (with or without label) that start with "sweep:"
)
and request.comment.user.type == "User" # ensure it's a user comment
and request.comment.user.login not in BLACKLISTED_USERS # ensure it's not a blacklisted user
and BOT_SUFFIX not in comment # we don't handle bot commnents
)
def handle_event(request_dict, event):
action = request_dict.get("action")
username = request_dict.get("sender", {}).get("login")
if username:
set_user({"username": username})
if repo_full_name := request_dict.get("repository", {}).get("full_name"):
if repo_full_name in DISABLED_REPOS:
logger.warning(f"Repo {repo_full_name} is disabled")
return {"success": False, "error_message": "Repo is disabled"}
with logger.contextualize(tracking_id="main", env=ENV):
match event, action:
case "check_run", "completed":
request = CheckRunCompleted(**request_dict)
_, g = get_github_client(request.installation.id)
repo = g.get_repo(request.repository.full_name)
pull_requests = request.check_run.pull_requests
if pull_requests:
logger.info(pull_requests[0].number)
pr = repo.get_pull(pull_requests[0].number)
if (time.time() - pr.created_at.timestamp()) > 60 * 60 and (
pr.title.startswith("[Sweep Rules]")
or pr.title.startswith("[Sweep GHA Fix]")
):
after_sha = pr.head.sha
commit = repo.get_commit(after_sha)
check_suites = commit.get_check_suites()
for check_suite in check_suites:
if check_suite.conclusion == "failure":
pr.edit(state="closed")
break
if (
not (time.time() - pr.created_at.timestamp()) > 60 * 15
and request.check_run.conclusion == "failure"
and pr.state == "open"
and get_gha_enabled(repo)
and len(
[
comment
for comment in pr.get_issue_comments()
if "Fixing PR" in comment.body
]
)
< 2
and GHA_AUTOFIX_ENABLED
):
# check if the base branch is passing
commits = repo.get_commits(sha=pr.base.ref)
latest_commit: Commit = commits[0]
if all(
status != "failure"
for status in [
status.state for status in latest_commit.get_statuses()
]
): # base branch is passing
logs = download_logs(
request.repository.full_name,
request.check_run.run_id,
request.installation.id,
)
logs, user_message = clean_gh_logs(logs)
attributor = request.sender.login
if attributor.endswith("[bot]"):
attributor = commit.author.login
if attributor.endswith("[bot]"):
attributor = pr.assignee.login
if attributor.endswith("[bot]"):
return {
"success": False,
"error_message": "The PR was created by a bot, so I won't attempt to fix it.",
}
chat_logger = ChatLogger(
data={
"username": attributor,
"title": "[Sweep GHA Fix] Fix the failing GitHub Actions",
}
)
if chat_logger.use_faster_model() and not IS_SELF_HOSTED:
return {
"success": False,
"error_message": "Disabled for free users",
}
# stack_pr(
# request=f"[Sweep GHA Fix] The GitHub Actions run failed on {request.check_run.head_sha[:7]} ({repo.default_branch}) with the following error logs:\n\n```\n\n{logs}\n\n```",
# pr_number=pr.number,
# username=attributor,
# repo_full_name=repo.full_name,
# installation_id=request.installation.id,
# tracking_id=tracking_id,
# commit_hash=pr.head.sha,
# )
case "pull_request", "opened":
try:
pr_request = PRRequest(**request_dict)
_, g = get_github_client(request_dict["installation"]["id"])
repo = g.get_repo(request_dict["repository"]["full_name"])
pr = repo.get_pull(request_dict["pull_request"]["number"])
# check if review_pr is restricted
allowed_repos = os.environ.get("PR_REVIEW_REPOS", "")
allowed_repos_set = set(allowed_repos.split(',')) if allowed_repos else set()
allowed_usernames = os.environ.get("PR_REVIEW_USERNAMES", "")
allowed_usernames_set = set(allowed_usernames.split(',')) if allowed_usernames else set()
# only call review pr if user names are allowed
# defaults to all users/repos if not set
if (not allowed_repos or repo.name in allowed_repos_set) and (not allowed_usernames or pr.user.login in allowed_usernames_set):
# run pr review
call_review_pr(
username=pr.user.login,
pr=pr,
repository=repo,
installation_id=pr_request.installation.id,
)
except Exception as e:
logger.exception(f"Failed to review PR: {e}")
raise e
case "pull_request", "labeled":
try:
pr_request = PRLabeledRequest(**request_dict)
# run only if sweep label is added to the pull request
if (
GITHUB_LABEL_NAME in [label.name.lower() for label in pr_request.pull_request.labels]
):
_, g = get_github_client(request_dict["installation"]["id"])
repo = g.get_repo(request_dict["repository"]["full_name"])
pr = repo.get_pull(request_dict["pull_request"]["number"])
# run pr review - no need to check for allowed users/repos if they are adding sweep label
call_review_pr(
username=pr.user.login,
pr=pr,
repository=repo,
installation_id=pr_request.installation.id,
)
else:
logger.info("sweep label not in pull request labels")
except Exception as e:
logger.exception(f"Failed to review PR: {e}")
raise e
case "issues", "opened":
request = IssueRequest(**request_dict)
issue_title_lower = request.issue.title.lower()
if (
issue_title_lower.startswith("sweep")
or "sweep:" in issue_title_lower
):
_, g = get_github_client(request.installation.id)
repo = g.get_repo(request.repository.full_name)
labels = repo.get_labels()
label_names = [label.name for label in labels]
if GITHUB_LABEL_NAME not in label_names:
try:
repo.create_label(
name=GITHUB_LABEL_NAME,
color=GITHUB_LABEL_COLOR,
description=GITHUB_LABEL_DESCRIPTION,
)
except GithubException as e:
if e.status == 422 and any(error.get("code") == "already_exists" for error in e.data.get("errors", [])):
logger.warning(f"Label '{GITHUB_LABEL_NAME}' already exists in the repository")
else:
raise e
current_issue = repo.get_issue(number=request.issue.number)
current_issue.add_to_labels(GITHUB_LABEL_NAME)
case "issue_comment", "edited":
request = IssueCommentRequest(**request_dict)
sweep_labeled_issue = GITHUB_LABEL_NAME in [
label.name.lower() for label in request.issue.labels
]
button_title_match = check_button_title_match(
REVERT_CHANGED_FILES_TITLE,
request.comment.body,
request.changes,
) or check_button_title_match(
RULES_TITLE,
request.comment.body,
request.changes,
)
if (
request.comment.user.type == "Bot"
and GITHUB_BOT_USERNAME in request.comment.user.login
and request.changes.body_from is not None
and button_title_match
and request.sender.type == "User"
and request.comment.user.login not in BLACKLISTED_USERS
):
run_on_button_click(request_dict)
restart_sweep = False
if (
request.comment.user.type == "Bot"
and GITHUB_BOT_USERNAME in request.comment.user.login
and request.changes.body_from is not None
and check_button_activated(
RESTART_SWEEP_BUTTON,
request.comment.body,
request.changes,
)
and sweep_labeled_issue
and request.sender.type == "User"
and request.comment.user.login not in BLACKLISTED_USERS
):
# Restart Sweep on this issue
restart_sweep = True
if (
request.issue is not None
and sweep_labeled_issue
and request.comment.user.type == "User"
and request.comment.user.login not in BLACKLISTED_USERS
and not request.comment.user.login.startswith("sweep")
and not (
request.issue.pull_request and request.issue.pull_request.url
)
or restart_sweep
):
logger.info("New issue comment edited")
request.issue.body = request.issue.body or ""
request.repository.description = (
request.repository.description or ""
)
if (
not request.comment.body.strip()
.lower()
.startswith(GITHUB_LABEL_NAME)
and not restart_sweep
):
logger.info("Comment does not start with 'Sweep', passing")
return {
"success": True,
"reason": "Comment does not start with 'Sweep', passing",
}
call_on_ticket(
title=request.issue.title,
summary=request.issue.body,
issue_number=request.issue.number,
issue_url=request.issue.html_url,
username=request.issue.user.login,
repo_full_name=request.repository.full_name,
repo_description=request.repository.description,
installation_id=request.installation.id,
comment_id=request.comment.id if not restart_sweep else None,
edited=True,
)
elif (
request.issue.pull_request
and request.comment.user.type == "User"
and request.comment.user.login not in BLACKLISTED_USERS
):
if should_handle_comment(request):
logger.info(f"Handling comment on PR: {request.issue.pull_request}")
pr_change_request = PRChangeRequest(
params={
"comment_type": "comment",
"repo_full_name": request.repository.full_name,
"repo_description": request.repository.description,
"comment": request.comment.body,
"pr_path": None,
"pr_line_position": None,
"username": request.comment.user.login,
"installation_id": request.installation.id,
"pr_number": request.issue.number,
"comment_id": request.comment.id,
},
)
call_on_comment(**pr_change_request.params)
case "issues", "edited":
request = IssueRequest(**request_dict)
if (
GITHUB_LABEL_NAME
in [label.name.lower() for label in request.issue.labels]
and request.sender.type == "User"
and not request.sender.login.startswith("sweep")
):
logger.info("New issue edited")
call_on_ticket(
title=request.issue.title,
summary=request.issue.body,
issue_number=request.issue.number,
issue_url=request.issue.html_url,
username=request.issue.user.login,
repo_full_name=request.repository.full_name,
repo_description=request.repository.description,
installation_id=request.installation.id,
comment_id=None,
)
else:
logger.info("Issue edited, but not a sweep issue")
case "issues", "labeled":
request = IssueRequest(**request_dict)
if (
any(
label.name.lower() == GITHUB_LABEL_NAME
for label in request.issue.labels
)
and not request.issue.pull_request
):
request.issue.body = request.issue.body or ""
request.repository.description = (
request.repository.description or ""
)
call_on_ticket(
title=request.issue.title,
summary=request.issue.body,
issue_number=request.issue.number,
issue_url=request.issue.html_url,
username=request.issue.user.login,
repo_full_name=request.repository.full_name,
repo_description=request.repository.description,
installation_id=request.installation.id,
comment_id=None,
)
case "issue_comment", "created":
request = IssueCommentRequest(**request_dict)
if (
request.issue is not None
and GITHUB_LABEL_NAME
in [label.name.lower() for label in request.issue.labels]
and request.comment.user.type == "User"
and request.comment.user.login not in BLACKLISTED_USERS
and not (
request.issue.pull_request and request.issue.pull_request.url
)
and BOT_SUFFIX not in request.comment.body
):
request.issue.body = request.issue.body or ""
request.repository.description = (
request.repository.description or ""
)
if (
not request.comment.body.strip()
.lower()
.startswith(GITHUB_LABEL_NAME)
):
logger.info("Comment does not start with 'Sweep', passing")
return {
"success": True,
"reason": "Comment does not start with 'Sweep', passing",
}
call_on_ticket(
title=request.issue.title,
summary=request.issue.body,
issue_number=request.issue.number,
issue_url=request.issue.html_url,
username=request.issue.user.login,
repo_full_name=request.repository.full_name,
repo_description=request.repository.description,
installation_id=request.installation.id,
comment_id=request.comment.id,
)
elif (
request.issue.pull_request
and request.comment.user.type == "User"
and request.comment.user.login not in BLACKLISTED_USERS
and BOT_SUFFIX not in request.comment.body
):
if should_handle_comment(request):
pr_change_request = PRChangeRequest(
params={
"comment_type": "comment",
"repo_full_name": request.repository.full_name,
"repo_description": request.repository.description,
"comment": request.comment.body,
"pr_path": None,
"pr_line_position": None,
"username": request.comment.user.login,
"installation_id": request.installation.id,
"pr_number": request.issue.number,
"comment_id": request.comment.id,
},
)
call_on_comment(**pr_change_request.params)
case "pull_request_review_comment", "created":
request = CommentCreatedRequest(**request_dict)
if should_handle_comment(request):
pr_change_request = PRChangeRequest(
params={
"comment_type": "comment",
"repo_full_name": request.repository.full_name,
"repo_description": request.repository.description,
"comment": request.comment.body,
"pr_path": request.comment.path,
"pr_line_position": request.comment.original_line,
"username": request.comment.user.login,
"installation_id": request.installation.id,
"pr_number": request.pull_request.number,
"comment_id": request.comment.id,
},
)
call_on_comment(**pr_change_request.params)
case "pull_request_review_comment", "edited":
request = CommentCreatedRequest(**request_dict)
if should_handle_comment(request):
pr_change_request = PRChangeRequest(
params={
"comment_type": "comment",
"repo_full_name": request.repository.full_name,
"repo_description": request.repository.description,
"comment": request.comment.body,
"pr_path": request.comment.path,
"pr_line_position": request.comment.original_line,
"username": request.comment.user.login,
"installation_id": request.installation.id,
"pr_number": request.pull_request.number,
"comment_id": request.comment.id,
},
)
call_on_comment(**pr_change_request.params)
case "installation_repositories", "added":
# don't do anything for now
pass
case "installation", "created":
# don't do anything for now
pass
case "pull_request", "edited":
request = PREdited(**request_dict)
if (
request.pull_request.user.login == GITHUB_BOT_USERNAME
and not request.sender.login.endswith("[bot]")
):
try:
_, g = get_github_client(request.installation.id)
repo = g.get_repo(request.repository.full_name)
pr = repo.get_pull(request.pull_request.number)
# check if review_pr is restricted
allowed_repos = os.environ.get("PR_REVIEW_REPOS", "")
allowed_repos_set = set(allowed_repos.split(',')) if allowed_repos else set()
if not allowed_repos or repo.name in allowed_repos_set:
# run pr review
call_review_pr(
username=pr.user.login,
pr=pr,
repository=repo,
installation_id=request.installation.id,
)
except Exception as e:
logger.exception(f"Failed to review PR: {e}")
raise e
case "pull_request", "closed":
pr_request = PRRequest(**request_dict)
(
organization,
repo_name,
) = pr_request.repository.full_name.split("/")
commit_author = pr_request.pull_request.user.login
merged_by = (
pr_request.pull_request.merged_by.login
if pr_request.pull_request.merged_by
else None
)
if CURRENT_USERNAME == commit_author and merged_by is not None:
event_name = "merged_sweep_pr"
if pr_request.pull_request.title.startswith("[config]"):
event_name = "config_pr_merged"
elif pr_request.pull_request.title.startswith("[Sweep Rules]"):
event_name = "sweep_rules_pr_merged"
edited_by_developers = False
_token, g = get_github_client(pr_request.installation.id)
pr = g.get_repo(pr_request.repository.full_name).get_pull(
pr_request.number
)
total_lines_in_commit = 0
total_lines_edited_by_developer = 0
edited_by_developers = False
for commit in pr.get_commits():
lines_modified = commit.stats.additions + commit.stats.deletions
total_lines_in_commit += lines_modified
if commit.author.login != CURRENT_USERNAME:
total_lines_edited_by_developer += lines_modified
# this was edited by a developer if at least 25% of the lines were edited by a developer
edited_by_developers = total_lines_in_commit > 0 and (total_lines_edited_by_developer / total_lines_in_commit) >= 0.25
posthog.capture(
merged_by,
event_name,
properties={
"repo_name": repo_name,
"organization": organization,
"repo_full_name": pr_request.repository.full_name,
"username": merged_by,
"additions": pr_request.pull_request.additions,
"deletions": pr_request.pull_request.deletions,
"total_changes": pr_request.pull_request.additions
+ pr_request.pull_request.deletions,
"edited_by_developers": edited_by_developers,
"total_lines_in_commit": total_lines_in_commit,
"total_lines_edited_by_developer": total_lines_edited_by_developer,
},
)
chat_logger = ChatLogger({"username": merged_by})
case "ping", None:
return {"message": "pong"}
case _:

"""
on_comment is responsible for handling PR comments and PR review comments, called from sweepai/api.py.
It is also called in sweepai/handlers/on_ticket.py when Sweep is reviewing its own PRs.
"""
import copy
import re
import time
import traceback
from typing import Any
from loguru import logger
from sentry_sdk import set_user
from sweepai.chat.api import posthog_trace
from sweepai.config.server import (
ENV,
GITHUB_BOT_USERNAME,
MONGODB_URI,
)
from sweepai.core.entities import MockPR, NoFilesException, Snippet, render_fcrs
from sweepai.core.pull_request_bot import PRSummaryBot
from sweepai.core.sweep_bot import get_files_to_change_for_on_comment, set_fcr_change_type
from sweepai.handlers.create_pr import handle_file_change_requests
from sweepai.core.review_utils import format_pr_info, get_pr_changes, smart_prune_file_based_on_patches
from sweepai.utils.chat_logger import ChatLogger
from sweepai.utils.concurrency_utils import fire_and_forget_wrapper
from sweepai.utils.diff import generate_diff
from sweepai.utils.event_logger import posthog
from sweepai.utils.github_utils import ClonedRepo, commit_multi_file_changes, get_github_client, sanitize_string_for_github, validate_and_sanitize_multi_file_changes
from sweepai.utils.str_utils import BOT_SUFFIX, FASTER_MODEL_MESSAGE, add_line_numbers, blockquote
from sweepai.utils.ticket_rendering_utils import center, sweeping_gif
from sweepai.utils.ticket_utils import prep_snippets
from github.Repository import Repository
from github.PullRequest import PullRequest
num_of_snippets_to_query = 30
total_number_of_snippet_tokens = 15_000
num_full_files = 2
num_extended_snippets = 2
ERROR_FORMAT = "❌ {title}\n\nPlease report this on our [community forum](https://community.sweep.dev/)."
SWEEPING_GIF = f"{center(sweeping_gif)}\n\n<div align='center'><h3>Sweep is working on resolving your comment...<h3/></div>\n\n"
@posthog_trace
def on_comment(
username: str,
repo_full_name: str,
repo_description: str,
comment: str,
pr_path: str | None,
pr_line_position: int | None,
installation_id: int,
pr_number: int = None,
comment_id: int | None = None,
chat_logger: Any = None,
pr: MockPR = None, # For on_comment calls before PR is created
repo: Any = None,
comment_type: str = "comment",
type: str = "comment",
tracking_id: str = None,
):
set_user({"username": username})
with logger.contextualize(
tracking_id=tracking_id,
):
# Initialization logic start
logger.info(
f"Calling on_comment() with the following arguments: {comment},"
f" {repo_full_name}, {repo_description}, {pr_path}"
)
organization, repo_name = repo_full_name.split("/")
start_time = time.time()
_token, g = get_github_client(installation_id)
repo: Repository = g.get_repo(repo_full_name)
if pr is None:
pr: PullRequest = repo.get_pull(pr_number)
pr_title = pr.title
pr_body = (
pr.body.split("<details>\n<summary><b>🎉 Latest improvements to Sweep:")[0]
if pr.body
and "<details>\n<summary><b>🎉 Latest improvements to Sweep:" in pr.body
else pr.body
)
pr_file_path = None
pr_chunk = None
formatted_pr_chunk = None
if pr.state == "closed":
return {"success": True, "message": "PR is closed. No event fired."}
# Initialization logic end
# Payment logic start
assignee = pr.assignee.login if pr.assignee else None
issue_number_match = re.search(r"Fixes #(?P<issue_number>\d+).", pr_body or "")
original_issue = None
if issue_number_match or assignee:
issue_number = issue_number_match.group("issue_number")
if not assignee:
original_issue = repo.get_issue(int(issue_number))
author = original_issue.user.login
else:
author = assignee
logger.info(f"Author of original issue is {author}")
chat_logger = (
chat_logger
if chat_logger is not None
else ChatLogger(
{
"repo_name": repo_name,
"title": "(Comment) " + pr_title,
"issue_url": pr.html_url,
"pr_file_path": pr_file_path, # may be None
"pr_chunk": pr_chunk, # may be None
"repo_full_name": repo_full_name,
"repo_description": repo_description,
"comment": comment,
"pr_path": pr_path,
"pr_line_position": pr_line_position,
"username": author,
"installation_id": installation_id,
"pr_number": pr_number,
"type": "comment",
},
active=True,
)
if MONGODB_URI
else None
)
else:
chat_logger = None
if chat_logger:
is_paying_user = chat_logger.is_paying_user()
use_faster_model = chat_logger.use_faster_model()
else:
# Todo: chat_logger is None for MockPRs, which will cause all comments to use GPT-4
is_paying_user = True
use_faster_model = False
if use_faster_model:
raise Exception(FASTER_MODEL_MESSAGE)
# Payment logic end
# Telemetry logic start
assignee = pr.assignee.login if pr.assignee else None
metadata = {
"repo_full_name": repo_full_name,
"repo_name": repo_name,
"organization": organization,
"repo_description": repo_description,
"installation_id": installation_id,
"username": username if not username.startswith("sweep") else assignee,
"function": "on_comment",
"model": "gpt-4",
"tier": "pro" if is_paying_user else "free",
"mode": ENV,
"pr_path": pr_path,
"pr_line_position": pr_line_position,
"pr_number": pr_number or pr.id,
"pr_html_url": pr.html_url,
"comment_id": comment_id,
"comment": comment,
"issue_number": issue_number if issue_number_match else "",
"tracking_id": tracking_id,
}
logger.bind(**metadata)
logger.info(f"Getting repo {repo_full_name}")
# Telemetry logic end
file_comment = bool(pr_path) and bool(pr_line_position)
item_to_react_to = None
reaction = None
bot_comment = None
def edit_comment(new_comment: str) -> None:
new_comment = sanitize_string_for_github(new_comment)
if bot_comment is not None:
bot_comment.edit(new_comment + "\n" + BOT_SUFFIX)
try:
if comment_id:
try:
item_to_react_to = pr.get_issue_comment(comment_id)
reaction = item_to_react_to.create_reaction("eyes")
except Exception:
try:
item_to_react_to = pr.get_review_comment(comment_id)
reaction = item_to_react_to.create_reaction("eyes")
except Exception:
pass
if reaction is not None:
# Delete rocket reaction
reactions = item_to_react_to.get_reactions()
for r in reactions:
if (
r.content == "rocket"
and r.user.login == GITHUB_BOT_USERNAME
):
item_to_react_to.delete_reaction(r.id)
branch_name = (
pr.head.ref if pr_number else pr.pr_head # pylint: disable=no-member
)
cloned_repo = ClonedRepo(
repo_full_name,
installation_id,
branch=branch_name,
repo=repo,
token=_token,
)
# Generate diffs for this PR
pr_diff_string = ""
if pr_number:
pr_changes, _dropped_files, _unsuitable_files = get_pr_changes(
repo, pr, cloned_repo
)
patches = []
source_codes = []
for file_name, pr_change in pr_changes.items():
if pr_change.status == "modified":
# Get the entire file contents, not just the patch
numbered_source_code = add_line_numbers(pr_change.new_code, start=1)
pruned_source_code = smart_prune_file_based_on_patches(numbered_source_code, pr_change.patches)
source_codes.append(f'<file_with_patches_applied file_name="{file_name}">\n{pruned_source_code}\n</file>')
patch_changes = [patch.changes for patch in pr_change.patches]
patch_annotations = pr_change.annotations
patches_with_annotations = [f'<patch index="{i}">\n{patch}\n</patch>\n<patch_description index="{i}">\n{patch_annotations[i]}\n<patch_description>' for i, patch in enumerate(patch_changes)]
patches_string = "\n".join(patches_with_annotations)
patches.append(
f'<patches file_name="{file_name}">\n{patches_string}\n</patches>'
)
# create source code string
source_code_string = (
"<code_files_with_patches_applied>\n" + "\n".join(source_codes) + "\n</code_files_with_patches_applied>"
)
pr_diff_string = (
"<pr_changes>\n" + "\n".join(patches) + "\n\n# Here is the current state of the codebase with the above patches applied:\n\n" + source_code_string + "</pr_changes>"
)
# This means it's a comment on a file
if file_comment:
pr_file = repo.get_contents(
pr_path, ref=branch_name
).decoded_content.decode("utf-8")
# splitlines returns empty array if the string is empty, split(\n) returns ['']
pr_lines = pr_file.split('\n')
start = max(0, pr_line_position - 11)
end = min(len(pr_lines), pr_line_position + 10)
pr_chunk = "\n".join(pr_lines[start:end])
pr_file_path = pr_path.strip()
formatted_pr_chunk = (
"\n".join(pr_lines[start : pr_line_position - 1])
+ f"\n{pr_lines[pr_line_position - 1]} <--- GITHUB COMMENT: {comment.strip()} --->\n"
+ "\n".join(pr_lines[pr_line_position:end])
)
if comment_id:
bot_comment = pr.create_review_comment_reply(
comment_id, SWEEPING_GIF + "Searching for relevant snippets..." + BOT_SUFFIX
)
else:
formatted_pr_chunk = None # pr_file
bot_comment = pr.create_issue_comment(SWEEPING_GIF + "Searching for relevant snippets..." + BOT_SUFFIX)
search_query = comment.strip("\n")
formatted_query = comment.strip("\n")
snippets = prep_snippets(
cloned_repo, search_query, use_multi_query=False
)
pr_diffs, _dropped_files, _unsuitable_files = get_pr_changes(repo, pr, cloned_repo)
snippets_modified = [Snippet.from_file(
pr_diff, cloned_repo.get_file_contents(pr_diff)
) for pr_diff in pr_diffs]
snippets = snippets_modified + snippets
snippets = snippets[:num_of_snippets_to_query]
except Exception as e:
stack_trace = traceback.format_exc()
logger.exception(e)
elapsed_time = time.time() - start_time
posthog.capture(
username,
"failed",
properties={
"error": str(e),
"traceback": f"An error occured during the search! The stack trace is below:\n\n{stack_trace}",
"duration": elapsed_time,
"tracking_id": tracking_id,
**metadata,
},
)
edit_comment(ERROR_FORMAT.format(title=f"An error occured!\n\nThe exception message is:{str(e)}\n\nThe stack trace is:{stack_trace}"))
raise e
try:
logger.info("Fetching files to modify/create...")
edit_comment(SWEEPING_GIF + "I just completed searching for relevant files, now I'm making changes...")
if file_comment:
formatted_query = f"The user left this GitHub PR Review comment in `{pr_path}`:\n<comment>\n{comment}\n</comment>\nThis was where they left their comment on the PR:\n<review_code_chunk>\n{formatted_pr_chunk}\n</review_code_chunk>.\n\nResolve their comment."
pull_request_info = format_pr_info(pr)
renames_dict, file_change_requests, plan = get_files_to_change_for_on_comment(
relevant_snippets=snippets,
read_only_snippets=[],
problem_statement=formatted_query,
repo_name=repo_name,
pr_info=pull_request_info,
pr_diffs=pr_diff_string,
cloned_repo=cloned_repo,
)
set_fcr_change_type(file_change_requests, cloned_repo)
assert file_change_requests, NoFilesException("I couldn't find any relevant files to change.")
planning_markdown = render_fcrs(file_change_requests)
sweep_response = f"I'm going to make the following changes:\n\n{planning_markdown}\n\nI'm currently validating these changes using parsers and linters to check for syntax errors and undefined variables..."
quoted_comment = blockquote(comment) + "\n\n"
response_for_user = (
f"{quoted_comment}\n\nHi @{username},\n\n{sweep_response}"
)
edit_comment(SWEEPING_GIF + response_for_user)
modify_files_dict, changes_made, file_change_requests = handle_file_change_requests(
file_change_requests=file_change_requests,
request=file_comment,
cloned_repo=cloned_repo,
username=username,
installation_id=installation_id,
renames_dict=renames_dict,
)
logger.info("\n".join(generate_diff(file_data["original_contents"], file_data["contents"]) for file_data in modify_files_dict.values()))
pull_request_bot = PRSummaryBot()
commit_message = pull_request_bot.get_commit_message(modify_files_dict, renames_dict=renames_dict, chat_logger=chat_logger)[:50]
new_file_contents_to_commit = {file_path: file_data["contents"] for file_path, file_data in modify_files_dict.items()}
previous_file_contents_to_commit = copy.deepcopy(new_file_contents_to_commit)
new_file_contents_to_commit, files_removed = validate_and_sanitize_multi_file_changes(cloned_repo.repo, new_file_contents_to_commit, file_change_requests)
if files_removed and username:
posthog.capture(
username,
"polluted_commits_error",
properties={
"old_keys": ",".join(previous_file_contents_to_commit.keys()),
"new_keys": ",".join(new_file_contents_to_commit.keys())
},
)
commit = commit_multi_file_changes(cloned_repo, new_file_contents_to_commit, commit_message, branch_name)
logger.info("Done!")
except Exception as e:
stack_trace = traceback.format_exc()
logger.error(stack_trace)
elapsed_time = time.time() - start_time
posthog.capture(
username,
"failed",
properties={
"error": str(e),
"reason": "Failed to make changes",
"duration": elapsed_time,
**metadata,
},
)
edit_comment(ERROR_FORMAT.format(title=f"Failed to make changes:\n\nThe exception message is:{str(e)}\n\nThe stack trace is:{stack_trace}"))
raise e
# Delete eyes
if reaction is not None:
item_to_react_to.delete_reaction(reaction.id)
try:
item_to_react_to = pr.get_issue_comment(comment_id)
reaction = item_to_react_to.create_reaction("rocket")
except Exception:
try:
item_to_react_to = pr.get_review_comment(comment_id)
reaction = item_to_react_to.create_reaction("rocket")
except Exception:
pass
patch_diff = ""
for file_path, file_data in modify_files_dict.items():
if file_path in new_file_contents_to_commit:
patch_diff += f"--- {file_path}\n+++ {file_path}\n{generate_diff(file_data['original_contents'], file_data['contents'])}\n\n"
if patch_diff:
edit_comment(f"### 🚀 Resolved via [{commit.sha[:7]}](https://github.com/{repo_full_name}/commit/{commit.sha})\n\nHere were the changes I made:\n```diff\n{patch_diff}\n```")
else:
edit_comment(f"### 🚀 Resolved via [{commit.sha[:7]}](https://github.com/{repo_full_name}/commit/{commit.sha})")
elapsed_time = time.time() - start_time
# make async
fire_and_forget_wrapper(posthog.capture)(
username,
"success",
properties={
**metadata,
"tracking_id": tracking_id,
"duration": elapsed_time,
},
)

Step 2: ⌨️ Coding

I'm going to follow the following steps to help you solve the GitHub issue:

  1. Remove the nonlocal statements from the edit_sweep_comment function in on_ticket.py.
  2. Add parameters to the edit_sweep_comment function for each of the variables that were previously accessed via nonlocal.
  3. Update all calls to edit_sweep_comment in on_ticket.py to pass the required arguments.

Here are the changes we decided to make. I'm done making edits and now I'm just validating the changes using a linter to catch any mistakes like syntax errors or undefined variables:

sweepai/handlers/on_ticket.py

1. Remove the `nonlocal` statement from the `edit_sweep_comment` function. 2. Add parameters to the `edit_sweep_comment` function for each of the variables that were previously accessed via `nonlocal`. 3. Update all calls to `edit_sweep_comment` to pass the required arguments.
--- 
+++ 
@@ -1,12 +1,18 @@
             def edit_sweep_comment(
                 message: str,
                 index: int,
+                current_index: int,
+                user_token: str,
+                g: Github,
+                repo: Repository,
+                issue_comment: IssueComment,
+                initial_sandbox_response: int,
+                initial_sandbox_response_file: str,
                 pr_message="",
                 done=False,
                 step_complete=True,
                 add_bonus_message=True,
             ):
-                nonlocal current_index, user_token, g, repo, issue_comment, initial_sandbox_response, initial_sandbox_response_file
                 message = sanitize_string_for_github(message)
                 if pr_message:
                     pr_message = sanitize_string_for_github(pr_message)

sweepai/handlers/on_ticket.py

Update the first call to `edit_sweep_comment` to pass the required arguments.
--- 
+++ 
@@ -1,4 +1,11 @@
             edit_sweep_comment(
                 "I've just finished validating the issue. I'm now going to start searching for relevant files.",
-                0
+                0,
+                current_index,
+                user_token,
+                g,
+                repo,
+                issue_comment,
+                initial_sandbox_response,
+                initial_sandbox_response_file
             )

sweepai/handlers/on_ticket.py

Update the second call to `edit_sweep_comment` to pass the required arguments.
--- 
+++ 
@@ -17,5 +17,12 @@
                                 else ""
                             ),
                             1,
+                            current_index,
+                            user_token,
+                            g,
+                            repo,
+                            issue_comment,
+                            initial_sandbox_response,
+                            initial_sandbox_response_file,
                             step_complete=False
                         )

sweepai/handlers/on_ticket.py

Update the third call to `edit_sweep_comment` to pass the required arguments.
--- 
+++ 
@@ -17,4 +17,11 @@
                         else ""
                     ),
                     1,
+                    current_index,
+                    user_token,
+                    g,
+                    repo,
+                    issue_comment,
+                    initial_sandbox_response,
+                    initial_sandbox_response_file
                 )

sweepai/handlers/on_ticket.py

Update the fourth call to `edit_sweep_comment` to pass the required arguments.
--- 
+++ 
@@ -1,4 +1,11 @@
                 edit_sweep_comment(
                     "I'm currently validating your changes using parsers and linters to check for mistakes like syntax errors or undefined variables. If I see any of these errors, I will automatically fix them.",
                     3,
+                    current_index,
+                    user_token,
+                    g,
+                    repo,
+                    issue_comment,
+                    initial_sandbox_response,
+                    initial_sandbox_response_file
                 )

sweepai/handlers/on_ticket.py

Update the fifth call to `edit_sweep_comment` to pass the required arguments.
--- 
+++ 
@@ -1,4 +1,11 @@
                 edit_sweep_comment(
                     f"Your changes have been successfully made to the branch [`{pull_request.branch_name}`](https://github.com/{repo_full_name}/tree/{pull_request.branch_name}). I have validated these changes using a syntax checker and a linter.",
                     3,
+                    current_index,
+                    user_token,
+                    g,
+                    repo,
+                    issue_comment,
+                    initial_sandbox_response,
+                    initial_sandbox_response_file
                 )

Step 3: 🔄️ Validating

Your changes have been successfully made to the branch sweep/refactor_edit_sweep_comment_in_on_ticket_8cd5b. I have validated these changes using a syntax checker and a linter.


Tip

To recreate the pull request, edit the issue title or description.

This is an automated message generated by Sweep AI.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
sweep Assigns Sweep to an issue or pull request.
Projects
None yet
1 participant