From 862f5244acda940f6da0efb739526e55c7edfbbb Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Thu, 11 May 2023 17:12:58 -0400 Subject: [PATCH 01/72] Create _devops.py Create test_devops.py Update pyproject.toml Update python.yml Update devops functions and tests, add cattrs and requests_mock dependencies Refactor Azure DevOps API wrappers and update tests Refactor DevOps client configuration to use environment variables instead of hard-coded values Refactor DevOpsClient class to return GitPullRequestCommentThread instead of Response. "Add ADO_TOKEN to environment variables" Add git push option to commit function. Refactored update_pull_request method and added assertions to test_update_pr_integration. Refactor repository modules and update imports. Refactor DevOpsClient and update PR summary handling in _github.py Refactor DevOps client, add review and comment commands, fix GitHub command group Refactor update PR method in test_devops.py. Add DevOpsCommandGroup and integrate with Azure DevOps repositories Add support for getting patch and context in Azure DevOps client Refactor DevOpsClient methods and improve patch calculation logic Remove commented out code in test_devops.py. Add azure-functions dependency and implement Azure Function for processing comments Refactor _DevOpsClient methods and extract helper functions Add Azure Functions support and refactor DevOps client Refactor GitHub client class and import statements. Fix import statement for GitHubCommandGroup in _gpt_cli.py --- .github/workflows/python.yml | 5 +- .gitignore | 12 + azure/README.md | 29 + azure/api/.funcignore | 8 + azure/api/__init__.py | 1 + azure/api/host.json | 22 + azure/api/incoming_msg_handler/__init__.py | 18 + azure/api/incoming_msg_handler/function.json | 12 + azure/api/requirements.txt | 6 + pyproject.toml | 8 +- src/gpt_review/_git.py | 17 +- src/gpt_review/_gpt_cli.py | 5 +- src/gpt_review/main.py | 3 + .../{ => repositories}/_repository.py | 4 +- src/gpt_review/repositories/devops.py | 722 ++++++++++++++++++ .../{_github.py => repositories/github.py} | 17 +- tests/test_devops.py | 303 ++++++++ tests/test_github.py | 6 +- tests/test_review.py | 2 +- 19 files changed, 1181 insertions(+), 19 deletions(-) create mode 100644 azure/README.md create mode 100644 azure/api/.funcignore create mode 100644 azure/api/__init__.py create mode 100644 azure/api/host.json create mode 100644 azure/api/incoming_msg_handler/__init__.py create mode 100644 azure/api/incoming_msg_handler/function.json create mode 100644 azure/api/requirements.txt rename src/gpt_review/{ => repositories}/_repository.py (88%) create mode 100644 src/gpt_review/repositories/devops.py rename src/gpt_review/{_github.py => repositories/github.py} (91%) create mode 100644 tests/test_devops.py diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index c6ab3d43..69dd992d 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -18,6 +18,7 @@ jobs: runs-on: ubuntu-latest env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ADO_TOKEN: ${{ secrets.ADO_TOKEN }} strategy: fail-fast: false matrix: @@ -38,7 +39,7 @@ jobs: with: creds: ${{ secrets.AZURE_CREDENTIALS }} - name: ${{ matrix.tools }} - uses: microsoft/action-python@0.6.3 + uses: microsoft/action-python@0.6.4 with: ${{ matrix.tools }}: true args: ${{ matrix.args }} @@ -58,7 +59,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} - name: ${{ matrix.tools }} - uses: microsoft/action-python@0.6.3 + uses: microsoft/action-python@0.6.4 with: pytest: true args: -m unit diff --git a/.gitignore b/.gitignore index d8782e2c..fccdc32b 100644 --- a/.gitignore +++ b/.gitignore @@ -131,3 +131,15 @@ dmypy.json # Default Directory for llama index files storage/ + +# Azure Functions artifacts +bin +obj +appsettings.json +local.settings.json + +# Azurite artifacts +__blobstorage__ +__queuestorage__ +__azurite_db*__.json +.python_packages \ No newline at end of file diff --git a/azure/README.md b/azure/README.md new file mode 100644 index 00000000..fcdea64c --- /dev/null +++ b/azure/README.md @@ -0,0 +1,29 @@ +This requires < Python 3.11 to run. + +Install the [Azure Function Tools](https://learn.microsoft.com/en-us/azure/azure-functions/functions-run-local?tabs=v4%2Clinux%2Ccsharp%2Cportal%2Cbash#local-settings-file) + +```sh +curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > microsoft.gpg +sudo mv microsoft.gpg /etc/apt/trusted.gpg.d/microsoft.gpg + +# Debian/Codespace +sudo sh -c 'echo "deb [arch=amd64] https://packages.microsoft.com/debian/$(lsb_release -rs | cut -d'.' -f 1)/prod $(lsb_release -cs) main" > /etc/apt/sources.list.d/dotnetdev.list' + +sudo apt-get update +sudo apt-get install azure-functions-core-tools-4 +``` + +Create a new python env when testing the function. + +```sh +python3.9 -m venv .venv/py39 +source .venv/py39/bin/activate + +python3.9 -m pip install flit +python3.9 -m flit install + +cd azure/api +python 3.9 +func start + +``` \ No newline at end of file diff --git a/azure/api/.funcignore b/azure/api/.funcignore new file mode 100644 index 00000000..9966315f --- /dev/null +++ b/azure/api/.funcignore @@ -0,0 +1,8 @@ +.git* +.vscode +__azurite_db*__.json +__blobstorage__ +__queuestorage__ +local.settings.json +test +.venv \ No newline at end of file diff --git a/azure/api/__init__.py b/azure/api/__init__.py new file mode 100644 index 00000000..7f41b1de --- /dev/null +++ b/azure/api/__init__.py @@ -0,0 +1 @@ +from . import incoming_msg_handler diff --git a/azure/api/host.json b/azure/api/host.json new file mode 100644 index 00000000..ff49a6ee --- /dev/null +++ b/azure/api/host.json @@ -0,0 +1,22 @@ +{ + "version": "2.0", + "logging": { + "applicationInsights": { + "samplingSettings": { + "isEnabled": true, + "excludedTypes": "Request" + } + } + }, + "extensionBundle": { + "id": "Microsoft.Azure.Functions.ExtensionBundle", + "version": "[3.15.0, 4.0.0)" + }, + "extensions": { + "serviceBus": { + "messageHandlerOptions": { + "autoComplete": false + } + } + } +} \ No newline at end of file diff --git a/azure/api/incoming_msg_handler/__init__.py b/azure/api/incoming_msg_handler/__init__.py new file mode 100644 index 00000000..cb2786bb --- /dev/null +++ b/azure/api/incoming_msg_handler/__init__.py @@ -0,0 +1,18 @@ +"""Azure DevOps API incoming message handler.""" +import os +from gpt_review.repositories.devops import _DevOpsClient + +import azure.functions as func + + +CLIENT = _DevOpsClient( + pat=os.environ["ADO_TOKEN"], + org=os.environ["ADO_ORG"], + project=os.environ["ADO_PROJECT"], + repository_id=os.environ["ADO_REPO"], +) + + +def main(msg: func.ServiceBusMessage) -> None: + """Handle an incoming message.""" + CLIENT.handle(msg) diff --git a/azure/api/incoming_msg_handler/function.json b/azure/api/incoming_msg_handler/function.json new file mode 100644 index 00000000..5c7d1886 --- /dev/null +++ b/azure/api/incoming_msg_handler/function.json @@ -0,0 +1,12 @@ +{ + "scriptFile": "__init__.py", + "bindings": [ + { + "name": "msg", + "type": "serviceBusTrigger", + "direction": "in", + "queueName": "ado-gpt-review", + "connection": "AzureServiceBusConnectionString" + } + ] +} \ No newline at end of file diff --git a/azure/api/requirements.txt b/azure/api/requirements.txt new file mode 100644 index 00000000..918ea384 --- /dev/null +++ b/azure/api/requirements.txt @@ -0,0 +1,6 @@ +# DO NOT include azure-functions-worker in this file +# The Python Worker is managed by Azure Functions platform +# Manually managing azure-functions-worker may cause unexpected issues + +azure-functions +gpt-review>=0.7.0 diff --git a/pyproject.toml b/pyproject.toml index e3dddbfd..09ab24fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,8 @@ classifiers = [ requires-python = ">=3.8.1" dynamic = ["version"] dependencies = [ + 'azure-devops', + 'azure-functions; python_version <= "3.10"', 'azure-identity', 'azure-keyvault-secrets', 'llama-index>=0.6.0,<=0.6.5', @@ -39,6 +41,7 @@ dependencies = [ test = [ "bandit[toml]==1.7.5", "black==23.3.0", + "cattrs", "check-manifest==0.49", "flake8-bugbear==23.5.9", "flake8-docstrings", @@ -54,7 +57,8 @@ test = [ "pytest-xdist", "pytest>=7.2.2", "pytest-github-actions-annotate-failures", - "shellcheck-py==0.9.0.2" + "shellcheck-py==0.9.0.2", + "requests_mock" ] [project.scripts] @@ -136,7 +140,7 @@ executionEnvironments = [ ] [tool.pytest.ini_options] -addopts = "--cov-report xml:coverage.xml --cov src --cov-fail-under 0 --cov-append -n auto" +addopts = "--cov-report xml:coverage.xml --cov src --cov-fail-under 0 --cov-append -m unit" pythonpath = [ "src" ] diff --git a/src/gpt_review/_git.py b/src/gpt_review/_git.py index ee08e06f..7d37068c 100644 --- a/src/gpt_review/_git.py +++ b/src/gpt_review/_git.py @@ -61,7 +61,14 @@ def _commit_message(gpt4: bool = False, large: bool = False) -> str: return _request_goal(diff, goal, fast=not gpt4, large=large) -def _commit(gpt4: bool = False, large: bool = False) -> Dict[str, str]: +def _push(): + """Run git push.""" + logging.debug("Pushing commit to remote.") + repo = Repo.init(_find_git_dir()) + return repo.git.push() + + +def _commit(gpt4: bool = False, large: bool = False, push: bool = False) -> Dict[str, str]: """Run git commit with a commit message generated by GPT. Args: @@ -75,6 +82,8 @@ def _commit(gpt4: bool = False, large: bool = False) -> Dict[str, str]: logging.debug("Commit Message: %s", message) repo = Repo.init(_find_git_dir()) commit = repo.git.commit(message=message) + if push: + commit += f"\n{_push()}" return {"response": commit} @@ -101,3 +110,9 @@ def load_arguments(loader: CLICommandsLoader) -> None: default=False, action="store_true", ) + args.argument( + "push", + help="Push the commit to the remote.", + default=False, + action="store_true", + ) diff --git a/src/gpt_review/_gpt_cli.py b/src/gpt_review/_gpt_cli.py index e9e682fa..17fc9dff 100644 --- a/src/gpt_review/_gpt_cli.py +++ b/src/gpt_review/_gpt_cli.py @@ -8,7 +8,8 @@ from gpt_review import __version__ from gpt_review._ask import AskCommandGroup from gpt_review._git import GitCommandGroup -from gpt_review._github import GitHubCommandGroup +from gpt_review.repositories.devops import DevOpsCommandGroup +from gpt_review.repositories.github import GitHubCommandGroup from gpt_review._review import ReviewCommandGroup CLI_NAME = "gpt" @@ -24,7 +25,7 @@ def get_cli_version(self) -> str: class GPTCommandsLoader(CLICommandsLoader): """The GPT CLI Commands Loader.""" - _CommandGroups = [AskCommandGroup, GitHubCommandGroup, GitCommandGroup, ReviewCommandGroup] + _CommandGroups = [AskCommandGroup, DevOpsCommandGroup, GitHubCommandGroup, GitCommandGroup, ReviewCommandGroup] def load_command_table(self, args) -> OrderedDict: for command_group in self._CommandGroups: diff --git a/src/gpt_review/main.py b/src/gpt_review/main.py index e1fa07ba..ebec1182 100644 --- a/src/gpt_review/main.py +++ b/src/gpt_review/main.py @@ -14,6 +14,9 @@ def _help_text(help_type, short_summary) -> str: helps[""] = _help_text("group", "Easily interact with GPT APIs.") +helps["ado"] = _help_text("group", "Use GPT with Azure Devops Repositories.") +helps["ado review"] = _help_text("command", "Review Azure Devops PR with Open AI, and post response as a comment.") +helps["ado comment"] = _help_text("command", "Comment on Azure Devops PR with Open AI.") helps["ask"] = _help_text("group", "Use GPT to ask questions.") helps["git"] = _help_text("group", "Use GPT enchanced git commands.") helps["git commit"] = _help_text("command", "Run git commit with a commit message generated by GPT.") diff --git a/src/gpt_review/_repository.py b/src/gpt_review/repositories/_repository.py similarity index 88% rename from src/gpt_review/_repository.py rename to src/gpt_review/repositories/_repository.py index c019922b..ffabba36 100644 --- a/src/gpt_review/_repository.py +++ b/src/gpt_review/repositories/_repository.py @@ -22,12 +22,12 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: @staticmethod @abstractmethod - def post_pr_summary(pr_patch) -> None: + def post_pr_summary(diff) -> None: """ Post a summary to a PR. Args: - pr_patch (str): The patch of the PR. + diff (str): The diff of the PR. Returns: str: The review of the PR. diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py new file mode 100644 index 00000000..ba08ee9d --- /dev/null +++ b/src/gpt_review/repositories/devops.py @@ -0,0 +1,722 @@ +"""Azure DevOps Package Wrappers to Simplify Usage.""" +import abc +import itertools +import logging +import json +import os +from typing import Dict, Iterator, List, Optional, Iterable + +from msrest.authentication import BasicAuthentication + +from knack.arguments import ArgumentsContext +from knack import CLICommandsLoader +from knack.commands import CommandGroup + +from azure.devops.connection import Connection +from azure.devops.v7_1.git.models import ( + Comment, + GitCommitDiffs, + GitBaseVersionDescriptor, + GitTargetVersionDescriptor, + GitBlobRef, + GitVersionDescriptor, + GitPullRequest, + GitPullRequestCommentThread, +) +from azure.devops.v7_1.git.git_client import GitClient + +from gpt_review._ask import _ask +from gpt_review._command import GPTCommandGroup +from gpt_review._review import _summarize_files +from gpt_review.repositories._repository import _RepositoryClient + + +MIN_CONTEXT_LINES = 5 +SURROUNDING_CONTEXT = 5 + + +class _DevOpsClient(_RepositoryClient, abc.ABC): + """Azure DevOps API Client Wrapper.""" + + def __init__(self, pat, org, project, repository_id) -> None: + """ + Initialize the client. + + Args: + pat (str): The Azure DevOps personal access token. + org (str): The Azure DevOps organization. + project (str): The Azure DevOps project. + repository_id (str): The Azure DevOps repository ID. + """ + self.pat = pat + self.org = org + self.project = project + self.repository_id = repository_id + + personal_access_token = pat + organization_url = f"https://dev.azure.com/{org}" + + # Create a connection to the org + credentials = BasicAuthentication("", personal_access_token) + connection = Connection(base_url=organization_url, creds=credentials) + + # Get a client (the "core" client provides access to projects, teams, etc) + self.connection = connection + self.client: GitClient = connection.clients_v7_1.get_git_client() + self.project = project + self.repository_id = repository_id + + def create_comment(self, pull_request_id: int, comment_id: int, text) -> Comment: + """ + Create a comment on a pull request. + + Args: + token (str): The Azure DevOps token. + org (str): The Azure DevOps organization. + project (str): The Azure DevOps project. + repository_id (str): The Azure DevOps repository ID. + pull_request_id (int): The Azure DevOps pull request ID. + comment_id (int): The Azure DevOps comment ID. + text (str): The text of the comment. + + Returns: + Comment: The response from the API. + """ + new_comment = Comment(content=text) + return self.client.create_comment( + new_comment, self.repository_id, pull_request_id, comment_id, project=self.project + ) + + def _get_comment_thread(self, pull_request_id: str, thread_id: str) -> GitPullRequestCommentThread: + """ + Get a comment thread. + + Args: + pull_request_id (str): The Azure DevOps pull request ID. + thread_id (str): The Azure DevOps thread ID. + + Returns: + GitPullRequestCommentThread: The response from the API. + """ + return self.client.get_pull_request_thread( + repository_id=self.repository_id, pull_request_id=pull_request_id, thread_id=thread_id, project=self.project + ) + + def _get_changed_blobs( + self, + sha1: str, + download: bool = None, + file_name: str = None, + resolve_lfs: bool = None, + ) -> GitBlobRef: + """ + Get the changed blobs in a commit. + + Args: + sha1 (str): The SHA1 of the commit. + download (bool): Whether to download the blob. + file_name (str): The name of the file. + resolve_lfs (bool): Whether to resolve LFS. + + Returns: + GitBlobRef: The response from the API. + """ + return self.client.get_blob( + repository_id=self.repository_id, + project=self.project, + sha1=sha1, + download=download, + file_name=file_name, + resolve_lfs=resolve_lfs, + ) + + def update_pr(self, pull_request_id, title=None, description=None) -> GitPullRequest: + """ + Update a pull request. + + Args: + pull_request_id (str): The Azure DevOps pull request ID. + title (str): The title of the pull request. + description (str): The description of the pull request. + + Returns: + GitPullRequest: The response from the API. + """ + return self.client.update_pull_request( + git_pull_request_to_update=GitPullRequest(title=title, description=description), + repository_id=self.repository_id, + project=self.project, + pull_request_id=pull_request_id, + ) + + def _get_commit_diff( + self, + diff_common_commit: bool, + base_version: GitBaseVersionDescriptor, + target_version: GitTargetVersionDescriptor, + ) -> GitCommitDiffs: + """ + Get the diff between two commits. + + Args: + diff_common_commit (bool): Whether to diff the common commit. + base_version (GitBaseVersionDescriptor): The base version. + target_version (GitTargetVersionDescriptor): The target version. + + Returns: + Response: The response from the API. + """ + return self.client.get_commit_diffs( + repository_id=self.repository_id, + project=self.project, + diff_common_commit=diff_common_commit, + base_version_descriptor=base_version, + target_version_descriptor=target_version, + ) + + def read_all_text( + self, + path: str, + commit_id: str = None, + **kwargs, + ) -> str: + """ + Read all text from a file. + + Args: + path (str): The path to the file. + commit_id (str): The commit ID. + **kwargs: Any additional keyword arguments. + + Returns: + str: The text of the file. + """ + byte_iterator = self.client.get_item_content( + repository_id=self.repository_id, + path=path, + project=self.project, + version_descriptor=GitVersionDescriptor(commit_id, version_type="commit") if commit_id else None, + **kwargs, + ) + return "".join(byte.decode("utf-8") for byte in byte_iterator) + + async def read_all_text_async(self, path: str, commit_id, **kwargs) -> Iterator[bytes]: + """ + Read all text from a file asynchronously. + + Args: + path (str): The path to the file. + commit_id (str): The commit ID. + **kwargs: Any additional keyword arguments. + + Returns: + Iterator[bytes]: The bytes of the file. + """ + return await self.client.read_all_text(path=path, commit_id=commit_id, **kwargs) + + @staticmethod + def process_comment_payload(payload: str) -> str: + """ + Extract question from Service Bus payload. + + Args: + payload (str): The Service Bus payload. + + Returns: + str: The question from the Azure DevOps Comment. + """ + payload = json.loads(payload) + return payload["resource"]["comment"]["content"] + + def get_patch(self, pull_request_event, pull_request_id, comment_id) -> List[str]: + """ + Get the diff of a pull request. + + Args: + pull_request_event (dict): The pull request event. + pull_request_id (str): The Azure DevOps pull request ID. + comment_id (str): The Azure DevOps comment ID. + + Returns: + List[str]: The diff of the pull request. + """ + context = ContextProvider(self) + thread = self._get_comment_thread(pull_request_id=pull_request_id, thread_id=comment_id) + + return context.get_patch(thread_context=thread.thread_context, pull_request_event=pull_request_event) + + def handle(self, msg) -> None: + """ + The main function for the Azure Function. + + Args: + msg (func.QueueMessage): The Service Bus message. + """ + body = msg.get_body().decode("utf-8") + logging.info("Python ServiceBus queue trigger processed message: %s", body) + if "copilot:summary" in body: + self._process_summary(body) + elif "copilot:" in body: + self._process_comment(body) + + def _process_comment(self, body) -> None: + """ + Process a comment from Copilot. + + Args: + body (str): The Service Bus payload. + """ + logging.info("Copilot Comment Alert Triggered") + payload = json.loads(body) + + pr_id = self._get_pr_id(payload) + + comment_id = self._get_comment_id(payload) + + diff = self.get_patch(pull_request_event=payload["resource"], pull_request_id=pr_id, comment_id=comment_id) + diff = "\n".join(diff) + + question = f""" + {diff} + + {_DevOpsClient.process_comment_payload(body)} + """ + + logging.info("Copilot diff: %s", diff) + response = _ask( + question=question, + max_tokens=500, + ) + self.create_comment(pull_request_id=pr_id, comment_id=comment_id, text=response["response"]) + + def _get_comment_id(self, payload) -> int: + """ + Get the comment ID from the payload. + + Args: + payload (dict): The payload from the Service Bus. + + Returns: + int: The comment ID. + """ + comment_id = payload["resource"]["comment"]["_links"]["threads"]["href"].split("/")[-1] + logging.info("Copilot Commet ID: %s", comment_id) + return comment_id + + def _process_summary(self, body) -> None: + """ + Process a summary from Copilot. + + Args: + body (str): The Service Bus payload. + """ + logging.info("Copilot Summary Alert Triggered") + payload = json.loads(body) + + pr_id = self._get_pr_id(payload) + + link = self._get_link(pr_id) + + if "comment" in payload["resource"]: + self._post_summary(payload, pr_id, link) + else: + logging.info("Copilot Update from Updated PR") + + def _get_link(self, pr_id) -> str: + link = f"https://{self.org}.visualstudio.com/{self.project}/_git/{self.repository_id}/pullrequest/{pr_id}" + logging.info("Copilot Link: %s", link) + return link + + def _get_pr_id(self, payload) -> int: + """ + Get the pull request ID from the Service Bus payload. + + Args: + payload (dict): The Service Bus payload. + + Returns: + int: The pull request ID. + """ + if "pullRequestId" in payload: + pr_id = payload["resource"]["pullRequestId"] + else: + pr_id = payload["resource"]["pullRequest"]["pullRequestId"] + logging.info("Copilot PR ID: %s", pr_id) + return pr_id + + def _post_summary(self, payload, pr_id, link) -> None: + """ + Process a summary from Copilot. + + Args: + payload (dict): The Service Bus payload. + pr_id (str): The Azure DevOps pull request ID. + link (str): The link to the PR. + """ + comment_id = payload["resource"]["comment"]["_links"]["threads"]["href"].split("/")[-1] + logging.info("Copilot Commet ID: %s", comment_id) + + os.putenv("RISK_SUMMARY", "false") + os.putenv("FILE_SUMMARY_FULL", "false") + os.putenv("TEST_SUMMARY", "false") + os.putenv("BUG_SUMMARY", "false") + os.putenv("SUMMARY_SUGGEST", "false") + + diff = self.get_patch(pull_request_event=payload["resource"], pull_request_id=pr_id, comment_id=comment_id) + diff = "\n".join(diff) + logging.info("Copilot diff: %s", diff) + + self.post_pr_summary(diff, link=link) + + +class DevOpsClient(_DevOpsClient): + """Azure DevOps client Wrapper for working with.""" + + @staticmethod + def post_pr_summary(diff, link=None, access_token=None) -> Dict[str, str]: + """ + Get a review of a PR. + + Requires the following environment variables: + - LINK: The link to the PR. + Example: https://.visualstudio.com//_git//pullrequest/ + or https://dev.azure.com///_git//pullrequest/ + - ADO_TOKEN: The GitHub access token. + + Args: + diff (str): The patch of the PR. + + Returns: + Dict[str, str]: The review. + """ + link = os.getenv("LINK", link) + access_token = os.getenv("ADO_TOKEN", access_token) + + if link and access_token: + review = _summarize_files(diff) + + if "dev.azure.com" in link: + org = link.split("/")[3] + project = link.split("/")[4] + repo = link.split("/")[6] + pr_id = link.split("/")[8] + else: + org = link.split("/")[2].split(".")[0] + project = link.split("/")[3] + repo = link.split("/")[5] + pr_id = link.split("/")[7] + + _DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).update_pr( + pull_request_id=pr_id, + description=review, + ) + return {"response": "PR posted"} + + logging.warning("No PR to post too") + return {"response": "No PR to post too"} + + @staticmethod + def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: + """ + Get the diff of a PR. + + Args: + patch_repo (str): The repo. + patch_pr (str): The PR. + access_token (str): The GitHub access token. + + Returns: + str: The diff of the PR. + """ + + +def _review(diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: + """Review Azure DevOps PR with Open AI, and post response as a comment. + + Args: + link (str): The link to the PR. + access_token (str): The Azure DevOps access token. + + Returns: + Dict[str, str]: The response. + """ + # diff = _DevOpsClient.get_pr_diff(repository, pull_request, access_token) + with open(diff, "r", encoding="utf8") as file: + diff_contents = file.read() + + _DevOpsClient.post_pr_summary(diff_contents, link, access_token) + return {"response": "Review posted as a comment."} + + +def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: + """Review Azure DevOps PR with Open AI, and post response as a comment. + + Args: + question (str): The question to ask. + comment_id (int): The comment ID. + diff(str): The diff file. + link (str): The link to the PR. + access_token (str): The Azure DevOps access token. + + Returns: + Dict[str, str]: The response. + """ + # diff = _DevOpsClient.get_pr_diff(repository, pull_request, access_token) + + if os.path.exists(diff): + with open(diff, "r", encoding="utf8") as file: + diff_contents = file.read() + question = f"{diff_contents}\n{question}" + + link = os.getenv("LINK", link) + access_token = os.getenv("ADO_TOKEN", access_token) + + if link and access_token: + response = _ask( + question=question, + ) + if "dev.azure.com" in link: + org = link.split("/")[3] + project = link.split("/")[4] + repo = link.split("/")[6] + pr_id = link.split("/")[8] + else: + org = link.split("/")[2].split(".")[0] + project = link.split("/")[3] + repo = link.split("/")[5] + pr_id = link.split("/")[7] + + _DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).create_comment( + pull_request_id=pr_id, comment_id=comment_id, text=response["response"] + ) + return {"response": "Review posted as a comment.", "text": response["response"]} + + +class DevOpsCommandGroup(GPTCommandGroup): + """Ask Command Group.""" + + @staticmethod + def load_command_table(loader: CLICommandsLoader) -> None: + with CommandGroup(loader, "ado", "gpt_review.repositories._devops#{}", is_preview=True) as group: + group.command("review", "_review", is_preview=True) + group.command("comment", "_comment", is_preview=True) + + @staticmethod + def load_arguments(loader: CLICommandsLoader) -> None: + """Add patch_repo, patch_pr, and access_token arguments.""" + with ArgumentsContext(loader, "ado") as args: + args.argument( + "diff", + type=str, + help="Git diff to review.", + default=".diff", + ) + args.argument( + "access_token", + type=str, + help="The Azure DevOps access token, or set ADO_TOKEN", + default=None, + ) + args.argument( + "link", + type=str, + help="The link to the PR.", + default=None, + ) + + with ArgumentsContext(loader, "ado comment") as args: + args.positional("question", type=str, nargs="+", help="Provide a question to ask GPT.") + args.argument( + "comment_id", + type=int, + help="The comment ID of Azure DevOps Pull Request Comment.", + default=None, + ) + + +class ContextProvider: + """Provides context for a given line in a file.""" + + def __init__(self, devops_client: _DevOpsClient) -> None: + """ + Initialize a new instance of ContextProvider. + + Args: + devops_client (_DevOpsClient): The DevOps client. + """ + self.devops_client = devops_client + + def get_patch(self, thread_context, pull_request_event) -> List[str]: + """ + Get the patch for a given thread context. + + Args: + thread_context (ThreadContext): The thread context. + pull_request_event (PullRequestEvent): The pull request event. + + Returns: + List[str]: The patch. + """ + pull_request = pull_request_event["pullRequest"] + if not pull_request: + raise ValueError("pull_request_event.pullRequest is required") + + original_content_task = self.devops_client.read_all_text(path=thread_context.file_path, check_if_exists=True) + changed_content_task = self.devops_client.read_all_text( + path=thread_context.file_path, + commit_id=pull_request["lastMergeSourceCommit"]["commitId"], + check_if_exists=True, + ) + # original_content = await original_content_task + # changed_content = await changed_content_task + original_content = original_content_task + changed_content = changed_content_task + + left_selection, right_selection = self._calculate_selection(thread_context, original_content, changed_content) + + return self._create_patch(left_selection or [], right_selection or [], thread_context.file_path) + + def _calculate_selection(self, thread_context, original_content, changed_content): + left_selection = None + right_selection = None + if original_content and thread_context.left_file_start and thread_context.left_file_end: + left_selection = self._get_selection( + original_content, thread_context.left_file_start.line, thread_context.left_file_end.line + ) + + if not changed_content or not thread_context.right_file_start or not thread_context.right_file_end: + raise ValueError("Both left and right selection cannot be None") + + right_selection = self._get_selection( + changed_content, thread_context.right_file_start.line, thread_context.right_file_end.line + ) + + if changed_content and thread_context.right_file_start and thread_context.right_file_end: + right_selection = self._get_selection( + changed_content, thread_context.right_file_start.line, thread_context.right_file_end.line + ) + + if left_selection or right_selection: + return left_selection, right_selection + raise ValueError("Both left and right selection cannot be None") + + async def get_patches(self, pull_request_event, condensed=False) -> Iterable[List[str]]: + """ + Get the patches for a given pull request event. + + Args: + pull_request_event (Any): The pull request event to retrieve patches for. + condensed (bool, optional): If True, returns a condensed version of the patch. Defaults to False. + + Returns: + Iterable[List[str]]: An iterable of lists containing the patches for the pull request event. + """ + pull_request_id = pull_request_event["pullRequest"]["pullRequestId"] + if not pull_request_id: + raise ValueError("pull_request_event.pullRequest is required") + + git_changes = await self.devops_client.get_changed_blobs_async(pull_request_event["pullRequest"]) + all_patches = [] + + for git_change in git_changes: + all_patches.append( + await self._get_change_async( + git_change, pull_request_event["pullRequest"]["lastMergeSourceCommit"]["commitId"], condensed + ) + ) + + return all_patches + + def _get_selection(self, file_contents: str, line_start: int, line_end: int) -> List[str]: + lines = file_contents.splitlines() + + if line_end - line_start < MIN_CONTEXT_LINES: + return lines + + if line_start < 1 or line_start > len(lines) or line_end < 1 or line_end > len(lines): + raise ValueError( + f"Selection region lineStart = {line_start}, lineEnd = {line_end}, lines length = {len(lines)}" + ) + + if line_start == line_end: + return [lines[line_start - 1]] + + return lines[line_start - 1 : line_end] + + async def _get_change_async(self, git_change, source_commit_head, condensed=False) -> List[str]: + return await self._get_git_change_async(self.devops_client, git_change.item.path, source_commit_head, condensed) + + async def _get_git_change_async(self, git_client, file_path, source_commit_head, condensed=False) -> List[str]: + original_content = git_client.read_all_text_async(file_path, check_if_exists=True) + changed_content = git_client.read_all_text_async(file_path, commit_id=source_commit_head, check_if_exists=True) + return self._create_patch(await original_content, await changed_content, file_path, condensed) + + def _create_patch( + self, original_content: Optional[str], changed_content: Optional[str], file_path: str, condensed=False + ) -> List[str]: + left = original_content.splitlines() if original_content else [] + right = changed_content if changed_content else [] + return self._create_patch_list(left, right, file_path, condensed) + + def _create_patch_list(self, left: List[str], right: List[str], file_path: str, condensed=False) -> List[str]: + needed_changes = self._calculate_minimum_change_needed(left, right) + line, row = 1, 1 + patch = [] + + while line < len(left) and row < len(right): + if needed_changes[line][row] == needed_changes[line - 1][row - 1]: + patch.append(left[line - 1]) + line += 1 + row += 1 + elif needed_changes[line - 1][row] < needed_changes[line][row - 1]: + patch.append(f"- {left[line - 1]}") + line += 1 + else: + patch.append(f"+ {right[row - 1]}") + row += 1 + + while line <= len(left): + patch.append(f"- {left[line - 1]}") + line += 1 + + while row <= len(right): + patch.append(f"+ {right[row - 1]}") + row += 1 + + if condensed: + patch = self._get_condensed_patch(patch) + + patch.insert(0, file_path) + return patch + + def _get_condensed_patch(self, patch: List[str]) -> List[str]: + buffer = [] + result = [] + trailing_context = 0 + + for line in patch: + if line.startswith("+") or line.startswith("-"): + result.extend(buffer[-SURROUNDING_CONTEXT:]) + buffer.clear() + result.append(line) + trailing_context = SURROUNDING_CONTEXT + elif trailing_context > 0: + result.append(line) + trailing_context -= 1 + else: + buffer.append(line) + + return result + + def _calculate_minimum_change_needed(self, left: List[str], right: List[str]) -> List[List[int]]: + changes = [[0] * (len(right) + 1) for _ in range(len(left) + 1)] + + for i, j in itertools.product(range(len(left) + 1), range(len(right) + 1)): + if i == 0 or j == 0: + changes[i][j] = 0 + elif left[i - 1] == right[j - 1]: + changes[i][j] = changes[i - 1][j - 1] + else: + changes[i][j] = 1 + min(changes[i - 1][j], changes[i][j - 1], changes[i - 1][j - 1]) + + return changes diff --git a/src/gpt_review/_github.py b/src/gpt_review/repositories/github.py similarity index 91% rename from src/gpt_review/_github.py rename to src/gpt_review/repositories/github.py index 676b7363..f5fdce73 100644 --- a/src/gpt_review/_github.py +++ b/src/gpt_review/repositories/github.py @@ -10,11 +10,11 @@ from knack.commands import CommandGroup from gpt_review._command import GPTCommandGroup -from gpt_review._repository import _RepositoryClient from gpt_review._review import _summarize_files +from gpt_review.repositories._repository import _RepositoryClient -class _GitHubClient(_RepositoryClient): +class GitHubClient(_RepositoryClient): """GitHub client.""" @staticmethod @@ -104,17 +104,22 @@ def _post_pr_comment(review, git_commit_hash: str, link: str, access_token: str) return response @staticmethod - def post_pr_summary(pr_patch) -> Dict[str, str]: + def post_pr_summary(diff) -> Dict[str, str]: """ Get a review of a PR. + Requires the following environment variables: + - LINK: The link to the PR. + - GIT_COMMIT_HASH: The git commit hash. + - GITHUB_TOKEN: The GitHub access token. + Args: - pr_patch (str): The patch of the PR. + diff (str): The patch of the PR. Returns: Dict[str, str]: The review. """ - review = _summarize_files(pr_patch) + review = _summarize_files(diff) logging.debug(review) link = os.getenv("LINK") @@ -152,7 +157,7 @@ class GitHubCommandGroup(GPTCommandGroup): @staticmethod def load_command_table(loader: CLICommandsLoader) -> None: - with CommandGroup(loader, "github", "gpt_review._github#{}") as group: + with CommandGroup(loader, "github", "gpt_review.repositories._github#{}", is_preview=True) as group: group.command("review", "_github_review", is_preview=True) @staticmethod diff --git a/tests/test_devops.py b/tests/test_devops.py new file mode 100644 index 00000000..4c10b331 --- /dev/null +++ b/tests/test_devops.py @@ -0,0 +1,303 @@ +import os +import pytest +import requests_mock + +from dataclasses import dataclass +from azure.devops.v7_1.git.models import ( + GitBaseVersionDescriptor, + GitTargetVersionDescriptor, + GitCommitDiffs, + GitPullRequest, + Comment, + GitPullRequestCommentThread, +) + +from gpt_review.repositories.devops import _DevOpsClient, _comment + +# Azure Devops PAT requires +# - Code: 'Read','Write' +# - Pull Request Threads: 'Read & Write' +TOKEN = os.getenv("ADO_TOKEN", "token1") + +ORG = os.getenv("ADO_ORG", "msazure") +PROJECT = os.getenv("ADO_PROJECT", "one") +REPO = os.getenv("ADO_REPO", "azure-gaming") +PR_ID = int(os.getenv("ADO_PR_ID", 8063875)) +COMMENT_ID = int(os.getenv("ADO_COMMENT_ID", 141344325)) + +SOURCE = os.getenv("ADO_COMMIT_SOURCE", "36f9a015ee220516f5f553faaa1898ab10972536") +TARGET = os.getenv("ADO_COMMIT_TARGET", "ecea1ea7db038317e94b45e090781410dc519b85") + +SAMPLE_PAYLOAD = """{ + "resource": { + "comment": { + "content": "copilot: summary of this changed code" + } + } +} +""" + +LONG_PAYLOAD = { + "id": "e89fa09c-f412-4167-a2cd-f6a5bb8aef56", + "eventType": "ms.vss-code.git-pullrequest-comment-event", + "publisherId": "tfs", + "message": {"text": "Daniel Ciborowski has replied to a pull request comment"}, + "detailedMessage": { + "text": 'Daniel Ciborowski has replied to a pull request comment\r\n```suggestion\n inlineScript: | \n echo "##[section] Summarize Pull Request with Open AI"\n\n echo "##[command]python3 -m pip install --upgrade pip"\n python3 -m pip install --upgrade pip --quiet\n```\nhow could i update this code?\r\n' + }, + "resource": { + "comment": { + "id": 2, + "parentCommentId": 1, + "author": { + "displayName": "Daniel Ciborowski", + "url": "https://spsprodwus23.vssps.visualstudio.com/A41b4f3ee-c651-4a14-9847-b7cbb5315b80/_apis/Identities/0ef5b3af-3e01-48fd-9bd3-2f701c8fdebe", + "_links": { + "avatar": { + "href": "https://msazure.visualstudio.com/_apis/GraphProfile/MemberAvatars/aad.OTgwYzcxNzEtMDI2Ni03YzVmLTk0YzEtMDNlYzU2YjViYjY4" + } + }, + "id": "0ef5b3af-3e01-48fd-9bd3-2f701c8fdebe", + "uniqueName": "dciborow@microsoft.com", + "imageUrl": "https://msazure.visualstudio.com/_apis/GraphProfile/MemberAvatars/aad.OTgwYzcxNzEtMDI2Ni03YzVmLTk0YzEtMDNlYzU2YjViYjY4", + "descriptor": "aad.OTgwYzcxNzEtMDI2Ni03YzVmLTk0YzEtMDNlYzU2YjViYjY4", + }, + "content": '```suggestion\n inlineScript: | \n echo "##[section] Summarize Pull Request with Open AI"\n\n echo "##[command]python3 -m pip install --upgrade pip"\n python3 -m pip install --upgrade pip --quiet\n```\nhow could i update this code?', + "publishedDate": "2023-05-13T00:30:56.68Z", + "lastUpdatedDate": "2023-05-13T00:30:56.68Z", + "lastContentUpdatedDate": "2023-05-13T00:30:56.68Z", + "commentType": "text", + "usersLiked": [], + "_links": { + "self": { + "href": "https://msazure.visualstudio.com/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3/pullRequests/8063875/threads/141415813/comments/2" + }, + "repository": { + "href": "https://msazure.visualstudio.com/b32aa71e-8ed2-41b2-9d77-5bc261222004/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3" + }, + "threads": { + "href": "https://msazure.visualstudio.com/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3/pullRequests/8063875/threads/141415813" + }, + "pullRequests": {"href": "https://msazure.visualstudio.com/_apis/git/pullRequests/8063875"}, + }, + }, + "pullRequest": { + "repository": { + "id": "612d9367-8ab6-4929-abe6-b5b5ad7b5ad3", + "name": "Azure-Gaming", + "url": "https://msazure.visualstudio.com/b32aa71e-8ed2-41b2-9d77-5bc261222004/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3", + "project": { + "id": "b32aa71e-8ed2-41b2-9d77-5bc261222004", + "name": "One", + "description": "MSAzure/One is the VSTS project containing all Azure team code bases and work items.\nPlease see https://aka.ms/azaccess for work item and source access policies.", + "url": "https://msazure.visualstudio.com/_apis/projects/b32aa71e-8ed2-41b2-9d77-5bc261222004", + "state": "wellFormed", + "revision": 307061, + "visibility": "organization", + "lastUpdateTime": "2023-05-12T17:40:59.963Z", + }, + "size": 508859977, + "remoteUrl": "https://msazure.visualstudio.com/DefaultCollection/One/_git/Azure-Gaming", + "sshUrl": "msazure@vs-ssh.visualstudio.com:v3/msazure/One/Azure-Gaming", + "webUrl": "https://msazure.visualstudio.com/DefaultCollection/One/_git/Azure-Gaming", + "isDisabled": False, + "isInMaintenance": False, + }, + "pullRequestId": 8063875, + "codeReviewId": 8836473, + "status": "active", + "createdBy": { + "displayName": "Daniel Ciborowski", + "url": "https://spsprodwus23.vssps.visualstudio.com/A41b4f3ee-c651-4a14-9847-b7cbb5315b80/_apis/Identities/0ef5b3af-3e01-48fd-9bd3-2f701c8fdebe", + "_links": { + "avatar": { + "href": "https://msazure.visualstudio.com/_apis/GraphProfile/MemberAvatars/aad.OTgwYzcxNzEtMDI2Ni03YzVmLTk0YzEtMDNlYzU2YjViYjY4" + } + }, + "id": "0ef5b3af-3e01-48fd-9bd3-2f701c8fdebe", + "uniqueName": "dciborow@microsoft.com", + "imageUrl": "https://msazure.visualstudio.com/_api/_common/identityImage?id=0ef5b3af-3e01-48fd-9bd3-2f701c8fdebe", + "descriptor": "aad.OTgwYzcxNzEtMDI2Ni03YzVmLTk0YzEtMDNlYzU2YjViYjY4", + }, + "creationDate": "2023-05-05T03:11:26.8599393Z", + "title": "Sample PR Title", + "description": "description1", + "sourceRefName": "refs/heads/dciborow/update-pr", + "targetRefName": "refs/heads/main", + "mergeStatus": "succeeded", + "isDraft": False, + "mergeId": "0e7397c6-5f11-402c-a5c6-c5a12b105350", + "lastMergeSourceCommit": { + "commitId": "ecea1ea7db038317e94b45e090781410dc519b85", + "url": "https://msazure.visualstudio.com/b32aa71e-8ed2-41b2-9d77-5bc261222004/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3/commits/ecea1ea7db038317e94b45e090781410dc519b85", + }, + "lastMergeTargetCommit": { + "commitId": "36f9a015ee220516f5f553faaa1898ab10972536", + "url": "https://msazure.visualstudio.com/b32aa71e-8ed2-41b2-9d77-5bc261222004/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3/commits/36f9a015ee220516f5f553faaa1898ab10972536", + }, + "lastMergeCommit": { + "commitId": "d5fc735b618647a78a0aff006445b67bfe4e8185", + "author": { + "name": "Daniel Ciborowski", + "email": "dciborow@microsoft.com", + "date": "2023-05-05T14:23:49Z", + }, + "committer": { + "name": "Daniel Ciborowski", + "email": "dciborow@microsoft.com", + "date": "2023-05-05T14:23:49Z", + }, + "comment": "Merge pull request 8063875 from dciborow/update-pr into main", + "url": "https://msazure.visualstudio.com/b32aa71e-8ed2-41b2-9d77-5bc261222004/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3/commits/d5fc735b618647a78a0aff006445b67bfe4e8185", + }, + "reviewers": [], + "url": "https://msazure.visualstudio.com/b32aa71e-8ed2-41b2-9d77-5bc261222004/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3/pullRequests/8063875", + "supportsIterations": True, + "artifactId": "vstfs:///Git/PullRequestId/b32aa71e-8ed2-41b2-9d77-5bc261222004%2f612d9367-8ab6-4929-abe6-b5b5ad7b5ad3%2f8063875", + }, + }, + "resourceVersion": "2.0", + "resourceContainers": { + "collection": {"id": "41bf5486-7392-4b7a-a7e3-a735c767e3b3", "baseUrl": "https://msazure.visualstudio.com/"}, + "account": {"id": "41b4f3ee-c651-4a14-9847-b7cbb5315b80", "baseUrl": "https://msazure.visualstudio.com/"}, + "project": {"id": "b32aa71e-8ed2-41b2-9d77-5bc261222004", "baseUrl": "https://msazure.visualstudio.com/"}, + }, + "createdDate": "2023-05-13T00:31:02.6421816Z", +} + + +@pytest.fixture +def mock_req(): + with requests_mock.Mocker() as m: + yield m + + +@pytest.fixture +def mock_ado_client(monkeypatch) -> None: + @dataclass + class MockResponse: + text: str + status_code: int = 203 + + def mock_update_thread(self, text, repository_id, pull_request_id, comment_id) -> MockResponse: + return MockResponse("mock response") + + monkeypatch.setattr("azure.devops.v7_1.git.git_client_base.GitClientBase.update_thread", mock_update_thread) + + class MockDevOpsClient: + def get_git_client(self) -> "MockDevOpsClient": + return MockDevOpsClient() + + def update_thread(self, text, repository_id, pull_request_id, comment_id) -> MockResponse: + return MockResponse("mock response") + + def create_comment(self, comment, repository_id, pull_request_id, thread_id, project=None) -> Comment: + return Comment() + + def get_pull_request_thread( + self, repository_id, pull_request_id, thread_id, project=None, iteration=None, base_iteration=None + ) -> GitPullRequestCommentThread: + return GitPullRequestCommentThread() + + def update_pull_request( + self, git_pull_request_to_update, repository_id, pull_request_id, project=None + ) -> GitPullRequest: + return GitPullRequest() + + def get_commit_diffs( + self, + repository_id, + project=None, + diff_common_commit=None, + top=None, + skip=None, + base_version_descriptor=None, + target_version_descriptor=None, + ) -> GitCommitDiffs: + return GitCommitDiffs() + + def mock_client(self) -> MockDevOpsClient: + return MockDevOpsClient() + + monkeypatch.setattr("azure.devops.released.client_factory.ClientFactory.get_core_client", mock_client) + monkeypatch.setattr("azure.devops.v7_1.client_factory.ClientFactoryV7_1.get_git_client", mock_client) + + +@pytest.fixture +def devops_client() -> _DevOpsClient: + return _DevOpsClient(TOKEN, ORG, PROJECT, REPO) + + +def test_create_comment(devops_client: _DevOpsClient, mock_ado_client: None) -> None: + response = devops_client.create_comment(pull_request_id=PR_ID, comment_id=COMMENT_ID, text="text1") + assert isinstance(response, Comment) + + +def test_update_pr(devops_client: _DevOpsClient, mock_ado_client: None) -> None: + response = devops_client.update_pr(pull_request_id=PR_ID, title="title1", description="description1") + assert isinstance(response, GitPullRequest) + + +def test_get_diff(devops_client: _DevOpsClient, mock_ado_client: None) -> None: + response = devops_client._get_commit_diff( + diff_common_commit=True, + base_version=GitBaseVersionDescriptor(version=SOURCE, version_type="commit"), + target_version=GitTargetVersionDescriptor(target_version=TARGET, target_version_type="commit"), + ) + assert isinstance(response, GitCommitDiffs) + + +@pytest.mark.integration +def test_create_comment_integration(devops_client: _DevOpsClient) -> None: + response = devops_client.create_comment(pull_request_id=PR_ID, comment_id=COMMENT_ID, text="text1") + assert isinstance(response, Comment) + + +@pytest.mark.integration +def test_update_pr_integration(devops_client: _DevOpsClient) -> None: + response = devops_client.update_pr(PR_ID, description="description1") + assert isinstance(response, GitPullRequest) + response = devops_client.update_pr(PR_ID, title="Sample PR Title") + assert isinstance(response, GitPullRequest) + + +@pytest.mark.integration +def test_get_diff_integration(devops_client: _DevOpsClient) -> None: + response = devops_client._get_commit_diff( + diff_common_commit=True, + base_version=GitBaseVersionDescriptor(version=SOURCE, version_type="commit"), + target_version=GitTargetVersionDescriptor(target_version=TARGET, target_version_type="commit"), + ) + assert isinstance(response, GitCommitDiffs) + + +def process_payload_test() -> None: + question = _DevOpsClient.process_comment_payload(SAMPLE_PAYLOAD) + link = "https://msazure.visualstudio.com/One/_git/Azure-Gaming/pullrequest/8063875" + _comment(question, comment_id=COMMENT_ID, link=link) + + +def test_process_payload(mock_ado_client: None) -> None: + process_payload_test() + + +@pytest.mark.integration +def test_process_payload_integration() -> None: + process_payload_test() + + +def get_patch_test(devops_client: _DevOpsClient) -> None: + comment_id = LONG_PAYLOAD["resource"]["comment"]["_links"]["threads"]["href"].split("/")[-1] + patch = devops_client.get_patch( + pull_request_event=LONG_PAYLOAD["resource"], pull_request_id=PR_ID, comment_id=comment_id + ) + assert len(patch) == 64 + + +def test_get_patch(devops_client: _DevOpsClient) -> None: + get_patch_test(devops_client) + + +@pytest.mark.integration +def test_get_patch_integration(devops_client: _DevOpsClient) -> None: + get_patch_test(devops_client) diff --git a/tests/test_github.py b/tests/test_github.py index 88ecea81..031ec54d 100644 --- a/tests/test_github.py +++ b/tests/test_github.py @@ -1,17 +1,17 @@ import pytest -from gpt_review._github import _GitHubClient +from gpt_review.repositories.github import GitHubClient def get_pr_diff_test(starts_with, patch_repo=None, patch_pr=None) -> None: """Test the GitHub API call.""" - diff = _GitHubClient.get_pr_diff(patch_repo=patch_repo, patch_pr=patch_pr) + diff = GitHubClient.get_pr_diff(patch_repo=patch_repo, patch_pr=patch_pr) assert diff.startswith(starts_with) def post_pr_comment_test() -> None: """Test the GitHub API call.""" - response = _GitHubClient.post_pr_summary("test") + response = GitHubClient.post_pr_summary("test") assert response diff --git a/tests/test_review.py b/tests/test_review.py index 51a89bbe..72deb744 100644 --- a/tests/test_review.py +++ b/tests/test_review.py @@ -1,6 +1,6 @@ import pytest -from gpt_review._github import _GitHubClient +from gpt_review.repositories._github import _GitHubClient def test_get_review(mock_openai) -> None: From 845dc0dcc165d3ed980b7d834b519b05353b644b Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Mon, 15 May 2023 23:16:45 +0000 Subject: [PATCH 02/72] "Add .deb and .venvs to .gitignore" --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index fccdc32b..a54729a8 100644 --- a/.gitignore +++ b/.gitignore @@ -137,6 +137,8 @@ bin obj appsettings.json local.settings.json +*.deb +.venvs # Azurite artifacts __blobstorage__ From bb97e2a910dde107ea868bdfa693ecb280af8651 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Mon, 15 May 2023 23:20:21 +0000 Subject: [PATCH 03/72] Refactor DevOpsClient instantiation in update_pr method. --- src/gpt_review/repositories/devops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index ba08ee9d..05325765 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -406,7 +406,7 @@ def post_pr_summary(diff, link=None, access_token=None) -> Dict[str, str]: repo = link.split("/")[5] pr_id = link.split("/")[7] - _DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).update_pr( + DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).update_pr( pull_request_id=pr_id, description=review, ) From 5b89ec0b466a4f818cdad627f388b00b7b24ed3a Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Mon, 15 May 2023 23:37:10 +0000 Subject: [PATCH 04/72] Add review and comment methods for DevOpsClient and refactor GitHubClient --- src/gpt_review/repositories/_repository.py | 31 ++ src/gpt_review/repositories/devops.py | 330 ++++++++++----------- src/gpt_review/repositories/github.py | 35 ++- 3 files changed, 216 insertions(+), 180 deletions(-) diff --git a/src/gpt_review/repositories/_repository.py b/src/gpt_review/repositories/_repository.py index ffabba36..04e17693 100644 --- a/src/gpt_review/repositories/_repository.py +++ b/src/gpt_review/repositories/_repository.py @@ -1,5 +1,6 @@ """Abstract class for a repository client.""" from abc import abstractmethod +from typing import Dict class _RepositoryClient: @@ -32,3 +33,33 @@ def post_pr_summary(diff) -> None: Returns: str: The review of the PR. """ + + @staticmethod + @abstractmethod + def _review(repository=None, pull_request=None, access_token=None) -> Dict[str, str]: + """Review PR with Open AI, and post response as a comment. + + Args: + repository (str): The repo of the PR. + pull_request (str): The PR number. + access_token (str): The GitHub access token. + + Returns: + Dict[str, str]: The response. + """ + + @staticmethod + @abstractmethod + def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: + """Review PR with Open AI, and post response as a comment. + + Args: + question (str): The question to ask. + comment_id (int): The comment ID. + diff(str): The diff file. + link (str): The link to the PR. + access_token (str): The Azure DevOps access token. + + Returns: + Dict[str, str]: The response. + """ diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 05325765..421c272f 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -369,171 +369,6 @@ def _post_summary(self, payload, pr_id, link) -> None: self.post_pr_summary(diff, link=link) -class DevOpsClient(_DevOpsClient): - """Azure DevOps client Wrapper for working with.""" - - @staticmethod - def post_pr_summary(diff, link=None, access_token=None) -> Dict[str, str]: - """ - Get a review of a PR. - - Requires the following environment variables: - - LINK: The link to the PR. - Example: https://.visualstudio.com//_git//pullrequest/ - or https://dev.azure.com///_git//pullrequest/ - - ADO_TOKEN: The GitHub access token. - - Args: - diff (str): The patch of the PR. - - Returns: - Dict[str, str]: The review. - """ - link = os.getenv("LINK", link) - access_token = os.getenv("ADO_TOKEN", access_token) - - if link and access_token: - review = _summarize_files(diff) - - if "dev.azure.com" in link: - org = link.split("/")[3] - project = link.split("/")[4] - repo = link.split("/")[6] - pr_id = link.split("/")[8] - else: - org = link.split("/")[2].split(".")[0] - project = link.split("/")[3] - repo = link.split("/")[5] - pr_id = link.split("/")[7] - - DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).update_pr( - pull_request_id=pr_id, - description=review, - ) - return {"response": "PR posted"} - - logging.warning("No PR to post too") - return {"response": "No PR to post too"} - - @staticmethod - def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: - """ - Get the diff of a PR. - - Args: - patch_repo (str): The repo. - patch_pr (str): The PR. - access_token (str): The GitHub access token. - - Returns: - str: The diff of the PR. - """ - - -def _review(diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: - """Review Azure DevOps PR with Open AI, and post response as a comment. - - Args: - link (str): The link to the PR. - access_token (str): The Azure DevOps access token. - - Returns: - Dict[str, str]: The response. - """ - # diff = _DevOpsClient.get_pr_diff(repository, pull_request, access_token) - with open(diff, "r", encoding="utf8") as file: - diff_contents = file.read() - - _DevOpsClient.post_pr_summary(diff_contents, link, access_token) - return {"response": "Review posted as a comment."} - - -def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: - """Review Azure DevOps PR with Open AI, and post response as a comment. - - Args: - question (str): The question to ask. - comment_id (int): The comment ID. - diff(str): The diff file. - link (str): The link to the PR. - access_token (str): The Azure DevOps access token. - - Returns: - Dict[str, str]: The response. - """ - # diff = _DevOpsClient.get_pr_diff(repository, pull_request, access_token) - - if os.path.exists(diff): - with open(diff, "r", encoding="utf8") as file: - diff_contents = file.read() - question = f"{diff_contents}\n{question}" - - link = os.getenv("LINK", link) - access_token = os.getenv("ADO_TOKEN", access_token) - - if link and access_token: - response = _ask( - question=question, - ) - if "dev.azure.com" in link: - org = link.split("/")[3] - project = link.split("/")[4] - repo = link.split("/")[6] - pr_id = link.split("/")[8] - else: - org = link.split("/")[2].split(".")[0] - project = link.split("/")[3] - repo = link.split("/")[5] - pr_id = link.split("/")[7] - - _DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).create_comment( - pull_request_id=pr_id, comment_id=comment_id, text=response["response"] - ) - return {"response": "Review posted as a comment.", "text": response["response"]} - - -class DevOpsCommandGroup(GPTCommandGroup): - """Ask Command Group.""" - - @staticmethod - def load_command_table(loader: CLICommandsLoader) -> None: - with CommandGroup(loader, "ado", "gpt_review.repositories._devops#{}", is_preview=True) as group: - group.command("review", "_review", is_preview=True) - group.command("comment", "_comment", is_preview=True) - - @staticmethod - def load_arguments(loader: CLICommandsLoader) -> None: - """Add patch_repo, patch_pr, and access_token arguments.""" - with ArgumentsContext(loader, "ado") as args: - args.argument( - "diff", - type=str, - help="Git diff to review.", - default=".diff", - ) - args.argument( - "access_token", - type=str, - help="The Azure DevOps access token, or set ADO_TOKEN", - default=None, - ) - args.argument( - "link", - type=str, - help="The link to the PR.", - default=None, - ) - - with ArgumentsContext(loader, "ado comment") as args: - args.positional("question", type=str, nargs="+", help="Provide a question to ask GPT.") - args.argument( - "comment_id", - type=int, - help="The comment ID of Azure DevOps Pull Request Comment.", - default=None, - ) - - class ContextProvider: """Provides context for a given line in a file.""" @@ -720,3 +555,168 @@ def _calculate_minimum_change_needed(self, left: List[str], right: List[str]) -> changes[i][j] = 1 + min(changes[i - 1][j], changes[i][j - 1], changes[i - 1][j - 1]) return changes + + +class DevOpsClient(_DevOpsClient): + """Azure DevOps client Wrapper for working with.""" + + @staticmethod + def post_pr_summary(diff, link=None, access_token=None) -> Dict[str, str]: + """ + Get a review of a PR. + + Requires the following environment variables: + - LINK: The link to the PR. + Example: https://.visualstudio.com//_git//pullrequest/ + or https://dev.azure.com///_git//pullrequest/ + - ADO_TOKEN: The GitHub access token. + + Args: + diff (str): The patch of the PR. + + Returns: + Dict[str, str]: The review. + """ + link = os.getenv("LINK", link) + access_token = os.getenv("ADO_TOKEN", access_token) + + if link and access_token: + review = _summarize_files(diff) + + if "dev.azure.com" in link: + org = link.split("/")[3] + project = link.split("/")[4] + repo = link.split("/")[6] + pr_id = link.split("/")[8] + else: + org = link.split("/")[2].split(".")[0] + project = link.split("/")[3] + repo = link.split("/")[5] + pr_id = link.split("/")[7] + + DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).update_pr( + pull_request_id=pr_id, + description=review, + ) + return {"response": "PR posted"} + + logging.warning("No PR to post too") + return {"response": "No PR to post too"} + + @staticmethod + def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: + """ + Get the diff of a PR. + + Args: + patch_repo (str): The repo. + patch_pr (str): The PR. + access_token (str): The GitHub access token. + + Returns: + str: The diff of the PR. + """ + + @staticmethod + def _review(diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: + """Review Azure DevOps PR with Open AI, and post response as a comment. + + Args: + link (str): The link to the PR. + access_token (str): The Azure DevOps access token. + + Returns: + Dict[str, str]: The response. + """ + # diff = _DevOpsClient.get_pr_diff(repository, pull_request, access_token) + with open(diff, "r", encoding="utf8") as file: + diff_contents = file.read() + + DevOpsClient.post_pr_summary(diff_contents, link, access_token) + return {"response": "Review posted as a comment."} + + @staticmethod + def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: + """Review Azure DevOps PR with Open AI, and post response as a comment. + + Args: + question (str): The question to ask. + comment_id (int): The comment ID. + diff(str): The diff file. + link (str): The link to the PR. + access_token (str): The Azure DevOps access token. + + Returns: + Dict[str, str]: The response. + """ + # diff = _DevOpsClient.get_pr_diff(repository, pull_request, access_token) + + if os.path.exists(diff): + with open(diff, "r", encoding="utf8") as file: + diff_contents = file.read() + question = f"{diff_contents}\n{question}" + + link = os.getenv("LINK", link) + access_token = os.getenv("ADO_TOKEN", access_token) + + if link and access_token: + response = _ask( + question=question, + ) + if "dev.azure.com" in link: + org = link.split("/")[3] + project = link.split("/")[4] + repo = link.split("/")[6] + pr_id = link.split("/")[8] + else: + org = link.split("/")[2].split(".")[0] + project = link.split("/")[3] + repo = link.split("/")[5] + pr_id = link.split("/")[7] + + DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).create_comment( + pull_request_id=pr_id, comment_id=comment_id, text=response["response"] + ) + return {"response": "Review posted as a comment.", "text": response["response"]} + + +class DevOpsCommandGroup(GPTCommandGroup): + """Ask Command Group.""" + + @staticmethod + def load_command_table(loader: CLICommandsLoader) -> None: + with CommandGroup(loader, "ado", "gpt_review.repositories.devops.DevOpsClient#{}", is_preview=True) as group: + group.command("review", "_review", is_preview=True) + group.command("comment", "_comment", is_preview=True) + + @staticmethod + def load_arguments(loader: CLICommandsLoader) -> None: + """Add patch_repo, patch_pr, and access_token arguments.""" + with ArgumentsContext(loader, "ado") as args: + args.argument( + "diff", + type=str, + help="Git diff to review.", + default=".diff", + ) + args.argument( + "access_token", + type=str, + help="The Azure DevOps access token, or set ADO_TOKEN", + default=None, + ) + args.argument( + "link", + type=str, + help="The link to the PR.", + default=None, + ) + + with ArgumentsContext(loader, "ado comment") as args: + args.positional("question", type=str, nargs="+", help="Provide a question to ask GPT.") + args.argument( + "comment_id", + type=int, + help="The comment ID of Azure DevOps Pull Request Comment.", + default=None, + ) diff --git a/src/gpt_review/repositories/github.py b/src/gpt_review/repositories/github.py index f5fdce73..2de9d024 100644 --- a/src/gpt_review/repositories/github.py +++ b/src/gpt_review/repositories/github.py @@ -127,7 +127,7 @@ def post_pr_summary(diff) -> Dict[str, str]: access_token = os.getenv("GITHUB_TOKEN") if link and git_commit_hash and access_token: - _GitHubClient._post_pr_comment( + GitHubClient._post_pr_comment( review=review, git_commit_hash=git_commit_hash, link=link, access_token=access_token ) return {"response": "PR posted"} @@ -135,21 +135,26 @@ def post_pr_summary(diff) -> Dict[str, str]: logging.warning("No PR to post too") return {"response": "No PR to post too"} + @staticmethod + def _review(repository=None, pull_request=None, access_token=None) -> Dict[str, str]: + """Review GitHub PR with Open AI, and post response as a comment. -def _github_review(repository=None, pull_request=None, access_token=None) -> Dict[str, str]: - """Review GitHub PR with Open AI, and post response as a comment. + Args: + repository (str): The repo of the PR. + pull_request (str): The PR number. + access_token (str): The GitHub access token. - Args: - repository (str): The repo of the PR. - pull_request (str): The PR number. - access_token (str): The GitHub access token. + Returns: + Dict[str, str]: The response. + """ + diff = GitHubClient.get_pr_diff(repository, pull_request, access_token) + GitHubClient.post_pr_summary(diff) + return {"response": "Review posted as a comment."} - Returns: - Dict[str, str]: The response. - """ - diff = _GitHubClient.get_pr_diff(repository, pull_request, access_token) - _GitHubClient.post_pr_summary(diff) - return {"response": "Review posted as a comment."} + @staticmethod + def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: + """""" + raise NotImplementedError class GitHubCommandGroup(GPTCommandGroup): @@ -157,8 +162,8 @@ class GitHubCommandGroup(GPTCommandGroup): @staticmethod def load_command_table(loader: CLICommandsLoader) -> None: - with CommandGroup(loader, "github", "gpt_review.repositories._github#{}", is_preview=True) as group: - group.command("review", "_github_review", is_preview=True) + with CommandGroup(loader, "github", "gpt_review.repositories.github.GitHubClient#{}", is_preview=True) as group: + group.command("review", "_review", is_preview=True) @staticmethod def load_arguments(loader: CLICommandsLoader) -> None: From 8f77ca2cd39090e1b8ccbc9fc6c2dc1563681082 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Mon, 15 May 2023 23:38:12 +0000 Subject: [PATCH 05/72] Refactor GitHubClient import in test_review.py file. --- tests/test_review.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_review.py b/tests/test_review.py index 72deb744..91813015 100644 --- a/tests/test_review.py +++ b/tests/test_review.py @@ -1,6 +1,6 @@ import pytest -from gpt_review.repositories._github import _GitHubClient +from gpt_review.repositories.github import GitHubClient def test_get_review(mock_openai) -> None: @@ -18,7 +18,7 @@ def get_review_test() -> None: with open("tests/mock.diff", "r") as f: diff = f.read() - _GitHubClient.post_pr_summary(diff) + GitHubClient.post_pr_summary(diff) def test_empty_summary(empty_summary, mock_openai) -> None: From 619a5e5539552ce8e6deee6b3489431f73a285a6 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Mon, 15 May 2023 23:45:59 +0000 Subject: [PATCH 06/72] Refactor: Rename _DevOpsClient to DevOpsClient and update tests --- tests/test_devops.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/test_devops.py b/tests/test_devops.py index 4c10b331..5ad943ce 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -12,7 +12,7 @@ GitPullRequestCommentThread, ) -from gpt_review.repositories.devops import _DevOpsClient, _comment +from gpt_review.repositories.devops import DevOpsClient # Azure Devops PAT requires # - Code: 'Read','Write' @@ -224,21 +224,21 @@ def mock_client(self) -> MockDevOpsClient: @pytest.fixture -def devops_client() -> _DevOpsClient: - return _DevOpsClient(TOKEN, ORG, PROJECT, REPO) +def devops_client() -> DevOpsClient: + return DevOpsClient(TOKEN, ORG, PROJECT, REPO) -def test_create_comment(devops_client: _DevOpsClient, mock_ado_client: None) -> None: +def test_create_comment(devops_client: DevOpsClient, mock_ado_client: None) -> None: response = devops_client.create_comment(pull_request_id=PR_ID, comment_id=COMMENT_ID, text="text1") assert isinstance(response, Comment) -def test_update_pr(devops_client: _DevOpsClient, mock_ado_client: None) -> None: +def test_update_pr(devops_client: DevOpsClient, mock_ado_client: None) -> None: response = devops_client.update_pr(pull_request_id=PR_ID, title="title1", description="description1") assert isinstance(response, GitPullRequest) -def test_get_diff(devops_client: _DevOpsClient, mock_ado_client: None) -> None: +def test_get_diff(devops_client: DevOpsClient, mock_ado_client: None) -> None: response = devops_client._get_commit_diff( diff_common_commit=True, base_version=GitBaseVersionDescriptor(version=SOURCE, version_type="commit"), @@ -248,13 +248,13 @@ def test_get_diff(devops_client: _DevOpsClient, mock_ado_client: None) -> None: @pytest.mark.integration -def test_create_comment_integration(devops_client: _DevOpsClient) -> None: +def test_create_comment_integration(devops_client: DevOpsClient) -> None: response = devops_client.create_comment(pull_request_id=PR_ID, comment_id=COMMENT_ID, text="text1") assert isinstance(response, Comment) @pytest.mark.integration -def test_update_pr_integration(devops_client: _DevOpsClient) -> None: +def test_update_pr_integration(devops_client: DevOpsClient) -> None: response = devops_client.update_pr(PR_ID, description="description1") assert isinstance(response, GitPullRequest) response = devops_client.update_pr(PR_ID, title="Sample PR Title") @@ -262,7 +262,7 @@ def test_update_pr_integration(devops_client: _DevOpsClient) -> None: @pytest.mark.integration -def test_get_diff_integration(devops_client: _DevOpsClient) -> None: +def test_get_diff_integration(devops_client: DevOpsClient) -> None: response = devops_client._get_commit_diff( diff_common_commit=True, base_version=GitBaseVersionDescriptor(version=SOURCE, version_type="commit"), @@ -272,9 +272,9 @@ def test_get_diff_integration(devops_client: _DevOpsClient) -> None: def process_payload_test() -> None: - question = _DevOpsClient.process_comment_payload(SAMPLE_PAYLOAD) + question = DevOpsClient.process_comment_payload(SAMPLE_PAYLOAD) link = "https://msazure.visualstudio.com/One/_git/Azure-Gaming/pullrequest/8063875" - _comment(question, comment_id=COMMENT_ID, link=link) + DevOpsClient._comment(question, comment_id=COMMENT_ID, link=link) def test_process_payload(mock_ado_client: None) -> None: @@ -286,7 +286,7 @@ def test_process_payload_integration() -> None: process_payload_test() -def get_patch_test(devops_client: _DevOpsClient) -> None: +def get_patch_test(devops_client: DevOpsClient) -> None: comment_id = LONG_PAYLOAD["resource"]["comment"]["_links"]["threads"]["href"].split("/")[-1] patch = devops_client.get_patch( pull_request_event=LONG_PAYLOAD["resource"], pull_request_id=PR_ID, comment_id=comment_id @@ -294,10 +294,10 @@ def get_patch_test(devops_client: _DevOpsClient) -> None: assert len(patch) == 64 -def test_get_patch(devops_client: _DevOpsClient) -> None: +def test_get_patch(devops_client: DevOpsClient) -> None: get_patch_test(devops_client) @pytest.mark.integration -def test_get_patch_integration(devops_client: _DevOpsClient) -> None: +def test_get_patch_integration(devops_client: DevOpsClient) -> None: get_patch_test(devops_client) From 2d1ac9fcd4eb6df2bb09ce66c2a533e659bda4e6 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 00:13:18 +0000 Subject: [PATCH 07/72] Refactor: Update function signatures and improve Azure context handling --- src/gpt_review/_openai.py | 2 +- src/gpt_review/context.py | 7 +- src/gpt_review/repositories/_repository.py | 30 ------ src/gpt_review/repositories/devops.py | 102 ++++++++++----------- src/gpt_review/repositories/github.py | 34 ++++--- tests/test_devops.py | 2 +- tests/test_gpt_cli.py | 7 +- 7 files changed, 80 insertions(+), 104 deletions(-) diff --git a/src/gpt_review/_openai.py b/src/gpt_review/_openai.py index a48d3e83..66512f4c 100644 --- a/src/gpt_review/_openai.py +++ b/src/gpt_review/_openai.py @@ -85,7 +85,7 @@ def _call_gpt( logging.info("Prompt sent to GPT: %s\n", prompt) completion = openai.ChatCompletion.create( - model=model, + deployment_id=model, messages=messages, max_tokens=max_tokens, temperature=temperature, diff --git a/src/gpt_review/context.py b/src/gpt_review/context.py index f03c7a51..cf7bc72e 100644 --- a/src/gpt_review/context.py +++ b/src/gpt_review/context.py @@ -42,16 +42,19 @@ def _load_azure_openai_context() -> Context: if azure_config.get("azure_api_type"): openai.api_type = os.environ["OPENAI_API_TYPE"] = azure_config.get("azure_api_type") + elif os.getenv("AZURE_OPENAI_API"): + openai.api_type = os.environ["OPENAI_API_TYPE"] = C.AZURE_API_TYPE elif "OPENAI_API_TYPE" in os.environ: openai.api_type = os.environ["OPENAI_API_TYPE"] if azure_config.get("azure_api_version"): openai.api_version = os.environ["OPENAI_API_VERSION"] = azure_config.get("azure_api_version") + elif os.getenv("AZURE_OPENAI_API"): + openai.api_version = os.environ["OPENAI_API_VERSION"] = C.AZURE_API_VERSION elif "OPENAI_API_VERSION" in os.environ: openai.api_version = os.environ["OPENAI_API_VERSION"] - if openai.api_type == C.AZURE_API_TYPE or (not openai.api_type and os.getenv("AZURE_OPENAI_API")): - openai.api_type = os.environ["OPENAI_API_TYPE"] = C.AZURE_API_TYPE + if os.getenv("AZURE_OPENAI_API"): openai.api_base = os.environ["OPENAI_API_BASE"] = os.getenv("AZURE_OPENAI_API") or azure_config.get( "azure_api_base" ) diff --git a/src/gpt_review/repositories/_repository.py b/src/gpt_review/repositories/_repository.py index 04e17693..f3b4341f 100644 --- a/src/gpt_review/repositories/_repository.py +++ b/src/gpt_review/repositories/_repository.py @@ -33,33 +33,3 @@ def post_pr_summary(diff) -> None: Returns: str: The review of the PR. """ - - @staticmethod - @abstractmethod - def _review(repository=None, pull_request=None, access_token=None) -> Dict[str, str]: - """Review PR with Open AI, and post response as a comment. - - Args: - repository (str): The repo of the PR. - pull_request (str): The PR number. - access_token (str): The GitHub access token. - - Returns: - Dict[str, str]: The response. - """ - - @staticmethod - @abstractmethod - def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: - """Review PR with Open AI, and post response as a comment. - - Args: - question (str): The question to ask. - comment_id (int): The comment ID. - diff(str): The diff file. - link (str): The link to the PR. - access_token (str): The Azure DevOps access token. - - Returns: - Dict[str, str]: The response. - """ diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 421c272f..cc472ec1 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -617,67 +617,67 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: str: The diff of the PR. """ - @staticmethod - def _review(diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: - """Review Azure DevOps PR with Open AI, and post response as a comment. - Args: - link (str): The link to the PR. - access_token (str): The Azure DevOps access token. +def _review(diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: + """Review Azure DevOps PR with Open AI, and post response as a comment. - Returns: - Dict[str, str]: The response. - """ - # diff = _DevOpsClient.get_pr_diff(repository, pull_request, access_token) - with open(diff, "r", encoding="utf8") as file: - diff_contents = file.read() + Args: + link (str): The link to the PR. + access_token (str): The Azure DevOps access token. - DevOpsClient.post_pr_summary(diff_contents, link, access_token) - return {"response": "Review posted as a comment."} + Returns: + Dict[str, str]: The response. + """ + # diff = _DevOpsClient.get_pr_diff(repository, pull_request, access_token) + with open(diff, "r", encoding="utf8") as file: + diff_contents = file.read() - @staticmethod - def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: - """Review Azure DevOps PR with Open AI, and post response as a comment. + DevOpsClient.post_pr_summary(diff_contents, link, access_token) + return {"response": "Review posted as a comment."} - Args: - question (str): The question to ask. - comment_id (int): The comment ID. - diff(str): The diff file. - link (str): The link to the PR. - access_token (str): The Azure DevOps access token. - Returns: - Dict[str, str]: The response. - """ - # diff = _DevOpsClient.get_pr_diff(repository, pull_request, access_token) +def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: + """Review Azure DevOps PR with Open AI, and post response as a comment. - if os.path.exists(diff): - with open(diff, "r", encoding="utf8") as file: - diff_contents = file.read() - question = f"{diff_contents}\n{question}" + Args: + question (str): The question to ask. + comment_id (int): The comment ID. + diff(str): The diff file. + link (str): The link to the PR. + access_token (str): The Azure DevOps access token. - link = os.getenv("LINK", link) - access_token = os.getenv("ADO_TOKEN", access_token) + Returns: + Dict[str, str]: The response. + """ + # diff = _DevOpsClient.get_pr_diff(repository, pull_request, access_token) - if link and access_token: - response = _ask( - question=question, - ) - if "dev.azure.com" in link: - org = link.split("/")[3] - project = link.split("/")[4] - repo = link.split("/")[6] - pr_id = link.split("/")[8] - else: - org = link.split("/")[2].split(".")[0] - project = link.split("/")[3] - repo = link.split("/")[5] - pr_id = link.split("/")[7] + if os.path.exists(diff): + with open(diff, "r", encoding="utf8") as file: + diff_contents = file.read() + question = f"{diff_contents}\n{question}" + + link = os.getenv("LINK", link) + access_token = os.getenv("ADO_TOKEN", access_token) - DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).create_comment( - pull_request_id=pr_id, comment_id=comment_id, text=response["response"] + if link and access_token: + response = _ask( + question=question, ) - return {"response": "Review posted as a comment.", "text": response["response"]} + if "dev.azure.com" in link: + org = link.split("/")[3] + project = link.split("/")[4] + repo = link.split("/")[6] + pr_id = link.split("/")[8] + else: + org = link.split("/")[2].split(".")[0] + project = link.split("/")[3] + repo = link.split("/")[5] + pr_id = link.split("/")[7] + + DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).create_comment( + pull_request_id=pr_id, comment_id=comment_id, text=response["response"] + ) + return {"response": "Review posted as a comment.", "text": response["response"]} class DevOpsCommandGroup(GPTCommandGroup): @@ -685,7 +685,7 @@ class DevOpsCommandGroup(GPTCommandGroup): @staticmethod def load_command_table(loader: CLICommandsLoader) -> None: - with CommandGroup(loader, "ado", "gpt_review.repositories.devops.DevOpsClient#{}", is_preview=True) as group: + with CommandGroup(loader, "ado", "gpt_review.repositories.devops#{}", is_preview=True) as group: group.command("review", "_review", is_preview=True) group.command("comment", "_comment", is_preview=True) diff --git a/src/gpt_review/repositories/github.py b/src/gpt_review/repositories/github.py index 2de9d024..6759afdb 100644 --- a/src/gpt_review/repositories/github.py +++ b/src/gpt_review/repositories/github.py @@ -135,34 +135,32 @@ def post_pr_summary(diff) -> Dict[str, str]: logging.warning("No PR to post too") return {"response": "No PR to post too"} - @staticmethod - def _review(repository=None, pull_request=None, access_token=None) -> Dict[str, str]: - """Review GitHub PR with Open AI, and post response as a comment. - Args: - repository (str): The repo of the PR. - pull_request (str): The PR number. - access_token (str): The GitHub access token. +def _review(repository=None, pull_request=None, access_token=None) -> Dict[str, str]: + """Review GitHub PR with Open AI, and post response as a comment. - Returns: - Dict[str, str]: The response. - """ - diff = GitHubClient.get_pr_diff(repository, pull_request, access_token) - GitHubClient.post_pr_summary(diff) - return {"response": "Review posted as a comment."} + Args: + repository (str): The repo of the PR. + pull_request (str): The PR number. + access_token (str): The GitHub access token. - @staticmethod - def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: - """""" - raise NotImplementedError + Returns: + Dict[str, str]: The response. + """ + diff = GitHubClient.get_pr_diff(repository, pull_request, access_token) + GitHubClient.post_pr_summary(diff) + return {"response": "Review posted as a comment."} +def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: + """""" + raise NotImplementedError class GitHubCommandGroup(GPTCommandGroup): """Ask Command Group.""" @staticmethod def load_command_table(loader: CLICommandsLoader) -> None: - with CommandGroup(loader, "github", "gpt_review.repositories.github.GitHubClient#{}", is_preview=True) as group: + with CommandGroup(loader, "github", "gpt_review.repositories.github#{}", is_preview=True) as group: group.command("review", "_review", is_preview=True) @staticmethod diff --git a/tests/test_devops.py b/tests/test_devops.py index 5ad943ce..34772975 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -277,7 +277,7 @@ def process_payload_test() -> None: DevOpsClient._comment(question, comment_id=COMMENT_ID, link=link) -def test_process_payload(mock_ado_client: None) -> None: +def test_process_payload(mock_openai, mock_ado_client: None) -> None: process_payload_test() diff --git a/tests/test_gpt_cli.py b/tests/test_gpt_cli.py index f41ad92a..93993e71 100644 --- a/tests/test_gpt_cli.py +++ b/tests/test_gpt_cli.py @@ -94,6 +94,11 @@ class CLICase2(CLICase): CLICase("github review"), ] +DEVOPS_COMMANDS = [ + CLICase("ado review --help"), + CLICase("ado review"), +] + GIT_COMMANDS = [ CLICase("git commit --help"), # CLICase("git commit"), @@ -109,7 +114,7 @@ class CLICase2(CLICase): CLICase("review diff --diff tests/mock.diff --config tests/config.summary.extra.yml"), ] -ARGS = ROOT_COMMANDS + ASK_COMMANDS + GIT_COMMANDS + GITHUB_COMMANDS + REVIEW_COMMANDS +ARGS = ROOT_COMMANDS + ASK_COMMANDS + GIT_COMMANDS + GITHUB_COMMANDS + DEVOPS_COMMANDS + REVIEW_COMMANDS ARGS_DICT = {arg.command: arg for arg in ARGS} MODULE_COMMANDS = [ From 3597b61dad94314624cb6239c800244db77b3f1a Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 00:18:20 +0000 Subject: [PATCH 08/72] Refactor imports and reformat code for better readability --- src/gpt_review/_gpt_cli.py | 2 +- src/gpt_review/_llama_index.py | 15 ++++--------- src/gpt_review/_openai.py | 2 +- src/gpt_review/_review.py | 3 ++- src/gpt_review/repositories/devops.py | 31 +++++++++++---------------- tests/conftest.py | 17 ++++++++------- tests/test_devops.py | 12 +++++------ tests/test_gpt_cli.py | 2 +- 8 files changed, 36 insertions(+), 48 deletions(-) diff --git a/src/gpt_review/_gpt_cli.py b/src/gpt_review/_gpt_cli.py index 17fc9dff..12bdabba 100644 --- a/src/gpt_review/_gpt_cli.py +++ b/src/gpt_review/_gpt_cli.py @@ -8,9 +8,9 @@ from gpt_review import __version__ from gpt_review._ask import AskCommandGroup from gpt_review._git import GitCommandGroup +from gpt_review._review import ReviewCommandGroup from gpt_review.repositories.devops import DevOpsCommandGroup from gpt_review.repositories.github import GitHubCommandGroup -from gpt_review._review import ReviewCommandGroup CLI_NAME = "gpt" diff --git a/src/gpt_review/_llama_index.py b/src/gpt_review/_llama_index.py index 80321206..2a5e306f 100644 --- a/src/gpt_review/_llama_index.py +++ b/src/gpt_review/_llama_index.py @@ -7,17 +7,10 @@ from langchain.chat_models import AzureChatOpenAI, ChatOpenAI from langchain.embeddings import OpenAIEmbeddings from langchain.llms import AzureOpenAI -from llama_index import ( - Document, - GithubRepositoryReader, - GPTVectorStoreIndex, - LangchainEmbedding, - LLMPredictor, - ServiceContext, - SimpleDirectoryReader, - StorageContext, - load_index_from_storage, -) +from llama_index import (Document, GithubRepositoryReader, GPTVectorStoreIndex, + LangchainEmbedding, LLMPredictor, ServiceContext, + SimpleDirectoryReader, StorageContext, + load_index_from_storage) from llama_index.indices.base import BaseGPTIndex from llama_index.storage.storage_context import DEFAULT_PERSIST_DIR from typing_extensions import override diff --git a/src/gpt_review/_openai.py b/src/gpt_review/_openai.py index 66512f4c..c62d30ba 100644 --- a/src/gpt_review/_openai.py +++ b/src/gpt_review/_openai.py @@ -5,8 +5,8 @@ from openai.error import RateLimitError import gpt_review.constants as C -from gpt_review.utils import _retry_with_exponential_backoff from gpt_review.context import _load_azure_openai_context +from gpt_review.utils import _retry_with_exponential_backoff def _count_tokens(prompt) -> int: diff --git a/src/gpt_review/_review.py b/src/gpt_review/_review.py index ec723d82..d0d8097f 100644 --- a/src/gpt_review/_review.py +++ b/src/gpt_review/_review.py @@ -10,7 +10,8 @@ from gpt_review._ask import _ask from gpt_review._command import GPTCommandGroup -from gpt_review.prompts._prompt import load_bug_yaml, load_coverage_yaml, load_summary_yaml +from gpt_review.prompts._prompt import (load_bug_yaml, load_coverage_yaml, + load_summary_yaml) _CHECKS = { "SUMMARY_CHECKS": [ diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index cc472ec1..e42ff3f2 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -1,36 +1,29 @@ """Azure DevOps Package Wrappers to Simplify Usage.""" import abc import itertools -import logging import json +import logging import os -from typing import Dict, Iterator, List, Optional, Iterable - -from msrest.authentication import BasicAuthentication - -from knack.arguments import ArgumentsContext -from knack import CLICommandsLoader -from knack.commands import CommandGroup +from typing import Dict, Iterable, Iterator, List, Optional from azure.devops.connection import Connection -from azure.devops.v7_1.git.models import ( - Comment, - GitCommitDiffs, - GitBaseVersionDescriptor, - GitTargetVersionDescriptor, - GitBlobRef, - GitVersionDescriptor, - GitPullRequest, - GitPullRequestCommentThread, -) from azure.devops.v7_1.git.git_client import GitClient +from azure.devops.v7_1.git.models import (Comment, GitBaseVersionDescriptor, + GitBlobRef, GitCommitDiffs, + GitPullRequest, + GitPullRequestCommentThread, + GitTargetVersionDescriptor, + GitVersionDescriptor) +from knack import CLICommandsLoader +from knack.arguments import ArgumentsContext +from knack.commands import CommandGroup +from msrest.authentication import BasicAuthentication from gpt_review._ask import _ask from gpt_review._command import GPTCommandGroup from gpt_review._review import _summarize_files from gpt_review.repositories._repository import _RepositoryClient - MIN_CONTEXT_LINES = 5 SURROUNDING_CONTEXT = 5 diff --git a/tests/conftest.py b/tests/conftest.py index aaaf6634..957ef994 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,7 +7,7 @@ def pytest_collection_modifyitems(items): for item in items: - if "_int_" in item.nodeid: + if "_int_" in item.nodeid or "integration" in item.nodeid: item.add_marker(pytest.mark.integration) elif "_cli_" in item.nodeid: item.add_marker(pytest.mark.cli) @@ -50,13 +50,14 @@ def as_query_engine(self): return self def mock_create( - model, - messages, - temperature, - max_tokens, - top_p, - frequency_penalty, - presence_penalty, + model=None, + deployment_id=None, + messages=[], + temperature=0, + max_tokens=500, + top_p=1, + frequency_penalty=0, + presence_penalty=0, ) -> MockResponse: return MockResponse() diff --git a/tests/test_devops.py b/tests/test_devops.py index 34772975..8d38ef35 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -1,18 +1,18 @@ import os +from dataclasses import dataclass + import pytest import requests_mock - -from dataclasses import dataclass from azure.devops.v7_1.git.models import ( + Comment, GitBaseVersionDescriptor, - GitTargetVersionDescriptor, GitCommitDiffs, GitPullRequest, - Comment, GitPullRequestCommentThread, + GitTargetVersionDescriptor, ) -from gpt_review.repositories.devops import DevOpsClient +from gpt_review.repositories.devops import DevOpsClient, _comment # Azure Devops PAT requires # - Code: 'Read','Write' @@ -274,7 +274,7 @@ def test_get_diff_integration(devops_client: DevOpsClient) -> None: def process_payload_test() -> None: question = DevOpsClient.process_comment_payload(SAMPLE_PAYLOAD) link = "https://msazure.visualstudio.com/One/_git/Azure-Gaming/pullrequest/8063875" - DevOpsClient._comment(question, comment_id=COMMENT_ID, link=link) + _comment(question, comment_id=COMMENT_ID, link=link) def test_process_payload(mock_openai, mock_ado_client: None) -> None: diff --git a/tests/test_gpt_cli.py b/tests/test_gpt_cli.py index 93993e71..7418a08d 100644 --- a/tests/test_gpt_cli.py +++ b/tests/test_gpt_cli.py @@ -96,7 +96,7 @@ class CLICase2(CLICase): DEVOPS_COMMANDS = [ CLICase("ado review --help"), - CLICase("ado review"), + CLICase("ado review --diff tests/mock.diff"), ] GIT_COMMANDS = [ From a3083b8801a3c93341dcec2c8fa7b40650de3565 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 00:21:53 +0000 Subject: [PATCH 09/72] Refactor imports and improve code formatting --- src/gpt_review/_llama_index.py | 15 +++++++++++---- src/gpt_review/_review.py | 3 +-- src/gpt_review/repositories/devops.py | 16 ++++++++++------ src/gpt_review/repositories/github.py | 2 ++ 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/gpt_review/_llama_index.py b/src/gpt_review/_llama_index.py index 2a5e306f..80321206 100644 --- a/src/gpt_review/_llama_index.py +++ b/src/gpt_review/_llama_index.py @@ -7,10 +7,17 @@ from langchain.chat_models import AzureChatOpenAI, ChatOpenAI from langchain.embeddings import OpenAIEmbeddings from langchain.llms import AzureOpenAI -from llama_index import (Document, GithubRepositoryReader, GPTVectorStoreIndex, - LangchainEmbedding, LLMPredictor, ServiceContext, - SimpleDirectoryReader, StorageContext, - load_index_from_storage) +from llama_index import ( + Document, + GithubRepositoryReader, + GPTVectorStoreIndex, + LangchainEmbedding, + LLMPredictor, + ServiceContext, + SimpleDirectoryReader, + StorageContext, + load_index_from_storage, +) from llama_index.indices.base import BaseGPTIndex from llama_index.storage.storage_context import DEFAULT_PERSIST_DIR from typing_extensions import override diff --git a/src/gpt_review/_review.py b/src/gpt_review/_review.py index d0d8097f..ec723d82 100644 --- a/src/gpt_review/_review.py +++ b/src/gpt_review/_review.py @@ -10,8 +10,7 @@ from gpt_review._ask import _ask from gpt_review._command import GPTCommandGroup -from gpt_review.prompts._prompt import (load_bug_yaml, load_coverage_yaml, - load_summary_yaml) +from gpt_review.prompts._prompt import load_bug_yaml, load_coverage_yaml, load_summary_yaml _CHECKS = { "SUMMARY_CHECKS": [ diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index e42ff3f2..3c80ef6c 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -8,12 +8,16 @@ from azure.devops.connection import Connection from azure.devops.v7_1.git.git_client import GitClient -from azure.devops.v7_1.git.models import (Comment, GitBaseVersionDescriptor, - GitBlobRef, GitCommitDiffs, - GitPullRequest, - GitPullRequestCommentThread, - GitTargetVersionDescriptor, - GitVersionDescriptor) +from azure.devops.v7_1.git.models import ( + Comment, + GitBaseVersionDescriptor, + GitBlobRef, + GitCommitDiffs, + GitPullRequest, + GitPullRequestCommentThread, + GitTargetVersionDescriptor, + GitVersionDescriptor, +) from knack import CLICommandsLoader from knack.arguments import ArgumentsContext from knack.commands import CommandGroup diff --git a/src/gpt_review/repositories/github.py b/src/gpt_review/repositories/github.py index 6759afdb..7efd7dd4 100644 --- a/src/gpt_review/repositories/github.py +++ b/src/gpt_review/repositories/github.py @@ -151,10 +151,12 @@ def _review(repository=None, pull_request=None, access_token=None) -> Dict[str, GitHubClient.post_pr_summary(diff) return {"response": "Review posted as a comment."} + def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: """""" raise NotImplementedError + class GitHubCommandGroup(GPTCommandGroup): """Ask Command Group.""" From 45b773d6344d0fde370e9e278c4b4d22ba51fbd3 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 00:58:23 +0000 Subject: [PATCH 10/72] Refactor DevOpsClient and add DevOpsFunction for Azure Function handling --- azure/api/incoming_msg_handler/__init__.py | 6 +- src/gpt_review/_openai.py | 32 ++- src/gpt_review/repositories/devops.py | 276 ++++++++++----------- tests/conftest.py | 2 +- 4 files changed, 159 insertions(+), 157 deletions(-) diff --git a/azure/api/incoming_msg_handler/__init__.py b/azure/api/incoming_msg_handler/__init__.py index cb2786bb..3e4b5387 100644 --- a/azure/api/incoming_msg_handler/__init__.py +++ b/azure/api/incoming_msg_handler/__init__.py @@ -1,11 +1,11 @@ """Azure DevOps API incoming message handler.""" import os -from gpt_review.repositories.devops import _DevOpsClient +from gpt_review.repositories.devops import DevOpsFunction import azure.functions as func -CLIENT = _DevOpsClient( +HANDLER = DevOpsFunction( pat=os.environ["ADO_TOKEN"], org=os.environ["ADO_ORG"], project=os.environ["ADO_PROJECT"], @@ -15,4 +15,4 @@ def main(msg: func.ServiceBusMessage) -> None: """Handle an incoming message.""" - CLIENT.handle(msg) + HANDLER.handle(msg) diff --git a/src/gpt_review/_openai.py b/src/gpt_review/_openai.py index c62d30ba..dd59f832 100644 --- a/src/gpt_review/_openai.py +++ b/src/gpt_review/_openai.py @@ -1,4 +1,5 @@ """Open AI API Call Wrapper.""" +import os import logging import openai @@ -81,18 +82,29 @@ def _call_gpt( messages = messages or [{"role": "user", "content": prompt}] try: model = _get_model(prompt, max_tokens=max_tokens, fast=fast, large=large) - logging.info(f"Model Selected based on prompt size: {model}") + logging.info("Model Selected based on prompt size: %s", model) logging.info("Prompt sent to GPT: %s\n", prompt) - completion = openai.ChatCompletion.create( - deployment_id=model, - messages=messages, - max_tokens=max_tokens, - temperature=temperature, - top_p=top_p, - frequency_penalty=frequency_penalty, - presence_penalty=presence_penalty, - ) + if os.environ["OPENAI_API_TYPE"] == C.AZURE_API_TYPE: + completion = openai.ChatCompletion.create( + deployment_id=model, + messages=messages, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + ) + else: + completion = openai.ChatCompletion.create( + model=model, + messages=messages, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + ) return completion.choices[0].message.content # type: ignore except RateLimitError as error: if retry < C.MAX_RETRIES: diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 3c80ef6c..9ba8e765 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -237,148 +237,11 @@ def get_patch(self, pull_request_event, pull_request_id, comment_id) -> List[str Returns: List[str]: The diff of the pull request. """ - context = ContextProvider(self) thread = self._get_comment_thread(pull_request_id=pull_request_id, thread_id=comment_id) - return context.get_patch(thread_context=thread.thread_context, pull_request_event=pull_request_event) + return self._get_patch(thread_context=thread.thread_context, pull_request_event=pull_request_event) - def handle(self, msg) -> None: - """ - The main function for the Azure Function. - - Args: - msg (func.QueueMessage): The Service Bus message. - """ - body = msg.get_body().decode("utf-8") - logging.info("Python ServiceBus queue trigger processed message: %s", body) - if "copilot:summary" in body: - self._process_summary(body) - elif "copilot:" in body: - self._process_comment(body) - - def _process_comment(self, body) -> None: - """ - Process a comment from Copilot. - - Args: - body (str): The Service Bus payload. - """ - logging.info("Copilot Comment Alert Triggered") - payload = json.loads(body) - - pr_id = self._get_pr_id(payload) - - comment_id = self._get_comment_id(payload) - - diff = self.get_patch(pull_request_event=payload["resource"], pull_request_id=pr_id, comment_id=comment_id) - diff = "\n".join(diff) - - question = f""" - {diff} - - {_DevOpsClient.process_comment_payload(body)} - """ - - logging.info("Copilot diff: %s", diff) - response = _ask( - question=question, - max_tokens=500, - ) - self.create_comment(pull_request_id=pr_id, comment_id=comment_id, text=response["response"]) - - def _get_comment_id(self, payload) -> int: - """ - Get the comment ID from the payload. - - Args: - payload (dict): The payload from the Service Bus. - - Returns: - int: The comment ID. - """ - comment_id = payload["resource"]["comment"]["_links"]["threads"]["href"].split("/")[-1] - logging.info("Copilot Commet ID: %s", comment_id) - return comment_id - - def _process_summary(self, body) -> None: - """ - Process a summary from Copilot. - - Args: - body (str): The Service Bus payload. - """ - logging.info("Copilot Summary Alert Triggered") - payload = json.loads(body) - - pr_id = self._get_pr_id(payload) - - link = self._get_link(pr_id) - - if "comment" in payload["resource"]: - self._post_summary(payload, pr_id, link) - else: - logging.info("Copilot Update from Updated PR") - - def _get_link(self, pr_id) -> str: - link = f"https://{self.org}.visualstudio.com/{self.project}/_git/{self.repository_id}/pullrequest/{pr_id}" - logging.info("Copilot Link: %s", link) - return link - - def _get_pr_id(self, payload) -> int: - """ - Get the pull request ID from the Service Bus payload. - - Args: - payload (dict): The Service Bus payload. - - Returns: - int: The pull request ID. - """ - if "pullRequestId" in payload: - pr_id = payload["resource"]["pullRequestId"] - else: - pr_id = payload["resource"]["pullRequest"]["pullRequestId"] - logging.info("Copilot PR ID: %s", pr_id) - return pr_id - - def _post_summary(self, payload, pr_id, link) -> None: - """ - Process a summary from Copilot. - - Args: - payload (dict): The Service Bus payload. - pr_id (str): The Azure DevOps pull request ID. - link (str): The link to the PR. - """ - comment_id = payload["resource"]["comment"]["_links"]["threads"]["href"].split("/")[-1] - logging.info("Copilot Commet ID: %s", comment_id) - - os.putenv("RISK_SUMMARY", "false") - os.putenv("FILE_SUMMARY_FULL", "false") - os.putenv("TEST_SUMMARY", "false") - os.putenv("BUG_SUMMARY", "false") - os.putenv("SUMMARY_SUGGEST", "false") - - diff = self.get_patch(pull_request_event=payload["resource"], pull_request_id=pr_id, comment_id=comment_id) - diff = "\n".join(diff) - logging.info("Copilot diff: %s", diff) - - self.post_pr_summary(diff, link=link) - - -class ContextProvider: - """Provides context for a given line in a file.""" - - def __init__(self, devops_client: _DevOpsClient) -> None: - """ - Initialize a new instance of ContextProvider. - - Args: - devops_client (_DevOpsClient): The DevOps client. - """ - self.devops_client = devops_client - - def get_patch(self, thread_context, pull_request_event) -> List[str]: + def _get_patch(self, thread_context, pull_request_event) -> List[str]: """ Get the patch for a given thread context. @@ -393,8 +256,8 @@ def get_patch(self, thread_context, pull_request_event) -> List[str]: if not pull_request: raise ValueError("pull_request_event.pullRequest is required") - original_content_task = self.devops_client.read_all_text(path=thread_context.file_path, check_if_exists=True) - changed_content_task = self.devops_client.read_all_text( + original_content_task = self.read_all_text(path=thread_context.file_path, check_if_exists=True) + changed_content_task = self.read_all_text( path=thread_context.file_path, commit_id=pull_request["lastMergeSourceCommit"]["commitId"], check_if_exists=True, @@ -447,7 +310,7 @@ async def get_patches(self, pull_request_event, condensed=False) -> Iterable[Lis if not pull_request_id: raise ValueError("pull_request_event.pullRequest is required") - git_changes = await self.devops_client.get_changed_blobs_async(pull_request_event["pullRequest"]) + git_changes = await self.client.get_changed_blobs_async(pull_request_event["pullRequest"]) all_patches = [] for git_change in git_changes: @@ -476,7 +339,7 @@ def _get_selection(self, file_contents: str, line_start: int, line_end: int) -> return lines[line_start - 1 : line_end] async def _get_change_async(self, git_change, source_commit_head, condensed=False) -> List[str]: - return await self._get_git_change_async(self.devops_client, git_change.item.path, source_commit_head, condensed) + return await self._get_git_change_async(self.client, git_change.item.path, source_commit_head, condensed) async def _get_git_change_async(self, git_client, file_path, source_commit_head, condensed=False) -> List[str]: original_content = git_client.read_all_text_async(file_path, check_if_exists=True) @@ -615,6 +478,133 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: """ +class DevOpsFunction(DevOpsClient): + """Azure Function for process Service Messages from Azure DevOps.""" + + def handle(self, msg) -> None: + """ + The main function for the Azure Function. + + Args: + msg (func.QueueMessage): The Service Bus message. + """ + body = msg.get_body().decode("utf-8") + logging.info("Python ServiceBus queue trigger processed message: %s", body) + if "copilot:summary" in body: + self._process_summary(body) + elif "copilot:" in body: + self._process_comment(body) + + def _process_comment(self, body) -> None: + """ + Process a comment from Copilot. + + Args: + body (str): The Service Bus payload. + """ + logging.info("Copilot Comment Alert Triggered") + payload = json.loads(body) + + pr_id = self._get_pr_id(payload) + + comment_id = self._get_comment_id(payload) + + diff = self.get_patch(pull_request_event=payload["resource"], pull_request_id=pr_id, comment_id=comment_id) + diff = "\n".join(diff) + + question = f""" + {diff} + + {_DevOpsClient.process_comment_payload(body)} + """ + + logging.info("Copilot diff: %s", diff) + response = _ask( + question=question, + max_tokens=500, + ) + self.create_comment(pull_request_id=pr_id, comment_id=comment_id, text=response["response"]) + + def _get_comment_id(self, payload) -> int: + """ + Get the comment ID from the payload. + + Args: + payload (dict): The payload from the Service Bus. + + Returns: + int: The comment ID. + """ + comment_id = payload["resource"]["comment"]["_links"]["threads"]["href"].split("/")[-1] + logging.info("Copilot Commet ID: %s", comment_id) + return comment_id + + def _process_summary(self, body) -> None: + """ + Process a summary from Copilot. + + Args: + body (str): The Service Bus payload. + """ + logging.info("Copilot Summary Alert Triggered") + payload = json.loads(body) + + pr_id = self._get_pr_id(payload) + + link = self._get_link(pr_id) + + if "comment" in payload["resource"]: + self._post_summary(payload, pr_id, link) + else: + logging.info("Copilot Update from Updated PR") + + def _get_link(self, pr_id) -> str: + link = f"https://{self.org}.visualstudio.com/{self.project}/_git/{self.repository_id}/pullrequest/{pr_id}" + logging.info("Copilot Link: %s", link) + return link + + def _get_pr_id(self, payload) -> int: + """ + Get the pull request ID from the Service Bus payload. + + Args: + payload (dict): The Service Bus payload. + + Returns: + int: The pull request ID. + """ + if "pullRequestId" in payload: + pr_id = payload["resource"]["pullRequestId"] + else: + pr_id = payload["resource"]["pullRequest"]["pullRequestId"] + logging.info("Copilot PR ID: %s", pr_id) + return pr_id + + def _post_summary(self, payload, pr_id, link) -> None: + """ + Process a summary from Copilot. + + Args: + payload (dict): The Service Bus payload. + pr_id (str): The Azure DevOps pull request ID. + link (str): The link to the PR. + """ + comment_id = payload["resource"]["comment"]["_links"]["threads"]["href"].split("/")[-1] + logging.info("Copilot Commet ID: %s", comment_id) + + os.putenv("RISK_SUMMARY", "false") + os.putenv("FILE_SUMMARY_FULL", "false") + os.putenv("TEST_SUMMARY", "false") + os.putenv("BUG_SUMMARY", "false") + os.putenv("SUMMARY_SUGGEST", "false") + + diff = self.get_patch(pull_request_event=payload["resource"], pull_request_id=pr_id, comment_id=comment_id) + diff = "\n".join(diff) + logging.info("Copilot diff: %s", diff) + + self.post_pr_summary(diff, link=link) + + def _review(diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: """Review Azure DevOps PR with Open AI, and post response as a comment. diff --git a/tests/conftest.py b/tests/conftest.py index 957ef994..e18f73ac 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -52,7 +52,7 @@ def as_query_engine(self): def mock_create( model=None, deployment_id=None, - messages=[], + messages=None, temperature=0, max_tokens=500, top_p=1, From 3edee57ec072899706ec6d51d98a204ad9459514 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 03:16:35 +0000 Subject: [PATCH 11/72] Fix question prompt and improve handling of changed blobs in DevOpsClient --- src/gpt_review/_ask.py | 2 +- src/gpt_review/repositories/devops.py | 95 ++++++++--------- tests/test_devops.py | 140 ++++++++++++++++++++++++++ 3 files changed, 191 insertions(+), 46 deletions(-) diff --git a/src/gpt_review/_ask.py b/src/gpt_review/_ask.py index 3753a801..9461de0a 100644 --- a/src/gpt_review/_ask.py +++ b/src/gpt_review/_ask.py @@ -101,7 +101,7 @@ def _ask( """ _load_azure_openai_context() - prompt = " ".join(question) + prompt = "".join(question) if files or directory or repository: response = _query_index( diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 9ba8e765..900e0a0d 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -7,6 +7,7 @@ from typing import Dict, Iterable, Iterator, List, Optional from azure.devops.connection import Connection +from azure.devops.exceptions import AzureDevOpsServiceError from azure.devops.v7_1.git.git_client import GitClient from azure.devops.v7_1.git.models import ( Comment, @@ -31,6 +32,8 @@ MIN_CONTEXT_LINES = 5 SURROUNDING_CONTEXT = 5 +MAX_CHANGES_AT_A_TIME = 100 # Replace with your value + class _DevOpsClient(_RepositoryClient, abc.ABC): """Azure DevOps API Client Wrapper.""" @@ -99,33 +102,29 @@ def _get_comment_thread(self, pull_request_id: str, thread_id: str) -> GitPullRe repository_id=self.repository_id, pull_request_id=pull_request_id, thread_id=thread_id, project=self.project ) - def _get_changed_blobs( - self, - sha1: str, - download: bool = None, - file_name: str = None, - resolve_lfs: bool = None, - ) -> GitBlobRef: - """ - Get the changed blobs in a commit. - - Args: - sha1 (str): The SHA1 of the commit. - download (bool): Whether to download the blob. - file_name (str): The name of the file. - resolve_lfs (bool): Whether to resolve LFS. + def get_changed_blobs(self, pull_request: GitPullRequest, cancellation_token=None): + changed_paths = [] + commit_diff_within_pr = None + + skip = 0 + while True: + commit_diff_within_pr = self._get_commit_diff( + diff_common_commit=False, + base_version=GitBaseVersionDescriptor( + base_version=pull_request["lastMergeSourceCommit"]["commitId"], base_version_type="commit" + ), + target_version=GitTargetVersionDescriptor( + target_version=pull_request["lastMergeTargetCommit"]["commitId"], target_version_type="commit" + ), + ) + changed_paths.extend( + [change for change in commit_diff_within_pr.changes if "isFolder" not in change["item"]] + ) + skip += len(commit_diff_within_pr.changes) + if commit_diff_within_pr.all_changes_included: + break - Returns: - GitBlobRef: The response from the API. - """ - return self.client.get_blob( - repository_id=self.repository_id, - project=self.project, - sha1=sha1, - download=download, - file_name=file_name, - resolve_lfs=resolve_lfs, - ) + return changed_paths def update_pr(self, pull_request_id, title=None, description=None) -> GitPullRequest: """ @@ -295,7 +294,7 @@ def _calculate_selection(self, thread_context, original_content, changed_content return left_selection, right_selection raise ValueError("Both left and right selection cannot be None") - async def get_patches(self, pull_request_event, condensed=False) -> Iterable[List[str]]: + def get_patches(self, pull_request_event, condensed=False) -> Iterable[List[str]]: """ Get the patches for a given pull request event. @@ -310,17 +309,15 @@ async def get_patches(self, pull_request_event, condensed=False) -> Iterable[Lis if not pull_request_id: raise ValueError("pull_request_event.pullRequest is required") - git_changes = await self.client.get_changed_blobs_async(pull_request_event["pullRequest"]) - all_patches = [] - - for git_change in git_changes: - all_patches.append( - await self._get_change_async( - git_change, pull_request_event["pullRequest"]["lastMergeSourceCommit"]["commitId"], condensed - ) + git_changes = self.get_changed_blobs(pull_request_event["pullRequest"]) + return [ + self._get_change( + git_change, + pull_request_event["pullRequest"]["lastMergeSourceCommit"]["commitId"], + condensed, ) - - return all_patches + for git_change in git_changes + ] def _get_selection(self, file_contents: str, line_start: int, line_end: int) -> List[str]: lines = file_contents.splitlines() @@ -338,13 +335,17 @@ def _get_selection(self, file_contents: str, line_start: int, line_end: int) -> return lines[line_start - 1 : line_end] - async def _get_change_async(self, git_change, source_commit_head, condensed=False) -> List[str]: - return await self._get_git_change_async(self.client, git_change.item.path, source_commit_head, condensed) + def _get_change(self, git_change, source_commit_head, condensed=False) -> List[str]: + return self._get_git_change(git_change["item"]["path"], source_commit_head, condensed) - async def _get_git_change_async(self, git_client, file_path, source_commit_head, condensed=False) -> List[str]: - original_content = git_client.read_all_text_async(file_path, check_if_exists=True) - changed_content = git_client.read_all_text_async(file_path, commit_id=source_commit_head, check_if_exists=True) - return self._create_patch(await original_content, await changed_content, file_path, condensed) + def _get_git_change(self, file_path, source_commit_head, condensed=False) -> List[str]: + try: + original_content = self.read_all_text(file_path, check_if_exists=True) + except AzureDevOpsServiceError: + # File Not Found + original_content = "" + changed_content = self.read_all_text(file_path, commit_id=source_commit_head, check_if_exists=True) + return self._create_patch(original_content, changed_content, file_path, condensed) def _create_patch( self, original_content: Optional[str], changed_content: Optional[str], file_path: str, condensed=False @@ -509,7 +510,12 @@ def _process_comment(self, body) -> None: comment_id = self._get_comment_id(payload) - diff = self.get_patch(pull_request_event=payload["resource"], pull_request_id=pr_id, comment_id=comment_id) + try: + diff = self.get_patch(pull_request_event=payload["resource"], pull_request_id=pr_id, comment_id=comment_id) + except: + diff = self.get_patches(pull_request_event=payload["resource"]) + + logging.info("Copilot diff: %s", diff) diff = "\n".join(diff) question = f""" @@ -518,7 +524,6 @@ def _process_comment(self, body) -> None: {_DevOpsClient.process_comment_payload(body)} """ - logging.info("Copilot diff: %s", diff) response = _ask( question=question, max_tokens=500, diff --git a/tests/test_devops.py b/tests/test_devops.py index 8d38ef35..e4cbe023 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -165,6 +165,132 @@ "createdDate": "2023-05-13T00:31:02.6421816Z", } +PR_COMMENT_PAYLOAD = { + "id": "851991af-ce4b-4463-83d4-eb4733559f14", + "eventType": "ms.vss-code.git-pullrequest-comment-event", + "publisherId": "tfs", + "message": {"text": "Daniel Ciborowski has replied to a pull request comment"}, + "detailedMessage": {"text": "Daniel Ciborowski has replied to a pull request comment\r\ncopilot: test\r\n"}, + "resource": { + "comment": { + "id": 5, + "parentCommentId": 1, + "author": { + "displayName": "Daniel Ciborowski", + "url": "https://spsprodwus23.vssps.visualstudio.com/A41b4f3ee-c651-4a14-9847-b7cbb5315b80/_apis/Identities/0ef5b3af-3e01-48fd-9bd3-2f701c8fdebe", + "_links": { + "avatar": { + "href": "https://msazure.visualstudio.com/_apis/GraphProfile/MemberAvatars/aad.OTgwYzcxNzEtMDI2Ni03YzVmLTk0YzEtMDNlYzU2YjViYjY4" + } + }, + "id": "0ef5b3af-3e01-48fd-9bd3-2f701c8fdebe", + "uniqueName": "dciborow@microsoft.com", + "imageUrl": "https://msazure.visualstudio.com/_apis/GraphProfile/MemberAvatars/aad.OTgwYzcxNzEtMDI2Ni03YzVmLTk0YzEtMDNlYzU2YjViYjY4", + "descriptor": "aad.OTgwYzcxNzEtMDI2Ni03YzVmLTk0YzEtMDNlYzU2YjViYjY4", + }, + "content": "copilot: test", + "publishedDate": "2023-05-16T01:22:28.67Z", + "lastUpdatedDate": "2023-05-16T01:22:28.67Z", + "lastContentUpdatedDate": "2023-05-16T01:22:28.67Z", + "commentType": "text", + "usersLiked": [], + "_links": { + "self": { + "href": "https://msazure.visualstudio.com/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3/pullRequests/8111242/threads/141607999/comments/5" + }, + "repository": { + "href": "https://msazure.visualstudio.com/b32aa71e-8ed2-41b2-9d77-5bc261222004/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3" + }, + "threads": { + "href": "https://msazure.visualstudio.com/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3/pullRequests/8111242/threads/141607999" + }, + "pullRequests": {"href": "https://msazure.visualstudio.com/_apis/git/pullRequests/8111242"}, + }, + }, + "pullRequest": { + "repository": { + "id": "612d9367-8ab6-4929-abe6-b5b5ad7b5ad3", + "name": "Azure-Gaming", + "url": "https://msazure.visualstudio.com/b32aa71e-8ed2-41b2-9d77-5bc261222004/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3", + "project": { + "id": "b32aa71e-8ed2-41b2-9d77-5bc261222004", + "name": "One", + "description": "MSAzure/One is the VSTS project containing all Azure team code bases and work items.\nPlease see https://aka.ms/azaccess for work item and source access policies.", + "url": "https://msazure.visualstudio.com/_apis/projects/b32aa71e-8ed2-41b2-9d77-5bc261222004", + "state": "wellFormed", + "revision": 307071, + "visibility": "organization", + "lastUpdateTime": "2023-05-15T17:47:30.807Z", + }, + "size": 508859977, + "remoteUrl": "https://msazure.visualstudio.com/DefaultCollection/One/_git/Azure-Gaming", + "sshUrl": "msazure@vs-ssh.visualstudio.com:v3/msazure/One/Azure-Gaming", + "webUrl": "https://msazure.visualstudio.com/DefaultCollection/One/_git/Azure-Gaming", + "isDisabled": False, + "isInMaintenance": False, + }, + "pullRequestId": 8111242, + "codeReviewId": 8886256, + "status": "active", + "createdBy": { + "displayName": "Daniel Ciborowski", + "url": "https://spsprodwus23.vssps.visualstudio.com/A41b4f3ee-c651-4a14-9847-b7cbb5315b80/_apis/Identities/0ef5b3af-3e01-48fd-9bd3-2f701c8fdebe", + "_links": { + "avatar": { + "href": "https://msazure.visualstudio.com/_apis/GraphProfile/MemberAvatars/aad.OTgwYzcxNzEtMDI2Ni03YzVmLTk0YzEtMDNlYzU2YjViYjY4" + } + }, + "id": "0ef5b3af-3e01-48fd-9bd3-2f701c8fdebe", + "uniqueName": "dciborow@microsoft.com", + "imageUrl": "https://msazure.visualstudio.com/_api/_common/identityImage?id=0ef5b3af-3e01-48fd-9bd3-2f701c8fdebe", + "descriptor": "aad.OTgwYzcxNzEtMDI2Ni03YzVmLTk0YzEtMDNlYzU2YjViYjY4", + }, + "creationDate": "2023-05-15T03:32:53.2319611Z", + "title": "Added __init__.py", + "description": "Added __init__.py", + "sourceRefName": "refs/heads/dciborow/python-sample", + "targetRefName": "refs/heads/main", + "mergeStatus": "succeeded", + "isDraft": False, + "mergeId": "762c15e2-0877-45d3-bec1-4257f94438b1", + "lastMergeSourceCommit": { + "commitId": "b7017e51b312116557fa2769a4a8e5310c9d51f4", + "url": "https://msazure.visualstudio.com/b32aa71e-8ed2-41b2-9d77-5bc261222004/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3/commits/b7017e51b312116557fa2769a4a8e5310c9d51f4", + }, + "lastMergeTargetCommit": { + "commitId": "36f9a015ee220516f5f553faaa1898ab10972536", + "url": "https://msazure.visualstudio.com/b32aa71e-8ed2-41b2-9d77-5bc261222004/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3/commits/36f9a015ee220516f5f553faaa1898ab10972536", + }, + "lastMergeCommit": { + "commitId": "84a8d5cc827b85271dda7f865c8516ddcc2ba941", + "author": { + "name": "Daniel Ciborowski", + "email": "dciborow@microsoft.com", + "date": "2023-05-15T03:54:44Z", + }, + "committer": { + "name": "Daniel Ciborowski", + "email": "dciborow@microsoft.com", + "date": "2023-05-15T03:54:44Z", + }, + "comment": "Merge pull request 8111242 from dciborow/python-sample into main", + "url": "https://msazure.visualstudio.com/b32aa71e-8ed2-41b2-9d77-5bc261222004/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3/commits/84a8d5cc827b85271dda7f865c8516ddcc2ba941", + }, + "reviewers": [], + "url": "https://msazure.visualstudio.com/b32aa71e-8ed2-41b2-9d77-5bc261222004/_apis/git/repositories/612d9367-8ab6-4929-abe6-b5b5ad7b5ad3/pullRequests/8111242", + "supportsIterations": True, + "artifactId": "vstfs:///Git/PullRequestId/b32aa71e-8ed2-41b2-9d77-5bc261222004%2f612d9367-8ab6-4929-abe6-b5b5ad7b5ad3%2f8111242", + }, + }, + "resourceVersion": "2.0", + "resourceContainers": { + "collection": {"id": "41bf5486-7392-4b7a-a7e3-a735c767e3b3", "baseUrl": "https://msazure.visualstudio.com/"}, + "account": {"id": "41b4f3ee-c651-4a14-9847-b7cbb5315b80", "baseUrl": "https://msazure.visualstudio.com/"}, + "project": {"id": "b32aa71e-8ed2-41b2-9d77-5bc261222004", "baseUrl": "https://msazure.visualstudio.com/"}, + }, + "createdDate": "2023-05-16T01:22:34.9492237Z", +} + @pytest.fixture def mock_req(): @@ -301,3 +427,17 @@ def test_get_patch(devops_client: DevOpsClient) -> None: @pytest.mark.integration def test_get_patch_integration(devops_client: DevOpsClient) -> None: get_patch_test(devops_client) + + +def get_patch_pr_comment_test(devops_client: DevOpsClient) -> None: + patch = devops_client.get_patches(pull_request_event=PR_COMMENT_PAYLOAD["resource"]) + assert len("\n".join(patch)) == 9185 + + +def test_get_patch_pr_comment(devops_client: DevOpsClient) -> None: + get_patch_pr_comment_test(devops_client) + + +@pytest.mark.integration +def test_get_patch_pr_comment_integration(devops_client: DevOpsClient) -> None: + get_patch_pr_comment_test(devops_client) From b013eae55d7dc1acfa1ded10c6d665d6f6ea53e2 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 03:39:57 +0000 Subject: [PATCH 12/72] Fix bug in _DevOpsClient method for creating patch list. --- src/gpt_review/repositories/devops.py | 2 +- tests/test_devops.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 900e0a0d..93d79aed 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -351,7 +351,7 @@ def _create_patch( self, original_content: Optional[str], changed_content: Optional[str], file_path: str, condensed=False ) -> List[str]: left = original_content.splitlines() if original_content else [] - right = changed_content if changed_content else [] + right = changed_content.splitlines() if changed_content else [] return self._create_patch_list(left, right, file_path, condensed) def _create_patch_list(self, left: List[str], right: List[str], file_path: str, condensed=False) -> List[str]: diff --git a/tests/test_devops.py b/tests/test_devops.py index e4cbe023..d02fff39 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -431,7 +431,7 @@ def test_get_patch_integration(devops_client: DevOpsClient) -> None: def get_patch_pr_comment_test(devops_client: DevOpsClient) -> None: patch = devops_client.get_patches(pull_request_event=PR_COMMENT_PAYLOAD["resource"]) - assert len("\n".join(patch)) == 9185 + assert len(patch) == 3 def test_get_patch_pr_comment(devops_client: DevOpsClient) -> None: From ffd90262ade22e5dc9ecaac0f324318e6a98b05c Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 03:45:19 +0000 Subject: [PATCH 13/72] Refactor _calculate_selection method in devops.py file. --- src/gpt_review/repositories/devops.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 93d79aed..30198a2f 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -268,11 +268,13 @@ def _get_patch(self, thread_context, pull_request_event) -> List[str]: left_selection, right_selection = self._calculate_selection(thread_context, original_content, changed_content) - return self._create_patch(left_selection or [], right_selection or [], thread_context.file_path) + return self._create_patch( + "\n".join(left_selection) or [], "\n".join(right_selection) or [], thread_context.file_path + ) def _calculate_selection(self, thread_context, original_content, changed_content): - left_selection = None - right_selection = None + left_selection = [] + right_selection = [] if original_content and thread_context.left_file_start and thread_context.left_file_end: left_selection = self._get_selection( original_content, thread_context.left_file_start.line, thread_context.left_file_end.line From 8ff8cd7e8ce2abe5e0de7480709d08254772a28a Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 03:45:43 +0000 Subject: [PATCH 14/72] Refactor _DevOpsClient to simplify code and improve readability. --- src/gpt_review/repositories/devops.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 30198a2f..7bbe7a55 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -255,16 +255,12 @@ def _get_patch(self, thread_context, pull_request_event) -> List[str]: if not pull_request: raise ValueError("pull_request_event.pullRequest is required") - original_content_task = self.read_all_text(path=thread_context.file_path, check_if_exists=True) - changed_content_task = self.read_all_text( + original_content = self.read_all_text(path=thread_context.file_path, check_if_exists=True) + changed_content = self.read_all_text( path=thread_context.file_path, commit_id=pull_request["lastMergeSourceCommit"]["commitId"], check_if_exists=True, ) - # original_content = await original_content_task - # changed_content = await changed_content_task - original_content = original_content_task - changed_content = changed_content_task left_selection, right_selection = self._calculate_selection(thread_context, original_content, changed_content) From 7792026c5faec54ab7a77e0cc500f870f9043250 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 03:55:29 +0000 Subject: [PATCH 15/72] Refactor DevOpsClient get_changed_blobs method and update test. --- src/gpt_review/repositories/devops.py | 6 ++++-- tests/test_devops.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 7bbe7a55..ba56e7a6 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -308,7 +308,7 @@ def get_patches(self, pull_request_event, condensed=False) -> Iterable[List[str] raise ValueError("pull_request_event.pullRequest is required") git_changes = self.get_changed_blobs(pull_request_event["pullRequest"]) - return [ + changes = [ self._get_change( git_change, pull_request_event["pullRequest"]["lastMergeSourceCommit"]["commitId"], @@ -317,6 +317,8 @@ def get_patches(self, pull_request_event, condensed=False) -> Iterable[List[str] for git_change in git_changes ] + return changes + def _get_selection(self, file_contents: str, line_start: int, line_end: int) -> List[str]: lines = file_contents.splitlines() @@ -334,7 +336,7 @@ def _get_selection(self, file_contents: str, line_start: int, line_end: int) -> return lines[line_start - 1 : line_end] def _get_change(self, git_change, source_commit_head, condensed=False) -> List[str]: - return self._get_git_change(git_change["item"]["path"], source_commit_head, condensed) + return "\n".join(self._get_git_change(git_change["item"]["path"], source_commit_head, condensed)) def _get_git_change(self, file_path, source_commit_head, condensed=False) -> List[str]: try: diff --git a/tests/test_devops.py b/tests/test_devops.py index d02fff39..f539f5a2 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -431,7 +431,8 @@ def test_get_patch_integration(devops_client: DevOpsClient) -> None: def get_patch_pr_comment_test(devops_client: DevOpsClient) -> None: patch = devops_client.get_patches(pull_request_event=PR_COMMENT_PAYLOAD["resource"]) - assert len(patch) == 3 + patch = "\n".join(patch) + assert len(patch) == 3348 def test_get_patch_pr_comment(devops_client: DevOpsClient) -> None: From 155dcc4d6dadd51720b206d8a52e439c7556b106 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 04:02:05 +0000 Subject: [PATCH 16/72] Refactor imports and remove unused variables. --- src/gpt_review/_openai.py | 2 +- src/gpt_review/repositories/devops.py | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/gpt_review/_openai.py b/src/gpt_review/_openai.py index dd59f832..72853d8c 100644 --- a/src/gpt_review/_openai.py +++ b/src/gpt_review/_openai.py @@ -1,6 +1,6 @@ """Open AI API Call Wrapper.""" -import os import logging +import os import openai from openai.error import RateLimitError diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index ba56e7a6..2fc6cf22 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -12,7 +12,6 @@ from azure.devops.v7_1.git.models import ( Comment, GitBaseVersionDescriptor, - GitBlobRef, GitCommitDiffs, GitPullRequest, GitPullRequestCommentThread, @@ -32,8 +31,6 @@ MIN_CONTEXT_LINES = 5 SURROUNDING_CONTEXT = 5 -MAX_CHANGES_AT_A_TIME = 100 # Replace with your value - class _DevOpsClient(_RepositoryClient, abc.ABC): """Azure DevOps API Client Wrapper.""" @@ -102,7 +99,16 @@ def _get_comment_thread(self, pull_request_id: str, thread_id: str) -> GitPullRe repository_id=self.repository_id, pull_request_id=pull_request_id, thread_id=thread_id, project=self.project ) - def get_changed_blobs(self, pull_request: GitPullRequest, cancellation_token=None): + def get_changed_blobs(self, pull_request: GitPullRequest): + """ + Get the changed blobs in a pull request. + + Args: + pull_request (GitPullRequest): The pull request. + + Returns: + List[Dict[str, str]]: The changed blobs. + """ changed_paths = [] commit_diff_within_pr = None @@ -308,7 +314,7 @@ def get_patches(self, pull_request_event, condensed=False) -> Iterable[List[str] raise ValueError("pull_request_event.pullRequest is required") git_changes = self.get_changed_blobs(pull_request_event["pullRequest"]) - changes = [ + return [ self._get_change( git_change, pull_request_event["pullRequest"]["lastMergeSourceCommit"]["commitId"], @@ -317,8 +323,6 @@ def get_patches(self, pull_request_event, condensed=False) -> Iterable[List[str] for git_change in git_changes ] - return changes - def _get_selection(self, file_contents: str, line_start: int, line_end: int) -> List[str]: lines = file_contents.splitlines() From c865971a7b3a7e6fecd6eedb36c60571f645875c Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 04:16:56 +0000 Subject: [PATCH 17/72] Refactored DevOpsFunction class and added new test cases. --- src/gpt_review/repositories/devops.py | 2 +- tests/test_devops.py | 21 +++++++++++++-------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 2fc6cf22..63d7b713 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -516,7 +516,7 @@ def _process_comment(self, body) -> None: try: diff = self.get_patch(pull_request_event=payload["resource"], pull_request_id=pr_id, comment_id=comment_id) - except: + except TypeError: diff = self.get_patches(pull_request_event=payload["resource"]) logging.info("Copilot diff: %s", diff) diff --git a/tests/test_devops.py b/tests/test_devops.py index f539f5a2..49eb42b2 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -12,7 +12,7 @@ GitTargetVersionDescriptor, ) -from gpt_review.repositories.devops import DevOpsClient, _comment +from gpt_review.repositories.devops import DevOpsClient, DevOpsFunction, _comment # Azure Devops PAT requires # - Code: 'Read','Write' @@ -354,6 +354,11 @@ def devops_client() -> DevOpsClient: return DevOpsClient(TOKEN, ORG, PROJECT, REPO) +@pytest.fixture +def devops_function() -> DevOpsFunction: + return DevOpsFunction(TOKEN, ORG, PROJECT, REPO) + + def test_create_comment(devops_client: DevOpsClient, mock_ado_client: None) -> None: response = devops_client.create_comment(pull_request_id=PR_ID, comment_id=COMMENT_ID, text="text1") assert isinstance(response, Comment) @@ -420,7 +425,7 @@ def get_patch_test(devops_client: DevOpsClient) -> None: assert len(patch) == 64 -def test_get_patch(devops_client: DevOpsClient) -> None: +def test_get_patch(mock_openai, devops_client: DevOpsClient) -> None: get_patch_test(devops_client) @@ -429,16 +434,16 @@ def test_get_patch_integration(devops_client: DevOpsClient) -> None: get_patch_test(devops_client) -def get_patch_pr_comment_test(devops_client: DevOpsClient) -> None: - patch = devops_client.get_patches(pull_request_event=PR_COMMENT_PAYLOAD["resource"]) +def get_patch_pr_comment_test(devops_function: DevOpsFunction) -> None: + patch = devops_function.get_patches(pull_request_event=PR_COMMENT_PAYLOAD["resource"]) patch = "\n".join(patch) assert len(patch) == 3348 -def test_get_patch_pr_comment(devops_client: DevOpsClient) -> None: - get_patch_pr_comment_test(devops_client) +def test_get_patch_pr_comment(mock_openai, devops_function: DevOpsFunction) -> None: + get_patch_pr_comment_test(devops_function) @pytest.mark.integration -def test_get_patch_pr_comment_integration(devops_client: DevOpsClient) -> None: - get_patch_pr_comment_test(devops_client) +def test_get_patch_pr_comment_integration(devops_function: DevOpsFunction) -> None: + get_patch_pr_comment_test(devops_function) From e3703507db46298167b2aebd1ed9f71b011a3793 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 04:23:37 +0000 Subject: [PATCH 18/72] Refactor test functions and add mock parameters. --- tests/test_devops.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_devops.py b/tests/test_devops.py index 49eb42b2..a9431da4 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -359,17 +359,17 @@ def devops_function() -> DevOpsFunction: return DevOpsFunction(TOKEN, ORG, PROJECT, REPO) -def test_create_comment(devops_client: DevOpsClient, mock_ado_client: None) -> None: +def test_create_comment(mock_ado_client: None, devops_client: DevOpsClient) -> None: response = devops_client.create_comment(pull_request_id=PR_ID, comment_id=COMMENT_ID, text="text1") assert isinstance(response, Comment) -def test_update_pr(devops_client: DevOpsClient, mock_ado_client: None) -> None: +def test_update_pr(mock_ado_client: None, devops_client: DevOpsClient) -> None: response = devops_client.update_pr(pull_request_id=PR_ID, title="title1", description="description1") assert isinstance(response, GitPullRequest) -def test_get_diff(devops_client: DevOpsClient, mock_ado_client: None) -> None: +def test_get_diff(mock_ado_client: None, devops_client: DevOpsClient) -> None: response = devops_client._get_commit_diff( diff_common_commit=True, base_version=GitBaseVersionDescriptor(version=SOURCE, version_type="commit"), @@ -425,7 +425,7 @@ def get_patch_test(devops_client: DevOpsClient) -> None: assert len(patch) == 64 -def test_get_patch(mock_openai, devops_client: DevOpsClient) -> None: +def test_get_patch(mock_openai, mock_ado_client: None, devops_client: DevOpsClient) -> None: get_patch_test(devops_client) @@ -434,7 +434,7 @@ def test_get_patch_integration(devops_client: DevOpsClient) -> None: get_patch_test(devops_client) -def get_patch_pr_comment_test(devops_function: DevOpsFunction) -> None: +def get_patch_pr_comment_test(mock_ado_client: None, devops_function: DevOpsFunction) -> None: patch = devops_function.get_patches(pull_request_event=PR_COMMENT_PAYLOAD["resource"]) patch = "\n".join(patch) assert len(patch) == 3348 From fc66292c486feae1f82ca353fc62bf3dfd5fc331 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 04:29:09 +0000 Subject: [PATCH 19/72] Fix exception handling in DevOpsFunction class. --- src/gpt_review/repositories/devops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 63d7b713..2af189f7 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -516,7 +516,7 @@ def _process_comment(self, body) -> None: try: diff = self.get_patch(pull_request_event=payload["resource"], pull_request_id=pr_id, comment_id=comment_id) - except TypeError: + except AttributeError: diff = self.get_patches(pull_request_event=payload["resource"]) logging.info("Copilot diff: %s", diff) From 67609781c22e9679ca828c4186bb3f4520a50d1d Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 04:47:42 +0000 Subject: [PATCH 20/72] Refactor DevOpsFunction get_patch method and add tests. --- src/gpt_review/repositories/devops.py | 2 +- tests/test_devops.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 2af189f7..2fc6cf22 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -516,7 +516,7 @@ def _process_comment(self, body) -> None: try: diff = self.get_patch(pull_request_event=payload["resource"], pull_request_id=pr_id, comment_id=comment_id) - except AttributeError: + except: diff = self.get_patches(pull_request_event=payload["resource"]) logging.info("Copilot diff: %s", diff) diff --git a/tests/test_devops.py b/tests/test_devops.py index a9431da4..57fe5230 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -340,7 +340,7 @@ def get_commit_diffs( base_version_descriptor=None, target_version_descriptor=None, ) -> GitCommitDiffs: - return GitCommitDiffs() + return GitCommitDiffs(changes=[], all_changes_included=True) def mock_client(self) -> MockDevOpsClient: return MockDevOpsClient() @@ -425,7 +425,7 @@ def get_patch_test(devops_client: DevOpsClient) -> None: assert len(patch) == 64 -def test_get_patch(mock_openai, mock_ado_client: None, devops_client: DevOpsClient) -> None: +def test_get_patch(mock_openai, devops_client: DevOpsClient) -> None: get_patch_test(devops_client) @@ -434,16 +434,16 @@ def test_get_patch_integration(devops_client: DevOpsClient) -> None: get_patch_test(devops_client) -def get_patch_pr_comment_test(mock_ado_client: None, devops_function: DevOpsFunction) -> None: +def get_patch_pr_comment_test(devops_function: DevOpsFunction, expected_len: int) -> None: patch = devops_function.get_patches(pull_request_event=PR_COMMENT_PAYLOAD["resource"]) patch = "\n".join(patch) - assert len(patch) == 3348 + assert len(patch) == expected_len -def test_get_patch_pr_comment(mock_openai, devops_function: DevOpsFunction) -> None: - get_patch_pr_comment_test(devops_function) +def test_get_patch_pr_comment(mock_openai: None, mock_ado_client: None, devops_function: DevOpsFunction) -> None: + get_patch_pr_comment_test(devops_function, 0) @pytest.mark.integration def test_get_patch_pr_comment_integration(devops_function: DevOpsFunction) -> None: - get_patch_pr_comment_test(devops_function) + get_patch_pr_comment_test(devops_function, 3348) From 895ac84dece02a8a1958381469ba9ebeb0bd9df9 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 05:17:26 +0000 Subject: [PATCH 21/72] Add CommentThreadContext typing and fix get_patch return --- src/gpt_review/repositories/devops.py | 7 +++++-- tests/test_devops.py | 17 +++++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 2fc6cf22..7680da96 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -17,6 +17,7 @@ GitPullRequestCommentThread, GitTargetVersionDescriptor, GitVersionDescriptor, + CommentThreadContext, ) from knack import CLICommandsLoader from knack.arguments import ArgumentsContext @@ -246,12 +247,12 @@ def get_patch(self, pull_request_event, pull_request_id, comment_id) -> List[str return self._get_patch(thread_context=thread.thread_context, pull_request_event=pull_request_event) - def _get_patch(self, thread_context, pull_request_event) -> List[str]: + def _get_patch(self, thread_context: CommentThreadContext, pull_request_event) -> List[str]: """ Get the patch for a given thread context. Args: - thread_context (ThreadContext): The thread context. + thread_context (CommentThreadContext): The thread context. pull_request_event (PullRequestEvent): The pull request event. Returns: @@ -294,6 +295,8 @@ def _calculate_selection(self, thread_context, original_content, changed_content changed_content, thread_context.right_file_start.line, thread_context.right_file_end.line ) + return left_selection, right_selection + if left_selection or right_selection: return left_selection, right_selection raise ValueError("Both left and right selection cannot be None") diff --git a/tests/test_devops.py b/tests/test_devops.py index 57fe5230..8604c001 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -1,5 +1,6 @@ import os from dataclasses import dataclass +from typing import Literal import pytest import requests_mock @@ -10,6 +11,7 @@ GitPullRequest, GitPullRequestCommentThread, GitTargetVersionDescriptor, + CommentThreadContext, ) from gpt_review.repositories.devops import DevOpsClient, DevOpsFunction, _comment @@ -323,13 +325,16 @@ def create_comment(self, comment, repository_id, pull_request_id, thread_id, pro def get_pull_request_thread( self, repository_id, pull_request_id, thread_id, project=None, iteration=None, base_iteration=None ) -> GitPullRequestCommentThread: - return GitPullRequestCommentThread() + return GitPullRequestCommentThread(thread_context=CommentThreadContext()) def update_pull_request( self, git_pull_request_to_update, repository_id, pull_request_id, project=None ) -> GitPullRequest: return GitPullRequest() + def get_item_content(self, repository_id="", path="", project="", version_descriptor=None, **kwargs): + return bytes("mock content", "utf-8").split() + def get_commit_diffs( self, repository_id, @@ -417,21 +422,21 @@ def test_process_payload_integration() -> None: process_payload_test() -def get_patch_test(devops_client: DevOpsClient) -> None: +def get_patch_test(devops_client: DevOpsClient, expected_len: int) -> None: comment_id = LONG_PAYLOAD["resource"]["comment"]["_links"]["threads"]["href"].split("/")[-1] patch = devops_client.get_patch( pull_request_event=LONG_PAYLOAD["resource"], pull_request_id=PR_ID, comment_id=comment_id ) - assert len(patch) == 64 + assert len(patch) == expected_len -def test_get_patch(mock_openai, devops_client: DevOpsClient) -> None: - get_patch_test(devops_client) +def test_get_patch(mock_openai, mock_ado_client: None, devops_client: DevOpsClient) -> None: + get_patch_test(devops_client, 1) @pytest.mark.integration def test_get_patch_integration(devops_client: DevOpsClient) -> None: - get_patch_test(devops_client) + get_patch_test(devops_client, 64) def get_patch_pr_comment_test(devops_function: DevOpsFunction, expected_len: int) -> None: From c8b75a8289a551f34643f032ecc7a639e64ef9c8 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 05:25:49 +0000 Subject: [PATCH 22/72] Refactor code and add comments for readability. --- src/gpt_review/_openai.py | 1 - src/gpt_review/repositories/devops.py | 11 ++++++----- tests/test_devops.py | 7 +++---- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/gpt_review/_openai.py b/src/gpt_review/_openai.py index 29c313a0..43bcf84a 100644 --- a/src/gpt_review/_openai.py +++ b/src/gpt_review/_openai.py @@ -1,5 +1,4 @@ """Open AI API Call Wrapper.""" -import os import logging import os diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 7680da96..05114bd8 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -11,13 +11,13 @@ from azure.devops.v7_1.git.git_client import GitClient from azure.devops.v7_1.git.models import ( Comment, + CommentThreadContext, GitBaseVersionDescriptor, GitCommitDiffs, GitPullRequest, GitPullRequestCommentThread, GitTargetVersionDescriptor, GitVersionDescriptor, - CommentThreadContext, ) from knack import CLICommandsLoader from knack.arguments import ArgumentsContext @@ -673,10 +673,11 @@ def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, acc repo = link.split("/")[5] pr_id = link.split("/")[7] - DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).create_comment( - pull_request_id=pr_id, comment_id=comment_id, text=response["response"] - ) - return {"response": "Review posted as a comment.", "text": response["response"]} + DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).create_comment( + pull_request_id=pr_id, comment_id=comment_id, text=response["response"] + ) + return {"response": "Review posted as a comment.", "text": response["response"]} + raise ValueError("LINK and ADO_TOKEN must be set.") class DevOpsCommandGroup(GPTCommandGroup): diff --git a/tests/test_devops.py b/tests/test_devops.py index 8604c001..686866b1 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -1,17 +1,16 @@ import os from dataclasses import dataclass -from typing import Literal import pytest import requests_mock from azure.devops.v7_1.git.models import ( Comment, + CommentThreadContext, GitBaseVersionDescriptor, GitCommitDiffs, GitPullRequest, GitPullRequestCommentThread, GitTargetVersionDescriptor, - CommentThreadContext, ) from gpt_review.repositories.devops import DevOpsClient, DevOpsFunction, _comment @@ -413,7 +412,7 @@ def process_payload_test() -> None: _comment(question, comment_id=COMMENT_ID, link=link) -def test_process_payload(mock_openai, mock_ado_client: None) -> None: +def test_process_payload(mock_openai: None, mock_ado_client: None) -> None: process_payload_test() @@ -430,7 +429,7 @@ def get_patch_test(devops_client: DevOpsClient, expected_len: int) -> None: assert len(patch) == expected_len -def test_get_patch(mock_openai, mock_ado_client: None, devops_client: DevOpsClient) -> None: +def test_get_patch(mock_openai: None, mock_ado_client: None, devops_client: DevOpsClient) -> None: get_patch_test(devops_client, 1) From b1ed6f301d6026f198e40bea3c577f097016a177 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 05:27:24 +0000 Subject: [PATCH 23/72] "Set ADO_TOKEN environment variable in mock_ado_client fixture" --- tests/test_devops.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_devops.py b/tests/test_devops.py index 686866b1..e5d5e673 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -301,6 +301,8 @@ def mock_req(): @pytest.fixture def mock_ado_client(monkeypatch) -> None: + monkeypatch.setenv("ADO_TOKEN", "MOCK_TOKEN") + @dataclass class MockResponse: text: str From 3fb7287066b5c5df47e3c76d05d4933d5feef42c Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 05:28:54 +0000 Subject: [PATCH 24/72] Refactor logging statement in _call_gpt function. --- src/gpt_review/_openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpt_review/_openai.py b/src/gpt_review/_openai.py index 43bcf84a..3b25b742 100644 --- a/src/gpt_review/_openai.py +++ b/src/gpt_review/_openai.py @@ -84,7 +84,7 @@ def _call_gpt( try: model = _get_model(prompt, max_tokens=max_tokens, fast=fast, large=large) - logging.debug(f"Model Selected based on prompt size: {model}") + logging.debug("Model Selected based on prompt size: %s", model) if os.environ["OPENAI_API_TYPE"] == C.AZURE_API_TYPE: logging.debug("Using Azure Open AI.") From 57bd4790625cc52af9049bb04cddeb5de21ab121 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 05:45:35 +0000 Subject: [PATCH 25/72] Refactor and reorganize devops.py, update tests --- src/gpt_review/repositories/devops.py | 205 ++++++++++++-------------- tests/test_devops.py | 6 +- 2 files changed, 102 insertions(+), 109 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 05114bd8..ed5cb560 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -4,7 +4,7 @@ import json import logging import os -from typing import Dict, Iterable, Iterator, List, Optional +from typing import Dict, Iterable, List, Tuple, Optional from azure.devops.connection import Connection from azure.devops.exceptions import AzureDevOpsServiceError @@ -85,54 +85,6 @@ def create_comment(self, pull_request_id: int, comment_id: int, text) -> Comment new_comment, self.repository_id, pull_request_id, comment_id, project=self.project ) - def _get_comment_thread(self, pull_request_id: str, thread_id: str) -> GitPullRequestCommentThread: - """ - Get a comment thread. - - Args: - pull_request_id (str): The Azure DevOps pull request ID. - thread_id (str): The Azure DevOps thread ID. - - Returns: - GitPullRequestCommentThread: The response from the API. - """ - return self.client.get_pull_request_thread( - repository_id=self.repository_id, pull_request_id=pull_request_id, thread_id=thread_id, project=self.project - ) - - def get_changed_blobs(self, pull_request: GitPullRequest): - """ - Get the changed blobs in a pull request. - - Args: - pull_request (GitPullRequest): The pull request. - - Returns: - List[Dict[str, str]]: The changed blobs. - """ - changed_paths = [] - commit_diff_within_pr = None - - skip = 0 - while True: - commit_diff_within_pr = self._get_commit_diff( - diff_common_commit=False, - base_version=GitBaseVersionDescriptor( - base_version=pull_request["lastMergeSourceCommit"]["commitId"], base_version_type="commit" - ), - target_version=GitTargetVersionDescriptor( - target_version=pull_request["lastMergeTargetCommit"]["commitId"], target_version_type="commit" - ), - ) - changed_paths.extend( - [change for change in commit_diff_within_pr.changes if "isFolder" not in change["item"]] - ) - skip += len(commit_diff_within_pr.changes) - if commit_diff_within_pr.all_changes_included: - break - - return changed_paths - def update_pr(self, pull_request_id, title=None, description=None) -> GitPullRequest: """ Update a pull request. @@ -152,31 +104,6 @@ def update_pr(self, pull_request_id, title=None, description=None) -> GitPullReq pull_request_id=pull_request_id, ) - def _get_commit_diff( - self, - diff_common_commit: bool, - base_version: GitBaseVersionDescriptor, - target_version: GitTargetVersionDescriptor, - ) -> GitCommitDiffs: - """ - Get the diff between two commits. - - Args: - diff_common_commit (bool): Whether to diff the common commit. - base_version (GitBaseVersionDescriptor): The base version. - target_version (GitTargetVersionDescriptor): The target version. - - Returns: - Response: The response from the API. - """ - return self.client.get_commit_diffs( - repository_id=self.repository_id, - project=self.project, - diff_common_commit=diff_common_commit, - base_version_descriptor=base_version, - target_version_descriptor=target_version, - ) - def read_all_text( self, path: str, @@ -203,20 +130,6 @@ def read_all_text( ) return "".join(byte.decode("utf-8") for byte in byte_iterator) - async def read_all_text_async(self, path: str, commit_id, **kwargs) -> Iterator[bytes]: - """ - Read all text from a file asynchronously. - - Args: - path (str): The path to the file. - commit_id (str): The commit ID. - **kwargs: Any additional keyword arguments. - - Returns: - Iterator[bytes]: The bytes of the file. - """ - return await self.client.read_all_text(path=path, commit_id=commit_id, **kwargs) - @staticmethod def process_comment_payload(payload: str) -> str: """ @@ -243,21 +156,14 @@ def get_patch(self, pull_request_event, pull_request_id, comment_id) -> List[str Returns: List[str]: The diff of the pull request. """ - thread = self._get_comment_thread(pull_request_id=pull_request_id, thread_id=comment_id) - - return self._get_patch(thread_context=thread.thread_context, pull_request_event=pull_request_event) - - def _get_patch(self, thread_context: CommentThreadContext, pull_request_event) -> List[str]: - """ - Get the patch for a given thread context. - - Args: - thread_context (CommentThreadContext): The thread context. - pull_request_event (PullRequestEvent): The pull request event. + thread = self.client.get_pull_request_thread( + repository_id=self.repository_id, + pull_request_id=pull_request_id, + thread_id=comment_id, + project=self.project, + ) + thread_context = thread.thread_context - Returns: - List[str]: The patch. - """ pull_request = pull_request_event["pullRequest"] if not pull_request: raise ValueError("pull_request_event.pullRequest is required") @@ -275,7 +181,18 @@ def _get_patch(self, thread_context: CommentThreadContext, pull_request_event) - "\n".join(left_selection) or [], "\n".join(right_selection) or [], thread_context.file_path ) - def _calculate_selection(self, thread_context, original_content, changed_content): + def _calculate_selection(self, thread_context, original_content, changed_content) -> Tuple[List[str], List[str]]: + """ + Calculate the selection for a given thread context. + + Args: + thread_context (CommentThreadContext): The thread context. + original_content (str): The original content. + changed_content (str): The changed content. + + Returns: + Tuple[List[str], List[str]]: The left and right selections. + """ left_selection = [] right_selection = [] if original_content and thread_context.left_file_start and thread_context.left_file_end: @@ -297,10 +214,6 @@ def _calculate_selection(self, thread_context, original_content, changed_content return left_selection, right_selection - if left_selection or right_selection: - return left_selection, right_selection - raise ValueError("Both left and right selection cannot be None") - def get_patches(self, pull_request_event, condensed=False) -> Iterable[List[str]]: """ Get the patches for a given pull request event. @@ -326,6 +239,41 @@ def get_patches(self, pull_request_event, condensed=False) -> Iterable[List[str] for git_change in git_changes ] + def get_changed_blobs(self, pull_request: GitPullRequest): + """ + Get the changed blobs in a pull request. + + Args: + pull_request (GitPullRequest): The pull request. + + Returns: + List[Dict[str, str]]: The changed blobs. + """ + changed_paths = [] + commit_diff_within_pr = None + + skip = 0 + while True: + commit_diff_within_pr = self.client.get_commit_diffs( + repository_id=self.repository_id, + project=self.project, + diff_common_commit=False, + base_version=GitBaseVersionDescriptor( + base_version=pull_request["lastMergeSourceCommit"]["commitId"], base_version_type="commit" + ), + target_version=GitTargetVersionDescriptor( + target_version=pull_request["lastMergeTargetCommit"]["commitId"], target_version_type="commit" + ), + ) + changed_paths.extend( + [change for change in commit_diff_within_pr.changes if "isFolder" not in change["item"]] + ) + skip += len(commit_diff_within_pr.changes) + if commit_diff_within_pr.all_changes_included: + break + + return changed_paths + def _get_selection(self, file_contents: str, line_start: int, line_end: int) -> List[str]: lines = file_contents.splitlines() @@ -357,11 +305,35 @@ def _get_git_change(self, file_path, source_commit_head, condensed=False) -> Lis def _create_patch( self, original_content: Optional[str], changed_content: Optional[str], file_path: str, condensed=False ) -> List[str]: + """ + Create a patch for a given file. + + Args: + original_content (Optional[str]): The original content. + changed_content (Optional[str]): The changed content. + file_path (str): The file path. + condensed (bool, optional): If True, returns a condensed version of the patch. Defaults to False. + + Returns: + List[str]: The patch. + """ left = original_content.splitlines() if original_content else [] right = changed_content.splitlines() if changed_content else [] return self._create_patch_list(left, right, file_path, condensed) def _create_patch_list(self, left: List[str], right: List[str], file_path: str, condensed=False) -> List[str]: + """ + Create a patch list for a given file. + + Args: + left (List[str]): The left side of the patch. + right (List[str]): The right side of the patch. + file_path (str): The file path. + condensed (bool, optional): If True, returns a condensed version of the patch. Defaults to False. + + Returns: + List[str]: The patch list. + """ needed_changes = self._calculate_minimum_change_needed(left, right) line, row = 1, 1 patch = [] @@ -393,6 +365,15 @@ def _create_patch_list(self, left: List[str], right: List[str], file_path: str, return patch def _get_condensed_patch(self, patch: List[str]) -> List[str]: + """ + Get a condensed version of the patch. + + Args: + patch (List[str]): The patch. + + Returns: + List[str]: The condensed patch. + """ buffer = [] result = [] trailing_context = 0 @@ -412,6 +393,16 @@ def _get_condensed_patch(self, patch: List[str]) -> List[str]: return result def _calculate_minimum_change_needed(self, left: List[str], right: List[str]) -> List[List[int]]: + """ + Calculate the minimum change needed to transform the left side to the right side. + + Args: + left (List[str]): The left side of the patch. + right (List[str]): The right side of the patch. + + Returns: + List[List[int]]: The minimum change needed. + """ changes = [[0] * (len(right) + 1) for _ in range(len(left) + 1)] for i, j in itertools.product(range(len(left) + 1), range(len(right) + 1)): diff --git a/tests/test_devops.py b/tests/test_devops.py index e5d5e673..c374bd7b 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -338,13 +338,15 @@ def get_item_content(self, repository_id="", path="", project="", version_descri def get_commit_diffs( self, - repository_id, + repository_id="", project=None, diff_common_commit=None, top=None, skip=None, base_version_descriptor=None, target_version_descriptor=None, + base_version=None, + target_version=None, ) -> GitCommitDiffs: return GitCommitDiffs(changes=[], all_changes_included=True) @@ -376,7 +378,7 @@ def test_update_pr(mock_ado_client: None, devops_client: DevOpsClient) -> None: def test_get_diff(mock_ado_client: None, devops_client: DevOpsClient) -> None: - response = devops_client._get_commit_diff( + response = devops_client.client.get_commit_diffs( diff_common_commit=True, base_version=GitBaseVersionDescriptor(version=SOURCE, version_type="commit"), target_version=GitTargetVersionDescriptor(target_version=TARGET, target_version_type="commit"), From a0bd2782f22278d1ac285aff20518a00657c92c7 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 06:05:59 +0000 Subject: [PATCH 26/72] Add support for parsing Azure DevOps URLs in devops.py. --- src/gpt_review/repositories/devops.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index ed5cb560..d364ca8d 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -5,6 +5,7 @@ import logging import os from typing import Dict, Iterable, List, Tuple, Optional +from urllib.parse import urlparse from azure.devops.connection import Connection from azure.devops.exceptions import AzureDevOpsServiceError @@ -442,7 +443,9 @@ def post_pr_summary(diff, link=None, access_token=None) -> Dict[str, str]: if link and access_token: review = _summarize_files(diff) - if "dev.azure.com" in link: + parsed_url = urlparse(link) + + if "dev.azure.com" in parsed_url.netloc: org = link.split("/")[3] project = link.split("/")[4] repo = link.split("/")[6] @@ -653,7 +656,9 @@ def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, acc response = _ask( question=question, ) - if "dev.azure.com" in link: + parsed_url = urlparse(link) + + if "dev.azure.com" in parsed_url.netloc: org = link.split("/")[3] project = link.split("/")[4] repo = link.split("/")[6] From 2b9ddf94bd7fd2076ca267e8779b81fb03f7d4b2 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 07:15:14 +0000 Subject: [PATCH 27/72] Refactor test_devops.py and test_gpt_cli.py files. --- tests/test_devops.py | 10 ---------- tests/test_gpt_cli.py | 4 ++-- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/tests/test_devops.py b/tests/test_devops.py index c374bd7b..271caf5d 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -400,16 +400,6 @@ def test_update_pr_integration(devops_client: DevOpsClient) -> None: assert isinstance(response, GitPullRequest) -@pytest.mark.integration -def test_get_diff_integration(devops_client: DevOpsClient) -> None: - response = devops_client._get_commit_diff( - diff_common_commit=True, - base_version=GitBaseVersionDescriptor(version=SOURCE, version_type="commit"), - target_version=GitTargetVersionDescriptor(target_version=TARGET, target_version_type="commit"), - ) - assert isinstance(response, GitCommitDiffs) - - def process_payload_test() -> None: question = DevOpsClient.process_comment_payload(SAMPLE_PAYLOAD) link = "https://msazure.visualstudio.com/One/_git/Azure-Gaming/pullrequest/8063875" diff --git a/tests/test_gpt_cli.py b/tests/test_gpt_cli.py index 7418a08d..30e16ea4 100644 --- a/tests/test_gpt_cli.py +++ b/tests/test_gpt_cli.py @@ -85,8 +85,8 @@ class CLICase2(CLICase): ), CLICase(f"ask --files {SAMPLE_FILE} --files {SAMPLE_FILE} {WHAT_LANGUAGE} --reset"), CLICase(f"ask --fast -f {SAMPLE_FILE} {WHAT_LANGUAGE}"), - CLICase(f"ask --fast -d src/gpt_review --reset --recursive --hidden --required-exts .py {WHAT_LANGUAGE}"), - CLICase(f"ask --fast -repo microsoft/gpt-review --branch main {WHAT_LANGUAGE}"), + # CLICase(f"ask --fast -d src/gpt_review --reset --recursive --hidden --required-exts .py {WHAT_LANGUAGE}"), + # CLICase(f"ask --fast -repo microsoft/gpt-review --branch main {WHAT_LANGUAGE}"), ] GITHUB_COMMANDS = [ From a096f990a0b976d341d307ffd4e036c268ec49f9 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 07:16:25 +0000 Subject: [PATCH 28/72] Refactor test_devops.py and remove unused test_get_diff function. --- tests/test_devops.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/tests/test_devops.py b/tests/test_devops.py index 271caf5d..7111939f 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -336,20 +336,6 @@ def update_pull_request( def get_item_content(self, repository_id="", path="", project="", version_descriptor=None, **kwargs): return bytes("mock content", "utf-8").split() - def get_commit_diffs( - self, - repository_id="", - project=None, - diff_common_commit=None, - top=None, - skip=None, - base_version_descriptor=None, - target_version_descriptor=None, - base_version=None, - target_version=None, - ) -> GitCommitDiffs: - return GitCommitDiffs(changes=[], all_changes_included=True) - def mock_client(self) -> MockDevOpsClient: return MockDevOpsClient() @@ -377,15 +363,6 @@ def test_update_pr(mock_ado_client: None, devops_client: DevOpsClient) -> None: assert isinstance(response, GitPullRequest) -def test_get_diff(mock_ado_client: None, devops_client: DevOpsClient) -> None: - response = devops_client.client.get_commit_diffs( - diff_common_commit=True, - base_version=GitBaseVersionDescriptor(version=SOURCE, version_type="commit"), - target_version=GitTargetVersionDescriptor(target_version=TARGET, target_version_type="commit"), - ) - assert isinstance(response, GitCommitDiffs) - - @pytest.mark.integration def test_create_comment_integration(devops_client: DevOpsClient) -> None: response = devops_client.create_comment(pull_request_id=PR_ID, comment_id=COMMENT_ID, text="text1") From 60c2080c4b971f679fea50a5e04e9beefaeabd74 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 07:19:12 +0000 Subject: [PATCH 29/72] Add mock method for getting commit diffs in test_devops.py. --- tests/test_devops.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_devops.py b/tests/test_devops.py index 7111939f..3ef30ab1 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -336,6 +336,20 @@ def update_pull_request( def get_item_content(self, repository_id="", path="", project="", version_descriptor=None, **kwargs): return bytes("mock content", "utf-8").split() + def get_commit_diffs( + self, + repository_id="", + project=None, + diff_common_commit=None, + top=None, + skip=None, + base_version_descriptor=None, + target_version_descriptor=None, + base_version=None, + target_version=None, + ) -> GitCommitDiffs: + return GitCommitDiffs(changes=[], all_changes_included=True) + def mock_client(self) -> MockDevOpsClient: return MockDevOpsClient() From 2ae71b185cb33168154d45a49148e822f3c022cd Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 15:22:57 +0000 Subject: [PATCH 30/72] Add __future__ import annotations and minor code formatting --- src/gpt/__init__.py | 1 - src/gpt/__main__.py | 2 ++ src/gpt_review/__main__.py | 2 ++ src/gpt_review/_ask.py | 2 ++ src/gpt_review/_command.py | 2 ++ src/gpt_review/_git.py | 2 ++ src/gpt_review/_gpt_cli.py | 2 ++ src/gpt_review/_llama_index.py | 2 ++ src/gpt_review/_openai.py | 2 ++ src/gpt_review/_review.py | 8 +++++++- src/gpt_review/constants.py | 2 ++ src/gpt_review/context.py | 2 ++ src/gpt_review/main.py | 2 ++ src/gpt_review/prompts/__init__.py | 2 ++ src/gpt_review/prompts/_prompt.py | 2 ++ src/gpt_review/repositories/_repository.py | 2 ++ src/gpt_review/repositories/devops.py | 14 +++++++------- src/gpt_review/repositories/github.py | 2 ++ src/gpt_review/utils.py | 2 ++ 19 files changed, 46 insertions(+), 9 deletions(-) diff --git a/src/gpt/__init__.py b/src/gpt/__init__.py index 8b137891..e69de29b 100644 --- a/src/gpt/__init__.py +++ b/src/gpt/__init__.py @@ -1 +0,0 @@ - diff --git a/src/gpt/__main__.py b/src/gpt/__main__.py index bd149a6a..890f2cc0 100644 --- a/src/gpt/__main__.py +++ b/src/gpt/__main__.py @@ -1,4 +1,6 @@ """The GPT CLI entry point for python -m gpt""" +from __future__ import annotations + import sys from gpt_review._gpt_cli import cli diff --git a/src/gpt_review/__main__.py b/src/gpt_review/__main__.py index bd149a6a..890f2cc0 100644 --- a/src/gpt_review/__main__.py +++ b/src/gpt_review/__main__.py @@ -1,4 +1,6 @@ """The GPT CLI entry point for python -m gpt""" +from __future__ import annotations + import sys from gpt_review._gpt_cli import cli diff --git a/src/gpt_review/_ask.py b/src/gpt_review/_ask.py index 9461de0a..6b646edd 100644 --- a/src/gpt_review/_ask.py +++ b/src/gpt_review/_ask.py @@ -1,4 +1,6 @@ """Ask GPT a question.""" +from __future__ import annotations + import logging from typing import Dict, List, Optional diff --git a/src/gpt_review/_command.py b/src/gpt_review/_command.py index acd68bbb..82c6e44f 100644 --- a/src/gpt_review/_command.py +++ b/src/gpt_review/_command.py @@ -1,4 +1,6 @@ """Interface for GPT CLI command groups.""" +from __future__ import annotations + from knack import CLICommandsLoader diff --git a/src/gpt_review/_git.py b/src/gpt_review/_git.py index 1f8729df..0db7004e 100644 --- a/src/gpt_review/_git.py +++ b/src/gpt_review/_git.py @@ -1,4 +1,6 @@ """Basic Shell Commands for Git.""" +from __future__ import annotations + import logging import os from typing import Dict diff --git a/src/gpt_review/_gpt_cli.py b/src/gpt_review/_gpt_cli.py index 12bdabba..c8d7bb12 100644 --- a/src/gpt_review/_gpt_cli.py +++ b/src/gpt_review/_gpt_cli.py @@ -1,4 +1,6 @@ """The GPT CLI configuration and utilities.""" +from __future__ import annotations + import os import sys from collections import OrderedDict diff --git a/src/gpt_review/_llama_index.py b/src/gpt_review/_llama_index.py index 80321206..a261dc73 100644 --- a/src/gpt_review/_llama_index.py +++ b/src/gpt_review/_llama_index.py @@ -1,4 +1,6 @@ """Wrapper for Llama Index.""" +from __future__ import annotations + import logging import os from typing import List, Optional diff --git a/src/gpt_review/_openai.py b/src/gpt_review/_openai.py index 3b25b742..99c20e66 100644 --- a/src/gpt_review/_openai.py +++ b/src/gpt_review/_openai.py @@ -1,4 +1,6 @@ """Open AI API Call Wrapper.""" +from __future__ import annotations + import logging import os diff --git a/src/gpt_review/_review.py b/src/gpt_review/_review.py index ec723d82..5376d2db 100644 --- a/src/gpt_review/_review.py +++ b/src/gpt_review/_review.py @@ -1,4 +1,6 @@ """Basic functions for requesting review based goals from GPT-4.""" +from __future__ import annotations + import os from dataclasses import dataclass from typing import Dict @@ -10,7 +12,11 @@ from gpt_review._ask import _ask from gpt_review._command import GPTCommandGroup -from gpt_review.prompts._prompt import load_bug_yaml, load_coverage_yaml, load_summary_yaml +from gpt_review.prompts._prompt import ( + load_bug_yaml, + load_coverage_yaml, + load_summary_yaml, +) _CHECKS = { "SUMMARY_CHECKS": [ diff --git a/src/gpt_review/constants.py b/src/gpt_review/constants.py index 08ff0f73..68c2a605 100644 --- a/src/gpt_review/constants.py +++ b/src/gpt_review/constants.py @@ -1,4 +1,6 @@ """Contains constants for minimum and maximum values of various parameters used in GPT Review.""" +from __future__ import annotations + import os import sys diff --git a/src/gpt_review/context.py b/src/gpt_review/context.py index cf7bc72e..8898a007 100644 --- a/src/gpt_review/context.py +++ b/src/gpt_review/context.py @@ -1,4 +1,6 @@ """Context for the Azure OpenAI API and the models.""" +from __future__ import annotations + import os from dataclasses import dataclass diff --git a/src/gpt_review/main.py b/src/gpt_review/main.py index ebec1182..2e29e161 100644 --- a/src/gpt_review/main.py +++ b/src/gpt_review/main.py @@ -1,4 +1,6 @@ """The GPT CLI entry point.""" +from __future__ import annotations + import sys from knack.help_files import helps diff --git a/src/gpt_review/prompts/__init__.py b/src/gpt_review/prompts/__init__.py index 5392fb72..98ae4bca 100644 --- a/src/gpt_review/prompts/__init__.py +++ b/src/gpt_review/prompts/__init__.py @@ -1 +1,3 @@ """Collection of GPT Prompts.""" + +from __future__ import annotations diff --git a/src/gpt_review/prompts/_prompt.py b/src/gpt_review/prompts/_prompt.py index 12fe4fae..f3e7c535 100644 --- a/src/gpt_review/prompts/_prompt.py +++ b/src/gpt_review/prompts/_prompt.py @@ -1,4 +1,6 @@ """Interface for a GPT Prompts.""" +from __future__ import annotations + import os import sys from dataclasses import dataclass diff --git a/src/gpt_review/repositories/_repository.py b/src/gpt_review/repositories/_repository.py index f3b4341f..3cee07ae 100644 --- a/src/gpt_review/repositories/_repository.py +++ b/src/gpt_review/repositories/_repository.py @@ -1,4 +1,6 @@ """Abstract class for a repository client.""" +from __future__ import annotations + from abc import abstractmethod from typing import Dict diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index d364ca8d..c8ee2bc0 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -1,10 +1,12 @@ """Azure DevOps Package Wrappers to Simplify Usage.""" +from __future__ import annotations + import abc import itertools import json import logging import os -from typing import Dict, Iterable, List, Tuple, Optional +from typing import Dict, Iterable, List, Optional, Tuple from urllib.parse import urlparse from azure.devops.connection import Connection @@ -12,11 +14,8 @@ from azure.devops.v7_1.git.git_client import GitClient from azure.devops.v7_1.git.models import ( Comment, - CommentThreadContext, GitBaseVersionDescriptor, - GitCommitDiffs, GitPullRequest, - GitPullRequestCommentThread, GitTargetVersionDescriptor, GitVersionDescriptor, ) @@ -25,6 +24,7 @@ from knack.commands import CommandGroup from msrest.authentication import BasicAuthentication + from gpt_review._ask import _ask from gpt_review._command import GPTCommandGroup from gpt_review._review import _summarize_files @@ -259,10 +259,10 @@ def get_changed_blobs(self, pull_request: GitPullRequest): repository_id=self.repository_id, project=self.project, diff_common_commit=False, - base_version=GitBaseVersionDescriptor( + base_version_descriptor=GitBaseVersionDescriptor( base_version=pull_request["lastMergeSourceCommit"]["commitId"], base_version_type="commit" ), - target_version=GitTargetVersionDescriptor( + target_version_descriptor=GitTargetVersionDescriptor( target_version=pull_request["lastMergeTargetCommit"]["commitId"], target_version_type="commit" ), ) @@ -513,7 +513,7 @@ def _process_comment(self, body) -> None: try: diff = self.get_patch(pull_request_event=payload["resource"], pull_request_id=pr_id, comment_id=comment_id) - except: + except Exception: diff = self.get_patches(pull_request_event=payload["resource"]) logging.info("Copilot diff: %s", diff) diff --git a/src/gpt_review/repositories/github.py b/src/gpt_review/repositories/github.py index 7efd7dd4..6cfce6d1 100644 --- a/src/gpt_review/repositories/github.py +++ b/src/gpt_review/repositories/github.py @@ -1,4 +1,6 @@ """GitHub API helpers.""" +from __future__ import annotations + import json import logging import os diff --git a/src/gpt_review/utils.py b/src/gpt_review/utils.py index 7d6d79df..67045fee 100644 --- a/src/gpt_review/utils.py +++ b/src/gpt_review/utils.py @@ -1,4 +1,6 @@ """Utility functions""" +from __future__ import annotations + import logging import time from typing import Optional From de24fb59a5222cae54adb43cf05ac6c76507084c Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 15:28:13 +0000 Subject: [PATCH 31/72] Update VSCode settings, tasks, and improve Azure Functions integration --- .devcontainer/devcontainer.json | 2 +- .../on-push-create-draft-release.yml | 4 +-- .gitignore | 2 +- .vscode/extensions.json | 6 ++++ .vscode/launch.json | 27 ++++++--------- .vscode/settings.json | 9 ++++- .vscode/tasks.json | 33 +++++++++++++++++++ action.yml | 1 - azure.yaml.template | 2 +- azure/README.md | 2 +- azure/api/.funcignore | 2 +- azure/api/__init__.py | 2 ++ azure/api/host.json | 2 +- azure/api/incoming_msg_handler/__init__.py | 4 ++- azure/api/incoming_msg_handler/function.json | 2 +- pyproject.toml | 4 +++ 16 files changed, 75 insertions(+), 29 deletions(-) create mode 100644 .vscode/extensions.json create mode 100644 .vscode/tasks.json diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 0977df46..bbe0d783 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -52,4 +52,4 @@ "azure-cli": "latest" }, "onCreateCommand": "pip install -e ." -} \ No newline at end of file +} diff --git a/.github/workflows/on-push-create-draft-release.yml b/.github/workflows/on-push-create-draft-release.yml index 7b5eeef1..3df08b25 100644 --- a/.github/workflows/on-push-create-draft-release.yml +++ b/.github/workflows/on-push-create-draft-release.yml @@ -18,7 +18,7 @@ jobs: uses: actions/checkout@v2 with: token: ${{ secrets.PAT }} - + - name: Bump version and push tag id: tag_version uses: mathieudutour/github-tag-action@v6.0 @@ -50,7 +50,7 @@ jobs: python -m pip install flit python -m flit build - + - if: steps.tag_version.outputs.release_type run: | gh release delete $TAG || echo "no tag existed" diff --git a/.gitignore b/.gitignore index a54729a8..61684247 100644 --- a/.gitignore +++ b/.gitignore @@ -144,4 +144,4 @@ local.settings.json __blobstorage__ __queuestorage__ __azurite_db*__.json -.python_packages \ No newline at end of file +.python_packages diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 00000000..3f63eb97 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,6 @@ +{ + "recommendations": [ + "ms-azuretools.vscode-azurefunctions", + "ms-python.python" + ] +} \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json index 8d28b0c6..ea3e0f1d 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,19 +1,12 @@ { - "version": "0.1.0", - "configurations": [ - { - "name": "Debug Tests", - "type": "python", - "request": "launch", - "program": "${file}", - "purpose": [ - "debug-test" - ], - "console": "integratedTerminal", - "justMyCode": false, - "env": { - "PYTEST_ADDOPTS": "--no-cov -n0 --dist no" - } - } - ] + "version": "0.2.0", + "configurations": [ + { + "name": "Attach to Python Functions", + "type": "python", + "request": "attach", + "port": 9091, + "preLaunchTask": "func: host start" + } + ] } \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 7b019787..4df9b6c9 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -34,5 +34,12 @@ "python.analysis.inlayHints.functionReturnTypes": true, "python.analysis.diagnosticSeverityOverrides": { "reportUndefinedVariable": "none" // Covered by Ruff F821 - } + }, + "azureFunctions.deploySubpath": "azure/api", + "azureFunctions.scmDoBuildDuringDeployment": true, + "azureFunctions.pythonVenv": ".venv", + "azureFunctions.projectLanguage": "Python", + "azureFunctions.projectRuntime": "~4", + "debug.internalConsoleOptions": "neverOpen", + "azureFunctions.projectSubpath": "azure/api" } \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 00000000..3c8ec2ef --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,33 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "type": "func", + "label": "func: host start", + "command": "host start", + "problemMatcher": "$func-python-watch", + "isBackground": true, + "dependsOn": "pip install (functions)", + "options": { + "cwd": "${workspaceFolder}/azure/api" + } + }, + { + "label": "pip install (functions)", + "type": "shell", + "osx": { + "command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt" + }, + "windows": { + "command": "${config:azureFunctions.pythonVenv}/Scripts/python -m pip install -r requirements.txt" + }, + "linux": { + "command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt" + }, + "problemMatcher": [], + "options": { + "cwd": "${workspaceFolder}/azure/api" + } + } + ] +} \ No newline at end of file diff --git a/action.yml b/action.yml index 4b15c2a9..6c021480 100644 --- a/action.yml +++ b/action.yml @@ -70,4 +70,3 @@ runs: BUG_SUMMARY: false RISK_SUMMARY: false RISK_BREAKING: false - diff --git a/azure.yaml.template b/azure.yaml.template index 201cb55d..76d02fae 100644 --- a/azure.yaml.template +++ b/azure.yaml.template @@ -5,4 +5,4 @@ azure_model_map: turbo_llm_model_deployment_id: gpt-35-turbo smart_llm_model_deployment_id: gpt-4 large_llm_model_deployment_id: gpt-4-32k - embedding_model_deployment_id: text-embedding-ada-002 \ No newline at end of file + embedding_model_deployment_id: text-embedding-ada-002 diff --git a/azure/README.md b/azure/README.md index fcdea64c..6b9772f2 100644 --- a/azure/README.md +++ b/azure/README.md @@ -26,4 +26,4 @@ cd azure/api python 3.9 func start -``` \ No newline at end of file +``` diff --git a/azure/api/.funcignore b/azure/api/.funcignore index 9966315f..f1110d33 100644 --- a/azure/api/.funcignore +++ b/azure/api/.funcignore @@ -5,4 +5,4 @@ __blobstorage__ __queuestorage__ local.settings.json test -.venv \ No newline at end of file +.venv diff --git a/azure/api/__init__.py b/azure/api/__init__.py index 7f41b1de..b2127d67 100644 --- a/azure/api/__init__.py +++ b/azure/api/__init__.py @@ -1 +1,3 @@ +from __future__ import annotations + from . import incoming_msg_handler diff --git a/azure/api/host.json b/azure/api/host.json index ff49a6ee..aa694f24 100644 --- a/azure/api/host.json +++ b/azure/api/host.json @@ -19,4 +19,4 @@ } } } -} \ No newline at end of file +} diff --git a/azure/api/incoming_msg_handler/__init__.py b/azure/api/incoming_msg_handler/__init__.py index 3e4b5387..fb162314 100644 --- a/azure/api/incoming_msg_handler/__init__.py +++ b/azure/api/incoming_msg_handler/__init__.py @@ -1,9 +1,11 @@ """Azure DevOps API incoming message handler.""" +from __future__ import annotations + import os -from gpt_review.repositories.devops import DevOpsFunction import azure.functions as func +from gpt_review.repositories.devops import DevOpsFunction HANDLER = DevOpsFunction( pat=os.environ["ADO_TOKEN"], diff --git a/azure/api/incoming_msg_handler/function.json b/azure/api/incoming_msg_handler/function.json index 5c7d1886..9b312de1 100644 --- a/azure/api/incoming_msg_handler/function.json +++ b/azure/api/incoming_msg_handler/function.json @@ -9,4 +9,4 @@ "connection": "AzureServiceBusConnectionString" } ] -} \ No newline at end of file +} diff --git a/pyproject.toml b/pyproject.toml index 2c15619c..0f3c1cef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -121,6 +121,10 @@ ignore = [ "W391", # Covered by Pylint trailing-newlines ] +[tool.isort] +profile = "black" +src_paths = ["src", "tests", "azure"] + [tool.pyright] include = ["src"] exclude = [ From cb8efd17849e0b7f774b5e8006720149d8bc5135 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 21:23:12 +0000 Subject: [PATCH 32/72] Update devcontainer, vscode config, remove future imports, and add docx2txt dependency --- .devcontainer/devcontainer.json | 2 +- .vscode/launch.json | 14 ++++++++++++++ azure/api/__init__.py | 2 -- azure/api/incoming_msg_handler/__init__.py | 2 -- pyproject.toml | 3 ++- src/gpt/__main__.py | 2 -- src/gpt_review/__init__.py | 2 -- src/gpt_review/__main__.py | 2 -- src/gpt_review/_ask.py | 2 -- src/gpt_review/_command.py | 2 -- src/gpt_review/_git.py | 2 -- src/gpt_review/_gpt_cli.py | 2 -- src/gpt_review/_llama_index.py | 2 -- src/gpt_review/_openai.py | 2 -- src/gpt_review/_review.py | 2 -- src/gpt_review/constants.py | 2 -- src/gpt_review/context.py | 2 -- src/gpt_review/main.py | 2 -- src/gpt_review/prompts/_prompt.py | 2 -- src/gpt_review/repositories/_repository.py | 2 -- src/gpt_review/repositories/devops.py | 2 -- src/gpt_review/repositories/github.py | 2 -- src/gpt_review/utils.py | 2 -- 23 files changed, 17 insertions(+), 42 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index bbe0d783..0977df46 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -52,4 +52,4 @@ "azure-cli": "latest" }, "onCreateCommand": "pip install -e ." -} +} \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json index ea3e0f1d..839cc38b 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,6 +1,20 @@ { "version": "0.2.0", "configurations": [ + { + "name": "Debug Tests", + "type": "python", + "request": "launch", + "program": "${file}", + "purpose": [ + "debug-test" + ], + "console": "integratedTerminal", + "justMyCode": false, + "env": { + "PYTEST_ADDOPTS": "--no-cov -n0 --dist no" + } + }, { "name": "Attach to Python Functions", "type": "python", diff --git a/azure/api/__init__.py b/azure/api/__init__.py index b2127d67..7f41b1de 100644 --- a/azure/api/__init__.py +++ b/azure/api/__init__.py @@ -1,3 +1 @@ -from __future__ import annotations - from . import incoming_msg_handler diff --git a/azure/api/incoming_msg_handler/__init__.py b/azure/api/incoming_msg_handler/__init__.py index fb162314..c40aef19 100644 --- a/azure/api/incoming_msg_handler/__init__.py +++ b/azure/api/incoming_msg_handler/__init__.py @@ -1,6 +1,4 @@ """Azure DevOps API incoming message handler.""" -from __future__ import annotations - import os import azure.functions as func diff --git a/pyproject.toml b/pyproject.toml index 0f3c1cef..af1f6e10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ test = [ "bandit[toml]==1.7.5", "black==23.3.0", "cattrs", + "docx2txt", "check-manifest==0.49", "flake8-bugbear==23.5.9", "flake8-docstrings", @@ -144,7 +145,7 @@ executionEnvironments = [ ] [tool.pytest.ini_options] -addopts = "--cov-report xml:coverage.xml --cov src --cov-fail-under 0 --cov-append -m unit" +addopts = "--cov-report xml:coverage.xml --cov src --cov-fail-under 0 --cov-append -n auto" pythonpath = [ "src" ] diff --git a/src/gpt/__main__.py b/src/gpt/__main__.py index 890f2cc0..bd149a6a 100644 --- a/src/gpt/__main__.py +++ b/src/gpt/__main__.py @@ -1,6 +1,4 @@ """The GPT CLI entry point for python -m gpt""" -from __future__ import annotations - import sys from gpt_review._gpt_cli import cli diff --git a/src/gpt_review/__init__.py b/src/gpt_review/__init__.py index ae089764..4aeff20b 100644 --- a/src/gpt_review/__init__.py +++ b/src/gpt_review/__init__.py @@ -3,6 +3,4 @@ # Licensed under the MIT License. See LICENSE in project root for information. # ------------------------------------------------------------- """Easy GPT CLI""" -from __future__ import annotations - __version__ = "0.9.2" diff --git a/src/gpt_review/__main__.py b/src/gpt_review/__main__.py index 890f2cc0..bd149a6a 100644 --- a/src/gpt_review/__main__.py +++ b/src/gpt_review/__main__.py @@ -1,6 +1,4 @@ """The GPT CLI entry point for python -m gpt""" -from __future__ import annotations - import sys from gpt_review._gpt_cli import cli diff --git a/src/gpt_review/_ask.py b/src/gpt_review/_ask.py index 6b646edd..9461de0a 100644 --- a/src/gpt_review/_ask.py +++ b/src/gpt_review/_ask.py @@ -1,6 +1,4 @@ """Ask GPT a question.""" -from __future__ import annotations - import logging from typing import Dict, List, Optional diff --git a/src/gpt_review/_command.py b/src/gpt_review/_command.py index 82c6e44f..acd68bbb 100644 --- a/src/gpt_review/_command.py +++ b/src/gpt_review/_command.py @@ -1,6 +1,4 @@ """Interface for GPT CLI command groups.""" -from __future__ import annotations - from knack import CLICommandsLoader diff --git a/src/gpt_review/_git.py b/src/gpt_review/_git.py index 0db7004e..1f8729df 100644 --- a/src/gpt_review/_git.py +++ b/src/gpt_review/_git.py @@ -1,6 +1,4 @@ """Basic Shell Commands for Git.""" -from __future__ import annotations - import logging import os from typing import Dict diff --git a/src/gpt_review/_gpt_cli.py b/src/gpt_review/_gpt_cli.py index c8d7bb12..12bdabba 100644 --- a/src/gpt_review/_gpt_cli.py +++ b/src/gpt_review/_gpt_cli.py @@ -1,6 +1,4 @@ """The GPT CLI configuration and utilities.""" -from __future__ import annotations - import os import sys from collections import OrderedDict diff --git a/src/gpt_review/_llama_index.py b/src/gpt_review/_llama_index.py index a261dc73..80321206 100644 --- a/src/gpt_review/_llama_index.py +++ b/src/gpt_review/_llama_index.py @@ -1,6 +1,4 @@ """Wrapper for Llama Index.""" -from __future__ import annotations - import logging import os from typing import List, Optional diff --git a/src/gpt_review/_openai.py b/src/gpt_review/_openai.py index 99c20e66..3b25b742 100644 --- a/src/gpt_review/_openai.py +++ b/src/gpt_review/_openai.py @@ -1,6 +1,4 @@ """Open AI API Call Wrapper.""" -from __future__ import annotations - import logging import os diff --git a/src/gpt_review/_review.py b/src/gpt_review/_review.py index 5376d2db..c4256b7a 100644 --- a/src/gpt_review/_review.py +++ b/src/gpt_review/_review.py @@ -1,6 +1,4 @@ """Basic functions for requesting review based goals from GPT-4.""" -from __future__ import annotations - import os from dataclasses import dataclass from typing import Dict diff --git a/src/gpt_review/constants.py b/src/gpt_review/constants.py index 68c2a605..08ff0f73 100644 --- a/src/gpt_review/constants.py +++ b/src/gpt_review/constants.py @@ -1,6 +1,4 @@ """Contains constants for minimum and maximum values of various parameters used in GPT Review.""" -from __future__ import annotations - import os import sys diff --git a/src/gpt_review/context.py b/src/gpt_review/context.py index 8898a007..cf7bc72e 100644 --- a/src/gpt_review/context.py +++ b/src/gpt_review/context.py @@ -1,6 +1,4 @@ """Context for the Azure OpenAI API and the models.""" -from __future__ import annotations - import os from dataclasses import dataclass diff --git a/src/gpt_review/main.py b/src/gpt_review/main.py index 2e29e161..ebec1182 100644 --- a/src/gpt_review/main.py +++ b/src/gpt_review/main.py @@ -1,6 +1,4 @@ """The GPT CLI entry point.""" -from __future__ import annotations - import sys from knack.help_files import helps diff --git a/src/gpt_review/prompts/_prompt.py b/src/gpt_review/prompts/_prompt.py index f3e7c535..12fe4fae 100644 --- a/src/gpt_review/prompts/_prompt.py +++ b/src/gpt_review/prompts/_prompt.py @@ -1,6 +1,4 @@ """Interface for a GPT Prompts.""" -from __future__ import annotations - import os import sys from dataclasses import dataclass diff --git a/src/gpt_review/repositories/_repository.py b/src/gpt_review/repositories/_repository.py index 3cee07ae..f3b4341f 100644 --- a/src/gpt_review/repositories/_repository.py +++ b/src/gpt_review/repositories/_repository.py @@ -1,6 +1,4 @@ """Abstract class for a repository client.""" -from __future__ import annotations - from abc import abstractmethod from typing import Dict diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index c8ee2bc0..356a62e7 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -1,6 +1,4 @@ """Azure DevOps Package Wrappers to Simplify Usage.""" -from __future__ import annotations - import abc import itertools import json diff --git a/src/gpt_review/repositories/github.py b/src/gpt_review/repositories/github.py index 6cfce6d1..7efd7dd4 100644 --- a/src/gpt_review/repositories/github.py +++ b/src/gpt_review/repositories/github.py @@ -1,6 +1,4 @@ """GitHub API helpers.""" -from __future__ import annotations - import json import logging import os diff --git a/src/gpt_review/utils.py b/src/gpt_review/utils.py index 67045fee..7d6d79df 100644 --- a/src/gpt_review/utils.py +++ b/src/gpt_review/utils.py @@ -1,6 +1,4 @@ """Utility functions""" -from __future__ import annotations - import logging import time from typing import Optional From 6781b4076bea9daf816e1be747c0f2ba14849621 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 17:24:32 -0400 Subject: [PATCH 33/72] Apply suggestions from code review --- src/gpt_review/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gpt_review/__init__.py b/src/gpt_review/__init__.py index 4aeff20b..ae089764 100644 --- a/src/gpt_review/__init__.py +++ b/src/gpt_review/__init__.py @@ -3,4 +3,6 @@ # Licensed under the MIT License. See LICENSE in project root for information. # ------------------------------------------------------------- """Easy GPT CLI""" +from __future__ import annotations + __version__ = "0.9.2" From cfb610c3c1952222990322be4e7592be9bc5add0 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 17:26:06 -0400 Subject: [PATCH 34/72] Apply suggestions from code review --- src/gpt_review/context.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gpt_review/context.py b/src/gpt_review/context.py index cf7bc72e..12615f7d 100644 --- a/src/gpt_review/context.py +++ b/src/gpt_review/context.py @@ -55,6 +55,7 @@ def _load_azure_openai_context() -> Context: openai.api_version = os.environ["OPENAI_API_VERSION"] if os.getenv("AZURE_OPENAI_API"): + openai.api_type = os.environ["OPENAI_API_TYPE"] = C.AZURE_API_TYPE openai.api_base = os.environ["OPENAI_API_BASE"] = os.getenv("AZURE_OPENAI_API") or azure_config.get( "azure_api_base" ) From f27d5aa488f52c387b86cee3faee6b313e8ffc32 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 17:29:13 -0400 Subject: [PATCH 35/72] Apply suggestions from code review --- src/gpt_review/prompts/__init__.py | 2 -- tests/conftest.py | 2 +- tests/test_devops.py | 10 +++++----- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/gpt_review/prompts/__init__.py b/src/gpt_review/prompts/__init__.py index 98ae4bca..5392fb72 100644 --- a/src/gpt_review/prompts/__init__.py +++ b/src/gpt_review/prompts/__init__.py @@ -1,3 +1 @@ """Collection of GPT Prompts.""" - -from __future__ import annotations diff --git a/tests/conftest.py b/tests/conftest.py index e18f73ac..7bb57e47 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,7 +7,7 @@ def pytest_collection_modifyitems(items): for item in items: - if "_int_" in item.nodeid or "integration" in item.nodeid: + if "_int_" in item.nodeid: item.add_marker(pytest.mark.integration) elif "_cli_" in item.nodeid: item.add_marker(pytest.mark.cli) diff --git a/tests/test_devops.py b/tests/test_devops.py index 3ef30ab1..a76edb3c 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -378,13 +378,13 @@ def test_update_pr(mock_ado_client: None, devops_client: DevOpsClient) -> None: @pytest.mark.integration -def test_create_comment_integration(devops_client: DevOpsClient) -> None: +def test_int_create_comment(devops_client: DevOpsClient) -> None: response = devops_client.create_comment(pull_request_id=PR_ID, comment_id=COMMENT_ID, text="text1") assert isinstance(response, Comment) @pytest.mark.integration -def test_update_pr_integration(devops_client: DevOpsClient) -> None: +def test_int_update_pr(devops_client: DevOpsClient) -> None: response = devops_client.update_pr(PR_ID, description="description1") assert isinstance(response, GitPullRequest) response = devops_client.update_pr(PR_ID, title="Sample PR Title") @@ -402,7 +402,7 @@ def test_process_payload(mock_openai: None, mock_ado_client: None) -> None: @pytest.mark.integration -def test_process_payload_integration() -> None: +def test_int_process_payload() -> None: process_payload_test() @@ -419,7 +419,7 @@ def test_get_patch(mock_openai: None, mock_ado_client: None, devops_client: DevO @pytest.mark.integration -def test_get_patch_integration(devops_client: DevOpsClient) -> None: +def test_int_get_patch(devops_client: DevOpsClient) -> None: get_patch_test(devops_client, 64) @@ -434,5 +434,5 @@ def test_get_patch_pr_comment(mock_openai: None, mock_ado_client: None, devops_f @pytest.mark.integration -def test_get_patch_pr_comment_integration(devops_function: DevOpsFunction) -> None: +def test_int_get_patch_pr_comment(devops_function: DevOpsFunction) -> None: get_patch_pr_comment_test(devops_function, 3348) From cb9d5cb04afa0983c9cf0b78b1a754abead5313b Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 17:29:44 -0400 Subject: [PATCH 36/72] Apply suggestions from code review --- tests/test_gpt_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_gpt_cli.py b/tests/test_gpt_cli.py index 30e16ea4..7418a08d 100644 --- a/tests/test_gpt_cli.py +++ b/tests/test_gpt_cli.py @@ -85,8 +85,8 @@ class CLICase2(CLICase): ), CLICase(f"ask --files {SAMPLE_FILE} --files {SAMPLE_FILE} {WHAT_LANGUAGE} --reset"), CLICase(f"ask --fast -f {SAMPLE_FILE} {WHAT_LANGUAGE}"), - # CLICase(f"ask --fast -d src/gpt_review --reset --recursive --hidden --required-exts .py {WHAT_LANGUAGE}"), - # CLICase(f"ask --fast -repo microsoft/gpt-review --branch main {WHAT_LANGUAGE}"), + CLICase(f"ask --fast -d src/gpt_review --reset --recursive --hidden --required-exts .py {WHAT_LANGUAGE}"), + CLICase(f"ask --fast -repo microsoft/gpt-review --branch main {WHAT_LANGUAGE}"), ] GITHUB_COMMANDS = [ From 40a7a01e29c480ced8d99c1ea8ef59cc3e177945 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 18:16:04 -0400 Subject: [PATCH 37/72] Create __init__.py --- src/gpt_review/repositories/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/gpt_review/repositories/__init__.py diff --git a/src/gpt_review/repositories/__init__.py b/src/gpt_review/repositories/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/src/gpt_review/repositories/__init__.py @@ -0,0 +1 @@ + From 974ede5b34f64891a68ee9fa7ab59b2102a7f1fe Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Tue, 16 May 2023 19:52:57 -0400 Subject: [PATCH 38/72] Apply suggestions from code review --- src/gpt_review/repositories/_repository.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gpt_review/repositories/_repository.py b/src/gpt_review/repositories/_repository.py index f3b4341f..ffabba36 100644 --- a/src/gpt_review/repositories/_repository.py +++ b/src/gpt_review/repositories/_repository.py @@ -1,6 +1,5 @@ """Abstract class for a repository client.""" from abc import abstractmethod -from typing import Dict class _RepositoryClient: From 8e32cdcc71934d74435450025cec9fe4de0c7ede Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Wed, 17 May 2023 04:06:27 -0400 Subject: [PATCH 39/72] Apply suggestions from code review --- src/gpt_review/repositories/devops.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 356a62e7..f47cfb16 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -55,11 +55,10 @@ def __init__(self, pat, org, project, repository_id) -> None: # Create a connection to the org credentials = BasicAuthentication("", personal_access_token) - connection = Connection(base_url=organization_url, creds=credentials) + self.connection = Connection(base_url=organization_url, creds=credentials) # Get a client (the "core" client provides access to projects, teams, etc) - self.connection = connection - self.client: GitClient = connection.clients_v7_1.get_git_client() + self.client: GitClient = self.connection.clients_v7_1.get_git_client() self.project = project self.repository_id = repository_id @@ -140,8 +139,7 @@ def process_comment_payload(payload: str) -> str: Returns: str: The question from the Azure DevOps Comment. """ - payload = json.loads(payload) - return payload["resource"]["comment"]["content"] + return json.loads(payload)["resource"]["comment"]["content"] def get_patch(self, pull_request_event, pull_request_id, comment_id) -> List[str]: """ @@ -525,7 +523,7 @@ def _process_comment(self, body) -> None: response = _ask( question=question, - max_tokens=500, + max_tokens=1000, ) self.create_comment(pull_request_id=pr_id, comment_id=comment_id, text=response["response"]) From 7e34fd96feac93956d01af7564f7ce8ec116ec6e Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Wed, 17 May 2023 20:25:04 +0000 Subject: [PATCH 40/72] Add azure-functions-core-tools-4, update methods in DevOpsClient, and improve logging --- .devcontainer/Dockerfile | 6 + azure/api/incoming_msg_handler/__init__.py | 6 + pyproject.toml | 5 +- src/gpt_review/repositories/devops.py | 147 +++++++++------------ 4 files changed, 77 insertions(+), 87 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 025eb2d7..dd07b8bb 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -3,6 +3,12 @@ FROM mcr.microsoft.com/devcontainers/python:3 RUN python -m pip install --upgrade pip \ && python -m pip install 'flit>=3.8.0' +RUN curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > microsoft.gpg \ + && mv microsoft.gpg /etc/apt/trusted.gpg.d/microsoft.gpg \ + && sh -c 'echo "deb [arch=amd64] https://packages.microsoft.com/debian/$(lsb_release -rs | cut -d'.' -f 1)/prod $(lsb_release -cs) main" > /etc/apt/sources.list.d/dotnetdev.list' \ + && apt-get update \ + && apt-get install azure-functions-core-tools-4 + ENV FLIT_ROOT_INSTALL=1 COPY pyproject.toml . diff --git a/azure/api/incoming_msg_handler/__init__.py b/azure/api/incoming_msg_handler/__init__.py index c40aef19..e060888b 100644 --- a/azure/api/incoming_msg_handler/__init__.py +++ b/azure/api/incoming_msg_handler/__init__.py @@ -12,6 +12,12 @@ repository_id=os.environ["ADO_REPO"], ) +os.putenv("RISK_SUMMARY", "false") +os.putenv("FILE_SUMMARY_FULL", "false") +os.putenv("TEST_SUMMARY", "false") +os.putenv("BUG_SUMMARY", "false") +os.putenv("SUMMARY_SUGGEST", "false") + def main(msg: func.ServiceBusMessage) -> None: """Handle an incoming message.""" diff --git a/pyproject.toml b/pyproject.toml index af1f6e10..1e90e0d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,8 +23,9 @@ requires-python = ">=3.8.1" dynamic = ["version"] dependencies = [ 'azure-devops', - 'azure-functions; python_version <= "3.10"', + 'azure-functions', 'azure-identity', + 'azure-keyvault', 'azure-keyvault-secrets', 'llama-index>=0.6.0,<=0.6.8', 'httpx', @@ -145,7 +146,7 @@ executionEnvironments = [ ] [tool.pytest.ini_options] -addopts = "--cov-report xml:coverage.xml --cov src --cov-fail-under 0 --cov-append -n auto" +addopts = "--cov-report xml:coverage.xml --cov src --cov-fail-under 0 --cov-append -m unit" pythonpath = [ "src" ] diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index f47cfb16..9d7d07ea 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -62,7 +62,7 @@ def __init__(self, pat, org, project, repository_id) -> None: self.project = project self.repository_id = repository_id - def create_comment(self, pull_request_id: int, comment_id: int, text) -> Comment: + def create_comment(self, pull_request_id: int, comment_id: int, text: str, **kwargs) -> Comment: """ Create a comment on a pull request. @@ -74,16 +74,17 @@ def create_comment(self, pull_request_id: int, comment_id: int, text) -> Comment pull_request_id (int): The Azure DevOps pull request ID. comment_id (int): The Azure DevOps comment ID. text (str): The text of the comment. + **kwargs: Any additional keyword arguments. Returns: Comment: The response from the API. """ new_comment = Comment(content=text) return self.client.create_comment( - new_comment, self.repository_id, pull_request_id, comment_id, project=self.project + new_comment, self.repository_id, pull_request_id, comment_id, project=self.project, **kwargs ) - def update_pr(self, pull_request_id, title=None, description=None) -> GitPullRequest: + def update_pr(self, pull_request_id, title=None, description=None, **kwargs) -> GitPullRequest: """ Update a pull request. @@ -91,6 +92,7 @@ def update_pr(self, pull_request_id, title=None, description=None) -> GitPullReq pull_request_id (str): The Azure DevOps pull request ID. title (str): The title of the pull request. description (str): The description of the pull request. + **kwargs: Any additional keyword arguments. Returns: GitPullRequest: The response from the API. @@ -100,6 +102,7 @@ def update_pr(self, pull_request_id, title=None, description=None) -> GitPullReq repository_id=self.repository_id, project=self.project, pull_request_id=pull_request_id, + **kwargs, ) def read_all_text( @@ -153,32 +156,18 @@ def get_patch(self, pull_request_event, pull_request_id, comment_id) -> List[str Returns: List[str]: The diff of the pull request. """ - thread = self.client.get_pull_request_thread( + thread_context = self.client.get_pull_request_thread( repository_id=self.repository_id, pull_request_id=pull_request_id, thread_id=comment_id, project=self.project, - ) - thread_context = thread.thread_context + ).thread_context - pull_request = pull_request_event["pullRequest"] - if not pull_request: - raise ValueError("pull_request_event.pullRequest is required") + left_selection, right_selection = self._calculate_selection(thread_context, pull_request_event) - original_content = self.read_all_text(path=thread_context.file_path, check_if_exists=True) - changed_content = self.read_all_text( - path=thread_context.file_path, - commit_id=pull_request["lastMergeSourceCommit"]["commitId"], - check_if_exists=True, - ) - - left_selection, right_selection = self._calculate_selection(thread_context, original_content, changed_content) - - return self._create_patch( - "\n".join(left_selection) or [], "\n".join(right_selection) or [], thread_context.file_path - ) + return self._create_patch(left_selection, right_selection, thread_context.file_path) - def _calculate_selection(self, thread_context, original_content, changed_content) -> Tuple[List[str], List[str]]: + def _calculate_selection(self, thread_context, pull_request) -> Tuple[str, str]: """ Calculate the selection for a given thread context. @@ -190,27 +179,52 @@ def _calculate_selection(self, thread_context, original_content, changed_content Returns: Tuple[List[str], List[str]]: The left and right selections. """ - left_selection = [] - right_selection = [] - if original_content and thread_context.left_file_start and thread_context.left_file_end: - left_selection = self._get_selection( - original_content, thread_context.left_file_start.line, thread_context.left_file_end.line - ) - if not changed_content or not thread_context.right_file_start or not thread_context.right_file_end: - raise ValueError("Both left and right selection cannot be None") + original_content = self.read_all_text(path=thread_context.file_path, check_if_exists=True) + changed_content = self.read_all_text( + path=thread_context.file_path, + commit_id=pull_request["pullRequest"]["lastMergeSourceCommit"]["commitId"], + check_if_exists=True, + ) - right_selection = self._get_selection( - changed_content, thread_context.right_file_start.line, thread_context.right_file_end.line + if not original_content and not changed_content: + raise ValueError("Both left and right selection cannot be None") + + left_selection = ( + self._get_selection( + original_content, thread_context.left_file_start.line, thread_context.left_file_end.line ) + if original_content and thread_context.left_file_start and thread_context.left_file_end + else None + ) - if changed_content and thread_context.right_file_start and thread_context.right_file_end: - right_selection = self._get_selection( + right_selection = ( + self._get_selection( changed_content, thread_context.right_file_start.line, thread_context.right_file_end.line ) + if changed_content and thread_context.right_file_start and thread_context.right_file_end + else None + ) return left_selection, right_selection + def _get_selection(self, file_contents: str, line_start: int, line_end: int) -> str: + lines = file_contents.splitlines() + + if line_end - line_start < MIN_CONTEXT_LINES: + return lines + + if line_start < 1 or line_start > len(lines) or line_end < 1 or line_end > len(lines): + raise ValueError( + f"Selection region lineStart = {line_start}, lineEnd = {line_end}, lines length = {len(lines)}" + ) + + if line_start == line_end: + return [lines[line_start - 1]] + + selection = lines[line_start - 1 : line_end] + return "\n".join(selection) + def get_patches(self, pull_request_event, condensed=False) -> Iterable[List[str]]: """ Get the patches for a given pull request event. @@ -236,7 +250,7 @@ def get_patches(self, pull_request_event, condensed=False) -> Iterable[List[str] for git_change in git_changes ] - def get_changed_blobs(self, pull_request: GitPullRequest): + def get_changed_blobs(self, pull_request: GitPullRequest) -> List[str]: """ Get the changed blobs in a pull request. @@ -271,33 +285,16 @@ def get_changed_blobs(self, pull_request: GitPullRequest): return changed_paths - def _get_selection(self, file_contents: str, line_start: int, line_end: int) -> List[str]: - lines = file_contents.splitlines() - - if line_end - line_start < MIN_CONTEXT_LINES: - return lines - - if line_start < 1 or line_start > len(lines) or line_end < 1 or line_end > len(lines): - raise ValueError( - f"Selection region lineStart = {line_start}, lineEnd = {line_end}, lines length = {len(lines)}" - ) - - if line_start == line_end: - return [lines[line_start - 1]] - - return lines[line_start - 1 : line_end] - def _get_change(self, git_change, source_commit_head, condensed=False) -> List[str]: - return "\n".join(self._get_git_change(git_change["item"]["path"], source_commit_head, condensed)) - - def _get_git_change(self, file_path, source_commit_head, condensed=False) -> List[str]: + file_path = git_change["item"]["path"] try: original_content = self.read_all_text(file_path, check_if_exists=True) except AzureDevOpsServiceError: # File Not Found original_content = "" changed_content = self.read_all_text(file_path, commit_id=source_commit_head, check_if_exists=True) - return self._create_patch(original_content, changed_content, file_path, condensed) + patch = self._create_patch(original_content, changed_content, file_path, condensed) + return "\n".join(patch) def _create_patch( self, original_content: Optional[str], changed_content: Optional[str], file_path: str, condensed=False @@ -316,21 +313,7 @@ def _create_patch( """ left = original_content.splitlines() if original_content else [] right = changed_content.splitlines() if changed_content else [] - return self._create_patch_list(left, right, file_path, condensed) - - def _create_patch_list(self, left: List[str], right: List[str], file_path: str, condensed=False) -> List[str]: - """ - Create a patch list for a given file. - - Args: - left (List[str]): The left side of the patch. - right (List[str]): The right side of the patch. - file_path (str): The file path. - condensed (bool, optional): If True, returns a condensed version of the patch. Defaults to False. - Returns: - List[str]: The patch list. - """ needed_changes = self._calculate_minimum_change_needed(left, right) line, row = 1, 1 patch = [] @@ -487,7 +470,7 @@ def handle(self, msg) -> None: msg (func.QueueMessage): The Service Bus message. """ body = msg.get_body().decode("utf-8") - logging.info("Python ServiceBus queue trigger processed message: %s", body) + logging.debug("Python ServiceBus queue trigger processed message: %s", body) if "copilot:summary" in body: self._process_summary(body) elif "copilot:" in body: @@ -500,7 +483,7 @@ def _process_comment(self, body) -> None: Args: body (str): The Service Bus payload. """ - logging.info("Copilot Comment Alert Triggered") + logging.debug("Copilot Comment Alert Triggered") payload = json.loads(body) pr_id = self._get_pr_id(payload) @@ -512,7 +495,7 @@ def _process_comment(self, body) -> None: except Exception: diff = self.get_patches(pull_request_event=payload["resource"]) - logging.info("Copilot diff: %s", diff) + logging.debug("Copilot diff: %s", diff) diff = "\n".join(diff) question = f""" @@ -538,7 +521,7 @@ def _get_comment_id(self, payload) -> int: int: The comment ID. """ comment_id = payload["resource"]["comment"]["_links"]["threads"]["href"].split("/")[-1] - logging.info("Copilot Commet ID: %s", comment_id) + logging.debug("Copilot Commet ID: %s", comment_id) return comment_id def _process_summary(self, body) -> None: @@ -548,7 +531,7 @@ def _process_summary(self, body) -> None: Args: body (str): The Service Bus payload. """ - logging.info("Copilot Summary Alert Triggered") + logging.debug("Copilot Summary Alert Triggered") payload = json.loads(body) pr_id = self._get_pr_id(payload) @@ -558,11 +541,11 @@ def _process_summary(self, body) -> None: if "comment" in payload["resource"]: self._post_summary(payload, pr_id, link) else: - logging.info("Copilot Update from Updated PR") + logging.debug("Copilot Update from Updated PR") def _get_link(self, pr_id) -> str: link = f"https://{self.org}.visualstudio.com/{self.project}/_git/{self.repository_id}/pullrequest/{pr_id}" - logging.info("Copilot Link: %s", link) + logging.debug("Copilot Link: %s", link) return link def _get_pr_id(self, payload) -> int: @@ -579,7 +562,7 @@ def _get_pr_id(self, payload) -> int: pr_id = payload["resource"]["pullRequestId"] else: pr_id = payload["resource"]["pullRequest"]["pullRequestId"] - logging.info("Copilot PR ID: %s", pr_id) + logging.debug("Copilot PR ID: %s", pr_id) return pr_id def _post_summary(self, payload, pr_id, link) -> None: @@ -592,17 +575,11 @@ def _post_summary(self, payload, pr_id, link) -> None: link (str): The link to the PR. """ comment_id = payload["resource"]["comment"]["_links"]["threads"]["href"].split("/")[-1] - logging.info("Copilot Commet ID: %s", comment_id) - - os.putenv("RISK_SUMMARY", "false") - os.putenv("FILE_SUMMARY_FULL", "false") - os.putenv("TEST_SUMMARY", "false") - os.putenv("BUG_SUMMARY", "false") - os.putenv("SUMMARY_SUGGEST", "false") + logging.debug("Copilot Commet ID: %s", comment_id) diff = self.get_patch(pull_request_event=payload["resource"], pull_request_id=pr_id, comment_id=comment_id) diff = "\n".join(diff) - logging.info("Copilot diff: %s", diff) + logging.debug("Copilot diff: %s", diff) self.post_pr_summary(diff, link=link) From 68de50caaba02d84fbcf57f9cfe596f7abe025e3 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Thu, 18 May 2023 05:01:08 +0000 Subject: [PATCH 41/72] Add check_if_exists option and refactor patch creation in DevOpsClient --- src/gpt_review/repositories/devops.py | 158 ++++++++------------------ tests/test_devops.py | 4 +- 2 files changed, 48 insertions(+), 114 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 9d7d07ea..d4dfac88 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -109,6 +109,7 @@ def read_all_text( self, path: str, commit_id: str = None, + check_if_exists=True, **kwargs, ) -> str: """ @@ -127,6 +128,7 @@ def read_all_text( path=path, project=self.project, version_descriptor=GitVersionDescriptor(commit_id, version_type="commit") if commit_id else None, + check_if_exists=check_if_exists, **kwargs, ) return "".join(byte.decode("utf-8") for byte in byte_iterator) @@ -152,6 +154,7 @@ def get_patch(self, pull_request_event, pull_request_id, comment_id) -> List[str pull_request_event (dict): The pull request event. pull_request_id (str): The Azure DevOps pull request ID. comment_id (str): The Azure DevOps comment ID. + condensed (bool): Whether to condense the diff. Returns: List[str]: The diff of the pull request. @@ -163,9 +166,37 @@ def get_patch(self, pull_request_event, pull_request_id, comment_id) -> List[str project=self.project, ).thread_context - left_selection, right_selection = self._calculate_selection(thread_context, pull_request_event) + left, right = self._calculate_selection(thread_context, pull_request_event) - return self._create_patch(left_selection, right_selection, thread_context.file_path) + return self._create_patch(left, right, thread_context.file_path) + + def _create_patch(self, left, right, file_path): + changes = [[0] * (len(right) + 1) for _ in range(len(left) + 1)] + + for i, j in itertools.product(range(len(left)), range(len(right))): + changes[i + 1][j + 1] = ( + changes[i][j] if left[i] == right[j] else 1 + min(changes[i][j + 1], changes[i + 1][j], changes[i][j]) + ) + + line, row = 1, 1 + patch = [file_path] + + while line < len(left) and row < len(right): + if changes[line][row] == changes[line - 1][row - 1]: + patch.append(left[line - 1]) + line += 1 + row += 1 + elif changes[line - 1][row] < changes[line][row - 1]: + patch.append(f"- {left[line - 1]}") + line += 1 + else: + patch.append(f"+ {right[row - 1]}") + row += 1 + + patch.extend(f"- {left[i - 1]}" for i in range(line, len(left) + 1)) + patch.extend(f"+ {right[j - 1]}" for j in range(row, len(right) + 1)) + + return patch def _calculate_selection(self, thread_context, pull_request) -> Tuple[str, str]: """ @@ -180,22 +211,18 @@ def _calculate_selection(self, thread_context, pull_request) -> Tuple[str, str]: Tuple[List[str], List[str]]: The left and right selections. """ - original_content = self.read_all_text(path=thread_context.file_path, check_if_exists=True) + original_content = self.read_all_text(path=thread_context.file_path) changed_content = self.read_all_text( path=thread_context.file_path, commit_id=pull_request["pullRequest"]["lastMergeSourceCommit"]["commitId"], - check_if_exists=True, ) - if not original_content and not changed_content: - raise ValueError("Both left and right selection cannot be None") - left_selection = ( self._get_selection( original_content, thread_context.left_file_start.line, thread_context.left_file_end.line ) if original_content and thread_context.left_file_start and thread_context.left_file_end - else None + else [] ) right_selection = ( @@ -203,7 +230,7 @@ def _calculate_selection(self, thread_context, pull_request) -> Tuple[str, str]: changed_content, thread_context.right_file_start.line, thread_context.right_file_end.line ) if changed_content and thread_context.right_file_start and thread_context.right_file_end - else None + else [] ) return left_selection, right_selection @@ -211,27 +238,14 @@ def _calculate_selection(self, thread_context, pull_request) -> Tuple[str, str]: def _get_selection(self, file_contents: str, line_start: int, line_end: int) -> str: lines = file_contents.splitlines() - if line_end - line_start < MIN_CONTEXT_LINES: - return lines - - if line_start < 1 or line_start > len(lines) or line_end < 1 or line_end > len(lines): - raise ValueError( - f"Selection region lineStart = {line_start}, lineEnd = {line_end}, lines length = {len(lines)}" - ) - - if line_start == line_end: - return [lines[line_start - 1]] - - selection = lines[line_start - 1 : line_end] - return "\n".join(selection) + return lines[line_start - 1 : line_end] if line_end - line_start > MIN_CONTEXT_LINES else lines - def get_patches(self, pull_request_event, condensed=False) -> Iterable[List[str]]: + def get_patches(self, pull_request_event) -> Iterable[List[str]]: """ Get the patches for a given pull request event. Args: pull_request_event (Any): The pull request event to retrieve patches for. - condensed (bool, optional): If True, returns a condensed version of the patch. Defaults to False. Returns: Iterable[List[str]]: An iterable of lists containing the patches for the pull request event. @@ -245,7 +259,6 @@ def get_patches(self, pull_request_event, condensed=False) -> Iterable[List[str] self._get_change( git_change, pull_request_event["pullRequest"]["lastMergeSourceCommit"]["commitId"], - condensed, ) for git_change in git_changes ] @@ -285,93 +298,17 @@ def get_changed_blobs(self, pull_request: GitPullRequest) -> List[str]: return changed_paths - def _get_change(self, git_change, source_commit_head, condensed=False) -> List[str]: + def _get_change(self, git_change, source_commit_head) -> List[str]: file_path = git_change["item"]["path"] try: - original_content = self.read_all_text(file_path, check_if_exists=True) + original_content = self.read_all_text(file_path) except AzureDevOpsServiceError: # File Not Found original_content = "" - changed_content = self.read_all_text(file_path, commit_id=source_commit_head, check_if_exists=True) - patch = self._create_patch(original_content, changed_content, file_path, condensed) + changed_content = self.read_all_text(file_path, commit_id=source_commit_head) + patch = self._create_patch(original_content, changed_content, file_path) return "\n".join(patch) - def _create_patch( - self, original_content: Optional[str], changed_content: Optional[str], file_path: str, condensed=False - ) -> List[str]: - """ - Create a patch for a given file. - - Args: - original_content (Optional[str]): The original content. - changed_content (Optional[str]): The changed content. - file_path (str): The file path. - condensed (bool, optional): If True, returns a condensed version of the patch. Defaults to False. - - Returns: - List[str]: The patch. - """ - left = original_content.splitlines() if original_content else [] - right = changed_content.splitlines() if changed_content else [] - - needed_changes = self._calculate_minimum_change_needed(left, right) - line, row = 1, 1 - patch = [] - - while line < len(left) and row < len(right): - if needed_changes[line][row] == needed_changes[line - 1][row - 1]: - patch.append(left[line - 1]) - line += 1 - row += 1 - elif needed_changes[line - 1][row] < needed_changes[line][row - 1]: - patch.append(f"- {left[line - 1]}") - line += 1 - else: - patch.append(f"+ {right[row - 1]}") - row += 1 - - while line <= len(left): - patch.append(f"- {left[line - 1]}") - line += 1 - - while row <= len(right): - patch.append(f"+ {right[row - 1]}") - row += 1 - - if condensed: - patch = self._get_condensed_patch(patch) - - patch.insert(0, file_path) - return patch - - def _get_condensed_patch(self, patch: List[str]) -> List[str]: - """ - Get a condensed version of the patch. - - Args: - patch (List[str]): The patch. - - Returns: - List[str]: The condensed patch. - """ - buffer = [] - result = [] - trailing_context = 0 - - for line in patch: - if line.startswith("+") or line.startswith("-"): - result.extend(buffer[-SURROUNDING_CONTEXT:]) - buffer.clear() - result.append(line) - trailing_context = SURROUNDING_CONTEXT - elif trailing_context > 0: - result.append(line) - trailing_context -= 1 - else: - buffer.append(line) - - return result - def _calculate_minimum_change_needed(self, left: List[str], right: List[str]) -> List[List[int]]: """ Calculate the minimum change needed to transform the left side to the right side. @@ -385,13 +322,10 @@ def _calculate_minimum_change_needed(self, left: List[str], right: List[str]) -> """ changes = [[0] * (len(right) + 1) for _ in range(len(left) + 1)] - for i, j in itertools.product(range(len(left) + 1), range(len(right) + 1)): - if i == 0 or j == 0: - changes[i][j] = 0 - elif left[i - 1] == right[j - 1]: - changes[i][j] = changes[i - 1][j - 1] - else: - changes[i][j] = 1 + min(changes[i - 1][j], changes[i][j - 1], changes[i - 1][j - 1]) + for i, j in itertools.product(range(len(left)), range(len(right))): + changes[i + 1][j + 1] = ( + changes[i][j] if left[i] == right[j] else 1 + min(changes[i][j + 1], changes[i + 1][j], changes[i][j]) + ) return changes diff --git a/tests/test_devops.py b/tests/test_devops.py index a76edb3c..fc7c42f2 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -426,7 +426,7 @@ def test_int_get_patch(devops_client: DevOpsClient) -> None: def get_patch_pr_comment_test(devops_function: DevOpsFunction, expected_len: int) -> None: patch = devops_function.get_patches(pull_request_event=PR_COMMENT_PAYLOAD["resource"]) patch = "\n".join(patch) - assert len(patch) == expected_len + assert len(patch.splitlines()) == expected_len def test_get_patch_pr_comment(mock_openai: None, mock_ado_client: None, devops_function: DevOpsFunction) -> None: @@ -435,4 +435,4 @@ def test_get_patch_pr_comment(mock_openai: None, mock_ado_client: None, devops_f @pytest.mark.integration def test_int_get_patch_pr_comment(devops_function: DevOpsFunction) -> None: - get_patch_pr_comment_test(devops_function, 3348) + get_patch_pr_comment_test(devops_function, 3079) From 3a916c3b7060d783377f4e260968326b84f52d18 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Thu, 18 May 2023 06:11:28 +0000 Subject: [PATCH 42/72] Add ask prompt YAML and update prompt templates, fix DevOps client methods --- src/gpt_review/constants.py | 1 + src/gpt_review/prompts/_prompt.py | 6 + src/gpt_review/prompts/prompt_ask.yaml | 7 + src/gpt_review/prompts/prompt_bug.yaml | 6 +- src/gpt_review/prompts/prompt_coverage.yaml | 8 +- src/gpt_review/prompts/prompt_summary.yaml | 8 +- src/gpt_review/repositories/devops.py | 150 ++++++++------------ tests/test_devops.py | 4 +- 8 files changed, 88 insertions(+), 102 deletions(-) create mode 100644 src/gpt_review/prompts/prompt_ask.yaml diff --git a/src/gpt_review/constants.py b/src/gpt_review/constants.py index 08ff0f73..bd6c9b49 100644 --- a/src/gpt_review/constants.py +++ b/src/gpt_review/constants.py @@ -33,6 +33,7 @@ AZURE_EMBEDDING_MODEL = "text-embedding-ada-002" AZURE_KEY_VAULT = "https://dciborow-openai.vault.azure.net/" +ASK_PROMPT_YAML = "prompt_ask.yaml" BUG_PROMPT_YAML = "prompt_bug.yaml" COVERAGE_PROMPT_YAML = "prompt_coverage.yaml" SUMMARY_PROMPT_YAML = "prompt_summary.yaml" diff --git a/src/gpt_review/prompts/_prompt.py b/src/gpt_review/prompts/_prompt.py index 12fe4fae..c18f32d6 100644 --- a/src/gpt_review/prompts/_prompt.py +++ b/src/gpt_review/prompts/_prompt.py @@ -42,3 +42,9 @@ def load_summary_yaml() -> LangChainPrompt: """Load the summary yaml.""" yaml_path = os.getenv("PROMPT_SUMMARY", str(Path(__file__).parents[0].joinpath(C.SUMMARY_PROMPT_YAML))) return LangChainPrompt.load(yaml_path) + + +def load_ask_yaml() -> LangChainPrompt: + """Load the summary yaml.""" + yaml_path = os.getenv("PROMPT_ASK", str(Path(__file__).parents[0].joinpath(C.ASK_PROMPT_YAML))) + return LangChainPrompt.load(yaml_path) diff --git a/src/gpt_review/prompts/prompt_ask.yaml b/src/gpt_review/prompts/prompt_ask.yaml new file mode 100644 index 00000000..7697e670 --- /dev/null +++ b/src/gpt_review/prompts/prompt_ask.yaml @@ -0,0 +1,7 @@ +_type: prompt +input_variables: + ["diff", "ask"] +template: | + {diff} + + {ask} diff --git a/src/gpt_review/prompts/prompt_bug.yaml b/src/gpt_review/prompts/prompt_bug.yaml index c6451919..530f1308 100644 --- a/src/gpt_review/prompts/prompt_bug.yaml +++ b/src/gpt_review/prompts/prompt_bug.yaml @@ -2,8 +2,8 @@ _type: prompt input_variables: ["diff"] template: | - Summarize the following file changed in a pull request submitted by a developer on GitHub, - focusing on major modifications, additions, deletions, and any significant updates within the files. - Do not include the file name in the summary and list the summary with bullet points. + Provide a concise summary of the bug found in the code, describing its characteristics, + location, and potential effects on the overall functionality and performance of the application. + Present the potential issues and errors first, following by the most important findings, in your summary {diff} diff --git a/src/gpt_review/prompts/prompt_coverage.yaml b/src/gpt_review/prompts/prompt_coverage.yaml index 530f1308..8dae596c 100644 --- a/src/gpt_review/prompts/prompt_coverage.yaml +++ b/src/gpt_review/prompts/prompt_coverage.yaml @@ -2,8 +2,10 @@ _type: prompt input_variables: ["diff"] template: | - Provide a concise summary of the bug found in the code, describing its characteristics, - location, and potential effects on the overall functionality and performance of the application. - Present the potential issues and errors first, following by the most important findings, in your summary + You are an experienced software developer. + + Generate unit test cases for the code submitted + in the pull request, ensuring comprehensive coverage of all functions, methods, + and scenarios to validate the correctness and reliability of the implementation. {diff} diff --git a/src/gpt_review/prompts/prompt_summary.yaml b/src/gpt_review/prompts/prompt_summary.yaml index 8dae596c..c6451919 100644 --- a/src/gpt_review/prompts/prompt_summary.yaml +++ b/src/gpt_review/prompts/prompt_summary.yaml @@ -2,10 +2,8 @@ _type: prompt input_variables: ["diff"] template: | - You are an experienced software developer. - - Generate unit test cases for the code submitted - in the pull request, ensuring comprehensive coverage of all functions, methods, - and scenarios to validate the correctness and reliability of the implementation. + Summarize the following file changed in a pull request submitted by a developer on GitHub, + focusing on major modifications, additions, deletions, and any significant updates within the files. + Do not include the file name in the summary and list the summary with bullet points. {diff} diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index d4dfac88..39cfdb67 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -26,6 +26,7 @@ from gpt_review._ask import _ask from gpt_review._command import GPTCommandGroup from gpt_review._review import _summarize_files +from gpt_review.prompts._prompt import load_ask_yaml from gpt_review.repositories._repository import _RepositoryClient MIN_CONTEXT_LINES = 5 @@ -79,9 +80,13 @@ def create_comment(self, pull_request_id: int, comment_id: int, text: str, **kwa Returns: Comment: The response from the API. """ - new_comment = Comment(content=text) return self.client.create_comment( - new_comment, self.repository_id, pull_request_id, comment_id, project=self.project, **kwargs + comment=Comment(content=text), + repository_id=self.repository_id, + pull_request_id=pull_request_id, + thread_id=comment_id, + project=self.project, + **kwargs, ) def update_pr(self, pull_request_id, title=None, description=None, **kwargs) -> GitPullRequest: @@ -89,7 +94,7 @@ def update_pr(self, pull_request_id, title=None, description=None, **kwargs) -> Update a pull request. Args: - pull_request_id (str): The Azure DevOps pull request ID. + pull_request_id (int): The Azure DevOps pull request ID. title (str): The title of the pull request. description (str): The description of the pull request. **kwargs: Any additional keyword arguments. @@ -118,20 +123,25 @@ def read_all_text( Args: path (str): The path to the file. commit_id (str): The commit ID. + check_if_exists (bool): Whether to check if the file exists. **kwargs: Any additional keyword arguments. Returns: str: The text of the file. """ - byte_iterator = self.client.get_item_content( - repository_id=self.repository_id, - path=path, - project=self.project, - version_descriptor=GitVersionDescriptor(commit_id, version_type="commit") if commit_id else None, - check_if_exists=check_if_exists, - **kwargs, - ) - return "".join(byte.decode("utf-8") for byte in byte_iterator) + try: + byte_iterator = self.client.get_item_content( + repository_id=self.repository_id, + path=path, + project=self.project, + version_descriptor=GitVersionDescriptor(commit_id, version_type="commit") if commit_id else None, + check_if_exists=check_if_exists, + **kwargs, + ) + return "".join(byte.decode("utf-8") for byte in byte_iterator).splitlines() + except AzureDevOpsServiceError: + # File Not Found + return "" @staticmethod def process_comment_payload(payload: str) -> str: @@ -154,7 +164,6 @@ def get_patch(self, pull_request_event, pull_request_id, comment_id) -> List[str pull_request_event (dict): The pull request event. pull_request_id (str): The Azure DevOps pull request ID. comment_id (str): The Azure DevOps comment ID. - condensed (bool): Whether to condense the diff. Returns: List[str]: The diff of the pull request. @@ -166,11 +175,23 @@ def get_patch(self, pull_request_event, pull_request_id, comment_id) -> List[str project=self.project, ).thread_context - left, right = self._calculate_selection(thread_context, pull_request_event) + commit_id = pull_request_event["pullRequest"]["lastMergeSourceCommit"]["commitId"] + left, right = self._calculate_selection(thread_context, commit_id) return self._create_patch(left, right, thread_context.file_path) - def _create_patch(self, left, right, file_path): + def _create_patch(self, left, right, file_path) -> List: + """ + Create a patch. + + Args: + left (List[str]): The left side of the diff. + right (List[str]): The right side of the diff. + file_path (str): The file path. + + Returns: + List: The patch. + """ changes = [[0] * (len(right) + 1) for _ in range(len(left) + 1)] for i, j in itertools.product(range(len(left)), range(len(right))): @@ -198,47 +219,37 @@ def _create_patch(self, left, right, file_path): return patch - def _calculate_selection(self, thread_context, pull_request) -> Tuple[str, str]: + def _calculate_selection(self, thread, commit_id) -> Tuple[str, str]: """ Calculate the selection for a given thread context. Args: - thread_context (CommentThreadContext): The thread context. - original_content (str): The original content. - changed_content (str): The changed content. + thread (GitPullRequestCommentThread): The thread context. + commit_id (str): The commit ID. Returns: - Tuple[List[str], List[str]]: The left and right selections. + Tuple[str, str]: The left and right selection. """ - original_content = self.read_all_text(path=thread_context.file_path) - changed_content = self.read_all_text( - path=thread_context.file_path, - commit_id=pull_request["pullRequest"]["lastMergeSourceCommit"]["commitId"], - ) + original_content = self.read_all_text(path=thread.file_path) + changed_content = self.read_all_text(path=thread.file_path, commit_id=commit_id) left_selection = ( - self._get_selection( - original_content, thread_context.left_file_start.line, thread_context.left_file_end.line - ) - if original_content and thread_context.left_file_start and thread_context.left_file_end + self._get_selection(original_content, thread.left_file_start.line, thread.left_file_end.line) + if thread.left_file_start and thread.left_file_end else [] ) right_selection = ( - self._get_selection( - changed_content, thread_context.right_file_start.line, thread_context.right_file_end.line - ) - if changed_content and thread_context.right_file_start and thread_context.right_file_end + self._get_selection(changed_content, thread.right_file_start.line, thread.right_file_end.line) + if thread.right_file_start and thread.right_file_end else [] ) return left_selection, right_selection - def _get_selection(self, file_contents: str, line_start: int, line_end: int) -> str: - lines = file_contents.splitlines() - - return lines[line_start - 1 : line_end] if line_end - line_start > MIN_CONTEXT_LINES else lines + def _get_selection(self, lines: str, line_start: int, line_end: int) -> str: + return lines[line_start - 1 : line_end] if line_end - line_start >= MIN_CONTEXT_LINES else lines def get_patches(self, pull_request_event) -> Iterable[List[str]]: """ @@ -250,16 +261,11 @@ def get_patches(self, pull_request_event) -> Iterable[List[str]]: Returns: Iterable[List[str]]: An iterable of lists containing the patches for the pull request event. """ - pull_request_id = pull_request_event["pullRequest"]["pullRequestId"] - if not pull_request_id: - raise ValueError("pull_request_event.pullRequest is required") + pull_request = pull_request_event["pullRequest"] - git_changes = self.get_changed_blobs(pull_request_event["pullRequest"]) + git_changes = self.get_changed_blobs(pull_request) return [ - self._get_change( - git_change, - pull_request_event["pullRequest"]["lastMergeSourceCommit"]["commitId"], - ) + self._get_change(git_change, pull_request["lastMergeSourceCommit"]["commitId"]) for git_change in git_changes ] @@ -274,11 +280,11 @@ def get_changed_blobs(self, pull_request: GitPullRequest) -> List[str]: List[Dict[str, str]]: The changed blobs. """ changed_paths = [] - commit_diff_within_pr = None + pr_commits = None skip = 0 while True: - commit_diff_within_pr = self.client.get_commit_diffs( + pr_commits = self.client.get_commit_diffs( repository_id=self.repository_id, project=self.project, diff_common_commit=False, @@ -289,46 +295,22 @@ def get_changed_blobs(self, pull_request: GitPullRequest) -> List[str]: target_version=pull_request["lastMergeTargetCommit"]["commitId"], target_version_type="commit" ), ) - changed_paths.extend( - [change for change in commit_diff_within_pr.changes if "isFolder" not in change["item"]] - ) - skip += len(commit_diff_within_pr.changes) - if commit_diff_within_pr.all_changes_included: + changed_paths.extend([change for change in pr_commits.changes if "isFolder" not in change["item"]]) + skip += len(pr_commits.changes) + if pr_commits.all_changes_included: break return changed_paths def _get_change(self, git_change, source_commit_head) -> List[str]: file_path = git_change["item"]["path"] - try: - original_content = self.read_all_text(file_path) - except AzureDevOpsServiceError: - # File Not Found - original_content = "" + + original_content = self.read_all_text(file_path) changed_content = self.read_all_text(file_path, commit_id=source_commit_head) + patch = self._create_patch(original_content, changed_content, file_path) return "\n".join(patch) - def _calculate_minimum_change_needed(self, left: List[str], right: List[str]) -> List[List[int]]: - """ - Calculate the minimum change needed to transform the left side to the right side. - - Args: - left (List[str]): The left side of the patch. - right (List[str]): The right side of the patch. - - Returns: - List[List[int]]: The minimum change needed. - """ - changes = [[0] * (len(right) + 1) for _ in range(len(left) + 1)] - - for i, j in itertools.product(range(len(left)), range(len(right))): - changes[i + 1][j + 1] = ( - changes[i][j] if left[i] == right[j] else 1 + min(changes[i][j + 1], changes[i + 1][j], changes[i][j]) - ) - - return changes - class DevOpsClient(_DevOpsClient): """Azure DevOps client Wrapper for working with.""" @@ -421,7 +403,6 @@ def _process_comment(self, body) -> None: payload = json.loads(body) pr_id = self._get_pr_id(payload) - comment_id = self._get_comment_id(payload) try: @@ -432,16 +413,10 @@ def _process_comment(self, body) -> None: logging.debug("Copilot diff: %s", diff) diff = "\n".join(diff) - question = f""" - {diff} + question = load_ask_yaml().format(diff=diff, ask=_DevOpsClient.process_comment_payload(body)) - {_DevOpsClient.process_comment_payload(body)} - """ + response = _ask(question=question, max_tokens=1000) - response = _ask( - question=question, - max_tokens=1000, - ) self.create_comment(pull_request_id=pr_id, comment_id=comment_id, text=response["response"]) def _get_comment_id(self, payload) -> int: @@ -469,13 +444,10 @@ def _process_summary(self, body) -> None: payload = json.loads(body) pr_id = self._get_pr_id(payload) - link = self._get_link(pr_id) if "comment" in payload["resource"]: self._post_summary(payload, pr_id, link) - else: - logging.debug("Copilot Update from Updated PR") def _get_link(self, pr_id) -> str: link = f"https://{self.org}.visualstudio.com/{self.project}/_git/{self.repository_id}/pullrequest/{pr_id}" diff --git a/tests/test_devops.py b/tests/test_devops.py index fc7c42f2..a76edb3c 100644 --- a/tests/test_devops.py +++ b/tests/test_devops.py @@ -426,7 +426,7 @@ def test_int_get_patch(devops_client: DevOpsClient) -> None: def get_patch_pr_comment_test(devops_function: DevOpsFunction, expected_len: int) -> None: patch = devops_function.get_patches(pull_request_event=PR_COMMENT_PAYLOAD["resource"]) patch = "\n".join(patch) - assert len(patch.splitlines()) == expected_len + assert len(patch) == expected_len def test_get_patch_pr_comment(mock_openai: None, mock_ado_client: None, devops_function: DevOpsFunction) -> None: @@ -435,4 +435,4 @@ def test_get_patch_pr_comment(mock_openai: None, mock_ado_client: None, devops_f @pytest.mark.integration def test_int_get_patch_pr_comment(devops_function: DevOpsFunction) -> None: - get_patch_pr_comment_test(devops_function, 3079) + get_patch_pr_comment_test(devops_function, 3348) From 4b0afa95db249169d6f5ed4a51d2426962950a34 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Fri, 19 May 2023 00:51:52 +0000 Subject: [PATCH 43/72] Refactor DevOpsClient: add type hints, extract methods, and improve readability --- src/gpt_review/repositories/devops.py | 74 +++++++++++++++++---------- 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 39cfdb67..c296d420 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -16,6 +16,7 @@ GitPullRequest, GitTargetVersionDescriptor, GitVersionDescriptor, + GitPullRequestCommentThread, ) from knack import CLICommandsLoader from knack.arguments import ArgumentsContext @@ -219,7 +220,7 @@ def _create_patch(self, left, right, file_path) -> List: return patch - def _calculate_selection(self, thread, commit_id) -> Tuple[str, str]: + def _calculate_selection(self, thread: GitPullRequestCommentThread, commit_id: str) -> Tuple[str, str]: """ Calculate the selection for a given thread context. @@ -231,26 +232,25 @@ def _calculate_selection(self, thread, commit_id) -> Tuple[str, str]: Tuple[str, str]: The left and right selection. """ - original_content = self.read_all_text(path=thread.file_path) - changed_content = self.read_all_text(path=thread.file_path, commit_id=commit_id) + original_content, changed_content = self._load_content(file_path=thread.file_path, commit_id=commit_id) + + def get_selection(lines: str, line_start: int, line_end: int) -> str: + return lines[line_start - 1 : line_end] if line_end - line_start >= MIN_CONTEXT_LINES else lines left_selection = ( - self._get_selection(original_content, thread.left_file_start.line, thread.left_file_end.line) + get_selection(original_content, thread.left_file_start.line, thread.left_file_end.line) if thread.left_file_start and thread.left_file_end else [] ) right_selection = ( - self._get_selection(changed_content, thread.right_file_start.line, thread.right_file_end.line) + get_selection(changed_content, thread.right_file_start.line, thread.right_file_end.line) if thread.right_file_start and thread.right_file_end else [] ) return left_selection, right_selection - def _get_selection(self, lines: str, line_start: int, line_end: int) -> str: - return lines[line_start - 1 : line_end] if line_end - line_start >= MIN_CONTEXT_LINES else lines - def get_patches(self, pull_request_event) -> Iterable[List[str]]: """ Get the patches for a given pull request event. @@ -263,13 +263,13 @@ def get_patches(self, pull_request_event) -> Iterable[List[str]]: """ pull_request = pull_request_event["pullRequest"] - git_changes = self.get_changed_blobs(pull_request) + git_changes = self._get_changed_blobs(pull_request) return [ self._get_change(git_change, pull_request["lastMergeSourceCommit"]["commitId"]) for git_change in git_changes ] - def get_changed_blobs(self, pull_request: GitPullRequest) -> List[str]: + def _get_changed_blobs(self, pull_request: GitPullRequest) -> List[str]: """ Get the changed blobs in a pull request. @@ -302,15 +302,17 @@ def get_changed_blobs(self, pull_request: GitPullRequest) -> List[str]: return changed_paths - def _get_change(self, git_change, source_commit_head) -> List[str]: + def _get_change(self, git_change, commit_id) -> List[str]: file_path = git_change["item"]["path"] - original_content = self.read_all_text(file_path) - changed_content = self.read_all_text(file_path, commit_id=source_commit_head) + original_content, changed_content = self._load_content(file_path, commit_id) patch = self._create_patch(original_content, changed_content, file_path) return "\n".join(patch) + def _load_content(self, file_path, commit_id): + return self.read_all_text(file_path), self.read_all_text(file_path, commit_id=commit_id) + class DevOpsClient(_DevOpsClient): """Azure DevOps client Wrapper for working with.""" @@ -338,18 +340,7 @@ def post_pr_summary(diff, link=None, access_token=None) -> Dict[str, str]: if link and access_token: review = _summarize_files(diff) - parsed_url = urlparse(link) - - if "dev.azure.com" in parsed_url.netloc: - org = link.split("/")[3] - project = link.split("/")[4] - repo = link.split("/")[6] - pr_id = link.split("/")[8] - else: - org = link.split("/")[2].split(".")[0] - project = link.split("/")[3] - repo = link.split("/")[5] - pr_id = link.split("/")[7] + org, project, repo, pr_id = DevOpsClient._parse_url(link) DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).update_pr( pull_request_id=pr_id, @@ -360,6 +351,22 @@ def post_pr_summary(diff, link=None, access_token=None) -> Dict[str, str]: logging.warning("No PR to post too") return {"response": "No PR to post too"} + @staticmethod + def _parse_url(link): + parsed_url = urlparse(link) + + if "dev.azure.com" in parsed_url.netloc: + org = link.split("/")[3] + project = link.split("/")[4] + repo = link.split("/")[6] + pr_id = link.split("/")[8] + else: + org = link.split("/")[2].split(".")[0] + project = link.split("/")[3] + repo = link.split("/")[5] + pr_id = link.split("/")[7] + return org, project, repo, pr_id + @staticmethod def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: """ @@ -373,6 +380,21 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: Returns: str: The diff of the PR. """ + link = os.getenv("LINK") + access_token = os.getenv("ADO_TOKEN", access_token) + + if link and access_token: + org, project, repo, pr_id = DevOpsClient._parse_url(link) + + client = DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo) + + diff = client.get_patches(pull_request_event=payload["resource"]) + diff = "\n".join(diff) + + return {"response": "PR posted"} + + logging.warning("No PR to post too") + return {"response": "No PR to post too"} class DevOpsFunction(DevOpsClient): @@ -500,7 +522,7 @@ def _review(diff: str = ".diff", link=None, access_token=None) -> Dict[str, str] Returns: Dict[str, str]: The response. """ - # diff = _DevOpsClient.get_pr_diff(repository, pull_request, access_token) + diff = DevOpsClient.get_pr_diff(repository, pull_request, access_token) with open(diff, "r", encoding="utf8") as file: diff_contents = file.read() From a06e25f74b16593fcd8ba4fd202588c3b10662f9 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Wed, 24 May 2023 18:14:24 +0000 Subject: [PATCH 44/72] Update dependencies and fix pull request diff retrieval in DevOpsClient. --- pyproject.toml | 4 ++-- src/gpt_review/repositories/devops.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1e90e0d1..514f8b87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ requires-python = ">=3.8.1" dynamic = ["version"] dependencies = [ 'azure-devops', - 'azure-functions', + 'azure-functions; python_version <= "3.10"', 'azure-identity', 'azure-keyvault', 'azure-keyvault-secrets', @@ -146,7 +146,7 @@ executionEnvironments = [ ] [tool.pytest.ini_options] -addopts = "--cov-report xml:coverage.xml --cov src --cov-fail-under 0 --cov-append -m unit" +addopts = "--cov-report xml:coverage.xml --cov src --cov-fail-under 0 --cov-append" pythonpath = [ "src" ] diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index c296d420..12939dd3 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -387,8 +387,8 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: org, project, repo, pr_id = DevOpsClient._parse_url(link) client = DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo) - - diff = client.get_patches(pull_request_event=payload["resource"]) + pull_request = client.client.get_pull_request_by_id(pull_request_id=pr_id) + diff = client.get_patches(pull_request_event=pull_request) diff = "\n".join(diff) return {"response": "PR posted"} From 31cd10ee8c36f5713dbef461ebfbfcee6bedeaca Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Wed, 24 May 2023 18:46:37 +0000 Subject: [PATCH 45/72] fix last tests --- src/gpt_review/repositories/devops.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 12939dd3..84637009 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -512,7 +512,7 @@ def _post_summary(self, payload, pr_id, link) -> None: self.post_pr_summary(diff, link=link) -def _review(diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: +def _review(repository=None, pull_request=None, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: """Review Azure DevOps PR with Open AI, and post response as a comment. Args: @@ -522,9 +522,11 @@ def _review(diff: str = ".diff", link=None, access_token=None) -> Dict[str, str] Returns: Dict[str, str]: The response. """ - diff = DevOpsClient.get_pr_diff(repository, pull_request, access_token) - with open(diff, "r", encoding="utf8") as file: - diff_contents = file.read() + if repository and pull_request: + diff_contents = DevOpsClient.get_pr_diff(repository, pull_request, access_token) + else: + with open(diff, "r", encoding="utf8") as file: + diff_contents = file.read() DevOpsClient.post_pr_summary(diff_contents, link, access_token) return {"response": "Review posted as a comment."} From 6d9283d0ce6a57e99e91d1c926596852bd37fc33 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Wed, 24 May 2023 19:13:13 +0000 Subject: [PATCH 46/72] Add default link for PR if not provided in environment variables. --- src/gpt_review/repositories/devops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 84637009..cf988d8e 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -380,7 +380,7 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: Returns: str: The diff of the PR. """ - link = os.getenv("LINK") + link = os.getenv("LINK", f"https://{patch_repo.split("/")[0]}.visualstudio.com/{patch_repo.split("/")[1]}/_git/{patch_repo.split("/")[2]}/pullrequest/{patch_pr}") access_token = os.getenv("ADO_TOKEN", access_token) if link and access_token: From 9bf097556e34fbf4f3ad689c721bb1ff68e4495e Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Wed, 24 May 2023 19:13:20 +0000 Subject: [PATCH 47/72] Fix link formatting in DevOpsClient method. --- src/gpt_review/repositories/devops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index cf988d8e..0f0a211d 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -380,7 +380,7 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: Returns: str: The diff of the PR. """ - link = os.getenv("LINK", f"https://{patch_repo.split("/")[0]}.visualstudio.com/{patch_repo.split("/")[1]}/_git/{patch_repo.split("/")[2]}/pullrequest/{patch_pr}") + link = os.getenv("LINK", f"https://{patch_repo.split('/')[0]}.visualstudio.com/{patch_repo.split('/')[1]}/_git/{patch_repo.split('/')[2]}/pullrequest/{patch_pr}") access_token = os.getenv("ADO_TOKEN", access_token) if link and access_token: From b2aa3ee905b0ca796a883a4671422d0e0b35a0b2 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Wed, 24 May 2023 19:14:56 +0000 Subject: [PATCH 48/72] Refactor DevOpsClient method to handle ADO pointers in diff PR. --- src/gpt_review/repositories/devops.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 0f0a211d..e7e66db9 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -373,14 +373,17 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: Get the diff of a PR. Args: - patch_repo (str): The repo. + patch_repo (str): The pointer to ADO in the format, org/project/repo patch_pr (str): The PR. access_token (str): The GitHub access token. Returns: str: The diff of the PR. """ - link = os.getenv("LINK", f"https://{patch_repo.split('/')[0]}.visualstudio.com/{patch_repo.split('/')[1]}/_git/{patch_repo.split('/')[2]}/pullrequest/{patch_pr}") + link = os.getenv( + "LINK", + f"https://{patch_repo.split('/')[0]}.visualstudio.com/{patch_repo.split('/')[1]}/_git/{patch_repo.split('/')[2]}/pullrequest/{patch_pr}", + ) access_token = os.getenv("ADO_TOKEN", access_token) if link and access_token: From 3a0e9ac691029cc5322209f329ecb318233e4155 Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Wed, 24 May 2023 19:05:39 +0000 Subject: [PATCH 49/72] Add PR diff summarization and update .gitignore, pyproject.toml --- .gitignore | 4 +++ src/gpt_review/repositories/devops.py | 28 ++++++++++++++++++ src/summarizations/_summarizations.py | 42 +++++++++++++++++++++++++++ 3 files changed, 74 insertions(+) create mode 100644 src/summarizations/_summarizations.py diff --git a/.gitignore b/.gitignore index 61684247..17250301 100644 --- a/.gitignore +++ b/.gitignore @@ -145,3 +145,7 @@ __blobstorage__ __queuestorage__ __azurite_db*__.json .python_packages + +#summarization development +src/summarizations/pull_request_list.csv +src/summarizations/constants.py diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index e7e66db9..8d49a066 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -399,6 +399,34 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: logging.warning("No PR to post too") return {"response": "No PR to post too"} + @staticmethod + def get_pr_diff_link_parameter(pull_request_link=None, access_token=None) -> str: + """ + Get the diff of a PR. + + Args: + patch_repo (str): The repo. + patch_pr (str): The PR. + access_token (str): The GitHub access token. + + Returns: + str: The diff of the PR. + """ + access_token = os.getenv("ADO_TOKEN", access_token) + + if pull_request_link and access_token: + org, project, repo, pr_id = DevOpsClient._parse_url(pull_request_link) + + client = DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo) + + diff = client.get_patches(pull_request_event=payload["resource"]) + diff = "\n".join(diff) + + return {"response": "PR posted"} + + logging.warning("No PR to post too") + return {"response": "No PR to post too"} + class DevOpsFunction(DevOpsClient): """Azure Function for process Service Messages from Azure DevOps.""" diff --git a/src/summarizations/_summarizations.py b/src/summarizations/_summarizations.py new file mode 100644 index 00000000..9fc4ea46 --- /dev/null +++ b/src/summarizations/_summarizations.py @@ -0,0 +1,42 @@ +"""Summarize the changes in a release.""" +from curses.ascii import isdigit +import csv +import os +from gpt_review._review import _summarize_files +from gpt_review.repositories.devops import DevOpsClient +import summarizations.constants as C + +access_token = os.getenv("MSDATA_ADO_TOKEN") + +# The repository and pull_request parameters are not being used in the function +# This might be problematic for the for loop +# pr_id is not being used in the function, why is that? +# diff = DevOpsClient.get_pr_diff(repository, pull_request, access_token) +# diff = DevOpsClient.get_pr_diff_link_parameter(pull_request_link, access_token) +# diff_summarization = _summarize_files(diff) +# print(diff_summarization) + +pull_request_ids = [] +with open("/workspaces/gpt-review/src/summarizations/pull_request_list.csv", "r") as f: + csv_file = csv.reader(f) + for line in csv_file: + if line[0].isdigit(): + pull_request_ids.append(line[0]) + +# do 10 summaries at a time +lower_number = 0 +upper_number = 10 +pull_request_ids_length = len(pull_request_ids) +remainder = len(pull_request_ids) % upper_number +summary_group = [] +summaries = [] + +while lower_number < upper_number: + for pr_id in pull_request_ids[lower_number:upper_number]: + pull_request_link = C.PRROOT + pr_id + diff = DevOpsClient.get_pr_diff_link_parameter(pull_request_link, access_token) + diff_summarization = _summarize_files(diff) + summary_group.append(diff_summarization) + lower_number += 9 if lower_number == 0 else 10 + upper_number += 10 + summaries.append(_summarize_files(summary_group)) From 7afc10deecac875da960fdc82f5ffb47a1e5af2f Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Wed, 24 May 2023 19:26:50 +0000 Subject: [PATCH 50/72] Added post_summary parameter to post PR summary or return it as response. --- src/gpt_review/repositories/devops.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 8d49a066..bd14483f 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -18,6 +18,7 @@ GitVersionDescriptor, GitPullRequestCommentThread, ) +from httpx import post from knack import CLICommandsLoader from knack.arguments import ArgumentsContext from knack.commands import CommandGroup @@ -318,7 +319,7 @@ class DevOpsClient(_DevOpsClient): """Azure DevOps client Wrapper for working with.""" @staticmethod - def post_pr_summary(diff, link=None, access_token=None) -> Dict[str, str]: + def post_pr_summary(diff, link=None, access_token=None, post_summary=False) -> Dict[str, str]: """ Get a review of a PR. @@ -340,13 +341,15 @@ def post_pr_summary(diff, link=None, access_token=None) -> Dict[str, str]: if link and access_token: review = _summarize_files(diff) - org, project, repo, pr_id = DevOpsClient._parse_url(link) + if post_summary: + org, project, repo, pr_id = DevOpsClient._parse_url(link) - DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).update_pr( - pull_request_id=pr_id, - description=review, - ) - return {"response": "PR posted"} + DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).update_pr( + pull_request_id=pr_id, + description=review, + ) + return {"response": "PR posted"} + return {"response": review} logging.warning("No PR to post too") return {"response": "No PR to post too"} @@ -543,7 +546,9 @@ def _post_summary(self, payload, pr_id, link) -> None: self.post_pr_summary(diff, link=link) -def _review(repository=None, pull_request=None, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: +def _review( + repository=None, pull_request=None, diff: str = ".diff", link=None, access_token=None, post_comment=False +) -> Dict[str, str]: """Review Azure DevOps PR with Open AI, and post response as a comment. Args: @@ -559,8 +564,7 @@ def _review(repository=None, pull_request=None, diff: str = ".diff", link=None, with open(diff, "r", encoding="utf8") as file: diff_contents = file.read() - DevOpsClient.post_pr_summary(diff_contents, link, access_token) - return {"response": "Review posted as a comment."} + return DevOpsClient.post_pr_summary(diff_contents, link, access_token, post_summary=post_comment) def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: From 303610186acdc579d3cbf3a18ac52cbcb4be2d0e Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Wed, 24 May 2023 20:25:58 +0000 Subject: [PATCH 51/72] Refactor pull request summarization code into functions. --- src/summarizations/_summarizations.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/summarizations/_summarizations.py b/src/summarizations/_summarizations.py index 9fc4ea46..47bb59ae 100644 --- a/src/summarizations/_summarizations.py +++ b/src/summarizations/_summarizations.py @@ -16,6 +16,7 @@ # diff_summarization = _summarize_files(diff) # print(diff_summarization) +# todo make this a function load_pull_request_ids() pull_request_ids = [] with open("/workspaces/gpt-review/src/summarizations/pull_request_list.csv", "r") as f: csv_file = csv.reader(f) @@ -23,20 +24,11 @@ if line[0].isdigit(): pull_request_ids.append(line[0]) -# do 10 summaries at a time -lower_number = 0 -upper_number = 10 -pull_request_ids_length = len(pull_request_ids) -remainder = len(pull_request_ids) % upper_number -summary_group = [] +# todo make this into a function summarize_pull_requests() summaries = [] - -while lower_number < upper_number: - for pr_id in pull_request_ids[lower_number:upper_number]: - pull_request_link = C.PRROOT + pr_id - diff = DevOpsClient.get_pr_diff_link_parameter(pull_request_link, access_token) - diff_summarization = _summarize_files(diff) - summary_group.append(diff_summarization) - lower_number += 9 if lower_number == 0 else 10 - upper_number += 10 - summaries.append(_summarize_files(summary_group)) +# todo do all summaries first and then summarize the summaries 10 at a time +for pr_id in pull_request_ids: + pull_request_link = C.PRROOT + pr_id + diff = DevOpsClient.get_pr_diff_link_parameter(pull_request_link, access_token) + diff_summarization = _summarize_files(diff) + summaries.append(diff_summarization) From 256e215d6388808511c9b00f0373cfd7886552d4 Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Thu, 25 May 2023 17:52:15 +0000 Subject: [PATCH 52/72] Fix handling of pull_request_event and improve URL parsing in DevOpsClient --- src/gpt_review/repositories/devops.py | 32 +++++++++++++-------- src/summarizations/_summarizations.py | 41 ++++++++++++++++----------- 2 files changed, 44 insertions(+), 29 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index bd14483f..167965ce 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -4,7 +4,8 @@ import json import logging import os -from typing import Dict, Iterable, List, Optional, Tuple +import urllib.parse +from typing import Dict, Iterable, List, Tuple from urllib.parse import urlparse from azure.devops.connection import Connection @@ -262,12 +263,15 @@ def get_patches(self, pull_request_event) -> Iterable[List[str]]: Returns: Iterable[List[str]]: An iterable of lists containing the patches for the pull request event. """ - pull_request = pull_request_event["pullRequest"] + + if isinstance(pull_request_event, dict): + pull_request = GitPullRequest(pull_request_event["pullRequest"]) + else: + pull_request = pull_request_event git_changes = self._get_changed_blobs(pull_request) return [ - self._get_change(git_change, pull_request["lastMergeSourceCommit"]["commitId"]) - for git_change in git_changes + self._get_change(git_change, pull_request.last_merge_source_commit.commit_id) for git_change in git_changes ] def _get_changed_blobs(self, pull_request: GitPullRequest) -> List[str]: @@ -290,10 +294,10 @@ def _get_changed_blobs(self, pull_request: GitPullRequest) -> List[str]: project=self.project, diff_common_commit=False, base_version_descriptor=GitBaseVersionDescriptor( - base_version=pull_request["lastMergeSourceCommit"]["commitId"], base_version_type="commit" + base_version=pull_request.last_merge_source_commit.commit_id, base_version_type="commit" ), target_version_descriptor=GitTargetVersionDescriptor( - target_version=pull_request["lastMergeTargetCommit"]["commitId"], target_version_type="commit" + target_version=pull_request.last_merge_target_commit.commit_id, target_version_type="commit" ), ) changed_paths.extend([change for change in pr_commits.changes if "isFolder" not in change["item"]]) @@ -383,9 +387,11 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: Returns: str: The diff of the PR. """ - link = os.getenv( - "LINK", - f"https://{patch_repo.split('/')[0]}.visualstudio.com/{patch_repo.split('/')[1]}/_git/{patch_repo.split('/')[2]}/pullrequest/{patch_pr}", + link = urllib.parse.unquote( + os.getenv( + "LINK", + f"https://{patch_repo.split('/')[0]}.visualstudio.com/{patch_repo.split('/')[1]}/_git/{patch_repo.split('/')[2]}/pullrequest/{patch_pr}", + ) ) access_token = os.getenv("ADO_TOKEN", access_token) @@ -415,14 +421,16 @@ def get_pr_diff_link_parameter(pull_request_link=None, access_token=None) -> str Returns: str: The diff of the PR. """ - access_token = os.getenv("ADO_TOKEN", access_token) + # TODO uncomment this later + # access_token = os.getenv("ADO_TOKEN", access_token) if pull_request_link and access_token: - org, project, repo, pr_id = DevOpsClient._parse_url(pull_request_link) + org, project, repo, pr_id = DevOpsClient._parse_url(urllib.parse.unquote(pull_request_link)) client = DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo) - diff = client.get_patches(pull_request_event=payload["resource"]) + pull_request = client.client.get_pull_request_by_id(pull_request_id=pr_id) + diff = client.get_patches(pull_request_event=pull_request) diff = "\n".join(diff) return {"response": "PR posted"} diff --git a/src/summarizations/_summarizations.py b/src/summarizations/_summarizations.py index 47bb59ae..607aae51 100644 --- a/src/summarizations/_summarizations.py +++ b/src/summarizations/_summarizations.py @@ -1,12 +1,13 @@ """Summarize the changes in a release.""" -from curses.ascii import isdigit -import csv +# from curses.ascii import isdigit +# import csv import os from gpt_review._review import _summarize_files from gpt_review.repositories.devops import DevOpsClient import summarizations.constants as C -access_token = os.getenv("MSDATA_ADO_TOKEN") +# access_token = os.getenv("MSDATA_ADO_TOKEN") +access_token = os.getenv("ADO_TOKEN") # The repository and pull_request parameters are not being used in the function # This might be problematic for the for loop @@ -17,18 +18,24 @@ # print(diff_summarization) # todo make this a function load_pull_request_ids() -pull_request_ids = [] -with open("/workspaces/gpt-review/src/summarizations/pull_request_list.csv", "r") as f: - csv_file = csv.reader(f) - for line in csv_file: - if line[0].isdigit(): - pull_request_ids.append(line[0]) +# pull_request_ids = [] +# with open("/workspaces/gpt-review/src/summarizations/pull_request_list.csv", "r") as f: +# csv_file = csv.reader(f) +# for line in csv_file: +# if line[0].isdigit(): +# pull_request_ids.append(line[0]) -# todo make this into a function summarize_pull_requests() -summaries = [] -# todo do all summaries first and then summarize the summaries 10 at a time -for pr_id in pull_request_ids: - pull_request_link = C.PRROOT + pr_id - diff = DevOpsClient.get_pr_diff_link_parameter(pull_request_link, access_token) - diff_summarization = _summarize_files(diff) - summaries.append(diff_summarization) +# # todo make this into a function summarize_pull_requests() +# summaries = [] +# # todo do all summaries first and then summarize the summaries 10 at a time +# for pr_id in pull_request_ids: +# pull_request_link = C.PRROOT + pr_id +# diff = DevOpsClient.get_pr_diff_link_parameter(pull_request_link, access_token) +# diff_summarization = _summarize_files(diff) +# summaries.append(diff_summarization) + +# DevOpsFunction._post_summary() + +pull_request_link = C.PRLINK +diff = DevOpsClient.get_pr_diff_link_parameter(pull_request_link, access_token) +diff_summarization = _summarize_files(diff) From 5d7e3ba15d7055959e8147f919ce3a7b52d10cf9 Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Thu, 25 May 2023 22:48:41 +0000 Subject: [PATCH 53/72] Update gitignore, refactor DevOpsClient, and improve summarization logic --- .gitignore | 3 +- src/gpt_review/repositories/devops.py | 49 +++++++-------------------- src/summarizations/_summarizations.py | 17 ++++++---- 3 files changed, 25 insertions(+), 44 deletions(-) diff --git a/.gitignore b/.gitignore index 17250301..539169fd 100644 --- a/.gitignore +++ b/.gitignore @@ -147,5 +147,6 @@ __azurite_db*__.json .python_packages #summarization development -src/summarizations/pull_request_list.csv +src/summarizations/msdata_pull_request_list.csv +src/summarizations/msazure_pull_request_list.csv src/summarizations/constants.py diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 167965ce..a4e9d2c6 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -352,7 +352,7 @@ def post_pr_summary(diff, link=None, access_token=None, post_summary=False) -> D pull_request_id=pr_id, description=review, ) - return {"response": "PR posted"} + return {"response": "PR summary posted"} return {"response": review} logging.warning("No PR to post too") @@ -381,18 +381,23 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: Args: patch_repo (str): The pointer to ADO in the format, org/project/repo - patch_pr (str): The PR. + patch_pr (str): The PR id. access_token (str): The GitHub access token. Returns: str: The diff of the PR. """ link = urllib.parse.unquote( - os.getenv( - "LINK", - f"https://{patch_repo.split('/')[0]}.visualstudio.com/{patch_repo.split('/')[1]}/_git/{patch_repo.split('/')[2]}/pullrequest/{patch_pr}", - ) + f"https://{patch_repo.split('/')[0]}.visualstudio.com/{patch_repo.split('/')[1]}/_git/{patch_repo.split('/')[2]}/pullrequest/{patch_pr}", ) + + # TODO uncomment this later + # link = urllib.parse.unquote( + # os.getenv( + # "LINK", + # f"https://{patch_repo.split('/')[0]}.visualstudio.com/{patch_repo.split('/')[1]}/_git/{patch_repo.split('/')[2]}/pullrequest/{patch_pr}", + # ) + # ) access_token = os.getenv("ADO_TOKEN", access_token) if link and access_token: @@ -403,37 +408,7 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: diff = client.get_patches(pull_request_event=pull_request) diff = "\n".join(diff) - return {"response": "PR posted"} - - logging.warning("No PR to post too") - return {"response": "No PR to post too"} - - @staticmethod - def get_pr_diff_link_parameter(pull_request_link=None, access_token=None) -> str: - """ - Get the diff of a PR. - - Args: - patch_repo (str): The repo. - patch_pr (str): The PR. - access_token (str): The GitHub access token. - - Returns: - str: The diff of the PR. - """ - # TODO uncomment this later - # access_token = os.getenv("ADO_TOKEN", access_token) - - if pull_request_link and access_token: - org, project, repo, pr_id = DevOpsClient._parse_url(urllib.parse.unquote(pull_request_link)) - - client = DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo) - - pull_request = client.client.get_pull_request_by_id(pull_request_id=pr_id) - diff = client.get_patches(pull_request_event=pull_request) - diff = "\n".join(diff) - - return {"response": "PR posted"} + return diff logging.warning("No PR to post too") return {"response": "No PR to post too"} diff --git a/src/summarizations/_summarizations.py b/src/summarizations/_summarizations.py index 607aae51..3fdd1bda 100644 --- a/src/summarizations/_summarizations.py +++ b/src/summarizations/_summarizations.py @@ -2,7 +2,6 @@ # from curses.ascii import isdigit # import csv import os -from gpt_review._review import _summarize_files from gpt_review.repositories.devops import DevOpsClient import summarizations.constants as C @@ -14,7 +13,7 @@ # pr_id is not being used in the function, why is that? # diff = DevOpsClient.get_pr_diff(repository, pull_request, access_token) # diff = DevOpsClient.get_pr_diff_link_parameter(pull_request_link, access_token) -# diff_summarization = _summarize_files(diff) +# diff_summarization = DevOpsClient.post_pr_summary(diff) # print(diff_summarization) # todo make this a function load_pull_request_ids() @@ -31,11 +30,17 @@ # for pr_id in pull_request_ids: # pull_request_link = C.PRROOT + pr_id # diff = DevOpsClient.get_pr_diff_link_parameter(pull_request_link, access_token) -# diff_summarization = _summarize_files(diff) +# diff_summarization = DevOpsClient.post_pr_summary(diff) # summaries.append(diff_summarization) # DevOpsFunction._post_summary() -pull_request_link = C.PRLINK -diff = DevOpsClient.get_pr_diff_link_parameter(pull_request_link, access_token) -diff_summarization = _summarize_files(diff) +# MSAZURE +# diff = DevOpsClient.get_pr_diff(C.MSAZURE_PATCHREPO, C.MSAZURE_PRID, access_token) +# summary = DevOpsClient.post_pr_summary(diff=diff, link=C.MSAZURE_PATCHREPO + C.MSAZURE_PRID, access_token=access_token) +# print(summary) + +# MSDATA +# diff = DevOpsClient.get_pr_diff(C.MSDATA_PATCHREPO, C.MSDATA_PRID, access_token) +# summary = DevOpsClient.post_pr_summary(diff=diff, link=C.MSDATA_PATCHREPO + C.MSDATA_PRID, access_token=access_token) +# print(summary) From a2897d5fcb8460fe37f4758fd12f24bf3b6cb2d7 Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Fri, 26 May 2023 03:03:36 +0000 Subject: [PATCH 54/72] Refactor code and add timing for summarization in DevOpsClient --- src/gpt_review/repositories/devops.py | 11 +++--- src/summarizations/_summarizations.py | 49 ++++++++++++++++----------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index a4e9d2c6..252b0a09 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -8,6 +8,11 @@ from typing import Dict, Iterable, List, Tuple from urllib.parse import urlparse +from knack import CLICommandsLoader +from knack.arguments import ArgumentsContext +from knack.commands import CommandGroup +from msrest.authentication import BasicAuthentication + from azure.devops.connection import Connection from azure.devops.exceptions import AzureDevOpsServiceError from azure.devops.v7_1.git.git_client import GitClient @@ -19,12 +24,6 @@ GitVersionDescriptor, GitPullRequestCommentThread, ) -from httpx import post -from knack import CLICommandsLoader -from knack.arguments import ArgumentsContext -from knack.commands import CommandGroup -from msrest.authentication import BasicAuthentication - from gpt_review._ask import _ask from gpt_review._command import GPTCommandGroup diff --git a/src/summarizations/_summarizations.py b/src/summarizations/_summarizations.py index 3fdd1bda..3812fee6 100644 --- a/src/summarizations/_summarizations.py +++ b/src/summarizations/_summarizations.py @@ -1,10 +1,13 @@ """Summarize the changes in a release.""" -# from curses.ascii import isdigit -# import csv +from curses.ascii import isdigit +import csv import os +import time from gpt_review.repositories.devops import DevOpsClient import summarizations.constants as C +start = time.process_time() + # access_token = os.getenv("MSDATA_ADO_TOKEN") access_token = os.getenv("ADO_TOKEN") @@ -17,23 +20,31 @@ # print(diff_summarization) # todo make this a function load_pull_request_ids() -# pull_request_ids = [] -# with open("/workspaces/gpt-review/src/summarizations/pull_request_list.csv", "r") as f: -# csv_file = csv.reader(f) -# for line in csv_file: -# if line[0].isdigit(): -# pull_request_ids.append(line[0]) - -# # todo make this into a function summarize_pull_requests() -# summaries = [] -# # todo do all summaries first and then summarize the summaries 10 at a time -# for pr_id in pull_request_ids: -# pull_request_link = C.PRROOT + pr_id -# diff = DevOpsClient.get_pr_diff_link_parameter(pull_request_link, access_token) -# diff_summarization = DevOpsClient.post_pr_summary(diff) -# summaries.append(diff_summarization) - -# DevOpsFunction._post_summary() +pull_request_ids = [] +with open("/workspaces/gpt-review/src/summarizations/msazure_pull_request_list.csv", "r") as f: + csv_file = csv.reader(f) + for line in csv_file: + if line[0].isdigit(): + pull_request_ids.append(line[0]) + +# todo make this into a function summarize_pull_requests() +summaries = [] +diffs = [] +# todo do all summaries first and then summarize the summaries 10 at a time +# TODO PRIORITIZE THIS, WHY ARE THE DIFFS EQUAL?? diffs[0] == diffs[1] == diffs[2] -> True +# it's catching the diff? and providing summaries on the same diff? investigate further +for pr_id in pull_request_ids: + pull_request_link = C.MSAZURE_PATCHREPO + pr_id + diff = DevOpsClient.get_pr_diff(C.MSAZURE_PATCHREPO, C.MSAZURE_PRID, access_token) + diffs.append(diff) + summary = DevOpsClient.post_pr_summary( + diff=diff, link=C.MSAZURE_PATCHREPO + C.MSAZURE_PRID, access_token=access_token + ) + print(time.process_time() - start) + summaries.append(summary) + + +print(summaries) # MSAZURE # diff = DevOpsClient.get_pr_diff(C.MSAZURE_PATCHREPO, C.MSAZURE_PRID, access_token) From 792db498f6e548c87e0006cff08e6913f784cab5 Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Mon, 29 May 2023 23:42:22 +0000 Subject: [PATCH 55/72] Add error handling and improve summarization functions in DevOpsClient --- src/gpt_review/repositories/devops.py | 17 ++-- src/summarizations/_summarizations.py | 112 +++++++++++++++----------- 2 files changed, 73 insertions(+), 56 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 252b0a09..817a6143 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -288,6 +288,8 @@ def _get_changed_blobs(self, pull_request: GitPullRequest) -> List[str]: skip = 0 while True: + # TODO error handling? + # TF401029: Couldn't find Git commit with ID ee3ca002f2e07b3a33321eeb2614a22d7a324bef. pr_commits = self.client.get_commit_diffs( repository_id=self.repository_id, project=self.project, @@ -299,6 +301,7 @@ def _get_changed_blobs(self, pull_request: GitPullRequest) -> List[str]: target_version=pull_request.last_merge_target_commit.commit_id, target_version_type="commit" ), ) + # TODO investigate further - "if that pr_commits.all_changes_included doesn't go true it go go for infinity" changed_paths.extend([change for change in pr_commits.changes if "isFolder" not in change["item"]]) skip += len(pr_commits.changes) if pr_commits.all_changes_included: @@ -386,18 +389,16 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: Returns: str: The diff of the PR. """ + link = urllib.parse.unquote( - f"https://{patch_repo.split('/')[0]}.visualstudio.com/{patch_repo.split('/')[1]}/_git/{patch_repo.split('/')[2]}/pullrequest/{patch_pr}", + os.getenv( + "LINK", + f"https://{patch_repo.split('/')[0]}.visualstudio.com/{patch_repo.split('/')[1]}/_git/{patch_repo.split('/')[2]}/pullrequest/{patch_pr}", + ) ) # TODO uncomment this later - # link = urllib.parse.unquote( - # os.getenv( - # "LINK", - # f"https://{patch_repo.split('/')[0]}.visualstudio.com/{patch_repo.split('/')[1]}/_git/{patch_repo.split('/')[2]}/pullrequest/{patch_pr}", - # ) - # ) - access_token = os.getenv("ADO_TOKEN", access_token) + # access_token = os.getenv("ADO_TOKEN", access_token) if link and access_token: org, project, repo, pr_id = DevOpsClient._parse_url(link) diff --git a/src/summarizations/_summarizations.py b/src/summarizations/_summarizations.py index 3812fee6..c5ce82f2 100644 --- a/src/summarizations/_summarizations.py +++ b/src/summarizations/_summarizations.py @@ -1,57 +1,73 @@ """Summarize the changes in a release.""" -from curses.ascii import isdigit import csv import os import time + from gpt_review.repositories.devops import DevOpsClient import summarizations.constants as C start = time.process_time() -# access_token = os.getenv("MSDATA_ADO_TOKEN") -access_token = os.getenv("ADO_TOKEN") - -# The repository and pull_request parameters are not being used in the function -# This might be problematic for the for loop -# pr_id is not being used in the function, why is that? -# diff = DevOpsClient.get_pr_diff(repository, pull_request, access_token) -# diff = DevOpsClient.get_pr_diff_link_parameter(pull_request_link, access_token) -# diff_summarization = DevOpsClient.post_pr_summary(diff) -# print(diff_summarization) - -# todo make this a function load_pull_request_ids() -pull_request_ids = [] -with open("/workspaces/gpt-review/src/summarizations/msazure_pull_request_list.csv", "r") as f: - csv_file = csv.reader(f) - for line in csv_file: - if line[0].isdigit(): - pull_request_ids.append(line[0]) - -# todo make this into a function summarize_pull_requests() -summaries = [] -diffs = [] -# todo do all summaries first and then summarize the summaries 10 at a time -# TODO PRIORITIZE THIS, WHY ARE THE DIFFS EQUAL?? diffs[0] == diffs[1] == diffs[2] -> True -# it's catching the diff? and providing summaries on the same diff? investigate further -for pr_id in pull_request_ids: - pull_request_link = C.MSAZURE_PATCHREPO + pr_id - diff = DevOpsClient.get_pr_diff(C.MSAZURE_PATCHREPO, C.MSAZURE_PRID, access_token) - diffs.append(diff) - summary = DevOpsClient.post_pr_summary( - diff=diff, link=C.MSAZURE_PATCHREPO + C.MSAZURE_PRID, access_token=access_token - ) - print(time.process_time() - start) - summaries.append(summary) - - -print(summaries) - -# MSAZURE -# diff = DevOpsClient.get_pr_diff(C.MSAZURE_PATCHREPO, C.MSAZURE_PRID, access_token) -# summary = DevOpsClient.post_pr_summary(diff=diff, link=C.MSAZURE_PATCHREPO + C.MSAZURE_PRID, access_token=access_token) -# print(summary) - -# MSDATA -# diff = DevOpsClient.get_pr_diff(C.MSDATA_PATCHREPO, C.MSDATA_PRID, access_token) -# summary = DevOpsClient.post_pr_summary(diff=diff, link=C.MSDATA_PATCHREPO + C.MSDATA_PRID, access_token=access_token) -# print(summary) +access_token = os.getenv("MSDATA_ADO_TOKEN") +# access_token = os.getenv("ADO_TOKEN") + + +def load_pull_request_ids(file_path: str) -> list: + """Load pull request ids from a csv file. + + Args: + file_path (str): The path to the csv file. + + Returns: + list: The list of pull request ids. + """ + pull_request_ids_list = [] + with open(file_path, "r", encoding="utf-8") as file: + csv_file = csv.reader(file) + for line in csv_file: + if line[0].isdigit(): + pull_request_ids_list.append(line[0]) + return pull_request_ids_list + + +def summarize_pull_requests(pull_request_ids_list: list, patch_repo: str) -> list: + """Summarize pull requests. + + Args: + pull_request_ids_list (list): The list of pull request ids. + patch_repo (str): The pointer to ADO in the format, org/project/repo. + + Returns: + list: The list of summaries. + """ + summaries_list = [] + for pr_id in pull_request_ids_list: + pr_link = patch_repo + pr_id + diff = DevOpsClient.get_pr_diff(patch_repo, pr_id, access_token) + summary = DevOpsClient.post_pr_summary(diff=diff, link=pr_link, access_token=access_token) + print(time.process_time() - start) + summaries_list.append(summary) + return summaries_list + + +# TODO make a function summarize_summaries() +# TODO do all summaries first and then summarize the summaries 10 at a time +# codebycopilot +def summarize_summaries(summaries_list: list) -> str: + """Summarize summaries. + + Args: + summaries_list (list): The list of summaries. + + Returns: + str: The summary of summaries. + """ + summary_of_summaries = "" + for summary in summaries_list: + summary_of_summaries += summary + return summary_of_summaries + + +pull_request_ids = load_pull_request_ids(C.MSDATA_PULL_REQUEST_LIST) +summaries = summarize_pull_requests(pull_request_ids, C.MSDATA_PATCHREPO) +# final_summary = summarize_summaries(summaries) From e25e48068afb40543bf3ec4c1ae4894c02154c73 Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Tue, 30 May 2023 23:35:37 +0000 Subject: [PATCH 56/72] Refactor DevOpsClient methods and improve summary generation --- src/gpt_review/repositories/devops.py | 9 ++- src/summarizations/_summarizations.py | 80 +++++++++++++++++++------- src/summarizations/prompt_summary.yaml | 9 +++ 3 files changed, 74 insertions(+), 24 deletions(-) create mode 100644 src/summarizations/prompt_summary.yaml diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 817a6143..d0de792f 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -325,7 +325,7 @@ class DevOpsClient(_DevOpsClient): """Azure DevOps client Wrapper for working with.""" @staticmethod - def post_pr_summary(diff, link=None, access_token=None, post_summary=False) -> Dict[str, str]: + def generate_pr_summary(diff, link=None, access_token=None, post_summary=False) -> Dict[str, str]: """ Get a review of a PR. @@ -337,6 +337,9 @@ def post_pr_summary(diff, link=None, access_token=None, post_summary=False) -> D Args: diff (str): The patch of the PR. + link (str, optional): The link to the PR. Defaults to None. + access_token (str, optional): The GitHub access token. Defaults to None. + post_summary (bool, optional): Whether to post the summary to the PR. Defaults to False. Returns: Dict[str, str]: The review. @@ -526,7 +529,7 @@ def _post_summary(self, payload, pr_id, link) -> None: diff = "\n".join(diff) logging.debug("Copilot diff: %s", diff) - self.post_pr_summary(diff, link=link) + self.generate_pr_summary(diff, link=link) def _review( @@ -547,7 +550,7 @@ def _review( with open(diff, "r", encoding="utf8") as file: diff_contents = file.read() - return DevOpsClient.post_pr_summary(diff_contents, link, access_token, post_summary=post_comment) + return DevOpsClient.generate_pr_summary(diff_contents, link, access_token, post_summary=post_comment) def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: diff --git a/src/summarizations/_summarizations.py b/src/summarizations/_summarizations.py index c5ce82f2..bdf65d5c 100644 --- a/src/summarizations/_summarizations.py +++ b/src/summarizations/_summarizations.py @@ -1,18 +1,16 @@ """Summarize the changes in a release.""" import csv -import os import time +import os +from pathlib import Path from gpt_review.repositories.devops import DevOpsClient +from gpt_review._review import _ask +from gpt_review.prompts._prompt import LangChainPrompt import summarizations.constants as C -start = time.process_time() - -access_token = os.getenv("MSDATA_ADO_TOKEN") -# access_token = os.getenv("ADO_TOKEN") - -def load_pull_request_ids(file_path: str) -> list: +def _load_pull_request_ids(file_path: str) -> list: """Load pull request ids from a csv file. Args: @@ -30,7 +28,7 @@ def load_pull_request_ids(file_path: str) -> list: return pull_request_ids_list -def summarize_pull_requests(pull_request_ids_list: list, patch_repo: str) -> list: +def _summarize_pull_requests(pull_request_ids_list: list, patch_repo: str) -> list: """Summarize pull requests. Args: @@ -42,18 +40,39 @@ def summarize_pull_requests(pull_request_ids_list: list, patch_repo: str) -> lis """ summaries_list = [] for pr_id in pull_request_ids_list: - pr_link = patch_repo + pr_id + start = time.process_time() + pr_link = ( + patch_repo + pr_id + ) # This is not a real link to a PR, but the link is needed to post the summary and this is not being done here diff = DevOpsClient.get_pr_diff(patch_repo, pr_id, access_token) - summary = DevOpsClient.post_pr_summary(diff=diff, link=pr_link, access_token=access_token) + summary = DevOpsClient.generate_pr_summary(diff=diff, link=pr_link, access_token=access_token) print(time.process_time() - start) summaries_list.append(summary) return summaries_list -# TODO make a function summarize_summaries() -# TODO do all summaries first and then summarize the summaries 10 at a time -# codebycopilot -def summarize_summaries(summaries_list: list) -> str: +def load_summary_yaml() -> LangChainPrompt: + """Load the summary yaml.""" + yaml_path = os.getenv("PROMPT_SUMMARY", str(Path(__file__).parents[0].joinpath(C.SUMMARY_PROMPT_YAML))) + return LangChainPrompt.load(yaml_path) + + +def _summarize_summary(summary_group) -> str: + """Summarize a list of summaries. + + Args: + summary (str): The summary to summarize. + + Returns: + str: The summary of the summary. + """ + + question = load_summary_yaml().format(summaries=summary_group) + response = _ask(question=[question], temperature=0.0) + return response + + +def _summarize_summaries(summaries_list: list) -> list: """Summarize summaries. Args: @@ -62,12 +81,31 @@ def summarize_summaries(summaries_list: list) -> str: Returns: str: The summary of summaries. """ - summary_of_summaries = "" - for summary in summaries_list: - summary_of_summaries += summary - return summary_of_summaries + + summarized_summaries_list = [] + for i in range(0, len(summaries_list), 10): + summary_group = summaries_list[i : i + 10] + summarized_summaries_list.append(_summarize_summary(summary_group)) + return summarized_summaries_list + + +def _get_summary(summaries_list: list) -> str: + """Get the final summary. + + Args: + summarized_summaries_list (list): The list of the summaries of the PRs. + + Returns: + str: The final summary. + """ + summarized_summaries = _summarize_summaries(summaries_list) + while len(summarized_summaries) > 1: + summarized_summaries = _summarize_summaries(summarized_summaries) + return summarized_summaries[0]["response"] -pull_request_ids = load_pull_request_ids(C.MSDATA_PULL_REQUEST_LIST) -summaries = summarize_pull_requests(pull_request_ids, C.MSDATA_PATCHREPO) -# final_summary = summarize_summaries(summaries) +access_token = C.MSAZURE_ADO_TOKEN +pull_request_ids = _load_pull_request_ids(C.MSAZURE_PULL_REQUEST_LIST) +summaries = _summarize_pull_requests(pull_request_ids, C.MSAZURE_PATCHREPO) +final_summary = _get_summary(summaries) +print(final_summary) diff --git a/src/summarizations/prompt_summary.yaml b/src/summarizations/prompt_summary.yaml new file mode 100644 index 00000000..1cfcf2b7 --- /dev/null +++ b/src/summarizations/prompt_summary.yaml @@ -0,0 +1,9 @@ +_type: prompt +input_variables: + ["summaries"] +template: | + Summarize the following list of summaries into a single summary, + focusing on major modifications, additions, deletions, and any significant updates within the files. + Do not include the file name in the summary and list the summary with bullet points. + + {summaries} From fbbfde64ec0078f9b6116fdc59fac22e508ee04c Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Wed, 31 May 2023 01:43:50 +0000 Subject: [PATCH 57/72] Add GitCommitRef creation, handle abandoned PRs, and empty summary cases --- src/gpt_review/repositories/devops.py | 56 +++++++++++++++++++++++++-- src/summarizations/_summarizations.py | 19 +++++---- 2 files changed, 64 insertions(+), 11 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index d0de792f..8b56174c 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -20,6 +20,7 @@ Comment, GitBaseVersionDescriptor, GitPullRequest, + GitCommitRef, GitTargetVersionDescriptor, GitVersionDescriptor, GitPullRequestCommentThread, @@ -252,6 +253,55 @@ def get_selection(lines: str, line_start: int, line_end: int) -> str: return left_selection, right_selection + def create_last_merge_source_commit_from_dict(self, commit_dict: Dict) -> GitCommitRef: + """Create a GitCommitRef object from a dictionary. + + Args: + commit_dict (Dict): The dictionary to create the GitCommitRef object from. + + Returns: + GitCommitRef: The GitCommitRef object. + """ + return GitCommitRef(commit_id=commit_dict["commitId"], url=commit_dict["url"]) + + # TODO might be able to improve with something like https://stackoverflow.com/questions/59250557/how-to-convert-a-python-dict-to-a-class-object + # or creating our own model class inheriting msrest.serialization.Model + def create_git_pull_request_from_dict(self, pr_dict: Dict) -> GitPullRequest: + """Create a GitPullRequest object from a dictionary. + + Args: + pr_dict (Dict): The dictionary to create the GitPullRequest object from. + + Returns: + GitPullRequest: The GitPullRequest object. + """ + pull_request = GitPullRequest( + title=pr_dict["title"], + description=pr_dict["description"], + source_ref_name=pr_dict["sourceRefName"], + target_ref_name=pr_dict["targetRefName"], + is_draft=pr_dict["isDraft"], + reviewers=pr_dict["reviewers"], + supports_iterations=pr_dict["supportsIterations"], + artifact_id=pr_dict["artifactId"], + status=pr_dict["status"], + created_by=pr_dict["createdBy"], + creation_date=pr_dict["creationDate"], + last_merge_source_commit=pr_dict["lastMergeSourceCommit"], + last_merge_target_commit=pr_dict["lastMergeTargetCommit"], + last_merge_commit=pr_dict["lastMergeCommit"], + url=pr_dict["url"], + repository=pr_dict["repository"], + merge_id=pr_dict["mergeId"], + ) + pull_request.last_merge_source_commit = self.create_last_merge_source_commit_from_dict( + pull_request.last_merge_source_commit + ) + pull_request.last_merge_target_commit = self.create_last_merge_source_commit_from_dict( + pull_request.last_merge_target_commit + ) + return pull_request + def get_patches(self, pull_request_event) -> Iterable[List[str]]: """ Get the patches for a given pull request event. @@ -264,7 +314,7 @@ def get_patches(self, pull_request_event) -> Iterable[List[str]]: """ if isinstance(pull_request_event, dict): - pull_request = GitPullRequest(pull_request_event["pullRequest"]) + pull_request = self.create_git_pull_request_from_dict(pull_request_event["pullRequest"]) else: pull_request = pull_request_event @@ -287,9 +337,7 @@ def _get_changed_blobs(self, pull_request: GitPullRequest) -> List[str]: pr_commits = None skip = 0 - while True: - # TODO error handling? - # TF401029: Couldn't find Git commit with ID ee3ca002f2e07b3a33321eeb2614a22d7a324bef. + while True and pull_request.status != "abandoned": pr_commits = self.client.get_commit_diffs( repository_id=self.repository_id, project=self.project, diff --git a/src/summarizations/_summarizations.py b/src/summarizations/_summarizations.py index bdf65d5c..f8b22d3f 100644 --- a/src/summarizations/_summarizations.py +++ b/src/summarizations/_summarizations.py @@ -45,9 +45,10 @@ def _summarize_pull_requests(pull_request_ids_list: list, patch_repo: str) -> li patch_repo + pr_id ) # This is not a real link to a PR, but the link is needed to post the summary and this is not being done here diff = DevOpsClient.get_pr_diff(patch_repo, pr_id, access_token) - summary = DevOpsClient.generate_pr_summary(diff=diff, link=pr_link, access_token=access_token) - print(time.process_time() - start) - summaries_list.append(summary) + if diff: + summary = DevOpsClient.generate_pr_summary(diff=diff, link=pr_link, access_token=access_token) + print(time.process_time() - start) + summaries_list.append(summary) return summaries_list @@ -98,10 +99,14 @@ def _get_summary(summaries_list: list) -> str: Returns: str: The final summary. """ - summarized_summaries = _summarize_summaries(summaries_list) - while len(summarized_summaries) > 1: - summarized_summaries = _summarize_summaries(summarized_summaries) - return summarized_summaries[0]["response"] + + if summaries_list: + summarized_summaries = _summarize_summaries(summaries_list) + while len(summarized_summaries) > 1: + summarized_summaries = _summarize_summaries(summarized_summaries) + return summarized_summaries[0]["response"] + else: + return "No summaries to summarize." access_token = C.MSAZURE_ADO_TOKEN From 8735aa2d71390144498ae174b34ad715facb9b27 Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Fri, 2 Jun 2023 02:59:09 +0000 Subject: [PATCH 58/72] Refactor code, add PR summary prompt, and fix minor issues in gpt_review --- src/gpt_review/_review.py | 2 + src/gpt_review/constants.py | 2 + src/gpt_review/prompts/_prompt_pr_summary.py | 11 +++++ .../prompts/prompt_pr_summary.yaml} | 2 +- src/gpt_review/repositories/devops.py | 13 ++--- .../repositories/devops_constants.py | 3 ++ src/summarizations/_summarizations.py | 48 +++++++++++-------- 7 files changed, 51 insertions(+), 30 deletions(-) create mode 100644 src/gpt_review/prompts/_prompt_pr_summary.py rename src/{summarizations/prompt_summary.yaml => gpt_review/prompts/prompt_pr_summary.yaml} (76%) create mode 100644 src/gpt_review/repositories/devops_constants.py diff --git a/src/gpt_review/_review.py b/src/gpt_review/_review.py index 926ec11c..17c7e9fc 100644 --- a/src/gpt_review/_review.py +++ b/src/gpt_review/_review.py @@ -142,6 +142,7 @@ def _split_diff(git_diff): """ diff = "diff" git = "--git a/" + # TODO doesn't seem to be working as expected for one file changes return git_diff.split(f"{diff} {git}")[1:] # Use formated string to prevent splitting @@ -203,6 +204,7 @@ def _summarize_files(git_diff) -> str: if os.getenv("FILE_SUMMARY_FULL", "true").lower() == "true": summary += file_summary + # TODO the summary of file changes cannot be provided as there are no specific files or details mentioned to analyze and summarize the files, another todo for this added in line 145 summary += f""" ### Summary of File Changes {_request_goal(file_summary, goal="Summarize the changes to the files.")} diff --git a/src/gpt_review/constants.py b/src/gpt_review/constants.py index ca70cbec..0ae66a34 100644 --- a/src/gpt_review/constants.py +++ b/src/gpt_review/constants.py @@ -38,3 +38,5 @@ BUG_PROMPT_YAML = "prompt_bug.yaml" COVERAGE_PROMPT_YAML = "prompt_coverage.yaml" SUMMARY_PROMPT_YAML = "prompt_summary.yaml" + +PR_SUMMARY_PROMPT_YAML = "prompt_pr_summary.yaml" diff --git a/src/gpt_review/prompts/_prompt_pr_summary.py b/src/gpt_review/prompts/_prompt_pr_summary.py new file mode 100644 index 00000000..a42603f2 --- /dev/null +++ b/src/gpt_review/prompts/_prompt_pr_summary.py @@ -0,0 +1,11 @@ +"""Prompt for PR summarization.""" +import os +from pathlib import Path +from gpt_review.prompts._prompt import LangChainPrompt +import gpt_review.constants as C + + +def load_pr_summary_yaml() -> LangChainPrompt: + """Load the summary yaml.""" + yaml_path = os.getenv("PROMPT_PR_SUMMARY", str(Path(__file__).parents[0].joinpath(C.PR_SUMMARY_PROMPT_YAML))) + return LangChainPrompt.load(yaml_path) diff --git a/src/summarizations/prompt_summary.yaml b/src/gpt_review/prompts/prompt_pr_summary.yaml similarity index 76% rename from src/summarizations/prompt_summary.yaml rename to src/gpt_review/prompts/prompt_pr_summary.yaml index 1cfcf2b7..29273a6a 100644 --- a/src/summarizations/prompt_summary.yaml +++ b/src/gpt_review/prompts/prompt_pr_summary.yaml @@ -2,7 +2,7 @@ _type: prompt input_variables: ["summaries"] template: | - Summarize the following list of summaries into a single summary, + Summarize the following list of pull request summaries into a single summary, focusing on major modifications, additions, deletions, and any significant updates within the files. Do not include the file name in the summary and list the summary with bullet points. diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 8b56174c..3f846152 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -31,9 +31,7 @@ from gpt_review._review import _summarize_files from gpt_review.prompts._prompt import load_ask_yaml from gpt_review.repositories._repository import _RepositoryClient - -MIN_CONTEXT_LINES = 5 -SURROUNDING_CONTEXT = 5 +import gpt_review.repositories.devops_constants as C class _DevOpsClient(_RepositoryClient, abc.ABC): @@ -237,7 +235,7 @@ def _calculate_selection(self, thread: GitPullRequestCommentThread, commit_id: s original_content, changed_content = self._load_content(file_path=thread.file_path, commit_id=commit_id) def get_selection(lines: str, line_start: int, line_end: int) -> str: - return lines[line_start - 1 : line_end] if line_end - line_start >= MIN_CONTEXT_LINES else lines + return lines[line_start - 1 : line_end] if line_end - line_start >= C.MIN_CONTEXT_LINES else lines left_selection = ( get_selection(original_content, thread.left_file_start.line, thread.left_file_end.line) @@ -264,8 +262,6 @@ def create_last_merge_source_commit_from_dict(self, commit_dict: Dict) -> GitCom """ return GitCommitRef(commit_id=commit_dict["commitId"], url=commit_dict["url"]) - # TODO might be able to improve with something like https://stackoverflow.com/questions/59250557/how-to-convert-a-python-dict-to-a-class-object - # or creating our own model class inheriting msrest.serialization.Model def create_git_pull_request_from_dict(self, pr_dict: Dict) -> GitPullRequest: """Create a GitPullRequest object from a dictionary. @@ -337,19 +333,18 @@ def _get_changed_blobs(self, pull_request: GitPullRequest) -> List[str]: pr_commits = None skip = 0 - while True and pull_request.status != "abandoned": + while pull_request.status != C.PR_TYPE_ABANDONED: pr_commits = self.client.get_commit_diffs( repository_id=self.repository_id, project=self.project, diff_common_commit=False, base_version_descriptor=GitBaseVersionDescriptor( - base_version=pull_request.last_merge_source_commit.commit_id, base_version_type="commit" + base_version=pull_request.last_merge_commit.commit_id, base_version_type="commit" ), target_version_descriptor=GitTargetVersionDescriptor( target_version=pull_request.last_merge_target_commit.commit_id, target_version_type="commit" ), ) - # TODO investigate further - "if that pr_commits.all_changes_included doesn't go true it go go for infinity" changed_paths.extend([change for change in pr_commits.changes if "isFolder" not in change["item"]]) skip += len(pr_commits.changes) if pr_commits.all_changes_included: diff --git a/src/gpt_review/repositories/devops_constants.py b/src/gpt_review/repositories/devops_constants.py new file mode 100644 index 00000000..e7ed1c21 --- /dev/null +++ b/src/gpt_review/repositories/devops_constants.py @@ -0,0 +1,3 @@ +MIN_CONTEXT_LINES = 5 +SURROUNDING_CONTEXT = 5 +PR_TYPE_ABANDONED = "abandoned" diff --git a/src/summarizations/_summarizations.py b/src/summarizations/_summarizations.py index f8b22d3f..2506a891 100644 --- a/src/summarizations/_summarizations.py +++ b/src/summarizations/_summarizations.py @@ -1,12 +1,12 @@ """Summarize the changes in a release.""" import csv import time -import os -from pathlib import Path +from typing import Dict from gpt_review.repositories.devops import DevOpsClient -from gpt_review._review import _ask -from gpt_review.prompts._prompt import LangChainPrompt +from gpt_review.prompts._prompt_pr_summary import load_pr_summary_yaml +from gpt_review._review import _ask, _summarize_files + import summarizations.constants as C @@ -28,6 +28,20 @@ def _load_pull_request_ids(file_path: str) -> list: return pull_request_ids_list +def generate_pr_review(diff) -> Dict[str, str]: + """Generate a pull request review. + + Args: + diff (str): The diff to review. + + Returns: + Dict[str, str]: The response from GPT-4. + """ + + review = _summarize_files(diff) + return {"response": review} + + def _summarize_pull_requests(pull_request_ids_list: list, patch_repo: str) -> list: """Summarize pull requests. @@ -41,23 +55,17 @@ def _summarize_pull_requests(pull_request_ids_list: list, patch_repo: str) -> li summaries_list = [] for pr_id in pull_request_ids_list: start = time.process_time() - pr_link = ( - patch_repo + pr_id - ) # This is not a real link to a PR, but the link is needed to post the summary and this is not being done here + # pr_link = ( + # patch_repo + pr_id + # ) # This is not a real link to a PR, but the link is needed to post the summary and this is not being done here diff = DevOpsClient.get_pr_diff(patch_repo, pr_id, access_token) if diff: - summary = DevOpsClient.generate_pr_summary(diff=diff, link=pr_link, access_token=access_token) + summary = generate_pr_review(diff=diff) print(time.process_time() - start) summaries_list.append(summary) return summaries_list -def load_summary_yaml() -> LangChainPrompt: - """Load the summary yaml.""" - yaml_path = os.getenv("PROMPT_SUMMARY", str(Path(__file__).parents[0].joinpath(C.SUMMARY_PROMPT_YAML))) - return LangChainPrompt.load(yaml_path) - - def _summarize_summary(summary_group) -> str: """Summarize a list of summaries. @@ -68,7 +76,7 @@ def _summarize_summary(summary_group) -> str: str: The summary of the summary. """ - question = load_summary_yaml().format(summaries=summary_group) + question = load_pr_summary_yaml().format(summaries=summary_group) response = _ask(question=[question], temperature=0.0) return response @@ -90,7 +98,7 @@ def _summarize_summaries(summaries_list: list) -> list: return summarized_summaries_list -def _get_summary(summaries_list: list) -> str: +def _get_final_summary(summaries_list: list) -> str: """Get the final summary. Args: @@ -109,8 +117,8 @@ def _get_summary(summaries_list: list) -> str: return "No summaries to summarize." -access_token = C.MSAZURE_ADO_TOKEN -pull_request_ids = _load_pull_request_ids(C.MSAZURE_PULL_REQUEST_LIST) -summaries = _summarize_pull_requests(pull_request_ids, C.MSAZURE_PATCHREPO) -final_summary = _get_summary(summaries) +access_token = C.MSDATA_ADO_TOKEN +pull_request_ids = _load_pull_request_ids(C.MSDATA_PULL_REQUEST_LIST) +summaries = _summarize_pull_requests(pull_request_ids, C.MSDATA_PATCHREPO) +final_summary = _get_final_summary(summaries) print(final_summary) From b30d6413757ec3472aeb7fb4c067b2150a3229ce Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Mon, 5 Jun 2023 15:11:32 +0000 Subject: [PATCH 59/72] Refactor code to use Azure DevOps instead of MSDATA. --- src/gpt_review/_review.py | 3 ++- src/summarizations/_summarizations.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/gpt_review/_review.py b/src/gpt_review/_review.py index 17c7e9fc..a775d22e 100644 --- a/src/gpt_review/_review.py +++ b/src/gpt_review/_review.py @@ -204,7 +204,8 @@ def _summarize_files(git_diff) -> str: if os.getenv("FILE_SUMMARY_FULL", "true").lower() == "true": summary += file_summary - # TODO the summary of file changes cannot be provided as there are no specific files or details mentioned to analyze and summarize the files, another todo for this added in line 145 + # TODO "the summary of file changes cannot be provided as there are no specific files or + # details mentioned to analyze and summarize the files" summary += f""" ### Summary of File Changes {_request_goal(file_summary, goal="Summarize the changes to the files.")} diff --git a/src/summarizations/_summarizations.py b/src/summarizations/_summarizations.py index 2506a891..13a4aa68 100644 --- a/src/summarizations/_summarizations.py +++ b/src/summarizations/_summarizations.py @@ -117,8 +117,8 @@ def _get_final_summary(summaries_list: list) -> str: return "No summaries to summarize." -access_token = C.MSDATA_ADO_TOKEN -pull_request_ids = _load_pull_request_ids(C.MSDATA_PULL_REQUEST_LIST) -summaries = _summarize_pull_requests(pull_request_ids, C.MSDATA_PATCHREPO) +access_token = C.MSAZURE_ADO_TOKEN +pull_request_ids = _load_pull_request_ids(C.MSAZURE_PULL_REQUEST_LIST) +summaries = _summarize_pull_requests(pull_request_ids, C.MSAZURE_PATCHREPO) final_summary = _get_final_summary(summaries) print(final_summary) From 9e5ec7696c5f7a4f3f2d3d813c40aec0d0b1a63a Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Wed, 7 Jun 2023 19:59:24 +0000 Subject: [PATCH 60/72] Add support for summarizing PR diffs, batch summaries, and deployment nature --- .gitignore | 5 + src/gpt_review/_review.py | 3 - src/gpt_review/constants.py | 3 + src/gpt_review/prompts/_prompt_pr_summary.py | 22 ++- src/gpt_review/prompts/prompt_pr_summary.yaml | 6 +- src/gpt_review/repositories/devops.py | 14 +- src/summarizations/_summarizations.py | 167 +++++++++++++++--- 7 files changed, 185 insertions(+), 35 deletions(-) diff --git a/.gitignore b/.gitignore index 539169fd..d6021ff4 100644 --- a/.gitignore +++ b/.gitignore @@ -150,3 +150,8 @@ __azurite_db*__.json src/summarizations/msdata_pull_request_list.csv src/summarizations/msazure_pull_request_list.csv src/summarizations/constants.py +src/gpt_review/prompts/prompt_pr_review.yaml +src/gpt_review/prompts/prompt_pr_batch_summary.yaml +src/gpt_review/prompts/prompt_nature.yaml +src/gpt_review/prompts/prompt_pr_summary.yaml +src/summarizations/summaries/ diff --git a/src/gpt_review/_review.py b/src/gpt_review/_review.py index a775d22e..926ec11c 100644 --- a/src/gpt_review/_review.py +++ b/src/gpt_review/_review.py @@ -142,7 +142,6 @@ def _split_diff(git_diff): """ diff = "diff" git = "--git a/" - # TODO doesn't seem to be working as expected for one file changes return git_diff.split(f"{diff} {git}")[1:] # Use formated string to prevent splitting @@ -204,8 +203,6 @@ def _summarize_files(git_diff) -> str: if os.getenv("FILE_SUMMARY_FULL", "true").lower() == "true": summary += file_summary - # TODO "the summary of file changes cannot be provided as there are no specific files or - # details mentioned to analyze and summarize the files" summary += f""" ### Summary of File Changes {_request_goal(file_summary, goal="Summarize the changes to the files.")} diff --git a/src/gpt_review/constants.py b/src/gpt_review/constants.py index 0ae66a34..06528d41 100644 --- a/src/gpt_review/constants.py +++ b/src/gpt_review/constants.py @@ -40,3 +40,6 @@ SUMMARY_PROMPT_YAML = "prompt_summary.yaml" PR_SUMMARY_PROMPT_YAML = "prompt_pr_summary.yaml" +PR_REVIEW_PROMPT_YAML = "prompt_pr_review.yaml" +PR_BATCH_SUMMARY_PROMPT_YAML = "prompt_pr_batch_summary.yaml" +PROMPT_NATURE_YAML = "prompt_nature.yaml" diff --git a/src/gpt_review/prompts/_prompt_pr_summary.py b/src/gpt_review/prompts/_prompt_pr_summary.py index a42603f2..a93d78bb 100644 --- a/src/gpt_review/prompts/_prompt_pr_summary.py +++ b/src/gpt_review/prompts/_prompt_pr_summary.py @@ -6,6 +6,26 @@ def load_pr_summary_yaml() -> LangChainPrompt: - """Load the summary yaml.""" + """Load the PR summary yaml.""" yaml_path = os.getenv("PROMPT_PR_SUMMARY", str(Path(__file__).parents[0].joinpath(C.PR_SUMMARY_PROMPT_YAML))) return LangChainPrompt.load(yaml_path) + + +def load_pr_review_yaml() -> LangChainPrompt: + """Load the PR review yaml.""" + yaml_path = os.getenv("PROMPT_PR_REVIEW", str(Path(__file__).parents[0].joinpath(C.PR_REVIEW_PROMPT_YAML))) + return LangChainPrompt.load(yaml_path) + + +def load_batch_pr_summary_yaml() -> LangChainPrompt: + """Load the PR summary yaml.""" + yaml_path = os.getenv( + "PROMPT_PR_BATCH_SUMMARY", str(Path(__file__).parents[0].joinpath(C.PR_BATCH_SUMMARY_PROMPT_YAML)) + ) + return LangChainPrompt.load(yaml_path) + + +def load_nature_yaml() -> LangChainPrompt: + """Load the nature yaml.""" + yaml_path = os.getenv("PROMPT_NATURE", str(Path(__file__).parents[0].joinpath(C.PROMPT_NATURE_YAML))) + return LangChainPrompt.load(yaml_path) diff --git a/src/gpt_review/prompts/prompt_pr_summary.yaml b/src/gpt_review/prompts/prompt_pr_summary.yaml index 29273a6a..8bc79754 100644 --- a/src/gpt_review/prompts/prompt_pr_summary.yaml +++ b/src/gpt_review/prompts/prompt_pr_summary.yaml @@ -1,9 +1,9 @@ _type: prompt input_variables: - ["summaries"] + ["diff"] template: | - Summarize the following list of pull request summaries into a single summary, + Summarize the following list of file diffs, focusing on major modifications, additions, deletions, and any significant updates within the files. Do not include the file name in the summary and list the summary with bullet points. - {summaries} + {diff} diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 3f846152..b5365710 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -205,7 +205,7 @@ def _create_patch(self, left, right, file_path) -> List: while line < len(left) and row < len(right): if changes[line][row] == changes[line - 1][row - 1]: - patch.append(left[line - 1]) + # patch.append(left[line - 1]) line += 1 row += 1 elif changes[line - 1][row] < changes[line][row - 1]: @@ -355,13 +355,17 @@ def _get_changed_blobs(self, pull_request: GitPullRequest) -> List[str]: def _get_change(self, git_change, commit_id) -> List[str]: file_path = git_change["item"]["path"] - original_content, changed_content = self._load_content(file_path, commit_id) + original_content, changed_content = self._load_content( + file_path, commit_id_original=git_change["item"]["commitId"], commit_id_changed=commit_id + ) - patch = self._create_patch(original_content, changed_content, file_path) + patch = self._create_patch(original_content, changed_content, file_path) # TODO - doesn't work for msdata ;-; return "\n".join(patch) - def _load_content(self, file_path, commit_id): - return self.read_all_text(file_path), self.read_all_text(file_path, commit_id=commit_id) + def _load_content(self, file_path, commit_id_original, commit_id_changed): + return self.read_all_text(file_path, commit_id=commit_id_original), self.read_all_text( + file_path, commit_id=commit_id_changed + ) class DevOpsClient(_DevOpsClient): diff --git a/src/summarizations/_summarizations.py b/src/summarizations/_summarizations.py index 13a4aa68..e30fce2f 100644 --- a/src/summarizations/_summarizations.py +++ b/src/summarizations/_summarizations.py @@ -1,14 +1,41 @@ """Summarize the changes in a release.""" import csv import time -from typing import Dict +import os +from dataclasses import dataclass from gpt_review.repositories.devops import DevOpsClient -from gpt_review.prompts._prompt_pr_summary import load_pr_summary_yaml -from gpt_review._review import _ask, _summarize_files +from gpt_review.prompts._prompt_pr_summary import load_pr_summary_yaml, load_batch_pr_summary_yaml, load_nature_yaml +from gpt_review.prompts._prompt import load_summary_yaml +from gpt_review._review import _ask import summarizations.constants as C +FILE_SUMMARY_NAME = ( + "/workspaces/gpt-review/src/summarizations/summaries/file_summary-" + + str(time.strftime("%b-%d-%Y %H:%M:%S")) + + ".csv" +) + + +@dataclass +class GitFile: + """A git file with its diff contents.""" + + file_name: str + diff: str + + +def _print_to_file(file_path: str, text: str) -> None: + """Print text to a file. + + Args: + file_path (str): The path to the file. + text (str): The text to print. + """ + with open(file_path, "a+", encoding="utf-8") as file: + file.write(text) + def _load_pull_request_ids(file_path: str) -> list: """Load pull request ids from a csv file. @@ -28,18 +55,93 @@ def _load_pull_request_ids(file_path: str) -> list: return pull_request_ids_list -def generate_pr_review(diff) -> Dict[str, str]: - """Generate a pull request review. +def _summarize_file(diff) -> str: + """Summarize a file in a git diff. Args: - diff (str): The diff to review. + diff (str): The file to summarize. Returns: - Dict[str, str]: The response from GPT-4. + str: The summary of the file. """ + question = load_summary_yaml().format(diff=diff) + + response = _ask(question=[question], temperature=0.0) + return response["response"] - review = _summarize_files(diff) - return {"response": review} + +def _request_goal(git_diff, goal, fast: bool = False, large: bool = False, temperature: float = 0) -> str: + """ + Request a goal from GPT-4. + + Args: + git_diff (str): The git diff to split. + goal (str): The goal to request from GPT-4. + fast (bool, optional): Whether to use the fast model. Defaults to False. + large (bool, optional): Whether to use the large model. Defaults to False. + temperature (float, optional): The temperature to use. Defaults to 0. + + Returns: + response (str): The response from GPT-4. + """ + prompt = f""" +{goal} + +{git_diff} +""" + + return _ask([prompt], max_tokens=1500, fast=fast, large=large, temperature=temperature)["response"] + + +def _split_diff(git_diff): + """Split a git diff into a list of files and their diff contents. + + Args: + git_diff (str): The git diff to split. + + Returns: + list: A list of tuples containing the file name and diff contents. + """ + diff = "diff" + git = "--git a/" + return ( + git_diff.split(f"{diff} {git}")[1:] if git_diff.split(f"{diff} {git}")[1:] else git_diff.split(f"{diff} {git}") + ) # Use formated string to prevent splitting + + +def _summarize_pr_diff(diff) -> str: + """Summarize a pull request diff. + + Args: + diff (str): The diff to summarize. + + Returns: + str: The summary. + """ + summary = "" + file_summary = "" + file_summary += "".join(_summarize_file(single_diff) for single_diff in _split_diff(diff)) + summary += _request_goal(file_summary, goal="Summarize the changes to the files.") + + return summary + + +# TODO finalize this function +# def _review_pr_diff(diff) -> str: +# """Review a pull request diff. + +# Args: +# diff (str): The diff to review. + +# Returns: +# str: The review. +# """ +# review = "Review of File Changes" +# file_review = "" +# file_review += "".join(_review_file(single_diff) for single_diff in _split_diff(diff)) +# review += _request_goal(file_review, goal="Review the changes to the files.") + +# return review def _summarize_pull_requests(pull_request_ids_list: list, patch_repo: str) -> list: @@ -58,15 +160,19 @@ def _summarize_pull_requests(pull_request_ids_list: list, patch_repo: str) -> li # pr_link = ( # patch_repo + pr_id # ) # This is not a real link to a PR, but the link is needed to post the summary and this is not being done here - diff = DevOpsClient.get_pr_diff(patch_repo, pr_id, access_token) + diff = DevOpsClient.get_pr_diff( + patch_repo, pr_id, access_token + ) # TODO pr_diff is not acurrately being calculated if diff: - summary = generate_pr_review(diff=diff) + summary = _summarize_pr_diff(diff=diff) print(time.process_time() - start) summaries_list.append(summary) + summary_to_print = f"{pr_id}, {summary}\n" + _print_to_file(FILE_SUMMARY_NAME, summary_to_print) return summaries_list -def _summarize_summary(summary_group) -> str: +def _summarize_summary_batch(summary_batch: list) -> str: """Summarize a list of summaries. Args: @@ -76,7 +182,7 @@ def _summarize_summary(summary_group) -> str: str: The summary of the summary. """ - question = load_pr_summary_yaml().format(summaries=summary_group) + question = load_batch_pr_summary_yaml().format(summaries=summary_batch) response = _ask(question=[question], temperature=0.0) return response @@ -93,8 +199,8 @@ def _summarize_summaries(summaries_list: list) -> list: summarized_summaries_list = [] for i in range(0, len(summaries_list), 10): - summary_group = summaries_list[i : i + 10] - summarized_summaries_list.append(_summarize_summary(summary_group)) + summary_batch = summaries_list[i : i + 10] + summarized_summaries_list.append(_summarize_summary_batch(summary_batch)) return summarized_summaries_list @@ -108,17 +214,32 @@ def _get_final_summary(summaries_list: list) -> str: str: The final summary. """ - if summaries_list: - summarized_summaries = _summarize_summaries(summaries_list) - while len(summarized_summaries) > 1: - summarized_summaries = _summarize_summaries(summarized_summaries) - return summarized_summaries[0]["response"] - else: - return "No summaries to summarize." + summarized_summaries = _summarize_summaries(summaries_list) + while len(summarized_summaries) > 1: + summarized_summaries = _summarize_summaries(summarized_summaries) + summaries_to_print = f"{len(summarized_summaries)}, {summarized_summaries}\n" + _print_to_file(FILE_SUMMARY_NAME, summaries_to_print) + return summarized_summaries[0]["response"] + + +def _get_deployment_nature(summary) -> str: + """Get the nature of the deployment. + + Args: + summary (str): The summary of the PRs in a deployment. + + Returns: + str: The nature of the deployment. + """ + question = load_nature_yaml().format(summary=summary) + response = _ask(question=[question], temperature=0.0) + return response["response"] access_token = C.MSAZURE_ADO_TOKEN pull_request_ids = _load_pull_request_ids(C.MSAZURE_PULL_REQUEST_LIST) summaries = _summarize_pull_requests(pull_request_ids, C.MSAZURE_PATCHREPO) final_summary = _get_final_summary(summaries) -print(final_summary) +_print_to_file(FILE_SUMMARY_NAME, "\nThe final summary is:\n" + final_summary) + +_print_to_file(FILE_SUMMARY_NAME, "\nThe nature of this deployment is: " + _get_deployment_nature(final_summary)) From 26f542424b0363c28e21fe2a8f8bf12cb5ea7668 Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Tue, 13 Jun 2023 02:25:46 +0000 Subject: [PATCH 61/72] Improve diff algorithm and fix patch generation in DevOpsClient --- src/gpt_review/repositories/devops.py | 64 ++++++++++++++++++++------- 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index b5365710..7aff7a1b 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -193,6 +193,7 @@ def _create_patch(self, left, right, file_path) -> List: Returns: List: The patch. """ + changes = [[0] * (len(right) + 1) for _ in range(len(left) + 1)] for i, j in itertools.product(range(len(left)), range(len(right))): @@ -200,25 +201,58 @@ def _create_patch(self, left, right, file_path) -> List: changes[i][j] if left[i] == right[j] else 1 + min(changes[i][j + 1], changes[i + 1][j], changes[i][j]) ) - line, row = 1, 1 - patch = [file_path] - - while line < len(left) and row < len(right): - if changes[line][row] == changes[line - 1][row - 1]: - # patch.append(left[line - 1]) - line += 1 - row += 1 + # Attempt at reverting original C# code + # patch = [file_path] + # line, row = 1, 1 + + # while line < len(left) and row < len(right): + # if changes[line][row] <= changes[line - 1][row] and changes[line][row] <= changes[line][row - 1]: + # if left[line - 1] != right[row - 1]: + # patch.append(f"- {left[line - 1]}") + # patch.append(f"+ {right[row - 1]}") + # line += 1 + # row += 1 + # elif changes[line - 1][row] < changes[line][row - 1]: + # patch.append(f"- {left[line - 1]}") + # line += 1 + # else: + # patch.append(f"+ {right[row - 1]}") + # row += 1 + + # while line < len(left): + # patch.append(f"- {left[line - 1]}") + # line += 1 + + # while row < len(right): + # patch.append(f"+ {right[row - 1]}") + # row += 1 + + original_patch = [file_path] + # original C code, it works but not really because it adds the diff backwards + line, row = len(left), len(right) + while line > 0 and row > 0: + if changes[line][row] <= changes[line - 1][row] and changes[line][row] <= changes[line][row - 1]: + if left[line - 1] != right[row - 1]: + original_patch.append(f"- {left[line - 1]}") + original_patch.append(f"+ {right[row - 1]}") + line -= 1 + row -= 1 elif changes[line - 1][row] < changes[line][row - 1]: - patch.append(f"- {left[line - 1]}") - line += 1 + original_patch.append(f"- {left[line - 1]}") + line -= 1 else: - patch.append(f"+ {right[row - 1]}") - row += 1 + original_patch.append(f"+ {right[row - 1]}") + row -= 1 + + while line > 0: + original_patch.append(f"- {left[line - 1]}") + line -= 1 - patch.extend(f"- {left[i - 1]}" for i in range(line, len(left) + 1)) - patch.extend(f"+ {right[j - 1]}" for j in range(row, len(right) + 1)) + while row > 0: + original_patch.append(f"+ {right[row - 1]}") + row -= 1 - return patch + return original_patch def _calculate_selection(self, thread: GitPullRequestCommentThread, commit_id: str) -> Tuple[str, str]: """ From e0e7cc2f684fae4fac77a36640462823b1b22913 Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Tue, 13 Jun 2023 19:06:22 +0000 Subject: [PATCH 62/72] Refactor diff generation and improve PR summary in DevOpsClient --- src/gpt_review/repositories/devops.py | 61 +++++++++------------------ src/summarizations/_summarizations.py | 32 +++----------- 2 files changed, 25 insertions(+), 68 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 7aff7a1b..aabb628a 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -201,58 +201,34 @@ def _create_patch(self, left, right, file_path) -> List: changes[i][j] if left[i] == right[j] else 1 + min(changes[i][j + 1], changes[i + 1][j], changes[i][j]) ) - # Attempt at reverting original C# code - # patch = [file_path] - # line, row = 1, 1 - - # while line < len(left) and row < len(right): - # if changes[line][row] <= changes[line - 1][row] and changes[line][row] <= changes[line][row - 1]: - # if left[line - 1] != right[row - 1]: - # patch.append(f"- {left[line - 1]}") - # patch.append(f"+ {right[row - 1]}") - # line += 1 - # row += 1 - # elif changes[line - 1][row] < changes[line][row - 1]: - # patch.append(f"- {left[line - 1]}") - # line += 1 - # else: - # patch.append(f"+ {right[row - 1]}") - # row += 1 - - # while line < len(left): - # patch.append(f"- {left[line - 1]}") - # line += 1 - - # while row < len(right): - # patch.append(f"+ {right[row - 1]}") - # row += 1 - - original_patch = [file_path] - # original C code, it works but not really because it adds the diff backwards + patch = [] line, row = len(left), len(right) while line > 0 and row > 0: if changes[line][row] <= changes[line - 1][row] and changes[line][row] <= changes[line][row - 1]: if left[line - 1] != right[row - 1]: - original_patch.append(f"- {left[line - 1]}") - original_patch.append(f"+ {right[row - 1]}") + patch.append(f"+ {right[row - 1]}") + patch.append(f"- {left[line - 1]}") line -= 1 row -= 1 elif changes[line - 1][row] < changes[line][row - 1]: - original_patch.append(f"- {left[line - 1]}") + patch.append(f"- {left[line - 1]}") line -= 1 else: - original_patch.append(f"+ {right[row - 1]}") + patch.append(f"+ {right[row - 1]}") row -= 1 while line > 0: - original_patch.append(f"- {left[line - 1]}") + patch.append(f"- {left[line - 1]}") line -= 1 while row > 0: - original_patch.append(f"+ {right[row - 1]}") + patch.append(f"+ {right[row - 1]}") row -= 1 - return original_patch + patch.append(file_path) + patch.reverse() + + return patch def _calculate_selection(self, thread: GitPullRequestCommentThread, commit_id: str) -> Tuple[str, str]: """ @@ -266,7 +242,7 @@ def _calculate_selection(self, thread: GitPullRequestCommentThread, commit_id: s Tuple[str, str]: The left and right selection. """ - original_content, changed_content = self._load_content(file_path=thread.file_path, commit_id=commit_id) + original_content, changed_content = self._load_content(file_path=thread.file_path, commit_id_changed=commit_id) def get_selection(lines: str, line_start: int, line_end: int) -> str: return lines[line_start - 1 : line_end] if line_end - line_start >= C.MIN_CONTEXT_LINES else lines @@ -396,7 +372,12 @@ def _get_change(self, git_change, commit_id) -> List[str]: patch = self._create_patch(original_content, changed_content, file_path) # TODO - doesn't work for msdata ;-; return "\n".join(patch) - def _load_content(self, file_path, commit_id_original, commit_id_changed): + def _load_content( + self, + file_path, + commit_id_original: str = None, + commit_id_changed: str = None, + ): return self.read_all_text(file_path, commit_id=commit_id_original), self.read_all_text( file_path, commit_id=commit_id_changed ) @@ -461,7 +442,7 @@ def _parse_url(link): return org, project, repo, pr_id @staticmethod - def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: + def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None): """ Get the diff of a PR. @@ -469,9 +450,6 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: patch_repo (str): The pointer to ADO in the format, org/project/repo patch_pr (str): The PR id. access_token (str): The GitHub access token. - - Returns: - str: The diff of the PR. """ link = urllib.parse.unquote( @@ -490,7 +468,6 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: client = DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo) pull_request = client.client.get_pull_request_by_id(pull_request_id=pr_id) diff = client.get_patches(pull_request_event=pull_request) - diff = "\n".join(diff) return diff diff --git a/src/summarizations/_summarizations.py b/src/summarizations/_summarizations.py index e30fce2f..001e68a6 100644 --- a/src/summarizations/_summarizations.py +++ b/src/summarizations/_summarizations.py @@ -1,11 +1,10 @@ """Summarize the changes in a release.""" import csv import time -import os from dataclasses import dataclass from gpt_review.repositories.devops import DevOpsClient -from gpt_review.prompts._prompt_pr_summary import load_pr_summary_yaml, load_batch_pr_summary_yaml, load_nature_yaml +from gpt_review.prompts._prompt_pr_summary import load_batch_pr_summary_yaml, load_nature_yaml from gpt_review.prompts._prompt import load_summary_yaml from gpt_review._review import _ask @@ -93,22 +92,6 @@ def _request_goal(git_diff, goal, fast: bool = False, large: bool = False, tempe return _ask([prompt], max_tokens=1500, fast=fast, large=large, temperature=temperature)["response"] -def _split_diff(git_diff): - """Split a git diff into a list of files and their diff contents. - - Args: - git_diff (str): The git diff to split. - - Returns: - list: A list of tuples containing the file name and diff contents. - """ - diff = "diff" - git = "--git a/" - return ( - git_diff.split(f"{diff} {git}")[1:] if git_diff.split(f"{diff} {git}")[1:] else git_diff.split(f"{diff} {git}") - ) # Use formated string to prevent splitting - - def _summarize_pr_diff(diff) -> str: """Summarize a pull request diff. @@ -120,7 +103,7 @@ def _summarize_pr_diff(diff) -> str: """ summary = "" file_summary = "" - file_summary += "".join(_summarize_file(single_diff) for single_diff in _split_diff(diff)) + file_summary += "".join(_summarize_file(file_diff) for file_diff in diff) summary += _request_goal(file_summary, goal="Summarize the changes to the files.") return summary @@ -157,12 +140,7 @@ def _summarize_pull_requests(pull_request_ids_list: list, patch_repo: str) -> li summaries_list = [] for pr_id in pull_request_ids_list: start = time.process_time() - # pr_link = ( - # patch_repo + pr_id - # ) # This is not a real link to a PR, but the link is needed to post the summary and this is not being done here - diff = DevOpsClient.get_pr_diff( - patch_repo, pr_id, access_token - ) # TODO pr_diff is not acurrately being calculated + diff = DevOpsClient.get_pr_diff(patch_repo, pr_id, access_token) if diff: summary = _summarize_pr_diff(diff=diff) print(time.process_time() - start) @@ -219,7 +197,9 @@ def _get_final_summary(summaries_list: list) -> str: summarized_summaries = _summarize_summaries(summarized_summaries) summaries_to_print = f"{len(summarized_summaries)}, {summarized_summaries}\n" _print_to_file(FILE_SUMMARY_NAME, summaries_to_print) - return summarized_summaries[0]["response"] + if summarized_summaries: + return summarized_summaries[0]["response"] + return "No summaries were provided." def _get_deployment_nature(summary) -> str: From ce6ec7a5326f5ff064fbffd87470701b2924fe81 Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Tue, 13 Jun 2023 20:56:04 +0000 Subject: [PATCH 63/72] Refactor _get_changed_blobs method in devops.py for better readability. --- src/gpt_review/repositories/devops.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index aabb628a..4ea91fbc 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -325,9 +325,7 @@ def get_patches(self, pull_request_event) -> Iterable[List[str]]: pull_request = pull_request_event git_changes = self._get_changed_blobs(pull_request) - return [ - self._get_change(git_change, pull_request.last_merge_source_commit.commit_id) for git_change in git_changes - ] + return [self._get_change(git_change, pull_request.last_merge_commit.commit_id) for git_change in git_changes] def _get_changed_blobs(self, pull_request: GitPullRequest) -> List[str]: """ From 6900c656b5bce4b4a66dde3675fcbf1d134a317e Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Thu, 15 Jun 2023 18:06:12 +0000 Subject: [PATCH 64/72] moved summarizations to scripts folder --- .gitignore | 10 ++++++---- {src => scripts}/summarizations/_summarizations.py | 8 ++++---- 2 files changed, 10 insertions(+), 8 deletions(-) rename {src => scripts}/summarizations/_summarizations.py (98%) diff --git a/.gitignore b/.gitignore index d6021ff4..09f25e40 100644 --- a/.gitignore +++ b/.gitignore @@ -147,11 +147,13 @@ __azurite_db*__.json .python_packages #summarization development -src/summarizations/msdata_pull_request_list.csv -src/summarizations/msazure_pull_request_list.csv -src/summarizations/constants.py +scripts/summarizations/msdata_pull_request_list.csv +scripts/summarizations/msazure_pull_request_list.csv +scripts/summarizations/constants.py +scripts/summarizations/summaries/ src/gpt_review/prompts/prompt_pr_review.yaml src/gpt_review/prompts/prompt_pr_batch_summary.yaml src/gpt_review/prompts/prompt_nature.yaml src/gpt_review/prompts/prompt_pr_summary.yaml -src/summarizations/summaries/ + + diff --git a/src/summarizations/_summarizations.py b/scripts/summarizations/_summarizations.py similarity index 98% rename from src/summarizations/_summarizations.py rename to scripts/summarizations/_summarizations.py index 001e68a6..6af7f5c7 100644 --- a/src/summarizations/_summarizations.py +++ b/scripts/summarizations/_summarizations.py @@ -3,17 +3,17 @@ import time from dataclasses import dataclass +import constants as C + from gpt_review.repositories.devops import DevOpsClient from gpt_review.prompts._prompt_pr_summary import load_batch_pr_summary_yaml, load_nature_yaml from gpt_review.prompts._prompt import load_summary_yaml from gpt_review._review import _ask -import summarizations.constants as C - FILE_SUMMARY_NAME = ( - "/workspaces/gpt-review/src/summarizations/summaries/file_summary-" + "/workspaces/gpt-review/scripts/summarizations/summaries/file_summary-" + str(time.strftime("%b-%d-%Y %H:%M:%S")) - + ".csv" + + ".txt" ) From 9707bc43238e8f07bb38e1843d8ce7e75e2b9d6d Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Thu, 15 Jun 2023 20:12:13 +0000 Subject: [PATCH 65/72] added config file --- .gitignore | 6 ++-- scripts/summarizations/_summarizations.py | 37 +++++++++-------------- scripts/summarizations/ado.yaml.template | 3 ++ src/gpt_review/constants.py | 2 ++ src/gpt_review/repositories/devops.py | 5 ++- 5 files changed, 24 insertions(+), 29 deletions(-) create mode 100644 scripts/summarizations/ado.yaml.template diff --git a/.gitignore b/.gitignore index 09f25e40..66ea1661 100644 --- a/.gitignore +++ b/.gitignore @@ -147,11 +147,11 @@ __azurite_db*__.json .python_packages #summarization development -scripts/summarizations/msdata_pull_request_list.csv +scripts/summarizations/msazure.yaml +scripts/summarizations/msdata.yaml scripts/summarizations/msazure_pull_request_list.csv -scripts/summarizations/constants.py +scripts/summarizations/msdata_pull_request_list.csv scripts/summarizations/summaries/ -src/gpt_review/prompts/prompt_pr_review.yaml src/gpt_review/prompts/prompt_pr_batch_summary.yaml src/gpt_review/prompts/prompt_nature.yaml src/gpt_review/prompts/prompt_pr_summary.yaml diff --git a/scripts/summarizations/_summarizations.py b/scripts/summarizations/_summarizations.py index 6af7f5c7..7df45645 100644 --- a/scripts/summarizations/_summarizations.py +++ b/scripts/summarizations/_summarizations.py @@ -1,14 +1,15 @@ """Summarize the changes in a release.""" import csv import time +import os from dataclasses import dataclass - -import constants as C +import yaml from gpt_review.repositories.devops import DevOpsClient from gpt_review.prompts._prompt_pr_summary import load_batch_pr_summary_yaml, load_nature_yaml from gpt_review.prompts._prompt import load_summary_yaml from gpt_review._review import _ask +import gpt_review.constants as C FILE_SUMMARY_NAME = ( "/workspaces/gpt-review/scripts/summarizations/summaries/file_summary-" @@ -109,24 +110,6 @@ def _summarize_pr_diff(diff) -> str: return summary -# TODO finalize this function -# def _review_pr_diff(diff) -> str: -# """Review a pull request diff. - -# Args: -# diff (str): The diff to review. - -# Returns: -# str: The review. -# """ -# review = "Review of File Changes" -# file_review = "" -# file_review += "".join(_review_file(single_diff) for single_diff in _split_diff(diff)) -# review += _request_goal(file_review, goal="Review the changes to the files.") - -# return review - - def _summarize_pull_requests(pull_request_ids_list: list, patch_repo: str) -> list: """Summarize pull requests. @@ -216,9 +199,17 @@ def _get_deployment_nature(summary) -> str: return response["response"] -access_token = C.MSAZURE_ADO_TOKEN -pull_request_ids = _load_pull_request_ids(C.MSAZURE_PULL_REQUEST_LIST) -summaries = _summarize_pull_requests(pull_request_ids, C.MSAZURE_PATCHREPO) +def _load_config_file(): + """Import from yaml file and return the config.""" + config_file = "scripts/summarizations/" + os.getenv("CONFIG_FILE", C.ADO_CONFIG_FILE) + with open(config_file, "r", encoding="utf8") as file: + return yaml.load(file, Loader=yaml.SafeLoader) + + +config = _load_config_file() +access_token = os.getenv(config.get("ado_token")) +pull_request_ids = _load_pull_request_ids(config.get("pull_request_list")) +summaries = _summarize_pull_requests(pull_request_ids, config.get("patch_repo")) final_summary = _get_final_summary(summaries) _print_to_file(FILE_SUMMARY_NAME, "\nThe final summary is:\n" + final_summary) diff --git a/scripts/summarizations/ado.yaml.template b/scripts/summarizations/ado.yaml.template new file mode 100644 index 00000000..148a11e8 --- /dev/null +++ b/scripts/summarizations/ado.yaml.template @@ -0,0 +1,3 @@ +ado_token : ADO_TOKEN +pull_request_list : "path/to/pull/request/list/file" +patch_repo : "org/project/repo" \ No newline at end of file diff --git a/src/gpt_review/constants.py b/src/gpt_review/constants.py index 06528d41..48cba889 100644 --- a/src/gpt_review/constants.py +++ b/src/gpt_review/constants.py @@ -43,3 +43,5 @@ PR_REVIEW_PROMPT_YAML = "prompt_pr_review.yaml" PR_BATCH_SUMMARY_PROMPT_YAML = "prompt_pr_batch_summary.yaml" PROMPT_NATURE_YAML = "prompt_nature.yaml" + +ADO_CONFIG_FILE = "ado.yaml" diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 4ea91fbc..a4569e62 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -367,7 +367,7 @@ def _get_change(self, git_change, commit_id) -> List[str]: file_path, commit_id_original=git_change["item"]["commitId"], commit_id_changed=commit_id ) - patch = self._create_patch(original_content, changed_content, file_path) # TODO - doesn't work for msdata ;-; + patch = self._create_patch(original_content, changed_content, file_path) return "\n".join(patch) def _load_content( @@ -457,8 +457,7 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None): ) ) - # TODO uncomment this later - # access_token = os.getenv("ADO_TOKEN", access_token) + access_token = os.getenv("ADO_TOKEN", access_token) if link and access_token: org, project, repo, pr_id = DevOpsClient._parse_url(link) From c8661eeb1f47e6cb2f567a416e15a2370fb1696c Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Thu, 15 Jun 2023 16:13:21 -0400 Subject: [PATCH 66/72] Delete __init__.py --- src/gpt_review/__init__.py | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 src/gpt_review/__init__.py diff --git a/src/gpt_review/__init__.py b/src/gpt_review/__init__.py deleted file mode 100644 index 3a71f018..00000000 --- a/src/gpt_review/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# ------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See LICENSE in project root for information. -# ------------------------------------------------------------- -"""Easy GPT CLI""" -from __future__ import annotations - -__version__ = "0.9.5" From c6b6ed3daf0630b7b995141ceb4d7de74939cbda Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Thu, 15 Jun 2023 16:13:38 -0400 Subject: [PATCH 67/72] Delete pyproject.toml --- pyproject.toml | 402 ------------------------------------------------- 1 file changed, 402 deletions(-) delete mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 4bcae5e4..00000000 --- a/pyproject.toml +++ /dev/null @@ -1,402 +0,0 @@ -[build-system] -requires = ["flit"] -build-backend = "flit.buildapi" - -[project] -name = "gpt-review" -authors = [ - {name = "Daniel Ciborowski", email = "dciborow@microsoft.com"}, -] -description = "Python Project for reviewing GitHub PRs with Open AI and Chat-GPT." -readme = "README.md" -classifiers = [ - "Development Status :: 3 - Alpha", - "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11" -] -requires-python = ">=3.8.1" -dynamic = ["version"] -dependencies = [ - 'azure-devops', - 'azure-functions; python_version <= "3.10"', - 'azure-identity', - 'azure-keyvault', - 'azure-keyvault-secrets', - 'llama-index>=0.6.0,<=0.6.14', - 'httpx', - 'GitPython', - 'knack', - 'openai', - 'requests', - 'pyyaml', - 'typing_extensions; python_version <= "3.10"', - 'transformers; python_version <= "3.8"' -] - -[project.optional-dependencies] -test = [ - "bandit[toml]==1.7.5", - "black==23.3.0", - "cattrs", - "docx2txt", - "check-manifest==0.49", - "flake8-bugbear==23.5.9", - "flake8-docstrings", - "flake8-formatter_junit_xml", - "flake8", - "flake8-pyproject", - "pre-commit==3.3.2", - "pylint==2.17.4", - "pylint_junit", - "pytest-cov>=3.0.0", - "pytest-mock", - "pytest-runner", - "pytest-xdist", - "pytest>=7.2.2", - "pytest-github-actions-annotate-failures", - "shellcheck-py==0.9.0.2", - "requests_mock" -] - -[project.scripts] -gpt = "gpt_review.main:__main__" - -[project.urls] -Documentation = "https://github.com/dciborow/action-gpt/tree/main#readme" -Source = "https://github.com/dciborow/action-gpt" -Tracker = "https://github.com/dciborow/action-gpt/issues" - -[tool.flit.module] -name = "gpt_review" - -[tool.bandit] -exclude_dirs = ["build","dist","tests","scripts"] -number = 4 -recursive = true -targets = "src" - -[tool.black] -line-length = 120 -fast = true -experimental-string-processing = true - -[tool.coverage.run] -branch = true -omit = [ - # Omitting files that can not be covered by tests - "src/gpt/__main__.py", - "src/gpt_review/__main__.py", - "src/gpt_review/main.py" -] - -[tool.coverage.report] -fail_under = 100 - -[tool.flake8] -max-line-length = 120 -select = "F,E,W,B,B901,B902,B903" -exclude = [ - ".eggs", - ".git", - ".tox", - "nssm", - "obj", - "out", - "packages", - "pywin32", - "tests", - "swagger_client" -] -ignore = [ - "E722", - "B001", - "W503", - "E203", - # Covered by Ruff - "F401", - "F501", - "F821", - "W391", # Covered by Pylint trailing-newlines -] - -[tool.isort] -profile = "black" -src_paths = ["src", "tests", "azure"] - -[tool.pyright] -include = ["src"] -exclude = [ - "**/node_modules", - "**/__pycache__", -] -venv = "env37" - -reportMissingImports = true -reportMissingTypeStubs = false - -pythonVersion = "3.7" -pythonPlatform = "Linux" - -executionEnvironments = [ - { root = "src" } -] - -[tool.pytest.ini_options] -addopts = "--cov-report xml:coverage.xml --cov src --cov-fail-under 0 --cov-append" -pythonpath = [ - "src" -] -testpaths = "tests" -# junit_family = "xunit2" -markers = [ - "integration: marks as integration test", - "notebooks: marks as notebook test", - "gpu: marks as gpu test", - "spark: marks tests which need Spark", - "slow: marks tests as slow", - "unit: fast offline tests", - "cli: test installed CLI", -] - -[tool.tox] -legacy_tox_ini = """ -[tox] -envlist = py, integration, spark, all - -[testenv] -commands = - pytest -m "not integration and not spark" {posargs} - -[testenv:integration] -commands = - pytest -m "integration" {posargs} - -[testenv:spark] -extras = spark -setenv = - PYSPARK_DRIVER_PYTHON = {envpython} - PYSPARK_PYTHON = {envpython} -commands = - pytest -m "spark" {posargs} - -[testenv:all] -extras = all -setenv = - PYSPARK_DRIVER_PYTHON = {envpython} - PYSPARK_PYTHON = {envpython} -commands = - pytest {posargs} -""" - -[tool.pylint] -extension-pkg-whitelist= [ - "numpy", - "torch", - "cv2", - "pyodbc", - "pydantic", - "ciso8601", - "netcdf4", - "scipy" -] -ignore="CVS" -ignore-patterns="test.*?py,conftest.py" -init-hook='import sys; sys.setrecursionlimit(8 * sys.getrecursionlimit())' -jobs=0 -limit-inference-results=100 -persistent="yes" -suggestion-mode="yes" -unsafe-load-any-extension="no" - -[tool.pylint.'MESSAGES CONTROL'] -enable="c-extension-no-member" -disable = [ - "unused-import", # Covered by Ruff F401 - "undefined-variable", # Covered by Ruff F821 - "line-too-long", # Covered by Ruff E501 -] -[tool.pylint.'REPORTS'] -evaluation="10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)" -output-format="text" -reports="no" -score="yes" - -[tool.pylint.'REFACTORING'] -max-nested-blocks=5 -never-returning-functions="sys.exit" - -[tool.pylint.'BASIC'] -argument-naming-style="snake_case" -attr-naming-style="snake_case" -bad-names= [ - "foo", - "bar" -] -class-attribute-naming-style="any" -class-naming-style="PascalCase" -const-naming-style="UPPER_CASE" -docstring-min-length=-1 -function-naming-style="snake_case" -good-names= [ - "i", - "j", - "k", - "ex", - "Run", - "_" -] -include-naming-hint="yes" -inlinevar-naming-style="any" -method-naming-style="snake_case" -module-naming-style="any" -no-docstring-rgx="^_" -property-classes="abc.abstractproperty" -variable-naming-style="snake_case" - -[tool.pylint.'FORMAT'] -ignore-long-lines="^\\s*(# )?.*['\"]??" -indent-after-paren=4 -indent-string=' ' -max-line-length=120 -max-module-lines=1000 -single-line-class-stmt="no" -single-line-if-stmt="no" - -[tool.pylint.'LOGGING'] -logging-format-style="old" -logging-modules="logging" - -[tool.pylint.'MISCELLANEOUS'] -notes= [ - "FIXME", - "XXX", - "TODO" -] - -[tool.pylint.'SIMILARITIES'] -ignore-comments="yes" -ignore-docstrings="yes" -ignore-imports="yes" -min-similarity-lines=7 - -[tool.pylint.'SPELLING'] -max-spelling-suggestions=4 -spelling-store-unknown-words="no" - -[tool.pylint.'STRING'] -check-str-concat-over-line-jumps="no" - -[tool.pylint.'TYPECHECK'] -contextmanager-decorators="contextlib.contextmanager" -generated-members="numpy.*,np.*,pyspark.sql.functions,collect_list" -ignore-mixin-members="yes" -ignore-none="yes" -ignore-on-opaque-inference="yes" -ignored-classes="optparse.Values,thread._local,_thread._local,numpy,torch,swagger_client" -ignored-modules="numpy,torch,swagger_client,netCDF4,scipy" -missing-member-hint="yes" -missing-member-hint-distance=1 -missing-member-max-choices=1 - -[tool.pylint.'VARIABLES'] -additional-builtins="dbutils" -allow-global-unused-variables="yes" -callbacks= [ - "cb_", - "_cb" -] -dummy-variables-rgx="_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_" -ignored-argument-names="_.*|^ignored_|^unused_" -init-import="no" -redefining-builtins-modules="six.moves,past.builtins,future.builtins,builtins,io" - -[tool.pylint.'CLASSES'] -defining-attr-methods= [ - "__init__", - "__new__", - "setUp", - "__post_init__" -] -exclude-protected= [ - "_asdict", - "_fields", - "_replace", - "_source", - "_make" -] -valid-classmethod-first-arg="cls" -valid-metaclass-classmethod-first-arg="cls" - -[tool.pylint.'DESIGN'] -max-args=5 -max-attributes=7 -max-bool-expr=5 -max-branches=12 -max-locals=15 -max-parents=7 -max-public-methods=20 -max-returns=6 -max-statements=50 -min-public-methods=2 - -[tool.pylint.'IMPORTS'] -allow-wildcard-with-all="no" -analyse-fallback-blocks="no" -deprecated-modules="optparse,tkinter.tix" - -[tool.pylint.'EXCEPTIONS'] -overgeneral-exceptions= [ - "BaseException", - "Exception" -] - -[tool.ruff] -# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default. -select = ["E", "F"] -ignore = [] - -# Allow autofix for all enabled rules (when `--fix`) is provided. -fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"] -unfixable = [] - -# Exclude a variety of commonly ignored directories. -exclude = [ - ".bzr", - ".direnv", - ".eggs", - ".git", - ".hg", - ".mypy_cache", - ".nox", - ".pants.d", - ".pytype", - ".ruff_cache", - ".svn", - ".tox", - ".venv", - "__pypackages__", - "_build", - "buck-out", - "build", - "dist", - "node_modules", - "venv", -] - -# Same as Black. -line-length = 120 - -# Allow unused variables when underscore-prefixed. -dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" - -# Assume Python 3.10. -target-version = "py311" - -[tool.ruff.mccabe] -# Unlike Flake8, default to a complexity level of 10. -max-complexity = 10 From d25dde27f142ffe8203adb9957d679b6b78f8b14 Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Thu, 15 Jun 2023 20:19:30 +0000 Subject: [PATCH 68/72] cleanup, and re-add removed files --- pyproject.toml | 402 ++++++++++++++++++ src/gpt_review/__init__.py | 8 + .../repositories/devops_constants.py | 2 +- 3 files changed, 411 insertions(+), 1 deletion(-) create mode 100644 pyproject.toml create mode 100644 src/gpt_review/__init__.py diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..ae9d2c73 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,402 @@ +[build-system] +requires = ["flit"] +build-backend = "flit.buildapi" + +[project] +name = "gpt-review" +authors = [ + {name = "Daniel Ciborowski", email = "dciborow@microsoft.com"}, +] +description = "Python Project for reviewing GitHub PRs with Open AI and Chat-GPT." +readme = "README.md" +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11" +] +requires-python = ">=3.8.1" +dynamic = ["version"] +dependencies = [ + 'azure-devops', + 'azure-functions; python_version <= "3.10"', + 'azure-identity', + 'azure-keyvault', + 'azure-keyvault-secrets', + 'llama-index>=0.6.0,<=0.6.9', + 'httpx', + 'GitPython', + 'knack', + 'openai', + 'requests', + 'pyyaml', + 'typing_extensions; python_version <= "3.10"', + 'transformers; python_version <= "3.8"' +] + +[project.optional-dependencies] +test = [ + "bandit[toml]==1.7.5", + "black==23.3.0", + "cattrs", + "docx2txt", + "check-manifest==0.49", + "flake8-bugbear==23.5.9", + "flake8-docstrings", + "flake8-formatter_junit_xml", + "flake8", + "flake8-pyproject", + "pre-commit==3.3.2", + "pylint==2.17.4", + "pylint_junit", + "pytest-cov>=3.0.0", + "pytest-mock", + "pytest-runner", + "pytest-xdist", + "pytest>=7.2.2", + "pytest-github-actions-annotate-failures", + "shellcheck-py==0.9.0.2", + "requests_mock" +] + +[project.scripts] +gpt = "gpt_review.main:__main__" + +[project.urls] +Documentation = "https://github.com/dciborow/action-gpt/tree/main#readme" +Source = "https://github.com/dciborow/action-gpt" +Tracker = "https://github.com/dciborow/action-gpt/issues" + +[tool.flit.module] +name = "gpt_review" + +[tool.bandit] +exclude_dirs = ["build","dist","tests","scripts"] +number = 4 +recursive = true +targets = "src" + +[tool.black] +line-length = 120 +fast = true +experimental-string-processing = true + +[tool.coverage.run] +branch = true +omit = [ + # Omitting files that can not be covered by tests + "src/gpt/__main__.py", + "src/gpt_review/__main__.py", + "src/gpt_review/main.py" +] + +[tool.coverage.report] +fail_under = 100 + +[tool.flake8] +max-line-length = 120 +select = "F,E,W,B,B901,B902,B903" +exclude = [ + ".eggs", + ".git", + ".tox", + "nssm", + "obj", + "out", + "packages", + "pywin32", + "tests", + "swagger_client" +] +ignore = [ + "E722", + "B001", + "W503", + "E203", + # Covered by Ruff + "F401", + "F501", + "F821", + "W391", # Covered by Pylint trailing-newlines +] + +[tool.isort] +profile = "black" +src_paths = ["src", "tests", "azure"] + +[tool.pyright] +include = ["src"] +exclude = [ + "**/node_modules", + "**/__pycache__", +] +venv = "env37" + +reportMissingImports = true +reportMissingTypeStubs = false + +pythonVersion = "3.7" +pythonPlatform = "Linux" + +executionEnvironments = [ + { root = "src" } +] + +[tool.pytest.ini_options] +addopts = "--cov-report xml:coverage.xml --cov src --cov-fail-under 0 --cov-append" +pythonpath = [ + "src" +] +testpaths = "tests" +# junit_family = "xunit2" +markers = [ + "integration: marks as integration test", + "notebooks: marks as notebook test", + "gpu: marks as gpu test", + "spark: marks tests which need Spark", + "slow: marks tests as slow", + "unit: fast offline tests", + "cli: test installed CLI", +] + +[tool.tox] +legacy_tox_ini = """ +[tox] +envlist = py, integration, spark, all + +[testenv] +commands = + pytest -m "not integration and not spark" {posargs} + +[testenv:integration] +commands = + pytest -m "integration" {posargs} + +[testenv:spark] +extras = spark +setenv = + PYSPARK_DRIVER_PYTHON = {envpython} + PYSPARK_PYTHON = {envpython} +commands = + pytest -m "spark" {posargs} + +[testenv:all] +extras = all +setenv = + PYSPARK_DRIVER_PYTHON = {envpython} + PYSPARK_PYTHON = {envpython} +commands = + pytest {posargs} +""" + +[tool.pylint] +extension-pkg-whitelist= [ + "numpy", + "torch", + "cv2", + "pyodbc", + "pydantic", + "ciso8601", + "netcdf4", + "scipy" +] +ignore="CVS" +ignore-patterns="test.*?py,conftest.py" +init-hook='import sys; sys.setrecursionlimit(8 * sys.getrecursionlimit())' +jobs=0 +limit-inference-results=100 +persistent="yes" +suggestion-mode="yes" +unsafe-load-any-extension="no" + +[tool.pylint.'MESSAGES CONTROL'] +enable="c-extension-no-member" +disable = [ + "unused-import", # Covered by Ruff F401 + "undefined-variable", # Covered by Ruff F821 + "line-too-long", # Covered by Ruff E501 +] +[tool.pylint.'REPORTS'] +evaluation="10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)" +output-format="text" +reports="no" +score="yes" + +[tool.pylint.'REFACTORING'] +max-nested-blocks=5 +never-returning-functions="sys.exit" + +[tool.pylint.'BASIC'] +argument-naming-style="snake_case" +attr-naming-style="snake_case" +bad-names= [ + "foo", + "bar" +] +class-attribute-naming-style="any" +class-naming-style="PascalCase" +const-naming-style="UPPER_CASE" +docstring-min-length=-1 +function-naming-style="snake_case" +good-names= [ + "i", + "j", + "k", + "ex", + "Run", + "_" +] +include-naming-hint="yes" +inlinevar-naming-style="any" +method-naming-style="snake_case" +module-naming-style="any" +no-docstring-rgx="^_" +property-classes="abc.abstractproperty" +variable-naming-style="snake_case" + +[tool.pylint.'FORMAT'] +ignore-long-lines="^\\s*(# )?.*['\"]??" +indent-after-paren=4 +indent-string=' ' +max-line-length=120 +max-module-lines=1000 +single-line-class-stmt="no" +single-line-if-stmt="no" + +[tool.pylint.'LOGGING'] +logging-format-style="old" +logging-modules="logging" + +[tool.pylint.'MISCELLANEOUS'] +notes= [ + "FIXME", + "XXX", + "TODO" +] + +[tool.pylint.'SIMILARITIES'] +ignore-comments="yes" +ignore-docstrings="yes" +ignore-imports="yes" +min-similarity-lines=7 + +[tool.pylint.'SPELLING'] +max-spelling-suggestions=4 +spelling-store-unknown-words="no" + +[tool.pylint.'STRING'] +check-str-concat-over-line-jumps="no" + +[tool.pylint.'TYPECHECK'] +contextmanager-decorators="contextlib.contextmanager" +generated-members="numpy.*,np.*,pyspark.sql.functions,collect_list" +ignore-mixin-members="yes" +ignore-none="yes" +ignore-on-opaque-inference="yes" +ignored-classes="optparse.Values,thread._local,_thread._local,numpy,torch,swagger_client" +ignored-modules="numpy,torch,swagger_client,netCDF4,scipy" +missing-member-hint="yes" +missing-member-hint-distance=1 +missing-member-max-choices=1 + +[tool.pylint.'VARIABLES'] +additional-builtins="dbutils" +allow-global-unused-variables="yes" +callbacks= [ + "cb_", + "_cb" +] +dummy-variables-rgx="_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_" +ignored-argument-names="_.*|^ignored_|^unused_" +init-import="no" +redefining-builtins-modules="six.moves,past.builtins,future.builtins,builtins,io" + +[tool.pylint.'CLASSES'] +defining-attr-methods= [ + "__init__", + "__new__", + "setUp", + "__post_init__" +] +exclude-protected= [ + "_asdict", + "_fields", + "_replace", + "_source", + "_make" +] +valid-classmethod-first-arg="cls" +valid-metaclass-classmethod-first-arg="cls" + +[tool.pylint.'DESIGN'] +max-args=5 +max-attributes=7 +max-bool-expr=5 +max-branches=12 +max-locals=15 +max-parents=7 +max-public-methods=20 +max-returns=6 +max-statements=50 +min-public-methods=2 + +[tool.pylint.'IMPORTS'] +allow-wildcard-with-all="no" +analyse-fallback-blocks="no" +deprecated-modules="optparse,tkinter.tix" + +[tool.pylint.'EXCEPTIONS'] +overgeneral-exceptions= [ + "BaseException", + "Exception" +] + +[tool.ruff] +# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default. +select = ["E", "F"] +ignore = [] + +# Allow autofix for all enabled rules (when `--fix`) is provided. +fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"] +unfixable = [] + +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", +] + +# Same as Black. +line-length = 120 + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +# Assume Python 3.10. +target-version = "py311" + +[tool.ruff.mccabe] +# Unlike Flake8, default to a complexity level of 10. +max-complexity = 10 \ No newline at end of file diff --git a/src/gpt_review/__init__.py b/src/gpt_review/__init__.py new file mode 100644 index 00000000..81b9dc64 --- /dev/null +++ b/src/gpt_review/__init__.py @@ -0,0 +1,8 @@ +# ------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See LICENSE in project root for information. +# ------------------------------------------------------------- +"""Easy GPT CLI""" +from __future__ import annotations + +__version__ = "0.9.4" diff --git a/src/gpt_review/repositories/devops_constants.py b/src/gpt_review/repositories/devops_constants.py index e7ed1c21..fd526993 100644 --- a/src/gpt_review/repositories/devops_constants.py +++ b/src/gpt_review/repositories/devops_constants.py @@ -1,3 +1,3 @@ +""" Constants for the devops functionality. """ MIN_CONTEXT_LINES = 5 -SURROUNDING_CONTEXT = 5 PR_TYPE_ABANDONED = "abandoned" From cf6b11d921835c964c723da8745823036ede6d3f Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Thu, 15 Jun 2023 20:53:08 +0000 Subject: [PATCH 69/72] additional cleanup --- scripts/summarizations/_summarizations.py | 32 ++++++++++++++++++++--- src/gpt_review/repositories/devops.py | 20 ++++++++------ 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/scripts/summarizations/_summarizations.py b/scripts/summarizations/_summarizations.py index 7df45645..65c68323 100644 --- a/scripts/summarizations/_summarizations.py +++ b/scripts/summarizations/_summarizations.py @@ -3,6 +3,7 @@ import time import os from dataclasses import dataclass + import yaml from gpt_review.repositories.devops import DevOpsClient @@ -110,7 +111,32 @@ def _summarize_pr_diff(diff) -> str: return summary -def _summarize_pull_requests(pull_request_ids_list: list, patch_repo: str) -> list: +def get_pr_diff(patch_repo=None, patch_pr=None, pat=None) -> list: + """ + Get the diff of a PR. + + Args: + patch_repo (str): The pointer to ADO in the format, org/project/repo + patch_pr (str): The PR id. + pat (str): The ADO access token. + """ + + org = patch_repo.split("/")[0] + project = patch_repo.split("/")[1] + repo = patch_repo.split("/")[2] + diff = [] + + if patch_pr and pat: + client = DevOpsClient(pat=pat, org=org, project=project, repository_id=repo) + pull_request = client.client.get_pull_request_by_id(pull_request_id=patch_pr) + diff = client.get_patches(pull_request_event=pull_request) + + return diff + + return diff + + +def _summarize_pull_requests(pull_request_ids_list: list, patch_repo: str, pat: str) -> list: """Summarize pull requests. Args: @@ -123,7 +149,7 @@ def _summarize_pull_requests(pull_request_ids_list: list, patch_repo: str) -> li summaries_list = [] for pr_id in pull_request_ids_list: start = time.process_time() - diff = DevOpsClient.get_pr_diff(patch_repo, pr_id, access_token) + diff = get_pr_diff(patch_repo, pr_id, pat) if diff: summary = _summarize_pr_diff(diff=diff) print(time.process_time() - start) @@ -209,7 +235,7 @@ def _load_config_file(): config = _load_config_file() access_token = os.getenv(config.get("ado_token")) pull_request_ids = _load_pull_request_ids(config.get("pull_request_list")) -summaries = _summarize_pull_requests(pull_request_ids, config.get("patch_repo")) +summaries = _summarize_pull_requests(pull_request_ids, config.get("patch_repo"), access_token) final_summary = _get_final_summary(summaries) _print_to_file(FILE_SUMMARY_NAME, "\nThe final summary is:\n" + final_summary) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index a4569e62..4d542e6d 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -217,13 +217,16 @@ def _create_patch(self, left, right, file_path) -> List: patch.append(f"+ {right[row - 1]}") row -= 1 - while line > 0: - patch.append(f"- {left[line - 1]}") - line -= 1 + # while line > 0: + # patch.append(f"- {left[line - 1]}") + # line -= 1 - while row > 0: - patch.append(f"+ {right[row - 1]}") - row -= 1 + # while row > 0: + # patch.append(f"+ {right[row - 1]}") + # row -= 1 + + patch.extend(f"- {left[i - 1]}" for i in range(0, line)) + patch.extend(f"+ {right[j - 1]}" for j in range(0, row)) patch.append(file_path) patch.reverse() @@ -447,7 +450,7 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None): Args: patch_repo (str): The pointer to ADO in the format, org/project/repo patch_pr (str): The PR id. - access_token (str): The GitHub access token. + access_token (str): The ADO access token. """ link = urllib.parse.unquote( @@ -465,8 +468,9 @@ def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None): client = DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo) pull_request = client.client.get_pull_request_by_id(pull_request_id=pr_id) diff = client.get_patches(pull_request_event=pull_request) + diff = "\n".join(diff) - return diff + return {"response": "PR posted"} logging.warning("No PR to post too") return {"response": "No PR to post too"} From 930e4100501feaac7aa424ab6e39d7a04d73f18f Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Thu, 15 Jun 2023 21:02:50 +0000 Subject: [PATCH 70/72] additional code cleanup --- src/gpt_review/repositories/devops.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 4d542e6d..a0d29239 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -388,7 +388,7 @@ class DevOpsClient(_DevOpsClient): """Azure DevOps client Wrapper for working with.""" @staticmethod - def generate_pr_summary(diff, link=None, access_token=None, post_summary=False) -> Dict[str, str]: + def post_pr_summary(diff, link=None, access_token=None) -> Dict[str, str]: """ Get a review of a PR. @@ -402,7 +402,6 @@ def generate_pr_summary(diff, link=None, access_token=None, post_summary=False) diff (str): The patch of the PR. link (str, optional): The link to the PR. Defaults to None. access_token (str, optional): The GitHub access token. Defaults to None. - post_summary (bool, optional): Whether to post the summary to the PR. Defaults to False. Returns: Dict[str, str]: The review. @@ -413,15 +412,13 @@ def generate_pr_summary(diff, link=None, access_token=None, post_summary=False) if link and access_token: review = _summarize_files(diff) - if post_summary: - org, project, repo, pr_id = DevOpsClient._parse_url(link) + org, project, repo, pr_id = DevOpsClient._parse_url(link) - DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).update_pr( - pull_request_id=pr_id, - description=review, - ) - return {"response": "PR summary posted"} - return {"response": review} + DevOpsClient(pat=access_token, org=org, project=project, repository_id=repo).update_pr( + pull_request_id=pr_id, + description=review, + ) + return {"response": "PR posted"} logging.warning("No PR to post too") return {"response": "No PR to post too"} @@ -443,7 +440,7 @@ def _parse_url(link): return org, project, repo, pr_id @staticmethod - def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None): + def get_pr_diff(patch_repo=None, patch_pr=None, access_token=None) -> str: """ Get the diff of a PR. @@ -588,12 +585,10 @@ def _post_summary(self, payload, pr_id, link) -> None: diff = "\n".join(diff) logging.debug("Copilot diff: %s", diff) - self.generate_pr_summary(diff, link=link) + self.post_pr_summary(diff, link=link) -def _review( - repository=None, pull_request=None, diff: str = ".diff", link=None, access_token=None, post_comment=False -) -> Dict[str, str]: +def _review(repository=None, pull_request=None, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: """Review Azure DevOps PR with Open AI, and post response as a comment. Args: @@ -609,7 +604,7 @@ def _review( with open(diff, "r", encoding="utf8") as file: diff_contents = file.read() - return DevOpsClient.generate_pr_summary(diff_contents, link, access_token, post_summary=post_comment) + return DevOpsClient.post_pr_summary(diff_contents, link, access_token) def _comment(question: str, comment_id: int, diff: str = ".diff", link=None, access_token=None) -> Dict[str, str]: From af2475a8f35abedfd480fc4e360925af01292aaa Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Thu, 15 Jun 2023 21:29:56 +0000 Subject: [PATCH 71/72] final refactorings --- src/gpt_review/repositories/devops.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index a0d29239..722da1d4 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -217,17 +217,8 @@ def _create_patch(self, left, right, file_path) -> List: patch.append(f"+ {right[row - 1]}") row -= 1 - # while line > 0: - # patch.append(f"- {left[line - 1]}") - # line -= 1 - - # while row > 0: - # patch.append(f"+ {right[row - 1]}") - # row -= 1 - patch.extend(f"- {left[i - 1]}" for i in range(0, line)) patch.extend(f"+ {right[j - 1]}" for j in range(0, row)) - patch.append(file_path) patch.reverse() From 36182227d30a1e5e725671cb70cc2eae5552d2cb Mon Sep 17 00:00:00 2001 From: Danay <60160912+danay1999@users.noreply.github.com> Date: Thu, 15 Jun 2023 22:36:37 +0000 Subject: [PATCH 72/72] fixed create_git_pull_request_from_dict --- src/gpt_review/repositories/devops.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gpt_review/repositories/devops.py b/src/gpt_review/repositories/devops.py index 722da1d4..8fa1daf1 100644 --- a/src/gpt_review/repositories/devops.py +++ b/src/gpt_review/repositories/devops.py @@ -255,7 +255,7 @@ def get_selection(lines: str, line_start: int, line_end: int) -> str: return left_selection, right_selection - def create_last_merge_source_commit_from_dict(self, commit_dict: Dict) -> GitCommitRef: + def create_git_commit_ref_from_dict(self, commit_dict: Dict) -> GitCommitRef: """Create a GitCommitRef object from a dictionary. Args: @@ -294,12 +294,13 @@ def create_git_pull_request_from_dict(self, pr_dict: Dict) -> GitPullRequest: repository=pr_dict["repository"], merge_id=pr_dict["mergeId"], ) - pull_request.last_merge_source_commit = self.create_last_merge_source_commit_from_dict( + pull_request.last_merge_source_commit = self.create_git_commit_ref_from_dict( pull_request.last_merge_source_commit ) - pull_request.last_merge_target_commit = self.create_last_merge_source_commit_from_dict( + pull_request.last_merge_target_commit = self.create_git_commit_ref_from_dict( pull_request.last_merge_target_commit ) + pull_request.last_merge_commit = self.create_git_commit_ref_from_dict(pull_request.last_merge_commit) return pull_request def get_patches(self, pull_request_event) -> Iterable[List[str]]: