From 205e146938d1db6ad66719406f69b12a86caef7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=BC=A8=E7=BC=A8?= Date: Mon, 20 Jan 2025 17:02:43 +0800 Subject: [PATCH] feat: add API for data insights on PRs, issues, and code changes. (#700) * feat: init the insight about issue * feat: add the api for pr&issue&code insight * chore: add tests for the insight utils --- server/insight/router.py | 51 ++++++++++++++++++++ server/insight/service/issue.py | 14 ++++++ server/insight/service/pr.py | 21 +++++++++ server/main.py | 8 +++- server/tests/utils/test_insight.py | 74 ++++++++++++++++++++++++++++++ server/utils/insight.py | 56 ++++++++++++++++++++++ 6 files changed, 222 insertions(+), 2 deletions(-) create mode 100644 server/insight/router.py create mode 100644 server/insight/service/issue.py create mode 100644 server/insight/service/pr.py create mode 100644 server/tests/utils/test_insight.py create mode 100644 server/utils/insight.py diff --git a/server/insight/router.py b/server/insight/router.py new file mode 100644 index 00000000..82ff1849 --- /dev/null +++ b/server/insight/router.py @@ -0,0 +1,51 @@ +import json +from typing import Optional +from fastapi import APIRouter, Depends +from insight.service.issue import get_issue_data +from insight.service.pr import get_code_changes, get_pr_data + + +router = APIRouter( + prefix="/api/insight", + tags=["insight"], + responses={404: {"description": "Not found"}}, +) + + +@router.get("/issue") +def get_issue_insight(repo_name: str): + try: + result = get_issue_data(repo_name) + return { + "success": True, + "data": result, + } + + except Exception as e: + return json.dumps({"success": False, "message": str(e)}) + + +@router.get("/pr") +def get_pr_insight(repo_name: str): + try: + result = get_pr_data(repo_name) + return { + "success": True, + "data": result, + } + + except Exception as e: + return json.dumps({"success": False, "message": str(e)}) + + +@router.get("code_change") +def get_code_change_insight(repo_name: str): + try: + result = get_code_changes(repo_name) + return { + "success": True, + "data": result, + } + + except Exception as e: + return json.dumps({"success": False, "message": str(e)}) diff --git a/server/insight/service/issue.py b/server/insight/service/issue.py new file mode 100644 index 00000000..f41d2135 --- /dev/null +++ b/server/insight/service/issue.py @@ -0,0 +1,14 @@ +import requests +from collections import defaultdict + +from utils.insight import get_data + + +def get_issue_data(repo_name): + metrics_mapping = { + "issues_new": "open", + "issues_closed": "close", + "issue_comments": "comment", + } + issue_data = get_data(repo_name, metrics_mapping) + return issue_data diff --git a/server/insight/service/pr.py b/server/insight/service/pr.py new file mode 100644 index 00000000..290fb845 --- /dev/null +++ b/server/insight/service/pr.py @@ -0,0 +1,21 @@ +import requests +from collections import defaultdict + +from utils.insight import get_data + + +def get_pr_data(repo_name): + metrics_mapping = { + "change_requests": "open", + "change_requests_accepted": "merge", + "change_requests_reviews": "reviews", + } + return get_data(repo_name, metrics_mapping) + + +def get_code_changes(repo_name): + metrics_mapping = { + "code_change_lines_add": "add", + "code_change_lines_remove": "remove", + } + return get_data(repo_name, metrics_mapping) diff --git a/server/main.py b/server/main.py index 6089da19..4f56bcb2 100644 --- a/server/main.py +++ b/server/main.py @@ -20,6 +20,7 @@ from rag import router as rag_router from task import router as task_router from user import router as user_router +from insight import router as insight_router AUTH0_DOMAIN = get_env_variable("AUTH0_DOMAIN") API_AUDIENCE = get_env_variable("API_IDENTIFIER") @@ -62,6 +63,7 @@ app.include_router(github_app_router.router) app.include_router(aws_router.router) app.include_router(user_router.router) +app.include_router(insight_router.router) @app.get("/") @@ -75,7 +77,7 @@ def health_checker(): "ENVIRONMENT": ENVIRONMENT, "API_URL": API_URL, "WEB_URL": WEB_URL, - "CALLBACK_URL": CALLBACK_URL + "CALLBACK_URL": CALLBACK_URL, } @@ -88,4 +90,6 @@ def health_checker(): reload=True, ) else: - uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PETERCAT_PORT", "8080"))) + uvicorn.run( + app, host="0.0.0.0", port=int(os.environ.get("PETERCAT_PORT", "8080")) + ) diff --git a/server/tests/utils/test_insight.py b/server/tests/utils/test_insight.py new file mode 100644 index 00000000..acbef380 --- /dev/null +++ b/server/tests/utils/test_insight.py @@ -0,0 +1,74 @@ +import unittest +from unittest.mock import patch, Mock +from collections import defaultdict + +from utils.insight import get_data + + +class TestGetData(unittest.TestCase): + + @patch("requests.get") + def test_get_data_success(self, mock_get): + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"2023-01": 10, "2023-02": 20, "2023-03": 30} + mock_get.return_value = mock_response + + repo_name = "test-repo" + metrics_mapping = {"metric1": "sum", "metric2": "average"} + + expected_result = { + "year": [ + {"type": "sum", "date": "2023", "value": 60}, + {"type": "average", "date": "2023", "value": 60}, + ], + "quarter": [ + {"type": "sum", "date": "2023Q1", "value": 60}, + {"type": "average", "date": "2023Q1", "value": 60}, + ], + "month": [ + {"type": "sum", "date": "2023-01", "value": 10}, + {"type": "average", "date": "2023-01", "value": 10}, + {"type": "sum", "date": "2023-02", "value": 20}, + {"type": "average", "date": "2023-02", "value": 20}, + {"type": "sum", "date": "2023-03", "value": 30}, + {"type": "average", "date": "2023-03", "value": 30}, + ], + } + + result = get_data(repo_name, metrics_mapping) + self.assertEqual(result, expected_result) + + @patch("requests.get") + def test_get_data_failure(self, mock_get): + mock_response = Mock() + mock_response.status_code = 500 + mock_get.return_value = mock_response + + repo_name = "test-repo" + metrics_mapping = {"metric1": "sum"} + + expected_result = { + "year": [], + "quarter": [], + "month": [], + } + + result = get_data(repo_name, metrics_mapping) + self.assertEqual(result, expected_result) + + @patch("requests.get") + def test_get_data_empty_response(self, mock_get): + # 模拟返回空数据 + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {} + mock_get.return_value = mock_response + + repo_name = "test-repo" + metrics_mapping = {"metric1": "sum"} + + expected_result = {"year": [], "quarter": [], "month": []} + + result = get_data(repo_name, metrics_mapping) + self.assertEqual(result, expected_result) diff --git a/server/utils/insight.py b/server/utils/insight.py new file mode 100644 index 00000000..aa73c71b --- /dev/null +++ b/server/utils/insight.py @@ -0,0 +1,56 @@ +import requests +from collections import defaultdict + + +def get_data(repo_name, metrics_mapping): + """ + :param repo_name: GitHub 仓库名 + :param metrics_mapping: 指标名称与聚合类型的映射字典 + :return: 按年、季度、月聚合的数据字典 + """ + base_url = f"https://oss.open-digger.cn/github/{repo_name}/" + + aggregated_data = { + "year": defaultdict( + lambda: {metric_type: 0 for metric_type in metrics_mapping.values()} + ), + "quarter": defaultdict( + lambda: {metric_type: 0 for metric_type in metrics_mapping.values()} + ), + "month": defaultdict( + lambda: {metric_type: 0 for metric_type in metrics_mapping.values()} + ), + } + + for metric, metric_type in metrics_mapping.items(): + url = f"{base_url}{metric}.json" + response = requests.get(url) + + if response.status_code == 200: + data = response.json() + for date, value in data.items(): + if "-" in date: + year, month = date.split("-")[:2] + quarter = f"{year}Q{(int(month) - 1) // 3 + 1}" + + # aggregate by year, quarter, and month + aggregated_data["year"][year][metric_type] += value + aggregated_data["quarter"][quarter][metric_type] += value + aggregated_data["month"][date][metric_type] += value + else: + print( + f"Error fetching data from {url} (status code: {response.status_code})" + ) + + def format_result(data): + result = [] + for date, counts in data.items(): + for type_, value in counts.items(): + result.append({"type": type_, "date": date, "value": value}) + return result + + return { + "year": format_result(aggregated_data["year"]), + "quarter": format_result(aggregated_data["quarter"]), + "month": format_result(aggregated_data["month"]), + }