diff --git a/.gitignore b/.gitignore index c4bfd86..e1e717f 100644 --- a/.gitignore +++ b/.gitignore @@ -126,3 +126,5 @@ dmypy.json # Pyright type checker .pyright/ + +.vscode/settings.json diff --git a/github_tracker_bot/process_commits.py b/github_tracker_bot/process_commits.py index 170c73e..1629c3b 100644 --- a/github_tracker_bot/process_commits.py +++ b/github_tracker_bot/process_commits.py @@ -1,13 +1,20 @@ import os import sys +import time import asyncio import aiohttp from datetime import datetime from dateutil import parser from github import Github from typing import List, Optional, Dict, Any - -from tenacity import retry, wait_fixed, stop_after_attempt, retry_if_exception_type +from tenacity import ( + retry, + wait_fixed, + stop_after_attempt, + retry_if_exception_type, + RetryError, +) +from asyncio import Semaphore sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) @@ -22,47 +29,54 @@ GITHUB_TOKEN = config.GITHUB_TOKEN g = Github(GITHUB_TOKEN) + +CONCURRENT_REQUESTS = 8 +semaphore = Semaphore(CONCURRENT_REQUESTS) + retry_conditions = ( - retry_if_exception_type(aiohttp.ClientError) + retry_if_exception_type( + aiohttp.ClientError, + ) | retry_if_exception_type(asyncio.TimeoutError) - | retry_if_exception_type(Exception) + | retry_if_exception_type(aiohttp.ClientConnectorError) ) -@retry(wait=wait_fixed(2), stop=stop_after_attempt(5), retry=retry_conditions) +@retry(wait=wait_fixed(5), stop=stop_after_attempt(8), retry=retry_conditions) async def fetch_diff(repo: str, sha: str) -> Optional[str]: url = f"https://api.github.com/repos/{repo}/commits/{sha}" - headers = {"Authorization": f"token {GITHUB_TOKEN}"} - try: - async with aiohttp.ClientSession() as session: - async with session.get(url, headers=headers) as response: - if response.status == 200: - commit_data = await response.json() - diff_url = commit_data["html_url"] + ".diff" - - async with session.get(diff_url, headers=headers) as diff_response: - if diff_response.status == 200: - return await diff_response.text() - else: - logger.error( - f"Failed to fetch diff: {await diff_response.text()}" - ) - return None - else: - logger.error( - f"Failed to fetch commit data: {await response.text()}" - ) - return None - except aiohttp.ClientError as e: - logger.error(f"Client error while fetching diff for repo {repo}: {e}") - raise - except asyncio.TimeoutError: - logger.error("Request timed out while fetching diff") - raise - except Exception as e: - logger.error(f"Unexpected error while fetching diff: {e}") - raise + headers = { + "Authorization": f"token {GITHUB_TOKEN}", + "Accept": "application/vnd.github.v3.diff", + } + + async with semaphore: + try: + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=headers) as response: + if response.status == 200: + diff = await response.text() + return diff + elif response.status == 403: + reset_time = response.headers.get("X-RateLimit-Reset") + sleep_time = ( + int(reset_time) - int(time.time()) + 1 if reset_time else 60 + ) + logger.warning( + f"Rate limit exceeded. Sleeping for {sleep_time} seconds." + ) + await asyncio.sleep(sleep_time) + raise aiohttp.ClientError("Rate limit exceeded, retrying...") + else: + error_text = await response.text() + logger.error( + f"Failed to fetch diff: {response.status}, {error_text}" + ) + return None + except Exception as e: + logger.error(f"Error while fetching diff for repo {repo}: {e}") + raise def concatenate_diff_to_commit_info( @@ -109,12 +123,15 @@ async def process_commits(commit_infos: List[Dict[str, Any]]): for commit_info in commit_infos ] - diffs = await asyncio.gather(*tasks) + diffs = await asyncio.gather(*tasks, return_exceptions=True) - processed_commits = [ - concatenate_diff_to_commit_info(commit_info, diff) - for commit_info, diff in zip(commit_infos, diffs) - ] + processed_commits = [] + for commit_info, diff in zip(commit_infos, diffs): + if isinstance(diff, Exception): + logger.error(f"Failed to fetch diff for {commit_info['sha']}: {diff}") + diff = None + processed_commit = concatenate_diff_to_commit_info(commit_info, diff) + processed_commits.append(processed_commit) grouped_commits = group_and_sort_commits(processed_commits) for daily_commit in grouped_commits.values(): @@ -126,9 +143,11 @@ async def process_commits(commit_infos: List[Dict[str, Any]]): if __name__ == "__main__": repo_name = "UmstadAI/zkAppUmstad" sha = "092c20a73859e0b4a4591f815efbdcab08df4df8" - diff = asyncio.run(fetch_diff(repo_name, sha)) - - if diff: - logger.info(f"Fetched diff successfully: {diff}") - else: - logger.error("Failed to fetch diff") + try: + diff = asyncio.run(fetch_diff(repo_name, sha)) + if diff: + logger.info(f"Fetched diff successfully.") + else: + logger.error("Failed to fetch diff") + except RetryError as e: + logger.error(f"Failed after retries: {e}") diff --git a/requirements.txt b/requirements.txt index 3ca876a..ba837b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -109,7 +109,6 @@ ujson==5.10.0 uritemplate==4.1.1 urllib3==2.2.2 uvicorn==0.30.3 -uvloop==0.19.0 vulture==2.11 watchfiles==0.22.0 websockets==12.0 diff --git a/setup.py b/setup.py index d4aa466..2f6bec9 100644 --- a/setup.py +++ b/setup.py @@ -2,6 +2,6 @@ setup( name="pgt_leaderbot", - version="0.3.6", + version="0.3.7", packages=find_packages(), ) diff --git a/tasks.py b/tasks.py index 6c82350..a5fa308 100644 --- a/tasks.py +++ b/tasks.py @@ -26,6 +26,11 @@ def testmongo(ctx): ctx.run(f"python -m unittest tests/test_mongo_data_handler.py") +@task +def testfc(ctx): + ctx.run(f"python -m unittest tests/test_process_commits.py") + + @task def testmongoint(ctx): ctx.run(f"python -m unittest tests/test_mongo_integration.py") diff --git a/tests/test_commit_scraper.py b/tests/test_commit_scraper.py index 3cdbbef..181505d 100644 --- a/tests/test_commit_scraper.py +++ b/tests/test_commit_scraper.py @@ -3,7 +3,10 @@ import asyncio import aiohttp from github.GithubException import GithubException +import sys +import os +sys.path.append(os.path.abspath(os.path.dirname(__file__) + "/../")) from github_tracker_bot.commit_scraper import fetch_commits, get_user_commits_in_repo diff --git a/tests/test_process_commits.py b/tests/test_process_commits.py new file mode 100644 index 0000000..9ad371d --- /dev/null +++ b/tests/test_process_commits.py @@ -0,0 +1,139 @@ +import unittest +from unittest.mock import patch, AsyncMock, call +import asyncio +import aiohttp + +import time +import os +import sys + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) +from github_tracker_bot.process_commits import fetch_diff + + +class TestFetchDiff(unittest.IsolatedAsyncioTestCase): + + @patch("aiohttp.ClientSession.get") + async def test_successful_response(self, mock_get): + # Mock the response object + mock_response = AsyncMock() + mock_response.status = 200 + mock_response.text = AsyncMock(return_value="diff content") + mock_get.return_value.__aenter__.return_value = mock_response + + # Call the fetch_diff function + repo = "memreok/PGT_LeaderBot" + sha = "244a732c324d993d62dcb0017f8fd1b0d39d99e0" + diff = await fetch_diff(repo, sha) + + # Assert the result + self.assertEqual(diff, "diff content") + + @patch("aiohttp.ClientSession.get") + async def test_retry_on_client_error(self, mock_get): + first_attempt = AsyncMock() + first_attempt.__aenter__.side_effect = aiohttp.ClientError() + + # Mock a successful response on retry + mock_response_success = AsyncMock() + mock_response_success.status = 200 + mock_response_success.text = AsyncMock(return_value="diff content") + second_attempt = AsyncMock() + second_attempt.__aenter__.return_value = mock_response_success + + # Set side effects for consecutive calls + mock_get.side_effect = [first_attempt, second_attempt] + + repo = "memreok/PGT_LeaderBot" + sha = "244a732c324d993d62dcb0017f8fd1b0d39d99e0" + diff = await fetch_diff(repo, sha) + + # Assert the result after retry + self.assertEqual(mock_get.call_count, 2) + self.assertEqual(diff, "diff content") + + @patch("aiohttp.ClientSession.get") + async def test_retry_on_timeout_error(self, mock_get): + first_attempt = AsyncMock() + first_attempt.__aenter__.side_effect = asyncio.TimeoutError() + + # Mock a successful response on retry + mock_response_success = AsyncMock() + mock_response_success.status = 200 + mock_response_success.text = AsyncMock(return_value="diff content") + second_attempt = AsyncMock() + second_attempt.__aenter__.return_value = mock_response_success + + # Set side effects for consecutive calls + mock_get.side_effect = [first_attempt, second_attempt] + + repo = "memreok/PGT_LeaderBot" + sha = "244a732c324d993d62dcb0017f8fd1b0d39d99e0" + diff = await fetch_diff(repo, sha) + + # Assert the result after retry + self.assertEqual(mock_get.call_count, 2) + self.assertEqual(diff, "diff content") + + @patch("asyncio.sleep", new_callable=AsyncMock) + @patch("time.time", return_value=1000) + @patch("aiohttp.ClientSession.get") + async def test_handle_403_api_rate_limit(self, mock_get, mock_time, mock_sleep): + # Set a fixed current time + current_time = 1000 + mock_time.return_value = current_time + reset_time_in_future = current_time + 120 # 2 minutes in the future + + # Mock a 403 response with rate limit reset header + mock_response_403 = AsyncMock() + mock_response_403.status = 403 + mock_response_403.text = AsyncMock(return_value="API Rate Limit Exceeded") + mock_response_403.headers = {"X-RateLimit-Reset": str(reset_time_in_future)} + + # Mock a successful response on retry + mock_response_success = AsyncMock() + mock_response_success.status = 200 + mock_response_success.text = AsyncMock(return_value="diff content") + + # Set side effects for consecutive calls + first_attempt = AsyncMock() + first_attempt.__aenter__.return_value = mock_response_403 + second_attempt = AsyncMock() + second_attempt.__aenter__.return_value = mock_response_success + mock_get.side_effect = [first_attempt, second_attempt] + + repo = "memreok/PGT_LeaderBot" + sha = "244a732c324d993d62dcb0017f8fd1b0d39d99e0" + diff = await fetch_diff(repo, sha) + + expected_sleep_time = reset_time_in_future - current_time + 1 # Should be 121 + + # Assert that sleep was called twice: + # 1. Once for the rate limit (expected_sleep_time) + # 2. Once for the tenacity retry (fixed 2 seconds) + self.assertEqual(mock_sleep.call_count, 2) + mock_sleep.assert_has_calls([call(expected_sleep_time), call(2.0)]) + + # Ensure that the second call to `aiohttp.get` was successful + self.assertEqual(mock_get.call_count, 2) + self.assertEqual(diff, "diff content") + + @patch("aiohttp.ClientSession.get") + async def test_general_exception_handling(self, mock_get): + # Mock an unknown exception + mock_get.side_effect = Exception("Unknown Error") + + repo = "memreok/PGT_LeaderBot" + sha = "244a732c324d993d62dcb0017f8fd1b0d39d99e0" + + with self.assertRaises(Exception): + await fetch_diff(repo, sha) + + +def run_async_tests(): + loop = asyncio.get_event_loop() + loop.run_until_complete(unittest.main()) + + +if __name__ == "__main__": + run_async_tests()