From 51444f2d6ecacb24d06c1adfddfd0c8a0f10317b Mon Sep 17 00:00:00 2001 From: Giovanni M Guidini <99758426+giovanni-guidini@users.noreply.github.com> Date: Fri, 9 Feb 2024 17:00:51 -0300 Subject: [PATCH] fix: ignore insallation for owner some times (#263) * fix: ignore insallation for owner some times This is a problem with the 'sync repos' button in the UI. It only affects getting the token for a OWNER (if it's for a repo it's correct). Sadly this affects `sync_teams` and `sync_repos` tasks. If the owner decides to sync we should use the owner's token, even if there is an installation app for that owner. The extra logic helps us do that. * hotfix: don't override old integration completely We want to fallback to the old installation_id if the repo claims to be covered. This is because there's an uncovered edge case in the ghapp installations if the owner doesn't have ALL selected and adds a new one to it. --- services/bots.py | 63 +++++++++++++++++++++++++++---------- services/github.py | 5 +++ services/owner.py | 8 +++-- services/tests/test_bots.py | 3 +- tasks/sync_repos.py | 6 +++- tasks/sync_teams.py | 4 ++- 6 files changed, 67 insertions(+), 22 deletions(-) diff --git a/services/bots.py b/services/bots.py index 1e9ed2a4d..823f1b0cd 100644 --- a/services/bots.py +++ b/services/bots.py @@ -20,25 +20,50 @@ def get_owner_installation_id( *, repository: Optional[Repository] = None, installation_name: str = GITHUB_APP_INSTALLATION_DEFAULT_NAME, + # This is used for SyncTeams and SyncRepos if we are not + # using the installations to list these values + # We have this secondary value because `deprecated_using_integration` is... deprecated + # And might get out-of-sync soon + ignore_installation: bool = False ) -> Optional[int]: - default_app_installation_filter = filter( - lambda obj: obj.name == installation_name, owner.github_app_installations or [] - ) - # filter is an Iterator, so we need to scan matches - # in practice we only consider the 1st one - for app_installation in default_app_installation_filter: - if repository: - if app_installation.is_repo_covered_by_integration(repository): + + if not ignore_installation or deprecated_using_integration: + default_app_installation_filter = filter( + lambda obj: obj.name == installation_name, + owner.github_app_installations or [], + ) + # filter is an Iterator, so we need to scan matches + # in practice we only consider the 1st one + for app_installation in default_app_installation_filter: + if repository: + if app_installation.is_repo_covered_by_integration(repository): + return app_installation.installation_id + # The repo we want to get a token for is not covered by the installation + log.warning( + "owner has ghapp installation but repo is not covered", + extra=dict( + repoid=(repository.repoid if repository else "no_repo"), + ownerid=owner.ownerid, + ), + ) + # Not returning None here because we found situations where ghapp installations will mark the + # the repo as NOT covered but it is, in fact, covered. + # We need to backfill some things. + else: + # Getting owner installation - not tied to any particular repo return app_installation.installation_id - # The repo we want to get a token for is not covered by the installation - return None - else: - # Getting owner installation - not tied to any particular repo - return app_installation.installation_id # DEPRECATED FLOW - begin if owner.integration_id and deprecated_using_integration: return owner.integration_id # DEPRECATED FLOW - end + log.warning( + "(owner has no ghapp installation AND no integration_id) OR not using integration", + extra=dict( + repoid=(repository.repoid if repository else "no_repo"), + ownerid=owner.ownerid, + using_integration=deprecated_using_integration, + ), + ) return None @@ -47,7 +72,7 @@ def get_repo_appropriate_bot_token(repo: Repository) -> Tuple[Dict, Optional[Own return get_public_bot_token(repo) installation_id = get_owner_installation_id( - repo.owner, repo.using_integration, repository=repo + repo.owner, repo.using_integration, repository=repo, ignore_installation=False ) if installation_id: github_token = get_github_integration_token(repo.owner.service, installation_id) @@ -88,7 +113,7 @@ def get_token_type_mapping(repo: Repository): if repo.private: return None installation_id = get_owner_installation_id( - repo.owner, repo.using_integration, repository=repo + repo.owner, repo.using_integration, repository=repo, ignore_installation=False ) if installation_id: return None @@ -135,8 +160,12 @@ def _get_repo_appropriate_bot(repo: Repository) -> Owner: raise RepositoryWithoutValidBotError() -def get_owner_appropriate_bot_token(owner, using_integration) -> Dict: - installation_id = get_owner_installation_id(owner, using_integration) +def get_owner_appropriate_bot_token( + owner, using_integration, ignore_installation: bool = False +) -> Dict: + installation_id = get_owner_installation_id( + owner, using_integration, ignore_installation=ignore_installation + ) if installation_id: github_token = get_github_integration_token(owner.service, installation_id) return dict(key=github_token) diff --git a/services/github.py b/services/github.py index 8c7b6ef46..269064895 100644 --- a/services/github.py +++ b/services/github.py @@ -1,13 +1,18 @@ +import logging + from shared.github import InvalidInstallationError from shared.github import get_github_integration_token as _get_github_integration_token from helpers.cache import cache from helpers.exceptions import RepositoryWithoutValidBotError +log = logging.getLogger(__name__) + @cache.cache_function(ttl=480) def get_github_integration_token(service, integration_id=None): try: return _get_github_integration_token(service, integration_id=integration_id) except InvalidInstallationError: + log.warning("Failed to get installation token") raise RepositoryWithoutValidBotError() diff --git a/services/owner.py b/services/owner.py index 5f57bafae..cbb3ff348 100644 --- a/services/owner.py +++ b/services/owner.py @@ -9,13 +9,17 @@ log = logging.getLogger(__name__) -def get_owner_provider_service(owner, using_integration=False): +def get_owner_provider_service( + owner, using_integration=False, ignore_installation=False +): _timeouts = [ get_config("setup", "http", "timeouts", "connect", default=15), get_config("setup", "http", "timeouts", "receive", default=30), ] service = owner.service - token = get_owner_appropriate_bot_token(owner, using_integration) + token = get_owner_appropriate_bot_token( + owner, using_integration, ignore_installation=ignore_installation + ) adapter_params = dict( owner=dict( service_id=owner.service_id, ownerid=owner.ownerid, username=owner.username diff --git a/services/tests/test_bots.py b/services/tests/test_bots.py index c5a87bef2..c6c8d98e6 100644 --- a/services/tests/test_bots.py +++ b/services/tests/test_bots.py @@ -425,13 +425,14 @@ def test_get_owner_installation_id_yes_installation_yes_legacy_integration_speci == 123456 ) # Notice that the installation object overrides the `Repository.using_integration` column completely + # ^ Not true anymore. We decided against it because there are some edge cases in filling up the list assert ( get_owner_installation_id( owner, repo_not_covered_by_installation.using_integration, repository=repo_not_covered_by_installation, ) - is None + == 12341234 ) def test_get_token_type_mapping_public_repo_no_configuration_no_particular_bot( diff --git a/tasks/sync_repos.py b/tasks/sync_repos.py index 4d7340016..aa7240b73 100644 --- a/tasks/sync_repos.py +++ b/tasks/sync_repos.py @@ -72,7 +72,11 @@ async def run_async( timeout=max(300, self.hard_time_limit_task), blocking_timeout=5, ): - git = get_owner_provider_service(owner, using_integration) + git = get_owner_provider_service( + owner, + using_integration, + ignore_installation=(not using_integration), + ) synced_repoids = [] if using_integration: with metrics.timer(f"{metrics_scope}.sync_repos_using_integration"): diff --git a/tasks/sync_teams.py b/tasks/sync_teams.py index 6697d6f37..71f71bdca 100644 --- a/tasks/sync_teams.py +++ b/tasks/sync_teams.py @@ -24,7 +24,9 @@ async def run_async(self, db_session, ownerid, *, username=None, **kwargs): assert owner, "Owner not found" service = owner.service - git = get_owner_provider_service(owner, using_integration=False) + git = get_owner_provider_service( + owner, using_integration=False, ignore_installation=True + ) # get list of teams with username, name, email, id (service_id), etc teams = await git.list_teams()