From 89e5483b783ba57b355520886e7916226cbb5752 Mon Sep 17 00:00:00 2001 From: Romain Komorn <136473744+romainkomorndatadog@users.noreply.github.com> Date: Mon, 9 Sep 2024 11:30:12 +0100 Subject: [PATCH] fix(ci_visibility): properly strip .git from repo URL when getting repo name (#10556) Fixes issue #10554 where `rstrip` was being used to remove the `.git` suffix from repo URLs, causing repo names that end in one or more of `g`, `i`, or `t` to be incorrectly extracted. eg: `fastapi.git` -> `fastap` or `test-environment` -> `test-environmen` . ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) (cherry picked from commit aa04928bf0950707d1dde063023ea9baaf363ada) --- ddtrace/internal/ci_visibility/recorder.py | 14 ++++++++++---- ...trip_dotgit_from_repo_url-523a908075aea559.yaml | 5 +++++ tests/ci_visibility/test_ci_visibility.py | 10 ++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 releasenotes/notes/ci_visibility-fix_properly_strip_dotgit_from_repo_url-523a908075aea559.yaml diff --git a/ddtrace/internal/ci_visibility/recorder.py b/ddtrace/internal/ci_visibility/recorder.py index f6410241a1c..0d55136742f 100644 --- a/ddtrace/internal/ci_visibility/recorder.py +++ b/ddtrace/internal/ci_visibility/recorder.py @@ -3,6 +3,7 @@ import json import os from pathlib import Path +import re import socket from typing import TYPE_CHECKING # noqa:F401 from typing import NamedTuple # noqa:F401 @@ -102,12 +103,17 @@ class CIVisibilityAuthenticationException(Exception): pass -def _extract_repository_name_from_url(repository_url): - # type: (str) -> str +def _extract_repository_name_from_url(repository_url: str) -> str: + _REPO_NAME_REGEX = r".*/(?P.*?)(\.git)?$" + try: - return parse.urlparse(repository_url).path.rstrip(".git").rpartition("/")[-1] + url_path = parse.urlparse(repository_url).path + matches = re.match(_REPO_NAME_REGEX, url_path, flags=re.IGNORECASE) + if matches: + return matches.group("repo_name") + log.warning("Cannot extract repository name from unexpected URL path: %s", url_path) + return repository_url except ValueError: - # In case of parsing error, default to repository url log.warning("Repository name cannot be parsed from repository_url: %s", repository_url) return repository_url diff --git a/releasenotes/notes/ci_visibility-fix_properly_strip_dotgit_from_repo_url-523a908075aea559.yaml b/releasenotes/notes/ci_visibility-fix_properly_strip_dotgit_from_repo_url-523a908075aea559.yaml new file mode 100644 index 00000000000..10cb457166d --- /dev/null +++ b/releasenotes/notes/ci_visibility-fix_properly_strip_dotgit_from_repo_url-523a908075aea559.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + CI Visibility: Fixes a bug where ``.git`` was incorrectly being stripped from repository URLs when extracting service names, + resulting in ``g``, ``i``, or ``t`` being removed (eg: ``test-environment.git`` incorrectly becoming ``test-environmen``) diff --git a/tests/ci_visibility/test_ci_visibility.py b/tests/ci_visibility/test_ci_visibility.py index c6fd79fb546..d6d95d414b9 100644 --- a/tests/ci_visibility/test_ci_visibility.py +++ b/tests/ci_visibility/test_ci_visibility.py @@ -292,6 +292,16 @@ def test_ci_visibility_service_disable(): ("git+git://github.com/org/repo-name.git", "repo-name"), ("git+ssh://github.com/org/repo-name.git", "repo-name"), ("git+https://github.com/org/repo-name.git", "repo-name"), + ("https://github.com/fastapi/fastapi.git", "fastapi"), + ("git@github.com:fastapi/fastapi.git", "fastapi"), + ("git@github.com:fastapi/fastapi.gitttttt", "fastapi.gitttttt"), + ("git@github.com:fastapi/fastapiiiititititi.git", "fastapiiiititititi"), + ("https://github.com/fastapi/fastapitttttt.git", "fastapitttttt"), + ("this is definitely not a valid git repo URL", "this is definitely not a valid git repo URL"), + ("git@github.com:fastapi/FastAPI.GiT", "FastAPI"), + ("git+https://github.com/org/REPO-NAME.GIT", "REPO-NAME"), + ("https://github.com/DataDog/DD-TRACE-py", "DD-TRACE-py"), + ("https://github.com/DataDog/dd-trace-py.GIT", "dd-trace-py"), ], ) def test_repository_name_extracted(repository_url, repository_name):