Merge branch 'main' into aiven-anton/fix-backup-consumer-logic-comment

Aiven-Open · Jun 12, 2024 · f734880 · f734880
2 parents 0122d2d + 38acabd
commit f734880
Show file tree

Hide file tree

Showing 109 changed files with 7,300 additions and 1,440 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,4 @@
+[run]
+branch = True
+relative_files = True
+source = karapace
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -19,8 +19,8 @@ jobs:
   lint:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
-    - uses: actions/setup-python@v4
+    - uses: actions/checkout@v4
+    - uses: actions/setup-python@v5
       with:
         cache: pip
         python-version: '3.11'
@@ -38,8 +38,8 @@ jobs:
   type-check:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
-    - uses: actions/setup-python@v4
+    - uses: actions/checkout@v4
+    - uses: actions/setup-python@v5
       with:
         cache: pip
         python-version: '3.11'

diff --git a/.github/workflows/schema.yml b/.github/workflows/schema.yml
@@ -9,11 +9,11 @@ jobs:
   check-generate-schema:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         # Need fetch-depth 0 to fetch tags, see https://github.com/actions/checkout/issues/701
         with:
           fetch-depth: 0
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: "3.11"
           cache: pip

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -27,21 +27,65 @@ jobs:
         --log-file=/tmp/ci-logs/pytest.log
         --showlocals
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
 
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         cache: pip
         python-version: ${{ matrix.python-version }}
 
     - run: make install version
     - run: make unit-tests
+      env:
+        COVERAGE_FILE: ".coverage.${{ matrix.python-version }}"
+        PYTEST_ARGS: "--cov=karapace --cov-append"
     - run: make integration-tests
+      env:
+        COVERAGE_FILE: ".coverage.${{ matrix.python-version }}"
+        PYTEST_ARGS: "--cov=karapace --cov-append --random-order"
 
     - name: Archive logs
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       if: ${{ always() }}
       with:
         name: karapace-integration-test-logs-${{ matrix.python-version }}
         path: /tmp/ci-logs
+    - name: Archive coverage file
+      uses: actions/upload-artifact@v4
+      with:
+        name: "coverage-${{ matrix.python-version }}"
+        path: ".coverage.${{ matrix.python-version }}"
+
+  coverage:
+    name: Coverage report
+    runs-on: ubuntu-latest
+    needs: tests
+    permissions:
+      pull-requests: write
+      contents: write
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Download coverage
+      id: download_coverage
+      uses: actions/download-artifact@v4
+      with:
+        pattern: coverage-*
+        merge-multiple: true
+
+    - run: make karapace/version.py
+
+    - name: Post coverage comment
+      id: post_coverage_comment
+      uses: py-cov-action/python-coverage-comment-action@v3
+      with:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        MERGE_COVERAGE_FILES: true
+
+    - name: Store PR comment to be posted
+      uses: actions/upload-artifact@v4
+      if: steps.post_coverage_comment.outputs.COMMENT_FILE_WRITTEN == 'true'
+      with:
+        name: python-coverage-comment-action
+        path: python-coverage-comment-action.txt
diff --git a/GNUmakefile b/GNUmakefile
@@ -18,7 +18,6 @@ MAKEFLAGS     += --warn-undefined-variables
 MAKEFLAGS     += --no-builtin-rules
 SHELL         := bash
 .SHELLFLAGS   := -euxo pipefail -O globstar -c
-.ONESHELL:
 .SILENT:
 .SUFFIXES:
 
@@ -86,9 +85,9 @@ cleanest: cleaner
 requirements: export CUSTOM_COMPILE_COMMAND='make requirements'
 requirements:
 	pip install --upgrade pip setuptools pip-tools
-	pip-compile --upgrade --resolver=backtracking requirements/requirements.in -o requirements/requirements.txt
-	pip-compile --upgrade --resolver=backtracking requirements/requirements-dev.in -o requirements/requirements-dev.txt
-	pip-compile --upgrade --resolver=backtracking requirements/requirements-typing.in -o requirements/requirements-typing.txt
+	cd requirements && pip-compile --upgrade --resolver=backtracking requirements.in -o requirements.txt
+	cd requirements && pip-compile --upgrade --resolver=backtracking requirements-dev.in -o requirements-dev.txt
+	cd requirements && pip-compile --upgrade --resolver=backtracking requirements-typing.in -o requirements-typing.txt
 
 .PHONY: schema
 schema: against := origin/main

diff --git a/README.rst b/README.rst
@@ -9,8 +9,8 @@ of `Kafka REST <https://docs.confluent.io/platform/current/kafka-rest/index.html
 
 |Tests| |Contributor Covenant|
 
-.. |Tests| image:: https://github.com/aiven/karapace/actions/workflows/tests.yml/badge.svg?branch=main
-    :target: https://github.com/aiven/karapace/actions/workflows/tests.yml?query=branch%3Amain
+.. |Tests| image:: https://github.com/Aiven-Open/karapace/actions/workflows/tests.yml/badge.svg?branch=main
+    :target: https://github.com/Aiven-Open/karapace/actions/workflows/tests.yml?query=branch%3Amain
 
 .. |Contributor Covenant| image:: https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg
     :target: CODE_OF_CONDUCT.md
@@ -461,7 +461,7 @@ Keys to take special care are the ones needed to configure Kafka and advertised_
      - Runtime directory for the ``protoc`` protobuf schema parser and code generator
    * - ``name_strategy``
      - ``topic_name``
-     - Name strategy to use when storing schemas from the kafka rest proxy service. You can opt between ``name_strategy`` , ``record_name`` and ``topic_record_name``
+     - Name strategy to use when storing schemas from the kafka rest proxy service. You can opt between ``topic_name`` , ``record_name`` and ``topic_record_name``
    * - ``name_strategy_validation``
      - ``true``
      - If enabled, validate that given schema is registered under used name strategy when producing messages from Kafka Rest
@@ -568,6 +568,17 @@ Example of complete authorization file
         ]
     }
 
+Karapace Schema Registry access to the schemas topic
+====================================================
+
+The principal used by the Karapace Schema Registry has to have adequate access to the schemas topic (see the ``topic_name`` configuration option above).
+In addition to what is required to access the topic, as described in the Confluent Schema Registry documentation_, the unique, single-member consumer group
+used by consumers in the schema registry needs ``Describe`` and ``Read`` permissions_ on the group.
+These unique (per instance of the schema registry) consumer group names are prefixed by ``karapace-autogenerated-``, followed by a random string.
+
+.. _`documentation`: https://docs.confluent.io/platform/current/schema-registry/security/index.html#authorizing-access-to-the-schemas-topic
+.. _`permissions`: https://docs.confluent.io/platform/current/kafka/authorization.html#group-resource-type-operations
+
 OAuth2 authentication and authorization of Karapace REST proxy
 ===================================================================
 
@@ -586,6 +597,22 @@ The REST proxy process manages a set of producer and consumer clients, which are
 
 Before a client refreshes its OAuth2 JWT token, it is expected to remove currently running consumers (eg. after committing their offsets) and producers using the current token.
 
+Schema Normalization
+--------------------
+
+If specified as a rest parameter for the POST ``/subjects/{subject}/versions?normalize=true`` endpoint and the POST ``subjects/{subject}?normalize=true`` endpoint,
+Karapace uses a schema normalization algorithm to ensure that the schema is stored in a canonical form.
+
+This normalization process is done so that schemas semantically equivalent are stored in the same way and should be considered equal.
+
+Normalization is currently only supported for Protobuf schemas. Karapace does not support all normalization features implemented by Confluent Schema Registry.
+Currently the normalization process is done only for the ordering of the optional fields in the schema.
+Use the feature with the assumption that it will be extended in the future and so two schemas that are semantically equivalent could be considered
+different by the normalization process in different future versions of Karapace.
+The safe choice, when using a normalization process, is always to consider as different two schemas that are semantically equivalent while the problem is when two semantically different schemas are considered equivalent.
+In that view the future extension of the normalization process isn't considered a breaking change but rather an extension of the normalization process.
+
+
 Uninstall
 =========
 

diff --git a/SECURITY.md b/SECURITY.md
@@ -12,7 +12,7 @@ receiving such patches depend on the CVSS v3.0 Rating:
 ## Reporting a Vulnerability
 
 Please report (suspected) security vulnerabilities to our **[bug bounty
-program](https://hackerone.com/aiven_ltd)**. You will receive a response from
+program](https://bugcrowd.com/aiven-mbb-og)**. You will receive a response from
 us within 2 working days. If the issue is confirmed, we will release a patch as
 soon as possible depending on impact and complexity.
 

diff --git a/karapace/auth.py b/karapace/auth.py
@@ -2,6 +2,8 @@
 Copyright (c) 2023 Aiven Ltd
 See LICENSE for details
 """
+from __future__ import annotations
+
 from base64 import b64encode
 from dataclasses import dataclass, field
 from enum import Enum, unique
@@ -10,7 +12,6 @@
 from karapace.rapu import JSON_CONTENT_TYPE
 from karapace.statsd import StatsClient
 from karapace.utils import json_decode, json_encode
-from typing import List, Optional
 from typing_extensions import TypedDict
 from watchfiles import awatch, Change
 
@@ -90,15 +91,68 @@ class ACLEntryData(TypedDict):
 
 
 class AuthData(TypedDict):
-    users: List[UserData]
-    permissions: List[ACLEntryData]
+    users: list[UserData]
+    permissions: list[ACLEntryData]
+
+
+class ACLAuthorizer:
+    def __init__(self, *, user_db: dict[str, User] | None = None, permissions: list[ACLEntry] | None = None) -> None:
+        self.user_db = user_db or {}
+        self.permissions = permissions or []
+
+    def get_user(self, username: str) -> User | None:
+        return self.user_db.get(username)
+
+    def _check_resources(self, resources: list[str], aclentry: ACLEntry) -> bool:
+        for resource in resources:
+            if aclentry.resource.match(resource) is not None:
+                return True
+        return False
+
+    def _check_operation(self, operation: Operation, aclentry: ACLEntry) -> bool:
+        """Does ACL entry allow given operation.
+
+        An entry at minimum gives Read permission. Write permission implies Read."""
+        return operation == Operation.Read or aclentry.operation == Operation.Write
+
+    def check_authorization(self, user: User | None, operation: Operation, resource: str) -> bool:
+        if user is None:
+            return False
 
+        for aclentry in self.permissions:
+            if (
+                aclentry.username == user.username
+                and self._check_operation(operation, aclentry)
+                and self._check_resources([resource], aclentry)
+            ):
+                return True
+        return False
+
+    def check_authorization_any(self, user: User | None, operation: Operation, resources: list[str]) -> bool:
+        """Checks that user is authorized to one of the resources in the list.
+
+        If any resource in the list matches the permission the function returns True. This indicates only that
+        one resource matches the permission and other resources may not.
+        """
+        if user is None:
+            return False
 
-class HTTPAuthorizer:
+        for aclentry in self.permissions:
+            if (
+                aclentry.username == user.username
+                and self._check_operation(operation, aclentry)
+                and self._check_resources(resources, aclentry)
+            ):
+                return True
+        return False
+
+
+class HTTPAuthorizer(ACLAuthorizer):
     def __init__(self, filename: str) -> None:
+        super().__init__()
         self._auth_filename: str = filename
         self._auth_mtime: float = -1
-        self._refresh_auth_task: Optional[asyncio.Task] = None
+        self._refresh_auth_task: asyncio.Task | None = None
         self._refresh_auth_awatch_stop_event = asyncio.Event()
         # Once first, can raise if file not valid
         self._load_authfile()
@@ -158,7 +212,7 @@ def _load_authfile(self) -> None:
                     ACLEntry(entry["username"], Operation(entry["operation"]), re.compile(entry["resource"]))
                     for entry in authdata["permissions"]
                 ]
-                self.userdb = users
+                self.user_db = users
                 log.info(
                     "Loaded schema registry users: %s",
                     users,
@@ -172,28 +226,6 @@ def _load_authfile(self) -> None:
         except Exception as ex:
             raise InvalidConfiguration("Failed to load auth file") from ex
 
-    def check_authorization(self, user: Optional[User], operation: Operation, resource: str) -> bool:
-        if user is None:
-            return False
-
-        def check_operation(operation: Operation, aclentry: ACLEntry) -> bool:
-            """Does ACL entry allow given operation.
-
-            An entry at minimum gives Read permission. Write permission implies Read."""
-            return operation == Operation.Read or aclentry.operation == Operation.Write
-
-        def check_resource(resource: str, aclentry: ACLEntry) -> bool:
-            return aclentry.resource.match(resource) is not None
-
-        for aclentry in self.permissions:
-            if (
-                aclentry.username == user.username
-                and check_operation(operation, aclentry)
-                and check_resource(resource, aclentry)
-            ):
-                return True
-        return False
-
     def authenticate(self, request: aiohttp.web.Request) -> User:
         auth_header = request.headers.get("Authorization")
         if auth_header is None:
@@ -211,7 +243,7 @@ def authenticate(self, request: aiohttp.web.Request) -> User:
                 text='{"message": "Unauthorized"}',
                 content_type=JSON_CONTENT_TYPE,
             )
-        user = self.userdb.get(auth.login)
+        user = self.get_user(auth.login)
         if user is None or not user.compare_password(auth.password):
             raise aiohttp.web.HTTPUnauthorized(
                 headers={"WWW-Authenticate": 'Basic realm="Karapace Schema Registry"'},