From 3b54b50e7e09c333dfd9ffdc9c033f23ccf1c767 Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Thu, 14 Nov 2024 14:57:41 +0200 Subject: [PATCH 01/11] wip: Karapace SR runs on FastAPI --- src/karapace/config.py | 2 ++ src/karapace/dependencies.py | 46 ++++++++++++++++++++++++++++ src/karapace/karapace_all.py | 1 + src/karapace/schema_registry_apis.py | 6 ++-- 4 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 src/karapace/dependencies.py diff --git a/src/karapace/config.py b/src/karapace/config.py index 2d933a5cc..146215ba7 100644 --- a/src/karapace/config.py +++ b/src/karapace/config.py @@ -20,6 +20,8 @@ HOSTNAME = socket.gethostname() +HOSTNAME = socket.gethostname() + class Config(BaseSettings): access_logs_debug: bool = False diff --git a/src/karapace/dependencies.py b/src/karapace/dependencies.py new file mode 100644 index 000000000..4968ce750 --- /dev/null +++ b/src/karapace/dependencies.py @@ -0,0 +1,46 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from fastapi import Depends +from karapace.config import Config +from karapace.karapace_all import CONFIG, SCHEMA_REGISTRY +from karapace.schema_registry import KarapaceSchemaRegistry +from karapace.schema_registry_apis import KarapaceSchemaRegistryController +from karapace.statsd import StatsClient +from typing import Annotated + + +def get_config() -> Config: + return CONFIG + + +ConfigDep = Annotated[Config, Depends(get_config)] + + +async def get_schema_registry() -> KarapaceSchemaRegistry: + return SCHEMA_REGISTRY + + +SchemaRegistryDep = Annotated[KarapaceSchemaRegistry, Depends(get_schema_registry)] + + +async def get_stats( + config: ConfigDep, +) -> StatsClient: + return StatsClient(config=config) + + +StatsDep = Annotated[StatsClient, Depends(get_stats)] + + +async def get_controller( + config: ConfigDep, + stats: StatsDep, + schema_registry: SchemaRegistryDep, +) -> KarapaceSchemaRegistryController: + return KarapaceSchemaRegistryController(config=config, schema_registry=schema_registry, stats=stats) + + +KarapaceSchemaRegistryControllerDep = Annotated[KarapaceSchemaRegistryController, Depends(get_controller)] diff --git a/src/karapace/karapace_all.py b/src/karapace/karapace_all.py index 714cd2073..54be59609 100644 --- a/src/karapace/karapace_all.py +++ b/src/karapace/karapace_all.py @@ -26,6 +26,7 @@ from typing import Final import logging +import os import sys import uvicorn diff --git a/src/karapace/schema_registry_apis.py b/src/karapace/schema_registry_apis.py index 56e79243c..cc91b85dd 100644 --- a/src/karapace/schema_registry_apis.py +++ b/src/karapace/schema_registry_apis.py @@ -837,7 +837,8 @@ async def subject_post( references = self._validate_references(schema_request=schema_request) try: - references, resolved_dependencies = self.schema_registry.resolve_references(references) + # references, resolved_dependencies = self.schema_registry.resolve_references(references) + resolved_dependencies = {} new_schema = ValidatedTypedSchema.parse( schema_type=schema_request.schema_type, schema_str=schema_request.schema_str, @@ -931,7 +932,8 @@ def get_schema_id_if_exists(self, *, subject: Subject, schema: TypedSchema, incl def get_new_schema(self, schema_request: SchemaRequest) -> ValidatedTypedSchema: references = self._validate_references(schema_request=schema_request) try: - references, new_schema_dependencies = self.schema_registry.resolve_references(references) + # references, new_schema_dependencies = self.schema_registry.resolve_references(references) + new_schema_dependencies = {} return ValidatedTypedSchema.parse( schema_type=schema_request.schema_type, schema_str=schema_request.schema_str, From 03861d427e234541ee1064609b63a83fd271ba87 Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Fri, 15 Nov 2024 19:21:31 +0200 Subject: [PATCH 02/11] fix: added reference support for protobuf back --- src/karapace/schema_registry_apis.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/karapace/schema_registry_apis.py b/src/karapace/schema_registry_apis.py index cc91b85dd..44d8bd128 100644 --- a/src/karapace/schema_registry_apis.py +++ b/src/karapace/schema_registry_apis.py @@ -837,8 +837,7 @@ async def subject_post( references = self._validate_references(schema_request=schema_request) try: - # references, resolved_dependencies = self.schema_registry.resolve_references(references) - resolved_dependencies = {} + references, resolved_dependencies = self.schema_registry.resolve_references(references) new_schema = ValidatedTypedSchema.parse( schema_type=schema_request.schema_type, schema_str=schema_request.schema_str, @@ -932,7 +931,7 @@ def get_schema_id_if_exists(self, *, subject: Subject, schema: TypedSchema, incl def get_new_schema(self, schema_request: SchemaRequest) -> ValidatedTypedSchema: references = self._validate_references(schema_request=schema_request) try: - # references, new_schema_dependencies = self.schema_registry.resolve_references(references) + references, new_schema_dependencies = self.schema_registry.resolve_references(references) new_schema_dependencies = {} return ValidatedTypedSchema.parse( schema_type=schema_request.schema_type, From 410e1da2180f7bf64db3a25953e836dd1779fb80 Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Mon, 18 Nov 2024 09:41:24 +0200 Subject: [PATCH 03/11] fix: added forwarding back --- src/karapace/dependencies.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/karapace/dependencies.py b/src/karapace/dependencies.py index 4968ce750..1f6d3fba8 100644 --- a/src/karapace/dependencies.py +++ b/src/karapace/dependencies.py @@ -5,6 +5,7 @@ from fastapi import Depends from karapace.config import Config +from karapace.forward_client import ForwardClient from karapace.karapace_all import CONFIG, SCHEMA_REGISTRY from karapace.schema_registry import KarapaceSchemaRegistry from karapace.schema_registry_apis import KarapaceSchemaRegistryController @@ -44,3 +45,16 @@ async def get_controller( KarapaceSchemaRegistryControllerDep = Annotated[KarapaceSchemaRegistryController, Depends(get_controller)] + + +FORWARD_CLIENT: ForwardClient | None = None + + +def get_forward_client() -> ForwardClient: + global FORWARD_CLIENT + if not FORWARD_CLIENT: + FORWARD_CLIENT = ForwardClient() + return FORWARD_CLIENT + + +ForwardClientDep = Annotated[ForwardClient, Depends(get_forward_client)] From ca2f62997c53a75edc27163ec68d9fd7161ed47f Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Tue, 19 Nov 2024 10:22:50 +0200 Subject: [PATCH 04/11] fix: basic and no authentication integrated --- src/karapace/dependencies.py | 60 ------------------------------------ src/karapace/karapace_all.py | 1 - 2 files changed, 61 deletions(-) delete mode 100644 src/karapace/dependencies.py diff --git a/src/karapace/dependencies.py b/src/karapace/dependencies.py deleted file mode 100644 index 1f6d3fba8..000000000 --- a/src/karapace/dependencies.py +++ /dev/null @@ -1,60 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - -from fastapi import Depends -from karapace.config import Config -from karapace.forward_client import ForwardClient -from karapace.karapace_all import CONFIG, SCHEMA_REGISTRY -from karapace.schema_registry import KarapaceSchemaRegistry -from karapace.schema_registry_apis import KarapaceSchemaRegistryController -from karapace.statsd import StatsClient -from typing import Annotated - - -def get_config() -> Config: - return CONFIG - - -ConfigDep = Annotated[Config, Depends(get_config)] - - -async def get_schema_registry() -> KarapaceSchemaRegistry: - return SCHEMA_REGISTRY - - -SchemaRegistryDep = Annotated[KarapaceSchemaRegistry, Depends(get_schema_registry)] - - -async def get_stats( - config: ConfigDep, -) -> StatsClient: - return StatsClient(config=config) - - -StatsDep = Annotated[StatsClient, Depends(get_stats)] - - -async def get_controller( - config: ConfigDep, - stats: StatsDep, - schema_registry: SchemaRegistryDep, -) -> KarapaceSchemaRegistryController: - return KarapaceSchemaRegistryController(config=config, schema_registry=schema_registry, stats=stats) - - -KarapaceSchemaRegistryControllerDep = Annotated[KarapaceSchemaRegistryController, Depends(get_controller)] - - -FORWARD_CLIENT: ForwardClient | None = None - - -def get_forward_client() -> ForwardClient: - global FORWARD_CLIENT - if not FORWARD_CLIENT: - FORWARD_CLIENT = ForwardClient() - return FORWARD_CLIENT - - -ForwardClientDep = Annotated[ForwardClient, Depends(get_forward_client)] diff --git a/src/karapace/karapace_all.py b/src/karapace/karapace_all.py index 54be59609..714cd2073 100644 --- a/src/karapace/karapace_all.py +++ b/src/karapace/karapace_all.py @@ -26,7 +26,6 @@ from typing import Final import logging -import os import sys import uvicorn From f4f11e01d782f9e14be52958da6de4d43ce7b629 Mon Sep 17 00:00:00 2001 From: Emmanuel Evbuomwan Date: Wed, 27 Nov 2024 11:39:43 +0100 Subject: [PATCH 05/11] feat: move schema-registry to own module with DI - we create a standalone module for SR related components - we use DI to wire together the SR dependencies - we move the routers to own folder - we move the config initialization to DI and app startup --- src/karapace/container.py | 40 + src/karapace/karapace_all.py | 175 +--- src/karapace/logging_setup.py | 46 + src/schema_registry/__init__.py | 4 + src/schema_registry/__main__.py | 51 + src/schema_registry/container.py | 18 + src/schema_registry/factory.py | 59 ++ src/schema_registry/http_handlers/__init__.py | 34 + src/schema_registry/middlewares/__init__.py | 33 + src/schema_registry/routers/__init__.py | 4 + src/schema_registry/routers/compatibility.py | 37 + src/schema_registry/routers/config.py | 121 +++ src/schema_registry/routers/errors.py | 70 ++ src/schema_registry/routers/health.py | 67 ++ src/schema_registry/routers/metrics.py | 24 + src/schema_registry/routers/mode.py | 47 + src/schema_registry/routers/requests.py | 101 ++ src/schema_registry/routers/root.py | 16 + src/schema_registry/routers/schemas.py | 91 ++ src/schema_registry/routers/setup.py | 25 + src/schema_registry/routers/subjects.py | 201 ++++ src/schema_registry/schema_registry_apis.py | 950 ++++++++++++++++++ src/schema_registry/user.py | 41 + 23 files changed, 2103 insertions(+), 152 deletions(-) create mode 100644 src/karapace/container.py create mode 100644 src/karapace/logging_setup.py create mode 100644 src/schema_registry/__init__.py create mode 100644 src/schema_registry/__main__.py create mode 100644 src/schema_registry/container.py create mode 100644 src/schema_registry/factory.py create mode 100644 src/schema_registry/http_handlers/__init__.py create mode 100644 src/schema_registry/middlewares/__init__.py create mode 100644 src/schema_registry/routers/__init__.py create mode 100644 src/schema_registry/routers/compatibility.py create mode 100644 src/schema_registry/routers/config.py create mode 100644 src/schema_registry/routers/errors.py create mode 100644 src/schema_registry/routers/health.py create mode 100644 src/schema_registry/routers/metrics.py create mode 100644 src/schema_registry/routers/mode.py create mode 100644 src/schema_registry/routers/requests.py create mode 100644 src/schema_registry/routers/root.py create mode 100644 src/schema_registry/routers/schemas.py create mode 100644 src/schema_registry/routers/setup.py create mode 100644 src/schema_registry/routers/subjects.py create mode 100644 src/schema_registry/schema_registry_apis.py create mode 100644 src/schema_registry/user.py diff --git a/src/karapace/container.py b/src/karapace/container.py new file mode 100644 index 000000000..7c71e99ca --- /dev/null +++ b/src/karapace/container.py @@ -0,0 +1,40 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from dependency_injector import containers, providers +from karapace.auth import get_authorizer, HTTPAuthorizer, NoAuthAndAuthz +from karapace.config import Config +from karapace.forward_client import ForwardClient +from karapace.instrumentation.prometheus import PrometheusInstrumentation +from karapace.schema_registry import KarapaceSchemaRegistry +from karapace.statsd import StatsClient + + +class KarapaceContainer(containers.DeclarativeContainer): + base_config = providers.Configuration() + config = providers.Singleton( + Config, + _env_file=base_config.karapace.env_file, + _env_file_encoding=base_config.karapace.env_file_encoding, + ) + + statsd = providers.Singleton(StatsClient, config=config) + + no_auth_authorizer = providers.Singleton(NoAuthAndAuthz) + + http_authorizer = providers.Singleton(HTTPAuthorizer, config=config) + + schema_registry = providers.Singleton(KarapaceSchemaRegistry, config=config) + + forward_client = providers.Singleton(ForwardClient) + + authorizer = providers.Factory( + get_authorizer, + config=config, + http_authorizer=http_authorizer, + no_auth_authorizer=no_auth_authorizer, + ) + + prometheus = providers.Singleton(PrometheusInstrumentation) diff --git a/src/karapace/karapace_all.py b/src/karapace/karapace_all.py index 714cd2073..80e36cd43 100644 --- a/src/karapace/karapace_all.py +++ b/src/karapace/karapace_all.py @@ -4,164 +4,35 @@ """ from __future__ import annotations -from collections.abc import AsyncGenerator -from contextlib import asynccontextmanager -from fastapi import FastAPI, HTTPException, Request, status -from fastapi.exceptions import RequestValidationError -from fastapi.responses import JSONResponse -from http import HTTPStatus +from dependency_injector.wiring import inject, Provide from karapace import version as karapace_version -from karapace.auth.auth import AuthenticatorAndAuthorizer -from karapace.auth.dependencies import AuthorizationDependencyManager -from karapace.config import Config -from karapace.content_type import check_schema_headers -from karapace.dependencies.config_dependency import ConfigDependencyManager -from karapace.dependencies.schema_registry_dependency import SchemaRegistryDependencyManager -from karapace.dependencies.stats_dependeny import StatsDependencyManager +from karapace.config import Config, KARAPACE_BASE_CONFIG_YAML_PATH +from karapace.container import KarapaceContainer from karapace.instrumentation.prometheus import PrometheusInstrumentation -from karapace.routers.errors import KarapaceValidationError -from karapace.schema_registry import KarapaceSchemaRegistry -from starlette.exceptions import HTTPException as StarletteHTTPException -from starlette.requests import Request as StarletteHTTPRequest -from typing import Final +from karapace.kafka_rest_apis import KafkaRest +from karapace.logging_setup import configure_logging, log_config_without_secrets +import argparse import logging import sys -import uvicorn -# from karapace.kafka_rest_apis import KafkaRest +@inject +def main( + config: Config = Provide[KarapaceContainer.config], + prometheus: PrometheusInstrumentation = Provide[KarapaceContainer.prometheus], +) -> int: + parser = argparse.ArgumentParser(prog="karapace", description="Karapace: Your Kafka essentials in one tool") + parser.add_argument("--version", action="version", help="show program version", version=karapace_version.__version__) + parser.parse_args() + configure_logging(config=config) + log_config_without_secrets(config=config) -def _configure_logging(*, config: Config) -> None: - log_handler = config.log_handler + logging.info("\n%s\nStarting %s\n%s", ("=" * 100), "Starting Karapace Rest Proxy", ("=" * 100)) + app = KafkaRest(config=config) - root_handler: logging.Handler | None = None - if "systemd" == log_handler: - from systemd import journal - - root_handler = journal.JournalHandler(SYSLOG_IDENTIFIER="karapace") - elif "stdout" == log_handler or log_handler is None: - root_handler = logging.StreamHandler(stream=sys.stdout) - else: - logging.basicConfig(level=config.log_level, format=config.log_format) - logging.getLogger().setLevel(config.log_level) - logging.warning("Log handler %s not recognized, root handler not set.", log_handler) - - if root_handler is not None: - root_handler.setFormatter(logging.Formatter(config.log_format)) - root_handler.setLevel(config.log_level) - root_handler.set_name(name="karapace") - logging.root.addHandler(root_handler) - - logging.root.setLevel(config.log_level) - logging.getLogger("uvicorn.error").setLevel(config.log_level) - - -@asynccontextmanager -async def lifespan(_: FastAPI) -> AsyncGenerator[None, None]: - schema_registry: KarapaceSchemaRegistry | None = None - authorizer: AuthenticatorAndAuthorizer | None = None - try: - schema_registry = await SchemaRegistryDependencyManager.get_schema_registry() - await schema_registry.start() - await schema_registry.get_master() - authorizer = AuthorizationDependencyManager.get_authorizer() - if authorizer is not None: - await authorizer.start(StatsDependencyManager.get_stats()) - yield - finally: - if schema_registry: - await schema_registry.close() - if authorizer: - await authorizer.close() - - -def create_karapace_application(*, config: Config) -> FastAPI: - # TODO: this lifespan is SR related lifespan - app = FastAPI(lifespan=lifespan) - _configure_logging(config=config) - - config_without_secrets = {} - for key, value in config.dict().items(): - if "password" in key: - value = "****" - elif "keyfile" in key: - value = "****" - config_without_secrets[key] = value - logging.log(logging.DEBUG, "Config %r", config_without_secrets) - logging.log(logging.INFO, "Karapace version %s", karapace_version) - - @app.exception_handler(StarletteHTTPException) - async def http_exception_handler(_: StarletteHTTPRequest, exc: StarletteHTTPException): - return JSONResponse(status_code=exc.status_code, content=exc.detail) - - @app.exception_handler(RequestValidationError) - async def validation_exception_handler(_: StarletteHTTPRequest, exc: RequestValidationError): - error_code = HTTPStatus.UNPROCESSABLE_ENTITY.value - if isinstance(exc, KarapaceValidationError): - error_code = exc.error_code - message = exc.body - else: - message = exc.errors() - return JSONResponse( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - content={ - "error_code": error_code, - "message": message, - }, - ) - - @app.middleware("http") - async def set_content_types(request: Request, call_next): - try: - response_content_type = check_schema_headers(request) - except HTTPException as exc: - return JSONResponse( - status_code=exc.status_code, - headers=exc.headers, - content=exc.detail, - ) - - # Schema registry supports application/octet-stream, assumption is JSON object body. - # Force internally to use application/json in this case for compatibility. - if request.headers.get("Content-Type") == "application/octet-stream": - new_headers = request.headers.mutablecopy() - new_headers["Content-Type"] = "application/json" - request._headers = new_headers - request.scope.update(headers=request.headers.raw) - - response = await call_next(request) - response.headers["Content-Type"] = response_content_type - return response - - if config.karapace_registry: - from karapace.routers.compatibility_router import compatibility_router - from karapace.routers.config_router import config_router - from karapace.routers.health_router import health_router - from karapace.routers.master_available_router import master_availability_router - from karapace.routers.mode_router import mode_router - from karapace.routers.root_router import root_router - from karapace.routers.schemas_router import schemas_router - from karapace.routers.subjects_router import subjects_router - - app.include_router(compatibility_router) - app.include_router(config_router) - app.include_router(health_router) - app.include_router(master_availability_router) - app.include_router(mode_router) - app.include_router(root_router) - app.include_router(schemas_router) - app.include_router(subjects_router) - if config.karapace_rest: - # add rest router. - pass - - return app - - -def __old_main() -> int: try: - PrometheusInstrumentation.setup_metrics(app=app) + prometheus.setup_metrics(app=app) app.run() # `close` will be called by the callback `close_by_app` set by `KarapaceBase` except Exception as ex: # pylint: disable-broad-except app.stats.unexpected_exception(ex=ex, where="karapace") @@ -169,8 +40,8 @@ def __old_main() -> int: return 0 -CONFIG: Final = ConfigDependencyManager.get_config() - if __name__ == "__main__": - app = create_karapace_application(config=CONFIG) - uvicorn.run(app, host=CONFIG.host, port=CONFIG.port, log_level=CONFIG.log_level.lower()) + container = KarapaceContainer() + container.base_config.from_yaml(KARAPACE_BASE_CONFIG_YAML_PATH, envs_required=True, required=True) + container.wire(modules=[__name__]) + sys.exit(main()) diff --git a/src/karapace/logging_setup.py b/src/karapace/logging_setup.py new file mode 100644 index 000000000..a8521601e --- /dev/null +++ b/src/karapace/logging_setup.py @@ -0,0 +1,46 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from karapace.config import Config + +import logging +import sys + + +def configure_logging(*, config: Config) -> None: + log_handler = config.log_handler + + root_handler: logging.Handler | None = None + if "systemd" == log_handler: + from systemd import journal + + root_handler = journal.JournalHandler(SYSLOG_IDENTIFIER="karapace") + elif "stdout" == log_handler or log_handler is None: + root_handler = logging.StreamHandler(stream=sys.stdout) + else: + logging.basicConfig(level=config.log_level, format=config.log_format) + logging.getLogger().setLevel(config.log_level) + logging.warning("Log handler %s not recognized, root handler not set.", log_handler) + + if root_handler is not None: + root_handler.setFormatter(logging.Formatter(config.log_format)) + root_handler.setLevel(config.log_level) + root_handler.set_name(name="karapace") + logging.root.addHandler(root_handler) + + logging.root.setLevel(config.log_level) + logging.getLogger("aiohttp.access").setLevel(config.log_level) + logging.getLogger("uvicorn.error").setLevel(config.log_level) + + +def log_config_without_secrets(config: Config) -> None: + config_without_secrets = {} + for key, value in config.dict().items(): + if "password" in key: + value = "****" + elif "keyfile" in key: + value = "****" + config_without_secrets[key] = value + logging.log(logging.DEBUG, "Config %r", config_without_secrets) diff --git a/src/schema_registry/__init__.py b/src/schema_registry/__init__.py new file mode 100644 index 000000000..f53be7121 --- /dev/null +++ b/src/schema_registry/__init__.py @@ -0,0 +1,4 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" diff --git a/src/schema_registry/__main__.py b/src/schema_registry/__main__.py new file mode 100644 index 000000000..0663bf774 --- /dev/null +++ b/src/schema_registry/__main__.py @@ -0,0 +1,51 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" +from karapace.config import KARAPACE_BASE_CONFIG_YAML_PATH +from karapace.container import KarapaceContainer +from schema_registry.container import SchemaRegistryContainer +from schema_registry.factory import create_karapace_application, karapace_schema_registry_lifespan + +import schema_registry.factory +import schema_registry.routers.compatibility +import schema_registry.routers.config +import schema_registry.routers.health +import schema_registry.routers.metrics +import schema_registry.routers.mode +import schema_registry.routers.schemas +import schema_registry.routers.subjects +import schema_registry.schema_registry_apis +import schema_registry.user +import uvicorn + +if __name__ == "__main__": + container = KarapaceContainer() + container.base_config.from_yaml(KARAPACE_BASE_CONFIG_YAML_PATH, envs_required=True, required=True) + container.wire( + modules=[ + __name__, + schema_registry.schema_registry_apis, + ] + ) + + schema_registry_container = SchemaRegistryContainer(karapace_container=container) + schema_registry_container.wire( + modules=[ + __name__, + schema_registry.factory, + schema_registry.user, + schema_registry.routers.health, + schema_registry.routers.metrics, + schema_registry.routers.subjects, + schema_registry.routers.schemas, + schema_registry.routers.config, + schema_registry.routers.compatibility, + schema_registry.routers.mode, + ] + ) + + app = create_karapace_application(config=container.config(), lifespan=karapace_schema_registry_lifespan) + uvicorn.run( + app, host=container.config().host, port=container.config().port, log_level=container.config().log_level.lower() + ) diff --git a/src/schema_registry/container.py b/src/schema_registry/container.py new file mode 100644 index 000000000..b93bc4139 --- /dev/null +++ b/src/schema_registry/container.py @@ -0,0 +1,18 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from dependency_injector import containers, providers +from karapace.container import KarapaceContainer +from schema_registry.schema_registry_apis import KarapaceSchemaRegistryController + + +class SchemaRegistryContainer(containers.DeclarativeContainer): + karapace_container = providers.Container(KarapaceContainer) + schema_registry_controller = providers.Singleton( + KarapaceSchemaRegistryController, + config=karapace_container.config, + schema_registry=karapace_container.schema_registry, + stats=karapace_container.statsd, + ) diff --git a/src/schema_registry/factory.py b/src/schema_registry/factory.py new file mode 100644 index 000000000..667bdfc7d --- /dev/null +++ b/src/schema_registry/factory.py @@ -0,0 +1,59 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" +from collections.abc import AsyncGenerator +from contextlib import asynccontextmanager +from dependency_injector.wiring import inject, Provide +from fastapi import Depends, FastAPI +from karapace import version as karapace_version +from karapace.auth import AuthenticatorAndAuthorizer +from karapace.config import Config +from karapace.logging_setup import configure_logging, log_config_without_secrets +from karapace.schema_registry import KarapaceSchemaRegistry +from karapace.statsd import StatsClient +from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor +from schema_registry.container import SchemaRegistryContainer +from schema_registry.http_handlers import setup_exception_handlers +from schema_registry.middlewares import setup_middlewares +from schema_registry.routers.setup import setup_routers + +import logging + + +@asynccontextmanager +@inject +async def karapace_schema_registry_lifespan( + _: FastAPI, + stastd: StatsClient = Depends(Provide[SchemaRegistryContainer.karapace_container.statsd]), + schema_registry: KarapaceSchemaRegistry = Depends(Provide[SchemaRegistryContainer.karapace_container.schema_registry]), + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), +) -> AsyncGenerator[None, None]: + try: + await schema_registry.start() + await schema_registry.get_master() + await authorizer.start(stats=stastd) + + yield + finally: + if schema_registry: + await schema_registry.close() + if authorizer: + await authorizer.close() + if stastd: + stastd.close() + + +def create_karapace_application(*, config: Config, lifespan: AsyncGenerator[None, None]) -> FastAPI: + configure_logging(config=config) + log_config_without_secrets(config=config) + logging.info("Starting Karapace Schema Registry (%s)", karapace_version.__version__) + + app = FastAPI(lifespan=lifespan) + setup_routers(app=app) + setup_exception_handlers(app=app) + setup_middlewares(app=app) + + FastAPIInstrumentor.instrument_app(app) + + return app diff --git a/src/schema_registry/http_handlers/__init__.py b/src/schema_registry/http_handlers/__init__.py new file mode 100644 index 000000000..93bc853cc --- /dev/null +++ b/src/schema_registry/http_handlers/__init__.py @@ -0,0 +1,34 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from fastapi import FastAPI, status +from fastapi.exceptions import RequestValidationError +from fastapi.responses import JSONResponse +from http import HTTPStatus +from schema_registry.routers.errors import KarapaceValidationError +from starlette.exceptions import HTTPException as StarletteHTTPException +from starlette.requests import Request as StarletteHTTPRequest + + +def setup_exception_handlers(app: FastAPI) -> None: + @app.exception_handler(StarletteHTTPException) + async def http_exception_handler(_: StarletteHTTPRequest, exc: StarletteHTTPException): + return JSONResponse(status_code=exc.status_code, content=exc.detail) + + @app.exception_handler(RequestValidationError) + async def validation_exception_handler(_: StarletteHTTPRequest, exc: RequestValidationError): + error_code = HTTPStatus.UNPROCESSABLE_ENTITY.value + if isinstance(exc, KarapaceValidationError): + error_code = exc.error_code + message = exc.body + else: + message = exc.errors() + return JSONResponse( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + content={ + "error_code": error_code, + "message": message, + }, + ) diff --git a/src/schema_registry/middlewares/__init__.py b/src/schema_registry/middlewares/__init__.py new file mode 100644 index 000000000..b5fb2e125 --- /dev/null +++ b/src/schema_registry/middlewares/__init__.py @@ -0,0 +1,33 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from fastapi import FastAPI, HTTPException, Request +from fastapi.responses import JSONResponse +from karapace.content_type import check_schema_headers + + +def setup_middlewares(app: FastAPI) -> None: + @app.middleware("http") + async def set_content_types(request: Request, call_next): + try: + response_content_type = check_schema_headers(request) + except HTTPException as exc: + return JSONResponse( + status_code=exc.status_code, + headers=exc.headers, + content=exc.detail, + ) + + # Schema registry supports application/octet-stream, assumption is JSON object body. + # Force internally to use application/json in this case for compatibility. + if request.headers.get("Content-Type") == "application/octet-stream": + new_headers = request.headers.mutablecopy() + new_headers["Content-Type"] = "application/json" + request._headers = new_headers + request.scope.update(headers=request.headers.raw) + + response = await call_next(request) + response.headers["Content-Type"] = response_content_type + return response diff --git a/src/schema_registry/routers/__init__.py b/src/schema_registry/routers/__init__.py new file mode 100644 index 000000000..f53be7121 --- /dev/null +++ b/src/schema_registry/routers/__init__.py @@ -0,0 +1,4 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" diff --git a/src/schema_registry/routers/compatibility.py b/src/schema_registry/routers/compatibility.py new file mode 100644 index 000000000..0e91e3625 --- /dev/null +++ b/src/schema_registry/routers/compatibility.py @@ -0,0 +1,37 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from dependency_injector.wiring import inject, Provide +from fastapi import APIRouter, Depends +from karapace.auth import AuthenticatorAndAuthorizer, Operation, User +from karapace.typing import Subject +from schema_registry.container import SchemaRegistryContainer +from schema_registry.routers.errors import unauthorized +from schema_registry.routers.requests import CompatibilityCheckResponse, SchemaRequest +from schema_registry.schema_registry_apis import KarapaceSchemaRegistryController +from schema_registry.user import get_current_user +from typing import Annotated + +compatibility_router = APIRouter( + prefix="/compatibility", + tags=["compatibility"], + responses={404: {"description": "Not found"}}, +) + + +@compatibility_router.post("/subjects/{subject}/versions/{version}", response_model_exclude_none=True) +@inject +async def compatibility_post( + subject: Subject, + version: str, # TODO support actual Version object + schema_request: SchemaRequest, + user: Annotated[User, Depends(get_current_user)], + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> CompatibilityCheckResponse: + if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): + raise unauthorized() + + return await controller.compatibility_check(subject=subject, schema_request=schema_request, version=version) diff --git a/src/schema_registry/routers/config.py b/src/schema_registry/routers/config.py new file mode 100644 index 000000000..04bd63545 --- /dev/null +++ b/src/schema_registry/routers/config.py @@ -0,0 +1,121 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from dependency_injector.wiring import inject, Provide +from fastapi import APIRouter, Depends, Request +from karapace.auth import AuthenticatorAndAuthorizer, Operation, User +from karapace.forward_client import ForwardClient +from karapace.schema_registry import KarapaceSchemaRegistry +from karapace.typing import Subject +from schema_registry.container import SchemaRegistryContainer +from schema_registry.routers.errors import no_primary_url_error, unauthorized +from schema_registry.routers.requests import CompatibilityLevelResponse, CompatibilityRequest, CompatibilityResponse +from schema_registry.schema_registry_apis import KarapaceSchemaRegistryController +from schema_registry.user import get_current_user +from typing import Annotated + +config_router = APIRouter( + prefix="/config", + tags=["config"], + responses={404: {"description": "Not found"}}, +) + + +@config_router.get("") +@inject +async def config_get( + user: Annotated[User, Depends(get_current_user)], + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> CompatibilityLevelResponse: + if authorizer and not authorizer.check_authorization(user, Operation.Read, "Config:"): + raise unauthorized() + + return await controller.config_get() + + +@config_router.put("") +@inject +async def config_put( + request: Request, + compatibility_level_request: CompatibilityRequest, + user: Annotated[User, Depends(get_current_user)], + schema_registry: KarapaceSchemaRegistry = Depends(Provide[SchemaRegistryContainer.karapace_container.schema_registry]), + forward_client: ForwardClient = Depends(Provide[SchemaRegistryContainer.karapace_container.forward_client]), + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> CompatibilityResponse: + if authorizer and not authorizer.check_authorization(user, Operation.Write, "Config:"): + raise unauthorized() + + i_am_primary, primary_url = await schema_registry.get_master() + if i_am_primary: + return await controller.config_set(compatibility_level_request=compatibility_level_request) + elif not primary_url: + raise no_primary_url_error() + else: + return await forward_client.forward_request_remote(request=request, primary_url=primary_url) + + +@config_router.get("/{subject}") +@inject +async def config_get_subject( + subject: Subject, + user: Annotated[User, Depends(get_current_user)], + defaultToGlobal: bool = False, + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> CompatibilityLevelResponse: + if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): + raise unauthorized() + + return await controller.config_subject_get(subject=subject, default_to_global=defaultToGlobal) + + +@config_router.put("/{subject}") +@inject +async def config_set_subject( + request: Request, + subject: Subject, + compatibility_level_request: CompatibilityRequest, + user: Annotated[User, Depends(get_current_user)], + schema_registry: KarapaceSchemaRegistry = Depends(Provide[SchemaRegistryContainer.karapace_container.schema_registry]), + forward_client: ForwardClient = Depends(Provide[SchemaRegistryContainer.karapace_container.forward_client]), + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> CompatibilityResponse: + if authorizer and not authorizer.check_authorization(user, Operation.Write, f"Subject:{subject}"): + raise unauthorized() + + i_am_primary, primary_url = await schema_registry.get_master() + if i_am_primary: + return await controller.config_subject_set(subject=subject, compatibility_level_request=compatibility_level_request) + elif not primary_url: + raise no_primary_url_error() + else: + return await forward_client.forward_request_remote(request=request, primary_url=primary_url) + + +@config_router.delete("/{subject}") +@inject +async def config_delete_subject( + request: Request, + subject: Subject, + user: Annotated[User, Depends(get_current_user)], + schema_registry: KarapaceSchemaRegistry = Depends(Provide[SchemaRegistryContainer.karapace_container.schema_registry]), + forward_client: ForwardClient = Depends(Provide[SchemaRegistryContainer.karapace_container.forward_client]), + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> CompatibilityResponse: + if authorizer and not authorizer.check_authorization(user, Operation.Write, f"Subject:{subject}"): + raise unauthorized() + + i_am_primary, primary_url = await schema_registry.get_master() + if i_am_primary: + return await controller.config_subject_delete(subject=subject) + elif not primary_url: + raise no_primary_url_error() + else: + return await forward_client.forward_request_remote(request=request, primary_url=primary_url) diff --git a/src/schema_registry/routers/errors.py b/src/schema_registry/routers/errors.py new file mode 100644 index 000000000..18c80299d --- /dev/null +++ b/src/schema_registry/routers/errors.py @@ -0,0 +1,70 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from enum import Enum, unique +from fastapi import HTTPException, status +from fastapi.exceptions import RequestValidationError + + +@unique +class SchemaErrorCodes(Enum): + HTTP_BAD_REQUEST = status.HTTP_400_BAD_REQUEST + HTTP_NOT_FOUND = status.HTTP_404_NOT_FOUND + HTTP_CONFLICT = status.HTTP_409_CONFLICT + HTTP_UNPROCESSABLE_ENTITY = status.HTTP_422_UNPROCESSABLE_ENTITY + HTTP_INTERNAL_SERVER_ERROR = status.HTTP_500_INTERNAL_SERVER_ERROR + SUBJECT_NOT_FOUND = 40401 + VERSION_NOT_FOUND = 40402 + SCHEMA_NOT_FOUND = 40403 + SUBJECT_SOFT_DELETED = 40404 + SUBJECT_NOT_SOFT_DELETED = 40405 + SCHEMAVERSION_SOFT_DELETED = 40406 + SCHEMAVERSION_NOT_SOFT_DELETED = 40407 + SUBJECT_LEVEL_COMPATIBILITY_NOT_CONFIGURED_ERROR_CODE = 40408 + INVALID_VERSION_ID = 42202 + INVALID_COMPATIBILITY_LEVEL = 42203 + INVALID_SCHEMA = 42201 + INVALID_SUBJECT = 42208 + SCHEMA_TOO_LARGE_ERROR_CODE = 42209 + REFERENCES_SUPPORT_NOT_IMPLEMENTED = 44302 + REFERENCE_EXISTS = 42206 + NO_MASTER_ERROR = 50003 + + +@unique +class SchemaErrorMessages(Enum): + SUBJECT_NOT_FOUND_FMT = "Subject '{subject}' not found." + INVALID_COMPATIBILITY_LEVEL = ( + "Invalid compatibility level. Valid values are none, backward, " + "forward, full, backward_transitive, forward_transitive, and " + "full_transitive" + ) + SUBJECT_LEVEL_COMPATIBILITY_NOT_CONFIGURED_FMT = ( + "Subject '{subject}' does not have subject-level compatibility configured" + ) + REFERENCES_SUPPORT_NOT_IMPLEMENTED = "Schema references are not supported for '{schema_type}' schema type" + + +class KarapaceValidationError(RequestValidationError): + def __init__(self, error_code: int, error: str): + super().__init__(errors=[], body=error) + self.error_code = error_code + + +def no_primary_url_error() -> HTTPException: + return HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail={ + "error_code": SchemaErrorCodes.NO_MASTER_ERROR, + "message": "Error while forwarding the request to the master.", + }, + ) + + +def unauthorized() -> HTTPException: + return HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail={"message": "Forbidden"}, + ) diff --git a/src/schema_registry/routers/health.py b/src/schema_registry/routers/health.py new file mode 100644 index 000000000..df3a8822f --- /dev/null +++ b/src/schema_registry/routers/health.py @@ -0,0 +1,67 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from dependency_injector.wiring import inject, Provide +from fastapi import APIRouter, Depends, HTTPException, status +from karapace.schema_registry import KarapaceSchemaRegistry +from pydantic import BaseModel +from schema_registry.container import SchemaRegistryContainer + + +class HealthStatus(BaseModel): + schema_registry_ready: bool + schema_registry_startup_time_sec: float + schema_registry_reader_current_offset: int + schema_registry_reader_highest_offset: int + schema_registry_is_primary: bool | None + schema_registry_is_primary_eligible: bool + schema_registry_primary_url: str | None + schema_registry_coordinator_running: bool + schema_registry_coordinator_generation_id: int + + +class HealthCheck(BaseModel): + status: HealthStatus + healthy: bool + + +health_router = APIRouter( + prefix="/_health", + tags=["health"], + responses={404: {"description": "Not found"}}, +) + + +@health_router.get("") +@inject +async def health( + schema_registry: KarapaceSchemaRegistry = Depends(Provide[SchemaRegistryContainer.karapace_container.schema_registry]), +) -> HealthCheck: + starttime = 0.0 + if schema_registry.schema_reader.ready: + starttime = schema_registry.schema_reader.last_check - schema_registry.schema_reader.start_time + + cs = schema_registry.mc.get_coordinator_status() + + health_status = HealthStatus( + schema_registry_ready=schema_registry.schema_reader.ready, + schema_registry_startup_time_sec=starttime, + schema_registry_reader_current_offset=schema_registry.schema_reader.offset, + schema_registry_reader_highest_offset=schema_registry.schema_reader.highest_offset(), + schema_registry_is_primary=cs.is_primary, + schema_registry_is_primary_eligible=cs.is_primary_eligible, + schema_registry_primary_url=cs.primary_url, + schema_registry_coordinator_running=cs.is_running, + schema_registry_coordinator_generation_id=cs.group_generation_id, + ) + # if self._auth is not None: + # resp["schema_registry_authfile_timestamp"] = self._auth.authfile_last_modified + + if not await schema_registry.schema_reader.is_healthy(): + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + ) + + return HealthCheck(status=health_status, healthy=True) diff --git a/src/schema_registry/routers/metrics.py b/src/schema_registry/routers/metrics.py new file mode 100644 index 000000000..23b4b39f8 --- /dev/null +++ b/src/schema_registry/routers/metrics.py @@ -0,0 +1,24 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from dependency_injector.wiring import inject, Provide +from fastapi import APIRouter, Depends, Response +from karapace.instrumentation.prometheus import PrometheusInstrumentation +from pydantic import BaseModel +from schema_registry.container import SchemaRegistryContainer + +metrics_router = APIRouter( + prefix=PrometheusInstrumentation.METRICS_ENDPOINT_PATH, + tags=["metrics"], + responses={404: {"description": "Not found"}}, +) + + +@metrics_router.get("") +@inject +async def metrics( + prometheus: PrometheusInstrumentation = Depends(Provide[SchemaRegistryContainer.karapace_container.prometheus]), +) -> BaseModel: + return Response(content=await prometheus.serve_metrics(), media_type=prometheus.CONTENT_TYPE_LATEST) diff --git a/src/schema_registry/routers/mode.py b/src/schema_registry/routers/mode.py new file mode 100644 index 000000000..870a876d2 --- /dev/null +++ b/src/schema_registry/routers/mode.py @@ -0,0 +1,47 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from dependency_injector.wiring import inject, Provide +from fastapi import APIRouter, Depends +from karapace.auth import AuthenticatorAndAuthorizer, Operation, User +from karapace.typing import Subject +from schema_registry.container import SchemaRegistryContainer +from schema_registry.routers.errors import unauthorized +from schema_registry.schema_registry_apis import KarapaceSchemaRegistryController +from schema_registry.user import get_current_user +from typing import Annotated + +mode_router = APIRouter( + prefix="/mode", + tags=["mode"], + responses={404: {"description": "Not found"}}, +) + + +@mode_router.get("") +@inject +async def mode_get( + user: Annotated[User, Depends(get_current_user)], + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +): + if authorizer and not authorizer.check_authorization(user, Operation.Read, "Config:"): + raise unauthorized() + + return await controller.get_global_mode() + + +@mode_router.get("/{subject}") +@inject +async def mode_get_subject( + subject: Subject, + user: Annotated[User, Depends(get_current_user)], + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +): + if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): + raise unauthorized() + + return await controller.get_subject_mode(subject=subject) diff --git a/src/schema_registry/routers/requests.py b/src/schema_registry/routers/requests.py new file mode 100644 index 000000000..fb4b51511 --- /dev/null +++ b/src/schema_registry/routers/requests.py @@ -0,0 +1,101 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from karapace.schema_type import SchemaType +from karapace.typing import Subject +from pydantic import BaseModel, Field, validator +from schema_registry.routers.errors import KarapaceValidationError +from typing import Any + + +class SchemaReference(BaseModel): + name: str + subject: Subject + version: int + + +class SchemaRequest(BaseModel): + schema_str: str = Field(alias="schema") + schema_type: SchemaType = Field(alias="schemaType", default=SchemaType.AVRO) + references: list[SchemaReference] | None = None + metadata: Any | None + ruleSet: Any | None + + class Config: + extra = "forbid" + + @validator("schema_str") + def validate_schema(cls, schema_str: str) -> str: + if not schema_str and not schema_str.strip(): + raise KarapaceValidationError( + error_code=42201, + error="Empty schema", + ) + return schema_str + + +class SchemaResponse(BaseModel): + subject: Subject + version: int + schema_id: int = Field(alias="id") + schema_str: str = Field(alias="schema") + schema_type: SchemaType | None = Field(alias="schemaType", default=None) + + +class SchemasResponse(BaseModel): + schema_str: str = Field(alias="schema") + subjects: list[Subject] | None = None + schema_type: SchemaType | None = Field(alias="schemaType", default=None) + references: list[Any] | None = None # TODO: typing + maxId: int | None = None + + +class SchemaListingItem(BaseModel): + subject: Subject + schema_str: str = Field(alias="schema") + version: int + schema_id: int = Field(alias="id") + schema_type: SchemaType | None = Field(alias="schemaType", default=None) + references: list[Any] | None + + +class SchemaIdResponse(BaseModel): + schema_id: int = Field(alias="id") + + +class CompatibilityRequest(BaseModel): + compatibility: str + + +class CompatibilityResponse(BaseModel): + compatibility: str + + +class CompatibilityLevelResponse(BaseModel): + compatibility_level: str = Field(alias="compatibilityLevel") + + +class CompatibilityCheckResponse(BaseModel): + is_compatible: bool + messages: list[str] | None = None + + +class ModeResponse(BaseModel): + mode: str + + +class SubjectVersion(BaseModel): + subject: Subject + version: int + + +class SubjectSchemaVersionResponse(BaseModel): + subject: Subject + version: int + schema_id: int = Field(alias="id") + schema_str: str = Field(alias="schema") + references: list[Any] | None = None + schema_type: SchemaType | None = Field(alias="schemaType", default=None) + compatibility: str | None = None diff --git a/src/schema_registry/routers/root.py b/src/schema_registry/routers/root.py new file mode 100644 index 000000000..6bec6cb9c --- /dev/null +++ b/src/schema_registry/routers/root.py @@ -0,0 +1,16 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from fastapi import APIRouter + +root_router = APIRouter( + tags=["root"], + responses={404: {"description": "Not found"}}, +) + + +@root_router.get("/") +async def root() -> dict: + return {} diff --git a/src/schema_registry/routers/schemas.py b/src/schema_registry/routers/schemas.py new file mode 100644 index 000000000..d7af4cd2b --- /dev/null +++ b/src/schema_registry/routers/schemas.py @@ -0,0 +1,91 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from dependency_injector.wiring import inject, Provide +from fastapi import APIRouter, Depends +from karapace.auth import AuthenticatorAndAuthorizer, User +from schema_registry.container import SchemaRegistryContainer +from schema_registry.routers.requests import SchemaListingItem, SchemasResponse, SubjectVersion +from schema_registry.schema_registry_apis import KarapaceSchemaRegistryController +from schema_registry.user import get_current_user +from typing import Annotated + +schemas_router = APIRouter( + prefix="/schemas", + tags=["schemas"], + responses={404: {"description": "Not found"}}, +) + + +# TODO is this needed? Is this actually the ids/schema/id/schema?? +@schemas_router.get("") +@inject +async def schemas_get_list( + user: Annotated[User, Depends(get_current_user)], + deleted: bool = False, + latestOnly: bool = False, + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> list[SchemaListingItem]: + return await controller.schemas_list( + deleted=deleted, + latest_only=latestOnly, + user=user, + authorizer=authorizer, + ) + + +@schemas_router.get("/ids/{schema_id}", response_model_exclude_none=True) +@inject +async def schemas_get( + user: Annotated[User, Depends(get_current_user)], + schema_id: str, # TODO: type to actual type + includeSubjects: bool = False, # TODO: include subjects? + fetchMaxId: bool = False, # TODO: fetch max id? + format: str = "", + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> SchemasResponse: + return await controller.schemas_get( + schema_id=schema_id, + include_subjects=includeSubjects, + fetch_max_id=fetchMaxId, + format_serialized=format, + user=user, + authorizer=authorizer, + ) + + +# @schemas_router.get("/ids/{schema_id}/schema") +# async def schemas_get_only_id( +# controller: KarapaceSchemaRegistryControllerDep, +# ) -> SchemasResponse: +# # TODO retrieve by id only schema +# return await controller.schemas_get() + + +@schemas_router.get("/ids/{schema_id}/versions") +@inject +async def schemas_get_versions( + user: Annotated[User, Depends(get_current_user)], + schema_id: str, + deleted: bool = False, + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> list[SubjectVersion]: + return await controller.schemas_get_versions( + schema_id=schema_id, + deleted=deleted, + user=user, + authorizer=authorizer, + ) + + +@schemas_router.get("/types") +@inject +async def schemas_get_subjects_list( + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> list[str]: + return await controller.schemas_types() diff --git a/src/schema_registry/routers/setup.py b/src/schema_registry/routers/setup.py new file mode 100644 index 000000000..fe0b6be9b --- /dev/null +++ b/src/schema_registry/routers/setup.py @@ -0,0 +1,25 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from fastapi import FastAPI +from schema_registry.routers.compatibility import compatibility_router +from schema_registry.routers.config import config_router +from schema_registry.routers.health import health_router +from schema_registry.routers.metrics import metrics_router +from schema_registry.routers.mode import mode_router +from schema_registry.routers.root import root_router +from schema_registry.routers.schemas import schemas_router +from schema_registry.routers.subjects import subjects_router + + +def setup_routers(app: FastAPI) -> None: + app.include_router(compatibility_router) + app.include_router(config_router) + app.include_router(health_router) + app.include_router(mode_router) + app.include_router(root_router) + app.include_router(schemas_router) + app.include_router(subjects_router) + app.include_router(metrics_router) diff --git a/src/schema_registry/routers/subjects.py b/src/schema_registry/routers/subjects.py new file mode 100644 index 000000000..766329795 --- /dev/null +++ b/src/schema_registry/routers/subjects.py @@ -0,0 +1,201 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from dependency_injector.wiring import inject, Provide +from fastapi import APIRouter, Depends, Request +from karapace.auth import AuthenticatorAndAuthorizer, Operation, User +from karapace.forward_client import ForwardClient +from karapace.schema_registry import KarapaceSchemaRegistry +from karapace.typing import Subject +from schema_registry.container import SchemaRegistryContainer +from schema_registry.routers.errors import no_primary_url_error, unauthorized +from schema_registry.routers.requests import SchemaIdResponse, SchemaRequest, SchemaResponse, SubjectSchemaVersionResponse +from schema_registry.schema_registry_apis import KarapaceSchemaRegistryController +from schema_registry.user import get_current_user +from typing import Annotated + +import logging + +LOG = logging.getLogger(__name__) + + +subjects_router = APIRouter( + prefix="/subjects", + tags=["subjects"], + responses={404: {"description": "Not found"}}, +) + + +@subjects_router.get("") +@inject +async def subjects_get( + user: Annotated[User, Depends(get_current_user)], + deleted: bool = False, + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> list[str]: + return await controller.subjects_list( + deleted=deleted, + user=user, + authorizer=authorizer, + ) + + +@subjects_router.post("/{subject}", response_model_exclude_none=True) +@inject +async def subjects_subject_post( + subject: Subject, + user: Annotated[User, Depends(get_current_user)], + schema_request: SchemaRequest, + deleted: bool = False, + normalize: bool = False, + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> SchemaResponse: + if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): + raise unauthorized() + + return await controller.subjects_schema_post( + subject=subject, + schema_request=schema_request, + deleted=deleted, + normalize=normalize, + ) + + +@subjects_router.delete("/{subject}") +@inject +async def subjects_subject_delete( + request: Request, + subject: Subject, + user: Annotated[User, Depends(get_current_user)], + permanent: bool = False, + forward_client: ForwardClient = Depends(Provide[SchemaRegistryContainer.karapace_container.forward_client]), + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + schema_registry: KarapaceSchemaRegistry = Depends(Provide[SchemaRegistryContainer.karapace_container.schema_registry]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> list[int]: + if authorizer and not authorizer.check_authorization(user, Operation.Write, f"Subject:{subject}"): + raise unauthorized() + + i_am_primary, primary_url = await schema_registry.get_master() + if i_am_primary: + return await controller.subject_delete(subject=subject, permanent=permanent) + elif not primary_url: + raise no_primary_url_error() + else: + return await forward_client.forward_request_remote(request=request, primary_url=primary_url) + + +@subjects_router.post("/{subject}/versions") +@inject +async def subjects_subject_versions_post( + request: Request, + subject: Subject, + schema_request: SchemaRequest, + user: Annotated[User, Depends(get_current_user)], + forward_client: ForwardClient = Depends(Provide[SchemaRegistryContainer.karapace_container.forward_client]), + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + normalize: bool = False, + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> SchemaIdResponse: + if authorizer and not authorizer.check_authorization(user, Operation.Write, f"Subject:{subject}"): + raise unauthorized() + + # TODO: split the functionality so primary error and forwarding can be handled here + # and local/primary write is in controller. + return await controller.subject_post( + subject=subject, + schema_request=schema_request, + normalize=normalize, + forward_client=forward_client, + request=request, + ) + + +@subjects_router.get("/{subject}/versions") +@inject +async def subjects_subject_versions_list( + subject: Subject, + user: Annotated[User, Depends(get_current_user)], + deleted: bool = False, + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> list[int]: + if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): + raise unauthorized() + + return await controller.subject_versions_list(subject=subject, deleted=deleted) + + +@subjects_router.get("/{subject}/versions/{version}", response_model_exclude_none=True) +@inject +async def subjects_subject_version_get( + subject: Subject, + version: str, + user: Annotated[User, Depends(get_current_user)], + deleted: bool = False, + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> SubjectSchemaVersionResponse: + if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): + raise unauthorized() + + return await controller.subject_version_get(subject=subject, version=version, deleted=deleted) + + +@subjects_router.delete("/{subject}/versions/{version}") +@inject +async def subjects_subject_version_delete( + request: Request, + subject: Subject, + version: str, + user: Annotated[User, Depends(get_current_user)], + permanent: bool = False, + forward_client: ForwardClient = Depends(Provide[SchemaRegistryContainer.karapace_container.forward_client]), + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + schema_registry: KarapaceSchemaRegistry = Depends(Provide[SchemaRegistryContainer.karapace_container.schema_registry]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> int: + if authorizer and not authorizer.check_authorization(user, Operation.Write, f"Subject:{subject}"): + raise unauthorized() + + i_am_primary, primary_url = await schema_registry.get_master() + if i_am_primary: + return await controller.subject_version_delete(subject=subject, version=version, permanent=permanent) + elif not primary_url: + raise no_primary_url_error() + else: + return await forward_client.forward_request_remote(request=request, primary_url=primary_url) + + +@subjects_router.get("/{subject}/versions/{version}/schema") +@inject +async def subjects_subject_version_schema_get( + subject: Subject, + version: str, + user: Annotated[User, Depends(get_current_user)], + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> dict: + if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): + raise unauthorized() + + return await controller.subject_version_schema_get(subject=subject, version=version) + + +@subjects_router.get("/{subject}/versions/{version}/referencedby") +@inject +async def subjects_subject_version_referenced_by( + subject: Subject, + version: str, + user: Annotated[User, Depends(get_current_user)], + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), + controller: KarapaceSchemaRegistryController = Depends(Provide[SchemaRegistryContainer.schema_registry_controller]), +) -> list[int]: + if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): + raise unauthorized() + + return await controller.subject_version_referencedby_get(subject=subject, version=version) diff --git a/src/schema_registry/schema_registry_apis.py b/src/schema_registry/schema_registry_apis.py new file mode 100644 index 000000000..cc9a01bb2 --- /dev/null +++ b/src/schema_registry/schema_registry_apis.py @@ -0,0 +1,950 @@ +""" +Copyright (c) 2023 Aiven Ltd +See LICENSE for details +""" +from __future__ import annotations + +from avro.errors import SchemaParseException +from dependency_injector.wiring import inject, Provide +from fastapi import Depends, HTTPException, Request, Response, status +from karapace.auth import AuthenticatorAndAuthorizer, Operation, User +from karapace.compatibility import CompatibilityModes +from karapace.compatibility.jsonschema.checks import is_incompatible +from karapace.compatibility.schema_compatibility import SchemaCompatibility +from karapace.config import Config +from karapace.container import KarapaceContainer +from karapace.errors import ( + IncompatibleSchema, + InvalidReferences, + InvalidSchema, + InvalidSchemaType, + InvalidVersion, + ReferenceExistsException, + SchemasNotFoundException, + SchemaTooLargeException, + SchemaVersionNotSoftDeletedException, + SchemaVersionSoftDeletedException, + SubjectNotFoundException, + SubjectNotSoftDeletedException, + SubjectSoftDeletedException, + VersionNotFoundException, +) +from karapace.forward_client import ForwardClient +from karapace.protobuf.exception import ProtobufUnresolvedDependencyException +from karapace.schema_models import ParsedTypedSchema, SchemaType, SchemaVersion, TypedSchema, ValidatedTypedSchema, Versioner +from karapace.schema_references import LatestVersionReference, Reference +from karapace.schema_registry import KarapaceSchemaRegistry +from karapace.statsd import StatsClient +from karapace.typing import JsonData, JsonObject, SchemaId, Subject, Version +from karapace.utils import JSONDecodeError +from schema_registry.routers.errors import no_primary_url_error, SchemaErrorCodes, SchemaErrorMessages +from schema_registry.routers.requests import ( + CompatibilityCheckResponse, + CompatibilityLevelResponse, + CompatibilityRequest, + CompatibilityResponse, + ModeResponse, + SchemaIdResponse, + SchemaListingItem, + SchemaRequest, + SchemaResponse, + SchemasResponse, + SubjectSchemaVersionResponse, + SubjectVersion, +) +from typing import Any, cast + +import json +import logging +import time + +LOG = logging.getLogger(__name__) + + +class KarapaceSchemaRegistryController: + def __init__(self, config: Config, schema_registry: KarapaceSchemaRegistry, stats: StatsClient) -> None: + # super().__init__(config=config, not_ready_handler=self._forward_if_not_ready_to_serve) + + print("+++++++++========") + print(schema_registry) + + self.config = config + self._process_start_time = time.monotonic() + self.stats = stats + self.schema_registry = schema_registry + + def _add_schema_registry_routes(self) -> None: + pass + + def _subject_get(self, subject: Subject, include_deleted: bool = False) -> dict[Version, SchemaVersion]: + try: + schema_versions = self.schema_registry.subject_get(subject, include_deleted) + except SubjectNotFoundException: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, + "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), + }, + ) + except SchemasNotFoundException: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, + "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), + }, + ) + return schema_versions + + def _invalid_version(self, version: str | int) -> HTTPException: + """Shall be called when InvalidVersion is raised""" + return HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail={ + "error_code": SchemaErrorCodes.INVALID_VERSION_ID.value, + "message": ( + f"The specified version '{version}' is not a valid version id. " + 'Allowed values are between [1, 2^31-1] and the string "latest"' + ), + }, + ) + + async def compatibility_check( + self, + *, + subject: Subject, + schema_request: SchemaRequest, + version: str, + ) -> CompatibilityCheckResponse: + """Check for schema compatibility""" + try: + compatibility_mode = self.schema_registry.get_compatibility_mode(subject=subject) + except ValueError as ex: + # Using INTERNAL_SERVER_ERROR because the subject and configuration + # should have been validated before. + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail={ + "error_code": SchemaErrorCodes.HTTP_INTERNAL_SERVER_ERROR.value, + "message": str(ex), + }, + ) + + new_schema = self.get_new_schema(schema_request=schema_request) + old_schema = self.get_old_schema(subject, Versioner.V(version)) # , content_type) + if compatibility_mode.is_transitive(): + # Ignore the schema version provided in the rest api call (`version`) + # Instead check against all previous versions (including `version` if existing) + result = self.schema_registry.check_schema_compatibility(new_schema, subject) + else: + # Check against the schema version provided in the rest api call (`version`) + result = SchemaCompatibility.check_compatibility(old_schema, new_schema, compatibility_mode) + + if is_incompatible(result): + return CompatibilityCheckResponse(is_compatible=False, messages=list(result.messages)) + return CompatibilityCheckResponse(is_compatible=True) + + @inject + async def schemas_list( + self, + *, + deleted: bool, + latest_only: bool, + user: User | None, + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[KarapaceContainer.authorizer]), + ) -> list[SchemaListingItem]: + schemas = await self.schema_registry.schemas_list(include_deleted=deleted, latest_only=latest_only) + response_schemas: list[SchemaListingItem] = [] + for subject, schema_versions in schemas.items(): + if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): + continue + for schema_version in schema_versions: + references: list[Any] | None = None + if schema_version.references: + references = [r.to_dict() for r in schema_version.references] + response_schemas.append( + SchemaListingItem( + subject=schema_version.subject, + schema=schema_version.schema.schema_str, + version=schema_version.version.value, + id=schema_version.schema_id, + schemaType=schema_version.schema.schema_type, + references=references, + ) + ) + + return response_schemas + + @inject + async def schemas_get( + self, + *, + schema_id: str, + fetch_max_id: bool, + include_subjects: bool, + format_serialized: str, + user: User | None, + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[KarapaceContainer.authorizer]), + ) -> SchemasResponse: + try: + parsed_schema_id = SchemaId(int(schema_id)) + except ValueError: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.HTTP_NOT_FOUND.value, + "message": "HTTP 404 Not Found", + }, + ) + + def _has_subject_with_id() -> bool: + # Fast path + if authorizer is None or authorizer.check_authorization(user, Operation.Read, "Subject:*"): + return True + + subjects = self.schema_registry.database.subjects_for_schema(schema_id=parsed_schema_id) + resources = [f"Subject:{subject}" for subject in subjects] + return authorizer.check_authorization_any(user=user, operation=Operation.Read, resources=resources) + + if authorizer: + has_subject = _has_subject_with_id() + if not has_subject: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SCHEMA_NOT_FOUND.value, + "message": "Schema not found", + }, + ) + + schema = self.schema_registry.schemas_get(parsed_schema_id, fetch_max_id=fetch_max_id) + print("+++++++++========") + print(schema) + if not schema: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SCHEMA_NOT_FOUND.value, + "message": "Schema not found", + }, + ) + + schema_str = schema.schema_str + if format_serialized and schema.schema_type == SchemaType.PROTOBUF: + parsed_schema = ParsedTypedSchema.parse(schema_type=schema.schema_type, schema_str=schema_str) + schema_str = parsed_schema.serialize() + + subjects: list[Subject] | None = None + schema_type: SchemaType | None = None + references: list[Any] | None = None # TODO: typing + maxId: int | None = None + + if include_subjects: + subjects = self.schema_registry.database.subjects_for_schema(parsed_schema_id) + if schema.schema_type is not SchemaType.AVRO: + schema_type = schema.schema_type + if schema.references: + references = [r.to_dict() for r in schema.references] + if fetch_max_id: + maxId = schema.max_id + + return SchemasResponse( + schema=schema_str, + subjects=subjects, + schemaType=schema_type, + references=references, + maxId=maxId, + ) + + @inject + async def schemas_get_versions( + self, + *, + schema_id: str, + deleted: bool, + user: User | None, + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[KarapaceContainer.authorizer]), + ) -> list[SubjectVersion]: + try: + schema_id_int = SchemaId(int(schema_id)) + except ValueError: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.HTTP_NOT_FOUND.value, + "message": "HTTP 404 Not Found", + }, + ) + + subject_versions = [] + for subject_version in self.schema_registry.get_subject_versions_for_schema(schema_id_int, include_deleted=deleted): + subject = subject_version["subject"] + if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): + continue + subject_versions.append( + # TODO correct typing + SubjectVersion( + subject=subject_version["subject"], + version=subject_version["version"].value, + ), + ) + return subject_versions + + async def schemas_types(self) -> list[str]: + return ["JSON", "AVRO", "PROTOBUF"] + + async def config_get(self) -> CompatibilityLevelResponse: + # Note: The format sent by the user differs from the return value, this + # is for compatibility reasons. + return CompatibilityLevelResponse(compatibilityLevel=self.schema_registry.schema_reader.config.compatibility) + + async def config_set( + self, + *, + compatibility_level_request: CompatibilityRequest, + ) -> CompatibilityResponse: + try: + compatibility_level = CompatibilityModes(compatibility_level_request.compatibility) + except (ValueError, KeyError): + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail={ + "error_code": SchemaErrorCodes.INVALID_COMPATIBILITY_LEVEL.value, + "message": SchemaErrorMessages.INVALID_COMPATIBILITY_LEVEL.value, + }, + ) + + self.schema_registry.send_config_message(compatibility_level=compatibility_level, subject=None) + return CompatibilityResponse(compatibility=self.schema_registry.schema_reader.config.compatibility) + + async def config_subject_get( + self, + *, + subject: str, + default_to_global: bool, + ) -> CompatibilityLevelResponse: + # Config for a subject can exist without schemas so no need to check for their existence + assert self.schema_registry.schema_reader, "KarapaceSchemaRegistry not initialized. Missing call to _init" + if self.schema_registry.database.find_subject(subject=Subject(subject)) is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, + "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), + }, + ) + + compatibility = self.schema_registry.database.get_subject_compatibility(subject=Subject(subject)) + if not compatibility and default_to_global: + compatibility = self.schema_registry.compatibility + if compatibility: + # Note: The format sent by the user differs from the return + # value, this is for compatibility reasons. + return CompatibilityLevelResponse(compatibilityLevel=compatibility) + + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SUBJECT_LEVEL_COMPATIBILITY_NOT_CONFIGURED_ERROR_CODE.value, + "message": SchemaErrorMessages.SUBJECT_LEVEL_COMPATIBILITY_NOT_CONFIGURED_FMT.value.format(subject=subject), + }, + ) + + async def config_subject_set( + self, + *, + subject: str, + compatibility_level_request: CompatibilityRequest, + ) -> CompatibilityResponse: + try: + compatibility_level = CompatibilityModes(compatibility_level_request.compatibility) + except (ValueError, KeyError): + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail={ + "error_code": SchemaErrorCodes.INVALID_COMPATIBILITY_LEVEL.value, + "message": "Invalid compatibility level", + }, + ) + + self.schema_registry.send_config_message(compatibility_level=compatibility_level, subject=Subject(subject)) + return CompatibilityResponse(compatibility=compatibility_level.value) + + async def config_subject_delete( + self, + *, + subject: str, + ) -> CompatibilityResponse: + self.schema_registry.send_config_subject_delete_message(subject=Subject(subject)) + return CompatibilityResponse(compatibility=self.schema_registry.schema_reader.config.compatibility) + + @inject + async def subjects_list( + self, + deleted: bool, + user: User | None, + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[KarapaceContainer.authorizer]), + ) -> list[str]: + subjects = [str(subject) for subject in self.schema_registry.database.find_subjects(include_deleted=deleted)] + if authorizer: + subjects = list( + filter( + lambda subject: authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"), + subjects, + ) + ) + return subjects + + async def subject_delete( + self, + *, + subject: str, + permanent: bool, + ) -> list[int]: + try: + version_list = await self.schema_registry.subject_delete_local(subject=Subject(subject), permanent=permanent) + return [version.value for version in version_list] + except (SubjectNotFoundException, SchemasNotFoundException): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, + "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), + }, + ) + except SubjectNotSoftDeletedException: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SUBJECT_NOT_SOFT_DELETED.value, + "message": f"Subject '{subject}' was not deleted first before being permanently deleted", + }, + ) + except SubjectSoftDeletedException: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SUBJECT_SOFT_DELETED.value, + "message": f"Subject '{subject}' was soft deleted.Set permanent=true to delete permanently", + }, + ) + + except ReferenceExistsException as arg: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail={ + "error_code": SchemaErrorCodes.REFERENCE_EXISTS.value, + "message": ( + f"One or more references exist to the schema " + f"{{magic=1,keytype=SCHEMA,subject={subject},version={arg.version}}}." + ), + }, + ) + + async def subject_version_get( + self, + subject: str, + version: str, + deleted: bool, + ) -> SubjectSchemaVersionResponse: + try: + subject_data = self.schema_registry.subject_version_get( + Subject(subject), Versioner.V(version), include_deleted=deleted + ) + return SubjectSchemaVersionResponse( + subject=subject_data["subject"], + version=subject_data["version"], + id=subject_data["id"], + schema=subject_data["schema"], + references=subject_data.get("references", None), + schemaType=subject_data.get("schemaType", None), + compatibility=None, # Do not return compatibility from this endpoint. + ) + except (SubjectNotFoundException, SchemasNotFoundException): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, + "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), + }, + ) + except VersionNotFoundException: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.VERSION_NOT_FOUND.value, + "message": f"Version {version} not found.", + }, + ) + except InvalidVersion: + raise self._invalid_version(version) + + async def subject_version_delete( + self, + *, + subject: str, + version: str, + permanent: bool, + ) -> int: + try: + resolved_version = await self.schema_registry.subject_version_delete_local( + Subject(subject), Versioner.V(version), permanent + ) + return resolved_version.value + except (SubjectNotFoundException, SchemasNotFoundException): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, + "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), + }, + ) + except VersionNotFoundException: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.VERSION_NOT_FOUND.value, + "message": f"Version {version} not found.", + }, + ) + except SchemaVersionSoftDeletedException: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SCHEMAVERSION_SOFT_DELETED.value, + "message": ( + f"Subject '{subject}' Version {version} was soft deleted. " + "Set permanent=true to delete permanently" + ), + }, + ) + except SchemaVersionNotSoftDeletedException: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SCHEMAVERSION_NOT_SOFT_DELETED.value, + "message": ( + f"Subject '{subject}' Version {version} was not deleted " "first before being permanently deleted" + ), + }, + ) + except ReferenceExistsException as arg: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail={ + "error_code": SchemaErrorCodes.REFERENCE_EXISTS.value, + "message": ( + f"One or more references exist to the schema " + f"{{magic=1,keytype=SCHEMA,subject={subject},version={arg.version}}}." + ), + }, + ) + except InvalidVersion: + self._invalid_version(version) + + async def subject_version_schema_get( + self, + *, + subject: str, + version: str, + ) -> dict: + try: + subject_data = self.schema_registry.subject_version_get(Subject(subject), Versioner.V(version)) + return json.loads(cast(str, subject_data["schema"])) # TODO typing + except InvalidVersion: + raise self._invalid_version(version) + except VersionNotFoundException: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.VERSION_NOT_FOUND.value, + "message": f"Version {version} not found.", + }, + ) + except (SchemasNotFoundException, SubjectNotFoundException): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, + "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), + }, + ) + + async def subject_version_referencedby_get( + self, + *, + subject: str, + version, + ) -> list[int]: + referenced_by: list[int] = [] + try: + referenced_by = await self.schema_registry.subject_version_referencedby_get( + Subject(subject), Versioner.V(version) + ) + except (SubjectNotFoundException, SchemasNotFoundException): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, + "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), + }, + ) + except VersionNotFoundException: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.VERSION_NOT_FOUND.value, + "message": f"Version {version} not found.", + }, + ) + except InvalidVersion: + raise self._invalid_version(version) + + return referenced_by + + async def subject_versions_list( + self, + *, + subject: str, + deleted: bool, + ) -> list[int]: + try: + schema_versions = self.schema_registry.subject_get(Subject(subject), include_deleted=deleted) + version_list = [version.value for version in schema_versions] + return version_list + except (SubjectNotFoundException, SchemasNotFoundException): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, + "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), + }, + ) + + def _validate_schema_type(self, data: JsonData) -> SchemaType: + # TODO: simplify the calling code, this functionality should not be required + # for old schemas. + if not isinstance(data, dict): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "error_code": SchemaErrorCodes.HTTP_BAD_REQUEST.value, + "message": "Malformed request", + }, + ) + schema_type_unparsed = data.get("schemaType", SchemaType.AVRO.value) + try: + schema_type = SchemaType(schema_type_unparsed) + except ValueError: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail={ + "error_code": SchemaErrorCodes.HTTP_UNPROCESSABLE_ENTITY.value, + "message": f"Invalid schemaType {schema_type_unparsed}", + }, + ) + return schema_type + + def _validate_references( + self, + schema_request: SchemaRequest, + ) -> list[Reference | LatestVersionReference] | None: + references = schema_request.references + # Allow passing `null` as value for compatibility + if references is None: + return None + if references and schema_request.schema_type != SchemaType.PROTOBUF: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail={ + "error_code": SchemaErrorCodes.REFERENCES_SUPPORT_NOT_IMPLEMENTED.value, + "message": SchemaErrorMessages.REFERENCES_SUPPORT_NOT_IMPLEMENTED.value.format( + schema_type=schema_request.schema_type.value + ), + }, + ) + + validated_references = [] + for reference in references: + version = Versioner.V(reference.version) + if version.is_latest: + validated_references.append( + LatestVersionReference( + name=reference.name, + subject=Subject(reference.subject), + ) + ) + else: + validated_references.append( + Reference( + name=reference.name, + subject=Subject(reference.subject), + version=version, + ) + ) + if validated_references: + return validated_references + return None + + async def subjects_schema_post( + self, + *, + subject: Subject, + schema_request: SchemaRequest, + deleted: bool, + normalize: bool, + ) -> SchemaResponse: + try: + subject_data = self._subject_get(subject, include_deleted=deleted) + except (SchemasNotFoundException, SubjectNotFoundException): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, + "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), + }, + ) + references = None + new_schema_dependencies = None + references = self._validate_references(schema_request) + references, new_schema_dependencies = self.schema_registry.resolve_references(references) + + new_schema: ParsedTypedSchema | None = None + try: + # When checking if schema is already registered, allow unvalidated schema in as + # there might be stored schemas that are non-compliant from the past. + new_schema = ParsedTypedSchema.parse( + schema_type=schema_request.schema_type, + schema_str=schema_request.schema_str, + references=references, + dependencies=new_schema_dependencies, + normalize=normalize, + use_protobuf_formatter=self.config.use_protobuf_formatter, + ) + except InvalidSchema: + LOG.warning("Invalid schema: %r", schema_request.schema_str) + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail={ + "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, + "message": f"Error while looking up schema under subject {subject}", + }, + ) + except InvalidReferences: + human_error = "Provided references is not valid" + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail={ + "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, + "message": f"Invalid {schema_request.schema_type} references. Error: {human_error}", + }, + ) + + # Match schemas based on version from latest to oldest + for schema_version in sorted(subject_data.values(), key=lambda item: item.version, reverse=True): + other_references, other_dependencies = self.schema_registry.resolve_references(schema_version.references) + try: + parsed_typed_schema = ParsedTypedSchema.parse( + schema_version.schema.schema_type, + schema_version.schema.schema_str, + references=other_references, + dependencies=other_dependencies, + normalize=normalize, + ) + except InvalidSchema as e: + failed_schema_id = schema_version.schema_id + LOG.exception("Existing schema failed to parse. Id: %s", failed_schema_id) + self.stats.unexpected_exception( + ex=e, where="Matching existing schemas to posted. Failed schema id: {failed_schema_id}" + ) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail={ + "error_code": SchemaErrorCodes.HTTP_INTERNAL_SERVER_ERROR.value, + "message": f"Error while looking up schema under subject {subject}", + }, + ) + + if schema_request.schema_type is SchemaType.JSONSCHEMA: + schema_valid = parsed_typed_schema.to_dict() == new_schema.to_dict() + else: + schema_valid = new_schema.match(parsed_typed_schema) + if parsed_typed_schema.schema_type == new_schema.schema_type and schema_valid: + schema_type: SchemaType | None = None + if schema_request.schema_type is not SchemaType.AVRO: + schema_type = schema_request.schema_type + return SchemaResponse( + subject=subject, + version=schema_version.version.value, + id=schema_version.schema_id, + schema=parsed_typed_schema.schema_str, + schemaType=schema_type, + ) + else: + LOG.debug("Schema %r did not match %r", schema_version, parsed_typed_schema) + + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SCHEMA_NOT_FOUND.value, + "message": "Schema not found", + }, + ) + + async def subject_post( + self, + *, + subject: str, + schema_request: SchemaRequest, + normalize: bool, + forward_client: ForwardClient, + request: Request, + ) -> SchemaIdResponse | Response: + LOG.debug("POST with subject: %r, request: %r", subject, schema_request) + + references = self._validate_references(schema_request=schema_request) + + try: + references, resolved_dependencies = self.schema_registry.resolve_references(references) + new_schema = ValidatedTypedSchema.parse( + schema_type=schema_request.schema_type, + schema_str=schema_request.schema_str, + references=references, + dependencies=resolved_dependencies, + normalize=normalize, + use_protobuf_formatter=self.config.use_protobuf_formatter, + ) + except (InvalidReferences, InvalidSchema, InvalidSchemaType) as e: + LOG.warning("Invalid schema: %r", schema_request.schema_str, exc_info=True) + if isinstance(e.__cause__, (SchemaParseException, JSONDecodeError, ProtobufUnresolvedDependencyException)): + human_error = f"{e.__cause__.args[0]}" # pylint: disable=no-member + else: + from_body_schema_str = schema_request.schema_str + human_error = ( + f"Invalid schema {from_body_schema_str} with refs {references} of type {schema_request.schema_type}" + ) + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail={ + "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, + "message": f"Invalid {schema_request.schema_type.value} schema. Error: {human_error}", + }, + ) + + schema_id = self.get_schema_id_if_exists(subject=Subject(subject), schema=new_schema, include_deleted=False) + if schema_id is not None: + return SchemaIdResponse(id=schema_id) + + i_am_primary, primary_url = await self.schema_registry.get_master() + if i_am_primary: + try: + schema_id = await self.schema_registry.write_new_schema_local(Subject(subject), new_schema, references) + return SchemaIdResponse(id=schema_id) + except InvalidSchema as ex: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail={ + "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, + "message": f"Invalid {schema_request.schema_type.value} schema. Error: {str(ex)}", + }, + ) + except IncompatibleSchema as ex: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail={ + "error_code": SchemaErrorCodes.HTTP_CONFLICT.value, + "message": str(ex), + }, + ) + except SchemaTooLargeException: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail={ + "error_code": SchemaErrorCodes.SCHEMA_TOO_LARGE_ERROR_CODE.value, + "message": "Schema is too large", + }, + ) + except Exception as xx: + raise xx + + elif not primary_url: + raise no_primary_url_error() + else: + return await forward_client.forward_request_remote(request=request, primary_url=primary_url) + + async def get_global_mode(self) -> ModeResponse: + return ModeResponse(mode=str(self.schema_registry.get_global_mode())) + + async def get_subject_mode( + self, + *, + subject: str, + ) -> ModeResponse: + if self.schema_registry.database.find_subject(subject=Subject(subject)) is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, + "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), + }, + ) + return ModeResponse(mode=str(self.schema_registry.get_global_mode())) + + def get_schema_id_if_exists(self, *, subject: Subject, schema: TypedSchema, include_deleted: bool) -> SchemaId | None: + schema_id = self.schema_registry.database.get_schema_id_if_exists( + subject=subject, schema=schema, include_deleted=include_deleted + ) + return schema_id + + def get_new_schema(self, schema_request: SchemaRequest) -> ValidatedTypedSchema: + references = self._validate_references(schema_request=schema_request) + try: + references, new_schema_dependencies = self.schema_registry.resolve_references(references) + return ValidatedTypedSchema.parse( + schema_type=schema_request.schema_type, + schema_str=schema_request.schema_str, + references=references, + dependencies=new_schema_dependencies, + use_protobuf_formatter=self.config.use_protobuf_formatter, + ) + except InvalidSchema: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail={ + "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, + "message": f"Invalid {schema_request.schema_type} schema", + }, + ) + + def get_old_schema(self, subject: Subject, version: Version) -> ParsedTypedSchema: + old: JsonObject | None = None + try: + old = self.schema_registry.subject_version_get(subject=subject, version=version) + except InvalidVersion: + self._invalid_version(version.value) + except (VersionNotFoundException, SchemasNotFoundException, SubjectNotFoundException): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "error_code": SchemaErrorCodes.VERSION_NOT_FOUND.value, + "message": f"Version {version} not found.", + }, + ) + assert old is not None + old_schema_type = self._validate_schema_type(data=old) + try: + old_references = old.get("references", None) + old_dependencies = None + if old_references: + old_references, old_dependencies = self.schema_registry.resolve_references(old_references) + old_schema = ParsedTypedSchema.parse(old_schema_type, old["schema"], old_references, old_dependencies) + return old_schema + except InvalidSchema: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail={ + "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, + "message": f"Found an invalid {old_schema_type} schema registered", + }, + ) diff --git a/src/schema_registry/user.py b/src/schema_registry/user.py new file mode 100644 index 000000000..16cd55705 --- /dev/null +++ b/src/schema_registry/user.py @@ -0,0 +1,41 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from dependency_injector.wiring import inject, Provide +from fastapi import Depends, HTTPException, status +from fastapi.security import HTTPBasic, HTTPBasicCredentials +from karapace.auth import AuthenticationError, AuthenticatorAndAuthorizer, User +from schema_registry.container import SchemaRegistryContainer +from typing import Annotated + + +@inject +async def get_current_user( + credentials: Annotated[HTTPBasicCredentials, Depends(HTTPBasic())], + authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), +) -> User: + import logging + + logging.info("get_current_user ++++++++++++=============") + logging.info(f"credentials: {credentials}") + logging.info(f"authorizer: {authorizer}") + if authorizer and not credentials: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail={"message": "Unauthorized"}, + headers={"WWW-Authenticate": 'Basic realm="Karapace Schema Registry"'}, + ) + assert authorizer is not None + assert credentials is not None + username: str = credentials.username + password: str = credentials.password + try: + return authorizer.authenticate(username=username, password=password) + except AuthenticationError as exc: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail={"message": "Unauthorized"}, + headers={"WWW-Authenticate": 'Basic realm="Karapace Schema Registry"'}, + ) from exc From f929af024b2fb2fbf3d74308f8db661ef2f051b5 Mon Sep 17 00:00:00 2001 From: Emmanuel Evbuomwan Date: Wed, 27 Nov 2024 11:40:28 +0100 Subject: [PATCH 06/11] chore: drop manual dependencies in-favour of DI --- src/karapace/{auth => }/auth.py | 21 ++++++--- src/karapace/auth/__init__.py | 0 src/karapace/auth/dependencies.py | 77 ------------------------------- src/karapace/base_config.yaml | 3 ++ 4 files changed, 17 insertions(+), 84 deletions(-) rename src/karapace/{auth => }/auth.py (95%) delete mode 100644 src/karapace/auth/__init__.py delete mode 100644 src/karapace/auth/dependencies.py create mode 100644 src/karapace/base_config.yaml diff --git a/src/karapace/auth/auth.py b/src/karapace/auth.py similarity index 95% rename from src/karapace/auth/auth.py rename to src/karapace/auth.py index 5546a43a8..cfc566cdf 100644 --- a/src/karapace/auth/auth.py +++ b/src/karapace/auth.py @@ -8,11 +8,11 @@ from dataclasses import dataclass, field from enum import Enum, unique from hmac import compare_digest -from karapace.config import InvalidConfiguration +from karapace.config import Config, InvalidConfiguration from karapace.statsd import StatsClient from karapace.utils import json_decode, json_encode -from typing import override, Protocol -from typing_extensions import TypedDict +from typing import Protocol +from typing_extensions import override, TypedDict from watchfiles import awatch, Change import argparse @@ -205,14 +205,12 @@ def check_authorization_any(self, user: User | None, operation: Operation, resou class HTTPAuthorizer(ACLAuthorizer, AuthenticatorAndAuthorizer): - def __init__(self, filename: str) -> None: + def __init__(self, config: Config) -> None: super().__init__() - self._auth_filename: str = filename + self._auth_filename: str = config.registry_authfile self._auth_mtime: float = -1 self._refresh_auth_task: asyncio.Task | None = None self._refresh_auth_awatch_stop_event = asyncio.Event() - # Once first, can raise if file not valid - self._load_authfile() @property def authfile_last_modified(self) -> float: @@ -221,6 +219,7 @@ def authfile_last_modified(self) -> float: @override async def start(self, stats: StatsClient) -> None: """Start authfile refresher task""" + self._load_authfile() async def _refresh_authfile() -> None: """Reload authfile, but keep old auth data if loading fails""" @@ -294,6 +293,14 @@ def authenticate(self, *, username: str, password: str) -> User: return user +def get_authorizer( + config: Config, + http_authorizer: HTTPAuthorizer, + no_auth_authorizer: NoAuthAndAuthz, +) -> AuthenticatorAndAuthorizer: + return http_authorizer if config.registry_authfile else no_auth_authorizer + + def main() -> int: parser = argparse.ArgumentParser(prog="karapace_mkpasswd", description="Karapace password hasher") parser.add_argument("-u", "--user", help="Username", type=str) diff --git a/src/karapace/auth/__init__.py b/src/karapace/auth/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/karapace/auth/dependencies.py b/src/karapace/auth/dependencies.py deleted file mode 100644 index a3001fc8f..000000000 --- a/src/karapace/auth/dependencies.py +++ /dev/null @@ -1,77 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - -from fastapi import Depends, HTTPException, Security, status -from fastapi.security import HTTPBasic, HTTPBasicCredentials -from fastapi.security.base import SecurityBase -from karapace.auth.auth import AuthenticationError, AuthenticatorAndAuthorizer, HTTPAuthorizer, NoAuthAndAuthz, User -from karapace.dependencies.config_dependency import ConfigDependencyManager -from typing import Annotated, Optional - -import logging - -LOG = logging.getLogger(__name__) - - -class AuthorizationDependencyManager: - AUTHORIZER: AuthenticatorAndAuthorizer | None = None - AUTH_SET: bool = False - SECURITY: SecurityBase | None = None - - @classmethod - def get_authorizer(cls) -> AuthenticatorAndAuthorizer: - if AuthorizationDependencyManager.AUTH_SET: - assert AuthorizationDependencyManager.AUTHORIZER - return AuthorizationDependencyManager.AUTHORIZER - - config = ConfigDependencyManager.get_config() - if config.registry_authfile: - AuthorizationDependencyManager.AUTHORIZER = HTTPAuthorizer(config.registry_authfile) - else: - # TODO: remove the need for empty authorization logic. - AuthorizationDependencyManager.AUTHORIZER = NoAuthAndAuthz() - AuthorizationDependencyManager.AUTH_SET = True - return AuthorizationDependencyManager.AUTHORIZER - - -AuthenticatorAndAuthorizerDep = Annotated[AuthenticatorAndAuthorizer, Depends(AuthorizationDependencyManager.get_authorizer)] - -# TODO Karapace can have authentication/authorization enabled or disabled. This code needs cleanup and better -# injection mechanism, this is fast workaround for optional user authentication and authorization. -SECURITY: SecurityBase | None = None -config = ConfigDependencyManager.get_config() -if config.registry_authfile: - SECURITY = HTTPBasic(auto_error=False) - - def get_current_user( - credentials: Annotated[Optional[HTTPBasicCredentials], Security(SECURITY)], - authorizer: AuthenticatorAndAuthorizerDep, - ) -> User: - if authorizer and not credentials: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail={"message": "Unauthorized"}, - headers={"WWW-Authenticate": 'Basic realm="Karapace Schema Registry"'}, - ) - assert authorizer is not None - assert credentials is not None - username: str = credentials.username - password: str = credentials.password - try: - return authorizer.authenticate(username=username, password=password) - except AuthenticationError: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail={"message": "Unauthorized"}, - headers={"WWW-Authenticate": 'Basic realm="Karapace Schema Registry"'}, - ) - -else: - - def get_current_user() -> None: - return None - - -CurrentUserDep = Annotated[Optional[User], Depends(get_current_user)] diff --git a/src/karapace/base_config.yaml b/src/karapace/base_config.yaml new file mode 100644 index 000000000..5aa4d42d2 --- /dev/null +++ b/src/karapace/base_config.yaml @@ -0,0 +1,3 @@ +karapace: + env_file: ${KARAPACE_DOTENV} + env_file_encoding: utf-8 From eb3f44eab53698d816e7d09308dd522431de2a3b Mon Sep 17 00:00:00 2001 From: Emmanuel Evbuomwan Date: Wed, 27 Nov 2024 11:41:07 +0100 Subject: [PATCH 07/11] chore: fix config and set defaults --- src/karapace/config.py | 130 ++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 66 deletions(-) diff --git a/src/karapace/config.py b/src/karapace/config.py index 146215ba7..d445b56a8 100644 --- a/src/karapace/config.py +++ b/src/karapace/config.py @@ -7,25 +7,34 @@ from __future__ import annotations from collections.abc import Mapping +from copy import deepcopy from karapace.constants import DEFAULT_AIOHTTP_CLIENT_MAX_SIZE, DEFAULT_PRODUCER_MAX_REQUEST, DEFAULT_SCHEMA_TOPIC from karapace.typing import ElectionStrategy, NameStrategy from karapace.utils import json_encode from pathlib import Path -from pydantic import BaseSettings +from pydantic import BaseModel, BaseSettings, PyObject +from typing import Final import logging import os import socket import ssl +KARAPACE_ROOT: Final[Path] = Path(__file__).parent +KARAPACE_BASE_CONFIG_YAML_PATH: Final[Path] = KARAPACE_ROOT / "base_config.yaml" + HOSTNAME = socket.gethostname() HOSTNAME = socket.gethostname() +class KarapaceTags(BaseModel): + app: str = "Karapace" + + class Config(BaseSettings): access_logs_debug: bool = False - access_log_class: type | None = None + access_log_class: PyObject = "aiohttp.web_log.AccessLogger" advertised_hostname: str | None = None advertised_port: int | None = None advertised_protocol: str = "http" @@ -60,6 +69,7 @@ class Config(BaseSettings): master_eligibility: bool = True replication_factor: int = 1 security_protocol: str = "PLAINTEXT" + ssl_ciphers: str | None = None ssl_cafile: str | None = None ssl_certfile: str | None = None ssl_keyfile: str | None = None @@ -93,7 +103,7 @@ class Config(BaseSettings): waiting_time_before_acting_as_master_ms: int = 5000 sentry: Mapping[str, object] | None = None - tags: Mapping[str, object] | None = None + tags: KarapaceTags = KarapaceTags() # add rest uri if not set # f"{new_config['advertised_protocol']}://{new_config['advertised_hostname']}:{new_config['advertised_port']}" @@ -120,6 +130,40 @@ def to_env_str(self) -> str: env_lines.append(f"{key.upper()}={value}") return "\n".join(env_lines) + def set_config_defaults(self, new_config: Mapping[str, str]) -> Config: + config = deepcopy(self) + for key, value in new_config.items(): + setattr(config, key, value) + + # Fallback to default port if `advertised_port` is not set + if config.advertised_port is None: + config.advertised_port = new_config["port"] + + # Fallback to `advertised_*` constructed URI if not set + if config.rest_base_uri is None: + config.rest_base_uri = f"{config.advertised_protocol}://{config.advertised_hostname}:{config.advertised_port}" + + # Set the aiohttp client max size if REST Proxy is enabled and producer max request configuration is altered + # from default and aiohttp client max size is not set + # Use the http request max size from the configuration without altering if set. + if ( + config.karapace_rest + and config.producer_max_request_size > DEFAULT_PRODUCER_MAX_REQUEST + and config.http_request_max_size is None + ): + # REST Proxy API configuration for producer max request size must be taken into account + # also for the aiohttp.web.Application client max size. + # Always add the aiohttp default client max size as the headroom above the producer max request size. + # The input JSON size for REST Proxy is not easy to estimate, lot of small records in single request has + # a lot of overhead due to JSON structure. + config.http_request_max_size = config.producer_max_request_size + DEFAULT_AIOHTTP_CLIENT_MAX_SIZE + elif config.http_request_max_size is None: + # Set the default aiohttp client max size + config.http_request_max_size = DEFAULT_AIOHTTP_CLIENT_MAX_SIZE + + validate_config(config) + return config + # class ConfigDefaults(Config, total=False): # ... @@ -148,45 +192,6 @@ def parse_env_value(value: str) -> str | int | bool: return value -def set_config_defaults(config: Config) -> Config: - # Fallback to default port if `advertised_port` is not set - if config["advertised_port"] is None: - config["advertised_port"] = new_config["port"] - - # Fallback to `advertised_*` constructed URI if not set - if new_config["rest_base_uri"] is None: - new_config[ - "rest_base_uri" - ] = f"{new_config['advertised_protocol']}://{new_config['advertised_hostname']}:{new_config['advertised_port']}" - - # Tag app should always be karapace - new_config.setdefault("tags", {}) - new_config["tags"]["app"] = "Karapace" - - # Set the aiohttp client max size if REST Proxy is enabled and producer max request configuration is altered from default - # and aiohttp client max size is not set - # Use the http request max size from the configuration without altering if set. - if ( - new_config["karapace_rest"] - and new_config["producer_max_request_size"] > DEFAULT_PRODUCER_MAX_REQUEST - and new_config["http_request_max_size"] is None - ): - # REST Proxy API configuration for producer max request size must be taken into account - # also for the aiohttp.web.Application client max size. - # Always add the aiohttp default client max size as the headroom above the producer max request size. - # The input JSON size for REST Proxy is not easy to estimate, lot of small records in single request has - # a lot of overhead due to JSON structure. - new_config["http_request_max_size"] = new_config["producer_max_request_size"] + DEFAULT_AIOHTTP_CLIENT_MAX_SIZE - elif new_config["http_request_max_size"] is None: - # Set the default aiohttp client max size - new_config["http_request_max_size"] = DEFAULT_AIOHTTP_CLIENT_MAX_SIZE - - # set_settings_from_environment(new_config) - set_sentry_dsn_from_environment(new_config) - validate_config(new_config) - return new_config - - # def set_settings_from_environment(config: Config) -> None: # """The environment variables have precedence and overwrite the configuration settings.""" # for config_name in DEFAULTS: @@ -221,7 +226,7 @@ def set_sentry_dsn_from_environment(config: Config) -> None: def validate_config(config: Config) -> None: - master_election_strategy = config["master_election_strategy"] + master_election_strategy = config.master_election_strategy try: ElectionStrategy(master_election_strategy.lower()) except ValueError: @@ -230,7 +235,7 @@ def validate_config(config: Config) -> None: f"Invalid master election strategy: {master_election_strategy}, valid values are {valid_strategies}" ) from None - name_strategy = config["name_strategy"] + name_strategy = config.name_strategy try: NameStrategy(name_strategy) except ValueError: @@ -239,7 +244,7 @@ def validate_config(config: Config) -> None: f"Invalid default name strategy: {name_strategy}, valid values are {valid_strategies}" ) from None - if config["rest_authorization"] and config["sasl_bootstrap_uri"] is None: + if config.rest_authorization and config.sasl_bootstrap_uri is None: raise InvalidConfiguration( "Using 'rest_authorization' requires configuration value for 'sasl_bootstrap_uri' to be set" ) @@ -256,17 +261,10 @@ def write_env_file(dot_env_path: Path, config: Config) -> None: def read_env_file(env_file_path: str) -> Config: return Config(_env_file=env_file_path, _env_file_encoding="utf-8") - Config() - try: - config = json_decode(config_handler) - except JSONDecodeError as ex: - raise InvalidConfiguration("Configuration is not a valid JSON") from ex - return set_config_defaults(config) - def create_client_ssl_context(config: Config) -> ssl.SSLContext | None: # taken from conn.py, as it adds a lot more logic to the context configuration than the initial version - if config["security_protocol"] in ("PLAINTEXT", "SASL_PLAINTEXT"): + if config.security_protocol in ("PLAINTEXT", "SASL_PLAINTEXT"): return None ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS) ssl_context.options |= ssl.OP_NO_SSLv2 @@ -274,30 +272,30 @@ def create_client_ssl_context(config: Config) -> ssl.SSLContext | None: ssl_context.options |= ssl.OP_NO_TLSv1 ssl_context.options |= ssl.OP_NO_TLSv1_1 ssl_context.verify_mode = ssl.CERT_OPTIONAL - if config["ssl_check_hostname"]: + if config.ssl_check_hostname: ssl_context.check_hostname = True - if config["ssl_cafile"]: - ssl_context.load_verify_locations(config["ssl_cafile"]) + if config.ssl_cafile: + ssl_context.load_verify_locations(config.ssl_cafile) ssl_context.verify_mode = ssl.CERT_REQUIRED - if config["ssl_certfile"] and config["ssl_keyfile"]: + if config.ssl_certfile and config.ssl_keyfile: ssl_context.load_cert_chain( - certfile=config["ssl_certfile"], - keyfile=config["ssl_keyfile"], - password=config["ssl_password"], + certfile=config.ssl_certfile, + keyfile=config.ssl_keyfile, + password=config.ssl_password, ) - if config["ssl_crlfile"]: + if config.ssl_crlfile: if not hasattr(ssl, "VERIFY_CRL_CHECK_LEAF"): raise RuntimeError("This version of Python does not support ssl_crlfile!") - ssl_context.load_verify_locations(config["ssl_crlfile"]) + ssl_context.load_verify_locations(config.ssl_crlfile) ssl_context.verify_flags |= ssl.VERIFY_CRL_CHECK_LEAF - if config.get("ssl_ciphers"): - ssl_context.set_ciphers(config["ssl_ciphers"]) + if config.ssl_ciphers: + ssl_context.set_ciphers(config.ssl_ciphers) return ssl_context def create_server_ssl_context(config: Config) -> ssl.SSLContext | None: - tls_certfile = config["server_tls_certfile"] - tls_keyfile = config["server_tls_keyfile"] + tls_certfile = config.server_tls_certfile + tls_keyfile = config.server_tls_keyfile if tls_certfile is None: if tls_keyfile is None: # Neither config value set, do not use TLS From 82c54f0bba5eb2128f76836ea4748c328b4f8ddb Mon Sep 17 00:00:00 2001 From: Emmanuel Evbuomwan Date: Wed, 27 Nov 2024 11:43:59 +0100 Subject: [PATCH 08/11] feat: add karapace docker CLI - we add a docker compose service which would act like a CLI - we add a dev Dockerfile - we drop python3.9 from the tests matrix as pydantic-settings has issues with python3.9 - we add .env files for both registry and rest --- .github/workflows/tests.yml | 7 +-- container/Dockerfile.dev | 62 +++++++++++++++++++++++++++ container/compose.yml | 76 ++++++++++++++++----------------- container/karapace.registry.env | 47 ++++++++++++++++++++ container/karapace.rest.env | 51 ++++++++++++++++++++++ 5 files changed, 201 insertions(+), 42 deletions(-) create mode 100644 container/Dockerfile.dev create mode 100644 container/karapace.registry.env create mode 100644 container/karapace.rest.env diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8b151f124..3423067f9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -14,13 +14,14 @@ env: FORCE_COLOR: 1 PIP_PROGRESS_BAR: off PYTHONUNBUFFERED: 1 + KARAPACE_DOTENV: ${{ github.workspace }}/karapace.config.env jobs: tests: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.9', '3.10', '3.11', '3.12' ] + python-version: [ '3.10', '3.11', '3.12' ] env: PYTEST_ADDOPTS: >- --log-dir=/tmp/ci-logs @@ -44,11 +45,11 @@ jobs: - run: make unit-tests env: COVERAGE_FILE: ".coverage.${{ matrix.python-version }}" - PYTEST_ARGS: "--cov=karapace --cov-append --numprocesses 4" + PYTEST_ARGS: "--cov=src --cov-append --numprocesses 4" - run: make integration-tests env: COVERAGE_FILE: ".coverage.${{ matrix.python-version }}" - PYTEST_ARGS: "--cov=karapace --cov-append --random-order --numprocesses 4" + PYTEST_ARGS: "--cov=src --cov-append --random-order --numprocesses 4" - name: Archive logs uses: actions/upload-artifact@v4 diff --git a/container/Dockerfile.dev b/container/Dockerfile.dev new file mode 100644 index 000000000..2ba1c0250 --- /dev/null +++ b/container/Dockerfile.dev @@ -0,0 +1,62 @@ +# Current versions of avro and zstandard don't yet have wheels for 3.11. +FROM python:3.10.11-bullseye AS builder + +ARG KARAPACE_VERSION + +# Create, activate, and enforce usage of virtualenv. +RUN python3 -m venv /venv +ENV PATH="/venv/bin:$PATH" +ENV PIP_REQUIRE_VIRTUALENV=true + +# Install golang needed by extensions +ENV GO_VERSION=1.21.0 +ENV PATH="/usr/local/go/bin:${PATH}" +RUN wget --progress=dot:giga "https://go.dev/dl/go${GO_VERSION}.linux-$(dpkg --print-architecture).tar.gz" \ + && tar -C /usr/local -xzf "go${GO_VERSION}.linux-$(dpkg --print-architecture).tar.gz" \ + && rm "go${GO_VERSION}.linux-$(dpkg --print-architecture).tar.gz" + +# Copy the requirements.txt and install dependencies in venv. Using a separate +# command to use layer caching. +# +# Note: the requirements.txt is pinned, if any of the dependencies is updated +# the cache will be invalidated and the image regenerated, which is the +# intended behavior. +COPY ./requirements/requirements.txt /build/ +COPY ./requirements/requirements-dev.txt /build/ +RUN --mount=type=cache,target=/root/.cache/pip \ + python3 -m pip install -r /build/requirements.txt -r /build/requirements-dev.txt + +COPY . /build/karapace-repo +WORKDIR /build/karapace-repo +RUN --mount=type=cache,target=/root/.cache/pip \ + if [ -z "${KARAPACE_VERSION}" ]; then \ + PRETEND_VERSION="$(python -c 'from src.karapace import version; print(version.__version__)')"; \ + else \ + PRETEND_VERSION=$KARAPACE_VERSION; \ + fi; \ + SETUPTOOLS_SCM_PRETEND_VERSION=$PRETEND_VERSION python3 -m pip install --no-deps . + +# Karapace image, i.e. production. +FROM python:3.10.11-slim-bullseye AS karapace + +# Setup user and directories. +RUN groupadd --system karapace \ + && useradd --system --gid karapace karapace \ + && mkdir /opt/karapace /opt/karapace/runtime /var/log/karapace \ + && chown --recursive karapace:karapace /opt/karapace /var/log/karapace + +# Install protobuf compiler. +ARG PROTOBUF_COMPILER_VERSION="3.12.4-1+deb11u1" +RUN apt-get update \ + && apt-get install --assume-yes --no-install-recommends \ + protobuf-compiler=$PROTOBUF_COMPILER_VERSION \ + && rm -rf /var/lib/apt/lists/* + +# Copy virtualenv from builder and activate it. +COPY --from=builder /venv /venv +ENV PATH="/venv/bin:$PATH" + +COPY ./container/healthcheck.py /opt/karapace + +WORKDIR /opt/karapace +USER karapace diff --git a/container/compose.yml b/container/compose.yml index fa2c53265..87106ee90 100644 --- a/container/compose.yml +++ b/container/compose.yml @@ -4,7 +4,7 @@ services: zookeeper: image: confluentinc/cp-zookeeper:latest ports: - - "2181:2181" + - 2181:2181 environment: ZOOKEEPER_CLIENT_PORT: 2181 ZOOKEEPER_TICK_TIME: 2000 @@ -14,8 +14,8 @@ services: depends_on: - zookeeper ports: - - "9101:9101" # JMX - - "9092:9092" # Kafka + - 9101:9101 # JMX + - 9092:9092 # Kafka environment: # Listeners: # PLAINTEXT_HOST -> Expose kafka to the host network @@ -23,7 +23,7 @@ services: KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092 KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 - KAFKA_CONFLUENT_SCHEMA_REGISTRY_URL: http://karapace-registry:8081 + KAFKA_CONFLUENT_SCHEMA_REGISTRY_URL: http://karapace-schema-registry:8081 # Metrics: KAFKA_JMX_PORT: 9101 KAFKA_JMX_HOSTNAME: localhost @@ -54,62 +54,60 @@ services: KAFKA_ZOOKEEPER_CONNECTION_TIMEOUT_MS: 6000 KAFKA_ZOOKEEPER_CONNECT: "zookeeper:2181" - karapace-registry: + karapace-schema-registry: image: ghcr.io/aiven-open/karapace:develop build: context: .. dockerfile: container/Dockerfile entrypoint: - - /bin/bash - - /opt/karapace/start.sh - - registry + - python3 + - -m + - schema_registry depends_on: - kafka ports: - - "8081:8081" + - 8081:8081 + volumes: + - ./karapace.registry.env:/opt/karapace/karapace.env environment: - KARAPACE_ADVERTISED_HOSTNAME: karapace-registry - KARAPACE_BOOTSTRAP_URI: kafka:29092 + KARAPACE_DOTENV: /opt/karapace/karapace.env KARAPACE_PORT: 8081 - KARAPACE_HOST: 0.0.0.0 - KARAPACE_CLIENT_ID: karapace - KARAPACE_GROUP_ID: karapace-registry - KARAPACE_MASTER_ELIGIBILITY: "true" - KARAPACE_TOPIC_NAME: _schemas - KARAPACE_LOG_LEVEL: WARNING - KARAPACE_COMPATIBILITY: FULL - KARAPACE_STATSD_HOST: statsd-exporter - KARAPACE_STATSD_PORT: 8125 - KARAPACE_KAFKA_SCHEMA_READER_STRICT_MODE: false - KARAPACE_KAFKA_RETRIABLE_ERRORS_SILENCED: true - karapace-rest: + karapace-rest-proxy: image: ghcr.io/aiven-open/karapace:develop build: context: .. dockerfile: container/Dockerfile entrypoint: - - /bin/bash - - /opt/karapace/start.sh - - rest + - python3 + - -m + - karapace.karapace_all depends_on: - kafka - - karapace-registry + - karapace-schema-registry ports: - - "8082:8082" + - 8082:8082 + volumes: + - ./karapace.rest.env:/opt/karapace/karapace.env environment: + KARAPACE_DOTENV: /opt/karapace/karapace.env KARAPACE_PORT: 8082 - KARAPACE_HOST: 0.0.0.0 - KARAPACE_ADVERTISED_HOSTNAME: karapace-rest - KARAPACE_BOOTSTRAP_URI: kafka:29092 - KARAPACE_REGISTRY_HOST: karapace-registry - KARAPACE_REGISTRY_PORT: 8081 - KARAPACE_ADMIN_METADATA_MAX_AGE: 0 - KARAPACE_LOG_LEVEL: WARNING - KARAPACE_STATSD_HOST: statsd-exporter - KARAPACE_STATSD_PORT: 8125 - KARAPACE_KAFKA_SCHEMA_READER_STRICT_MODE: false - KARAPACE_KAFKA_RETRIABLE_ERRORS_SILENCED: true + + karapace-cli: + image: ghcr.io/aiven-open/karapace:cli + build: + context: .. + dockerfile: container/Dockerfile.dev + tty: true + depends_on: + - kafka + - karapace-schema-registry + - karapace-rest-proxy + volumes: + - ../tests:/opt/karapace/tests + - ../karapace.config.env:/opt/karapace/karapace.env + environment: + KARAPACE_DOTENV: /opt/karapace/karapace.env prometheus: image: prom/prometheus diff --git a/container/karapace.registry.env b/container/karapace.registry.env new file mode 100644 index 000000000..cd757a99b --- /dev/null +++ b/container/karapace.registry.env @@ -0,0 +1,47 @@ +KARAPACE_DOTENV=/opt/karapace/karapace.env +ACCESS_LOGS_DEBUG=False +ADVERTISED_HOSTNAME=karapace-schema-registry +ADVERTISED_PORT=8081 +ADVERTISED_PROTOCOL=http +BOOTSTRAP_URI=kafka:29092 +CLIENT_ID=karapace-schema-registry +COMPATIBILITY=BACKWARD +CONNECTIONS_MAX_IDLE_MS=15000 +CONSUMER_ENABLE_AUTO_COMMIT=True +CONSUMER_REQUEST_TIMEOUT_MS=11000 +CONSUMER_REQUEST_MAX_BYTES=67108864 +CONSUMER_IDLE_DISCONNECT_TIMEOUT=0 +FETCH_MIN_BYTES=1 +GROUP_ID=karapace-schema-registry +HOST=0.0.0.0 +PORT=8081 +REGISTRY_HOST=karapace-schema-registry +REGISTRY_PORT=8081 +REST_AUTHORIZATION=False +LOG_HANDLER=stdout +LOG_LEVEL=WARNING +LOG_FORMAT=%(asctime)s [%(threadName)s] %(filename)s:%(funcName)s:%(lineno)d %(message)s +MASTER_ELIGIBILITY=True +REPLICATION_FACTOR=1 +SECURITY_PROTOCOL=PLAINTEXT +SSL_CHECK_HOSTNAME=True +TOPIC_NAME=_schemas +METADATA_MAX_AGE_MS=60000 +ADMIN_METADATA_MAX_AGE=5 +PRODUCER_ACKS=1 +PRODUCER_COUNT=5 +PRODUCER_LINGER_MS=100 +PRODUCER_MAX_REQUEST_SIZE=1048576 +SESSION_TIMEOUT_MS=10000 +KARAPACE_REST=False +KARAPACE_REGISTRY=True +KARAPACE_PORT=8081 +NAME_STRATEGY=topic_name +NAME_STRATEGY_VALIDATION=True +MASTER_ELECTION_STRATEGY=lowest +PROTOBUF_RUNTIME_DIRECTORY=runtime +STATSD_HOST=statsd-exporter +STATSD_PORT=8125 +KAFKA_SCHEMA_READER_STRICT_MODE=False +KAFKA_RETRIABLE_ERRORS_SILENCED=True +USE_PROTOBUF_FORMATTER=False diff --git a/container/karapace.rest.env b/container/karapace.rest.env new file mode 100644 index 000000000..3df13f3b2 --- /dev/null +++ b/container/karapace.rest.env @@ -0,0 +1,51 @@ +KARAPACE_DOTENV=/opt/karapace/karapace.env +ACCESS_LOGS_DEBUG=False +# ACCESS_LOG_CLASS=karapace.utils.DebugAccessLogger +ACCESS_LOG_CLASS=aiohttp.web_log.AccessLogger +ADVERTISED_HOSTNAME=karapace-rest-proxy +ADVERTISED_PORT=8082 +ADVERTISED_PROTOCOL=http +BOOTSTRAP_URI=kafka:29092 +CLIENT_ID=karapace-rest-proxy +COMPATIBILITY=BACKWARD +CONNECTIONS_MAX_IDLE_MS=15000 +CONSUMER_ENABLE_AUTO_COMMIT=True +CONSUMER_REQUEST_TIMEOUT_MS=11000 +CONSUMER_REQUEST_MAX_BYTES=67108864 +CONSUMER_IDLE_DISCONNECT_TIMEOUT=0 +FETCH_MIN_BYTES=1 +GROUP_ID=karapace-rest-proxy +HOST=0.0.0.0 +PORT=8082 +REGISTRY_HOST=karapace-schema-registry +REGISTRY_PORT=8081 +REST_AUTHORIZATION=False +LOG_HANDLER=stdout +LOG_LEVEL=WARNING +LOG_FORMAT=%(asctime)s [%(threadName)s] %(filename)s:%(funcName)s:%(lineno)d %(message)s +MASTER_ELIGIBILITY=True +REPLICATION_FACTOR=1 +SECURITY_PROTOCOL=PLAINTEXT +SSL_CHECK_HOSTNAME=True +TOPIC_NAME=_schemas +METADATA_MAX_AGE_MS=60000 +ADMIN_METADATA_MAX_AGE=5 +PRODUCER_ACKS=1 +PRODUCER_COUNT=5 +PRODUCER_LINGER_MS=100 +PRODUCER_MAX_REQUEST_SIZE=1048576 +SESSION_TIMEOUT_MS=10000 +KARAPACE_REST=True +KARAPACE_REGISTRY=False +KARAPACE_PORT=8082 +NAME_STRATEGY=topic_name +NAME_STRATEGY_VALIDATION=True +MASTER_ELECTION_STRATEGY=lowest +PROTOBUF_RUNTIME_DIRECTORY=runtime +STATSD_HOST=statsd-exporter +STATSD_PORT=8125 +KAFKA_SCHEMA_READER_STRICT_MODE=False +KAFKA_RETRIABLE_ERRORS_SILENCED=True +USE_PROTOBUF_FORMATTER=False +HTTP_REQUEST_MAX_SIZE=1048576 +TAGS='{ "app": "karapace-rest-proxy" }' From 0b8a2c616a280031257a5112668e330b04c811a3 Mon Sep 17 00:00:00 2001 From: Emmanuel Evbuomwan Date: Wed, 27 Nov 2024 11:56:48 +0100 Subject: [PATCH 09/11] feat: run unit-tests in the cli docker container - we replace use the karapace container to load the default config --- GNUmakefile | 11 ++- tests/conftest.py | 15 ++++ tests/unit/backup/test_api.py | 52 +++++++----- .../test_rest_proxy_cluster_metadata_cache.py | 71 +++++++++------- tests/unit/protobuf/test_protoc.py | 6 +- tests/unit/test_authentication.py | 35 +++++--- tests/unit/test_config.py | 28 +++---- tests/unit/test_in_memory_database.py | 6 +- tests/unit/test_kafka_error_handler.py | 13 +-- tests/unit/test_protobuf_serialization.py | 36 ++++----- tests/unit/test_rapu.py | 12 +-- tests/unit/test_rest_auth.py | 14 ++-- tests/unit/test_schema_reader.py | 39 +++++---- tests/unit/test_schema_registry_api.py | 81 +++++++++++-------- tests/unit/test_serialization.py | 38 ++++----- 15 files changed, 270 insertions(+), 187 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 88c14daa9..b8305d65a 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -3,7 +3,9 @@ SHELL := /usr/bin/env bash VENV_DIR ?= $(CURDIR)/venv PIP ?= pip3 --disable-pip-version-check --no-input --require-virtualenv PYTHON ?= python3 -PYTHON_VERSION ?= 3.9 +PYTHON_VERSION ?= 3.10 +DOCKER_COMPOSE ?= docker compose +KARAPACE-CLI ?= $(DOCKER_COMPOSE) -f container/compose.yml run karapace-cli define PIN_VERSIONS_COMMAND pip install pip-tools && \ @@ -102,3 +104,10 @@ schema: .PHONY: pin-requirements pin-requirements: docker run -e CUSTOM_COMPILE_COMMAND='make pin-requirements' -it -v .:/karapace --security-opt label=disable python:$(PYTHON_VERSION)-bullseye /bin/bash -c "$(PIN_VERSIONS_COMMAND)" + +.PHONY: unit-tests-in-docker +unit-tests-in-docker: export PYTEST_ARGS ?= +unit-tests-in-docker: + rm -fr runtime/* + $(KARAPACE-CLI) $(PYTHON) -m pytest -s -vvv $(PYTEST_ARGS) tests/unit/ + rm -fr runtime/* diff --git a/tests/conftest.py b/tests/conftest.py index d62663633..91fb0b02d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,10 @@ See LICENSE for details """ from avro.compatibility import SchemaCompatibilityResult +from karapace.config import KARAPACE_BASE_CONFIG_YAML_PATH +from karapace.container import KarapaceContainer from pathlib import Path +from schema_registry.container import SchemaRegistryContainer from tempfile import mkstemp from typing import Optional @@ -179,3 +182,15 @@ def fixture_tmp_file(): path = Path(str_path) yield path path.unlink() + + +@pytest.fixture(name="karapace_container", scope="session") +def fixture_karapace_container() -> KarapaceContainer: + container = KarapaceContainer() + container.base_config.from_yaml(KARAPACE_BASE_CONFIG_YAML_PATH, envs_required=True, required=True) + return container + + +@pytest.fixture +def schema_registry_container(karapace_container: KarapaceContainer) -> SchemaRegistryContainer: + return SchemaRegistryContainer(karapace_container=karapace_container) diff --git a/tests/unit/backup/test_api.py b/tests/unit/backup/test_api.py index 983beb786..3df4d028d 100644 --- a/tests/unit/backup/test_api.py +++ b/tests/unit/backup/test_api.py @@ -5,7 +5,6 @@ from __future__ import annotations from aiokafka.errors import KafkaError, TopicAlreadyExistsError -from karapace import config from karapace.backup.api import ( _admin, _consumer, @@ -22,6 +21,7 @@ from karapace.backup.errors import BackupError, PartitionCountError from karapace.config import Config from karapace.constants import DEFAULT_SCHEMA_TOPIC +from karapace.container import KarapaceContainer from karapace.kafka.consumer import KafkaConsumer, PartitionMetadata from karapace.kafka.producer import KafkaProducer from pathlib import Path @@ -41,10 +41,12 @@ class TestAdmin: @mock.patch("time.sleep", autospec=True) @patch_admin_new - def test_retries_on_kafka_error(self, admin_new: MagicMock, sleep_mock: MagicMock) -> None: + def test_retries_on_kafka_error( + self, admin_new: MagicMock, sleep_mock: MagicMock, karapace_container: KarapaceContainer + ) -> None: admin_mock = admin_new.return_value admin_new.side_effect = [KafkaError("1"), KafkaError("2"), admin_mock] - with _admin(config.DEFAULTS) as admin: + with _admin(karapace_container.config()) as admin: assert admin is admin_mock assert sleep_mock.call_count == 2 # proof that we waited between retries @@ -56,41 +58,48 @@ def test_reraises_unknown_exceptions( admin_new: MagicMock, sleep_mock: MagicMock, e: type[BaseException], + karapace_container: KarapaceContainer, ) -> None: admin_new.side_effect = e - with pytest.raises(e), _admin(config.DEFAULTS): + with pytest.raises(e), _admin(karapace_container.config()): pass assert sleep_mock.call_count == 0 # proof that we did not retry class TestHandleRestoreTopic: @patch_admin_new - def test_calls_admin_create_topics(self, admin_new: MagicMock) -> None: + def test_calls_admin_create_topics(self, admin_new: MagicMock, karapace_container: KarapaceContainer) -> None: new_topic: MagicMock = admin_new.return_value.new_topic topic_configs = {"cleanup.policy": "compact"} - _maybe_create_topic(DEFAULT_SCHEMA_TOPIC, config=config.DEFAULTS, replication_factor=1, topic_configs=topic_configs) + _maybe_create_topic( + DEFAULT_SCHEMA_TOPIC, config=karapace_container.config(), replication_factor=1, topic_configs=topic_configs + ) new_topic.assert_called_once_with( DEFAULT_SCHEMA_TOPIC, num_partitions=1, - replication_factor=config.DEFAULTS["replication_factor"], + replication_factor=karapace_container.config().replication_factor, config=topic_configs, ) @patch_admin_new - def test_gracefully_handles_topic_already_exists_error(self, admin_new: MagicMock) -> None: + def test_gracefully_handles_topic_already_exists_error( + self, admin_new: MagicMock, karapace_container: KarapaceContainer + ) -> None: new_topic: MagicMock = admin_new.return_value.new_topic new_topic.side_effect = TopicAlreadyExistsError() - _maybe_create_topic(DEFAULT_SCHEMA_TOPIC, config=config.DEFAULTS, replication_factor=1, topic_configs={}) + _maybe_create_topic(DEFAULT_SCHEMA_TOPIC, config=karapace_container.config(), replication_factor=1, topic_configs={}) new_topic.assert_called_once() @patch_admin_new - def test_retries_for_kafka_errors(self, admin_new: MagicMock) -> None: + def test_retries_for_kafka_errors(self, admin_new: MagicMock, karapace_container: KarapaceContainer) -> None: new_topic: MagicMock = admin_new.return_value.new_topic new_topic.side_effect = [KafkaError("1"), KafkaError("2"), None] with mock.patch("time.sleep", autospec=True): - _maybe_create_topic(DEFAULT_SCHEMA_TOPIC, config=config.DEFAULTS, replication_factor=1, topic_configs={}) + _maybe_create_topic( + DEFAULT_SCHEMA_TOPIC, config=karapace_container.config(), replication_factor=1, topic_configs={} + ) assert new_topic.call_count == 3 @@ -98,17 +107,19 @@ def test_retries_for_kafka_errors(self, admin_new: MagicMock) -> None: def test_noop_for_custom_name_on_legacy_versions( self, admin_new: MagicMock, + karapace_container: KarapaceContainer, ) -> None: new_topic: MagicMock = admin_new.return_value.new_topic assert "custom-name" != DEFAULT_SCHEMA_TOPIC instruction = RestoreTopicLegacy(topic_name="custom-name", partition_count=1) - _handle_restore_topic_legacy(instruction, config.DEFAULTS) + _handle_restore_topic_legacy(instruction, karapace_container.config()) new_topic.assert_not_called() @patch_admin_new def test_allows_custom_name_on_v3( self, admin_new: MagicMock, + karapace_container: KarapaceContainer, ) -> None: new_topic: MagicMock = admin_new.return_value.new_topic topic_name = "custom-name" @@ -117,7 +128,7 @@ def test_allows_custom_name_on_v3( instruction = RestoreTopic( topic_name="custom-name", partition_count=1, replication_factor=2, topic_configs=topic_configs ) - _handle_restore_topic(instruction, config.DEFAULTS) + _handle_restore_topic(instruction, karapace_container.config()) new_topic.assert_called_once_with(topic_name, num_partitions=1, replication_factor=2, config=topic_configs) @@ -125,11 +136,12 @@ def test_allows_custom_name_on_v3( def test_skip_topic_creation( self, admin_new: MagicMock, + karapace_container: KarapaceContainer, ) -> None: new_topic: MagicMock = admin_new.return_value.new_topic _handle_restore_topic( RestoreTopic(topic_name="custom-name", partition_count=1, replication_factor=2, topic_configs={}), - config.DEFAULTS, + karapace_container.config(), skip_topic_creation=True, ) _handle_restore_topic_legacy( @@ -137,7 +149,7 @@ def test_skip_topic_creation( topic_name="custom-name", partition_count=1, ), - config.DEFAULTS, + karapace_container.config(), skip_topic_creation=True, ) @@ -171,11 +183,12 @@ def test_auto_closing( client_class: type[KafkaConsumer | KafkaProducer], partitions_method: FunctionType, close_method_name: str, + karapace_container: KarapaceContainer, ) -> None: with mock.patch(f"{client_class.__module__}.{client_class.__qualname__}.__new__", autospec=True) as client_ctor: client_mock = client_ctor.return_value getattr(client_mock, partitions_method.__name__).return_value = self._partition_metadata() - with ctx_mng(config.DEFAULTS, "topic") as client: + with ctx_mng(karapace_container.config(), "topic") as client: assert client is client_mock assert getattr(client_mock, close_method_name).call_count == 1 @@ -194,12 +207,13 @@ def test_raises_partition_count_error_for_unexpected_count( partitions_method: FunctionType, partition_count: int, close_method_name: str, + karapace_container: KarapaceContainer, ) -> None: with mock.patch(f"{client_class.__module__}.{client_class.__qualname__}.__new__", autospec=True) as client_ctor: client_mock = client_ctor.return_value getattr(client_mock, partitions_method.__name__).return_value = self._partition_metadata(partition_count) with pytest.raises(PartitionCountError): - with ctx_mng(config.DEFAULTS, "topic") as client: + with ctx_mng(karapace_container.config(), "topic") as client: assert client == client_mock assert getattr(client_mock, close_method_name).call_count == 1 @@ -271,6 +285,6 @@ def test_returns_option_if_given(self) -> None: fake_config = cast(Config, {}) assert normalize_topic_name("some-topic", fake_config) == "some-topic" - def test_defaults_to_config(self) -> None: - fake_config = cast(Config, {"topic_name": "default-topic"}) + def test_defaults_to_config(self, karapace_container: KarapaceContainer) -> None: + fake_config = karapace_container.config().set_config_defaults({"topic_name": "default-topic"}) assert normalize_topic_name(None, fake_config) == "default-topic" diff --git a/tests/unit/kafka_rest_apis/test_rest_proxy_cluster_metadata_cache.py b/tests/unit/kafka_rest_apis/test_rest_proxy_cluster_metadata_cache.py index b47fb5e02..d1227fbc2 100644 --- a/tests/unit/kafka_rest_apis/test_rest_proxy_cluster_metadata_cache.py +++ b/tests/unit/kafka_rest_apis/test_rest_proxy_cluster_metadata_cache.py @@ -3,7 +3,8 @@ Copyright (c) 2024 Aiven Ltd See LICENSE for details """ -from karapace.config import DEFAULTS + +from karapace.container import KarapaceContainer from karapace.kafka_rest_apis import UserRestProxy from karapace.serialization import SchemaRegistrySerializer from unittest.mock import patch @@ -11,10 +12,10 @@ import copy -def user_rest_proxy(max_age_metadata: int = 5) -> UserRestProxy: - configs = {**DEFAULTS, **{"admin_metadata_max_age": max_age_metadata}} - serializer = SchemaRegistrySerializer(configs) - return UserRestProxy(configs, 1, serializer, auth_expiry=None, verify_connection=False) +def user_rest_proxy(karapace_container: KarapaceContainer, max_age_metadata: int = 5) -> UserRestProxy: + config = karapace_container.config().set_config_defaults({"admin_metadata_max_age": max_age_metadata}) + serializer = SchemaRegistrySerializer(config=config) + return UserRestProxy(config, 1, serializer, auth_expiry=None, verify_connection=False) EMPTY_REPLY = { @@ -158,8 +159,8 @@ def user_rest_proxy(max_age_metadata: int = 5) -> UserRestProxy: } -async def test_cache_is_evicted_after_expiration_global_initially() -> None: - proxy = user_rest_proxy() +async def test_cache_is_evicted_after_expiration_global_initially(karapace_container: KarapaceContainer) -> None: + proxy = user_rest_proxy(karapace_container) with patch( "karapace.kafka.admin.KafkaAdminClient.cluster_metadata", return_value=EMPTY_REPLY ) as mocked_cluster_metadata: @@ -167,8 +168,8 @@ async def test_cache_is_evicted_after_expiration_global_initially() -> None: mocked_cluster_metadata.assert_called_once_with(None) # "initially the metadata are always old" -async def test_no_topic_means_all_metadata() -> None: - proxy = user_rest_proxy() +async def test_no_topic_means_all_metadata(karapace_container: KarapaceContainer) -> None: + proxy = user_rest_proxy(karapace_container) with patch( "karapace.kafka.admin.KafkaAdminClient.cluster_metadata", return_value=EMPTY_REPLY ) as mocked_cluster_metadata: @@ -176,8 +177,8 @@ async def test_no_topic_means_all_metadata() -> None: mocked_cluster_metadata.assert_called_once_with(None) -async def test_cache_is_evicted_after_expiration_global() -> None: - proxy = user_rest_proxy(max_age_metadata=10) +async def test_cache_is_evicted_after_expiration_global(karapace_container: KarapaceContainer) -> None: + proxy = user_rest_proxy(karapace_container, max_age_metadata=10) proxy._global_metadata_birth = 0 with patch( "karapace.kafka.admin.KafkaAdminClient.cluster_metadata", return_value=EMPTY_REPLY @@ -187,8 +188,8 @@ async def test_cache_is_evicted_after_expiration_global() -> None: mocked_cluster_metadata.assert_called_once_with(None) # "metadata old require a refresh" -async def test_global_cache_is_used_for_single_topic() -> None: - proxy = user_rest_proxy(max_age_metadata=10) +async def test_global_cache_is_used_for_single_topic(karapace_container: KarapaceContainer) -> None: + proxy = user_rest_proxy(karapace_container, max_age_metadata=10) proxy._global_metadata_birth = 0 with patch( "karapace.kafka.admin.KafkaAdminClient.cluster_metadata", return_value=ALL_TOPIC_REQUEST @@ -214,8 +215,8 @@ async def test_global_cache_is_used_for_single_topic() -> None: ), "the result should still be cached since we marked it as ready at time 11 and we are at 14" -async def test_cache_is_evicted_if_one_topic_is_expired() -> None: - proxy = user_rest_proxy(max_age_metadata=10) +async def test_cache_is_evicted_if_one_topic_is_expired(karapace_container: KarapaceContainer) -> None: + proxy = user_rest_proxy(karapace_container, max_age_metadata=10) proxy._global_metadata_birth = 0 with patch( "karapace.kafka.admin.KafkaAdminClient.cluster_metadata", return_value=ALL_TOPIC_REQUEST @@ -234,8 +235,8 @@ async def test_cache_is_evicted_if_one_topic_is_expired() -> None: assert mocked_cluster_metadata.call_count == 1, "topic_b should be evicted" -async def test_cache_is_evicted_if_a_topic_was_never_queries() -> None: - proxy = user_rest_proxy(max_age_metadata=10) +async def test_cache_is_evicted_if_a_topic_was_never_queries(karapace_container: KarapaceContainer) -> None: + proxy = user_rest_proxy(karapace_container, max_age_metadata=10) proxy._global_metadata_birth = 0 with patch( "karapace.kafka.admin.KafkaAdminClient.cluster_metadata", return_value=ALL_TOPIC_REQUEST @@ -254,8 +255,8 @@ async def test_cache_is_evicted_if_a_topic_was_never_queries() -> None: assert mocked_cluster_metadata.call_count == 1, "topic_b is not present in the cache, should call the refresh" -async def test_cache_is_used_if_topic_requested_is_updated() -> None: - proxy = user_rest_proxy(max_age_metadata=10) +async def test_cache_is_used_if_topic_requested_is_updated(karapace_container: KarapaceContainer) -> None: + proxy = user_rest_proxy(karapace_container, max_age_metadata=10) proxy._global_metadata_birth = 0 with patch( "karapace.kafka.admin.KafkaAdminClient.cluster_metadata", return_value=TOPIC_REQUEST @@ -272,8 +273,8 @@ async def test_cache_is_used_if_topic_requested_is_updated() -> None: assert mocked_cluster_metadata.call_count == 0, "topic_a cache its present, should be used" -async def test_update_global_cache() -> None: - proxy = user_rest_proxy(max_age_metadata=10) +async def test_update_global_cache(karapace_container: KarapaceContainer) -> None: + proxy = user_rest_proxy(karapace_container, max_age_metadata=10) proxy._global_metadata_birth = 0 with patch( "karapace.kafka.admin.KafkaAdminClient.cluster_metadata", return_value=TOPIC_REQUEST @@ -292,8 +293,8 @@ async def test_update_global_cache() -> None: assert mocked_cluster_metadata.call_count == 0, "should call the server since the cache its expired" -async def test_update_topic_cache_do_not_evict_all_the_global_cache() -> None: - proxy = user_rest_proxy(max_age_metadata=10) +async def test_update_topic_cache_do_not_evict_all_the_global_cache(karapace_container: KarapaceContainer) -> None: + proxy = user_rest_proxy(karapace_container, max_age_metadata=10) proxy._global_metadata_birth = 0 proxy._cluster_metadata = ALL_TOPIC_REQUEST proxy._cluster_metadata_topic_birth = {"topic_a": 0, "topic_b": 200, "__consumer_offsets": 200} @@ -317,8 +318,10 @@ async def test_update_topic_cache_do_not_evict_all_the_global_cache() -> None: ), "we should call the server since the previous time of caching for the topic_a was 0" -async def test_update_local_cache_does_not_evict_all_the_global_cache_if_no_new_data() -> None: - proxy = user_rest_proxy(max_age_metadata=10) +async def test_update_local_cache_does_not_evict_all_the_global_cache_if_no_new_data( + karapace_container: KarapaceContainer, +) -> None: + proxy = user_rest_proxy(karapace_container, max_age_metadata=10) proxy._global_metadata_birth = 0 proxy._cluster_metadata_complete = True proxy._cluster_metadata = ALL_TOPIC_REQUEST @@ -346,8 +349,10 @@ async def test_update_local_cache_does_not_evict_all_the_global_cache_if_no_new_ ), "we should call the server since the previous time of caching for the topic_a was 0" -async def test_update_local_cache_not_evict_all_the_global_cache_if_changed_replica_data() -> None: - proxy = user_rest_proxy(max_age_metadata=10) +async def test_update_local_cache_not_evict_all_the_global_cache_if_changed_replica_data( + karapace_container: KarapaceContainer, +) -> None: + proxy = user_rest_proxy(karapace_container, max_age_metadata=10) proxy._global_metadata_birth = 0 proxy._cluster_metadata_complete = True proxy._cluster_metadata = ALL_TOPIC_REQUEST @@ -360,8 +365,10 @@ async def test_update_local_cache_not_evict_all_the_global_cache_if_changed_repl assert not proxy._cluster_metadata_complete, "new replica data incoming, should update the global metadata next!" -async def test_update_local_cache_not_evict_all_the_global_cache_if_new_topic_data() -> None: - proxy = user_rest_proxy(max_age_metadata=10) +async def test_update_local_cache_not_evict_all_the_global_cache_if_new_topic_data( + karapace_container: KarapaceContainer, +) -> None: + proxy = user_rest_proxy(karapace_container, max_age_metadata=10) proxy._global_metadata_birth = 0 proxy._cluster_metadata_complete = True proxy._cluster_metadata = ALL_TOPIC_REQUEST @@ -374,8 +381,10 @@ async def test_update_local_cache_not_evict_all_the_global_cache_if_new_topic_da assert not proxy._cluster_metadata_complete, "new topic data incoming, should update the global metadata next!" -async def test_update_local_cache_not_evict_all_the_global_cache_if_new_broker_data() -> None: - proxy = user_rest_proxy(max_age_metadata=10) +async def test_update_local_cache_not_evict_all_the_global_cache_if_new_broker_data( + karapace_container: KarapaceContainer, +) -> None: + proxy = user_rest_proxy(karapace_container, max_age_metadata=10) proxy._global_metadata_birth = 0 proxy._cluster_metadata_complete = True proxy._cluster_metadata = ALL_TOPIC_REQUEST diff --git a/tests/unit/protobuf/test_protoc.py b/tests/unit/protobuf/test_protoc.py index f044f1abe..d61648d9e 100644 --- a/tests/unit/protobuf/test_protoc.py +++ b/tests/unit/protobuf/test_protoc.py @@ -2,7 +2,7 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ -from karapace import config +from karapace.container import KarapaceContainer from karapace.protobuf.io import calculate_class_name from karapace.protobuf.kotlin_wrapper import trim_margin @@ -14,7 +14,7 @@ log = logging.getLogger(__name__) -def test_protoc() -> None: +def test_protoc(karapace_container: KarapaceContainer) -> None: proto: str = """ |syntax = "proto3"; |package com.instaclustr.protobuf; @@ -28,7 +28,7 @@ def test_protoc() -> None: """ proto = trim_margin(proto) - directory = config.DEFAULTS["protobuf_runtime_directory"] + directory = karapace_container.config().protobuf_runtime_directory proto_name = calculate_class_name(str(proto)) proto_path = f"{directory}/{proto_name}.proto" class_path = f"{directory}/{proto_name}_pb2.py" diff --git a/tests/unit/test_authentication.py b/tests/unit/test_authentication.py index 40abc5c01..9834865fb 100644 --- a/tests/unit/test_authentication.py +++ b/tests/unit/test_authentication.py @@ -4,8 +4,9 @@ """ from __future__ import annotations +from collections.abc import Mapping from http import HTTPStatus -from karapace.config import ConfigDefaults, set_config_defaults +from karapace.container import KarapaceContainer from karapace.kafka_rest_apis.authentication import ( get_auth_config_from_header, get_expiration_time_from_header, @@ -13,6 +14,7 @@ SimpleOauthTokenProvider, ) from karapace.rapu import HTTPResponse, JSON_CONTENT_TYPE +from typing import Any import base64 import datetime @@ -31,11 +33,11 @@ def _assert_unauthorized_http_response(http_response: HTTPResponse) -> None: "auth_header", (None, "Digest foo=bar"), ) -def test_get_auth_config_from_header_raises_unauthorized_on_invalid_header(auth_header: str | None) -> None: - config = set_config_defaults({}) - +def test_get_auth_config_from_header_raises_unauthorized_on_invalid_header( + karapace_container: KarapaceContainer, auth_header: str | None +) -> None: with pytest.raises(HTTPResponse) as exc_info: - get_auth_config_from_header(auth_header, config) + get_auth_config_from_header(auth_header, karapace_container.config()) _assert_unauthorized_http_response(exc_info.value) @@ -66,9 +68,12 @@ def test_get_auth_config_from_header_raises_unauthorized_on_invalid_header(auth_ ), ) def test_get_auth_config_from_header( - auth_header: str, config_override: ConfigDefaults, expected_auth_config: ConfigDefaults + karapace_container: KarapaceContainer, + auth_header: str, + config_override: Mapping[str, Any], + expected_auth_config: Mapping[str, Any], ) -> None: - config = set_config_defaults(config_override) + config = karapace_container.config().set_config_defaults(new_config=config_override) auth_config = get_auth_config_from_header(auth_header, config) assert auth_config == expected_auth_config @@ -109,9 +114,11 @@ def test_simple_oauth_token_provider_returns_configured_token_and_expiry() -> No assert token_provider.token_with_expiry() == (token, expiry_timestamp) -def test_get_client_auth_parameters_from_config_sasl_plain() -> None: - config = set_config_defaults( - {"sasl_mechanism": "PLAIN", "sasl_plain_username": "username", "sasl_plain_password": "password"} +def test_get_client_auth_parameters_from_config_sasl_plain( + karapace_container: KarapaceContainer, +) -> None: + config = karapace_container.config().set_config_defaults( + new_config={"sasl_mechanism": "PLAIN", "sasl_plain_username": "username", "sasl_plain_password": "password"}, ) client_auth_params = get_kafka_client_auth_parameters_from_config(config) @@ -123,10 +130,14 @@ def test_get_client_auth_parameters_from_config_sasl_plain() -> None: } -def test_get_client_auth_parameters_from_config_oauth() -> None: +def test_get_client_auth_parameters_from_config_oauth( + karapace_container: KarapaceContainer, +) -> None: expiry_timestamp = 1697013997 token = jwt.encode({"exp": expiry_timestamp}, "secret") - config = set_config_defaults({"sasl_mechanism": "OAUTHBEARER", "sasl_oauth_token": token}) + config = karapace_container.config().set_config_defaults( + new_config={"sasl_mechanism": "OAUTHBEARER", "sasl_oauth_token": token} + ) client_auth_params = get_kafka_client_auth_parameters_from_config(config) diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index b8475e1c6..79ce7da78 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -4,55 +4,55 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ -from karapace.config import set_config_defaults from karapace.constants import DEFAULT_AIOHTTP_CLIENT_MAX_SIZE, DEFAULT_PRODUCER_MAX_REQUEST +from karapace.container import KarapaceContainer -def test_http_request_max_size() -> None: - config = set_config_defaults( +def test_http_request_max_size(karapace_container: KarapaceContainer) -> None: + config = karapace_container.config().set_config_defaults( { "karapace_rest": False, "producer_max_request_size": DEFAULT_PRODUCER_MAX_REQUEST + 1024, } ) - assert config["http_request_max_size"] == DEFAULT_AIOHTTP_CLIENT_MAX_SIZE + assert config.http_request_max_size == DEFAULT_AIOHTTP_CLIENT_MAX_SIZE - config = set_config_defaults( + config = karapace_container.config().set_config_defaults( { "karapace_rest": False, "http_request_max_size": 1024, } ) - assert config["http_request_max_size"] == 1024 + assert config.http_request_max_size == 1024 - config = set_config_defaults( + config = karapace_container.config().set_config_defaults( { "karapace_rest": True, } ) - assert config["http_request_max_size"] == DEFAULT_AIOHTTP_CLIENT_MAX_SIZE + assert config.http_request_max_size == DEFAULT_AIOHTTP_CLIENT_MAX_SIZE - config = set_config_defaults( + config = karapace_container.config().set_config_defaults( { "karapace_rest": True, "producer_max_request_size": 1024, } ) - assert config["http_request_max_size"] == DEFAULT_AIOHTTP_CLIENT_MAX_SIZE + assert config.http_request_max_size == DEFAULT_AIOHTTP_CLIENT_MAX_SIZE - config = set_config_defaults( + config = karapace_container.config().set_config_defaults( { "karapace_rest": True, "producer_max_request_size": DEFAULT_PRODUCER_MAX_REQUEST + 1024, } ) - assert config["http_request_max_size"] == DEFAULT_PRODUCER_MAX_REQUEST + 1024 + DEFAULT_AIOHTTP_CLIENT_MAX_SIZE + assert config.http_request_max_size == DEFAULT_PRODUCER_MAX_REQUEST + 1024 + DEFAULT_AIOHTTP_CLIENT_MAX_SIZE - config = set_config_defaults( + config = karapace_container.config().set_config_defaults( { "karapace_rest": True, "producer_max_request_size": DEFAULT_PRODUCER_MAX_REQUEST + 1024, "http_request_max_size": 1024, } ) - assert config["http_request_max_size"] == 1024 + assert config.http_request_max_size == 1024 diff --git a/tests/unit/test_in_memory_database.py b/tests/unit/test_in_memory_database.py index a3720940d..2a0156567 100644 --- a/tests/unit/test_in_memory_database.py +++ b/tests/unit/test_in_memory_database.py @@ -7,8 +7,8 @@ from collections import defaultdict from collections.abc import Iterable, Sequence from confluent_kafka.cimpl import KafkaError -from karapace.config import DEFAULTS from karapace.constants import DEFAULT_SCHEMA_TOPIC +from karapace.container import KarapaceContainer from karapace.in_memory_database import InMemoryDatabase, KarapaceDatabase, Subject, SubjectData from karapace.kafka.types import Timestamp from karapace.key_format import KeyFormatter @@ -214,7 +214,7 @@ def compute_schema_id_to_subjects( return schema_id_to_duplicated_subjects -def test_can_ingest_schemas_from_log() -> None: +def test_can_ingest_schemas_from_log(karapace_container: KarapaceContainer) -> None: """ Test for the consistency of a backup, this checks that each SchemaID its unique in the backup. The format of the log its the one obtained by running: @@ -228,7 +228,7 @@ def test_can_ingest_schemas_from_log() -> None: database = WrappedInMemoryDatabase() schema_reader = KafkaSchemaReader( - config=DEFAULTS, + config=karapace_container.config(), offset_watcher=OffsetWatcher(), key_formatter=KeyFormatter(), master_coordinator=None, diff --git a/tests/unit/test_kafka_error_handler.py b/tests/unit/test_kafka_error_handler.py index 45e9fea1b..183205137 100644 --- a/tests/unit/test_kafka_error_handler.py +++ b/tests/unit/test_kafka_error_handler.py @@ -3,6 +3,7 @@ See LICENSE for details """ from _pytest.logging import LogCaptureFixture +from karapace.container import KarapaceContainer from karapace.errors import CorruptKafkaRecordException from karapace.kafka_error_handler import KafkaErrorHandler, KafkaErrorLocation @@ -12,11 +13,13 @@ @pytest.fixture(name="kafka_error_handler") -def fixture_kafka_error_handler() -> KafkaErrorHandler: - config = { - "kafka_schema_reader_strict_mode": False, - "kafka_retriable_errors_silenced": True, - } +def fixture_kafka_error_handler(karapace_container: KarapaceContainer) -> KafkaErrorHandler: + config = karapace_container.config().set_config_defaults( + { + "kafka_schema_reader_strict_mode": False, + "kafka_retriable_errors_silenced": True, + } + ) return KafkaErrorHandler(config=config) diff --git a/tests/unit/test_protobuf_serialization.py b/tests/unit/test_protobuf_serialization.py index ee2586d63..1cb013538 100644 --- a/tests/unit/test_protobuf_serialization.py +++ b/tests/unit/test_protobuf_serialization.py @@ -2,7 +2,7 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ -from karapace.config import read_config +from karapace.container import KarapaceContainer from karapace.dependency import Dependency from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.schema_models import ParsedTypedSchema, SchemaType, Versioner @@ -11,11 +11,11 @@ InvalidMessageHeader, InvalidMessageSchema, InvalidPayload, + SchemaRegistryClient, SchemaRegistrySerializer, START_BYTE, ) from karapace.typing import Subject -from pathlib import Path from tests.utils import schema_protobuf, test_fail_objects_protobuf, test_objects_protobuf from unittest.mock import call, Mock @@ -27,16 +27,16 @@ log = logging.getLogger(__name__) -async def make_ser_deser(config_path: str, mock_client) -> SchemaRegistrySerializer: - with open(config_path, encoding="utf8") as handler: - config = read_config(handler) - serializer = SchemaRegistrySerializer(config=config) +async def make_ser_deser( + karapace_container: KarapaceContainer, mock_client: SchemaRegistryClient +) -> SchemaRegistrySerializer: + serializer = SchemaRegistrySerializer(config=karapace_container.config()) await serializer.registry_client.close() serializer.registry_client = mock_client return serializer -async def test_happy_flow(default_config_path: Path): +async def test_happy_flow(karapace_container: KarapaceContainer): mock_protobuf_registry_client = Mock() schema_for_id_one_future = asyncio.Future() schema_for_id_one_future.set_result( @@ -49,7 +49,7 @@ async def test_happy_flow(default_config_path: Path): ) mock_protobuf_registry_client.get_schema.return_value = get_latest_schema_future - serializer = await make_ser_deser(default_config_path, mock_protobuf_registry_client) + serializer = await make_ser_deser(karapace_container, mock_protobuf_registry_client) assert len(serializer.ids_to_schemas) == 0 schema = await serializer.get_schema_for_subject("top") for o in test_objects_protobuf: @@ -62,7 +62,7 @@ async def test_happy_flow(default_config_path: Path): assert mock_protobuf_registry_client.method_calls == [call.get_schema("top"), call.get_schema_for_id(1)] -async def test_happy_flow_references(default_config_path: Path): +async def test_happy_flow_references(karapace_container: KarapaceContainer): no_ref_schema_str = """ |syntax = "proto3"; | @@ -117,7 +117,7 @@ async def test_happy_flow_references(default_config_path: Path): get_latest_schema_future.set_result((1, ref_schema, Versioner.V(1))) mock_protobuf_registry_client.get_schema.return_value = get_latest_schema_future - serializer = await make_ser_deser(default_config_path, mock_protobuf_registry_client) + serializer = await make_ser_deser(karapace_container, mock_protobuf_registry_client) assert len(serializer.ids_to_schemas) == 0 schema = await serializer.get_schema_for_subject("top") for o in test_objects: @@ -130,7 +130,7 @@ async def test_happy_flow_references(default_config_path: Path): assert mock_protobuf_registry_client.method_calls == [call.get_schema("top"), call.get_schema_for_id(1)] -async def test_happy_flow_references_two(default_config_path: Path): +async def test_happy_flow_references_two(karapace_container: KarapaceContainer): no_ref_schema_str = """ |syntax = "proto3"; | @@ -204,7 +204,7 @@ async def test_happy_flow_references_two(default_config_path: Path): get_latest_schema_future.set_result((1, ref_schema_two, Versioner.V(1))) mock_protobuf_registry_client.get_schema.return_value = get_latest_schema_future - serializer = await make_ser_deser(default_config_path, mock_protobuf_registry_client) + serializer = await make_ser_deser(karapace_container, mock_protobuf_registry_client) assert len(serializer.ids_to_schemas) == 0 schema = await serializer.get_schema_for_subject("top") for o in test_objects: @@ -217,7 +217,7 @@ async def test_happy_flow_references_two(default_config_path: Path): assert mock_protobuf_registry_client.method_calls == [call.get_schema("top"), call.get_schema_for_id(1)] -async def test_serialization_fails(default_config_path: Path): +async def test_serialization_fails(karapace_container: KarapaceContainer): mock_protobuf_registry_client = Mock() get_latest_schema_future = asyncio.Future() get_latest_schema_future.set_result( @@ -225,7 +225,7 @@ async def test_serialization_fails(default_config_path: Path): ) mock_protobuf_registry_client.get_schema.return_value = get_latest_schema_future - serializer = await make_ser_deser(default_config_path, mock_protobuf_registry_client) + serializer = await make_ser_deser(karapace_container, mock_protobuf_registry_client) with pytest.raises(InvalidMessageSchema): schema = await serializer.get_schema_for_subject("top") await serializer.serialize(schema, test_fail_objects_protobuf[0]) @@ -240,10 +240,10 @@ async def test_serialization_fails(default_config_path: Path): assert mock_protobuf_registry_client.method_calls == [call.get_schema("top")] -async def test_deserialization_fails(default_config_path: Path): +async def test_deserialization_fails(karapace_container: KarapaceContainer): mock_protobuf_registry_client = Mock() - deserializer = await make_ser_deser(default_config_path, mock_protobuf_registry_client) + deserializer = await make_ser_deser(karapace_container, mock_protobuf_registry_client) invalid_header_payload = struct.pack(">bII", 1, 500, 500) with pytest.raises(InvalidMessageHeader): await deserializer.deserialize(invalid_header_payload) @@ -259,10 +259,10 @@ async def test_deserialization_fails(default_config_path: Path): assert mock_protobuf_registry_client.method_calls == [call.get_schema_for_id(500)] -async def test_deserialization_fails2(default_config_path: Path): +async def test_deserialization_fails2(karapace_container: KarapaceContainer): mock_protobuf_registry_client = Mock() - deserializer = await make_ser_deser(default_config_path, mock_protobuf_registry_client) + deserializer = await make_ser_deser(karapace_container, mock_protobuf_registry_client) invalid_header_payload = struct.pack(">bII", 1, 500, 500) with pytest.raises(InvalidMessageHeader): await deserializer.deserialize(invalid_header_payload) diff --git a/tests/unit/test_rapu.py b/tests/unit/test_rapu.py index cde68e2be..ba5c77e8c 100644 --- a/tests/unit/test_rapu.py +++ b/tests/unit/test_rapu.py @@ -5,7 +5,7 @@ from _pytest.logging import LogCaptureFixture from aiohttp.client_exceptions import ClientConnectionError from aiohttp.web import Request -from karapace.config import DEFAULTS +from karapace.container import KarapaceContainer from karapace.karapace import KarapaceBase from karapace.rapu import HTTPRequest, REST_ACCEPT_RE, REST_CONTENT_TYPE_RE from karapace.statsd import StatsClient @@ -167,12 +167,14 @@ def test_content_type_re(): @pytest.mark.parametrize("connection_error", (ConnectionError(), ClientConnectionError())) -async def test_raise_connection_error_handling(connection_error: BaseException) -> None: +async def test_raise_connection_error_handling( + karapace_container: KarapaceContainer, connection_error: BaseException +) -> None: request_mock = Mock(spec=Request) request_mock.read.side_effect = connection_error callback_mock = Mock() - app = KarapaceBase(config=DEFAULTS) + app = KarapaceBase(config=karapace_container.config()) response = await app._handle_request( # pylint: disable=protected-access request=request_mock, @@ -185,8 +187,8 @@ async def test_raise_connection_error_handling(connection_error: BaseException) callback_mock.assert_not_called() -async def test_close_by_app(caplog: LogCaptureFixture) -> None: - app = KarapaceBase(config=DEFAULTS) +async def test_close_by_app(caplog: LogCaptureFixture, karapace_container: KarapaceContainer) -> None: + app = KarapaceBase(config=karapace_container.config()) app.stats = Mock(spec=StatsClient) with caplog.at_level(logging.WARNING, logger="karapace.rapu"): diff --git a/tests/unit/test_rest_auth.py b/tests/unit/test_rest_auth.py index 86bb14b8a..ad2d54057 100644 --- a/tests/unit/test_rest_auth.py +++ b/tests/unit/test_rest_auth.py @@ -5,7 +5,7 @@ """ from __future__ import annotations -from karapace.config import set_config_defaults +from karapace.container import KarapaceContainer from karapace.kafka_rest_apis import AUTH_EXPIRY_TOLERANCE, KafkaRest, UserRestProxy from unittest.mock import call, Mock @@ -34,8 +34,8 @@ def _create_mock_proxy( return proxy -async def test_rest_proxy_janitor_expiring_credentials() -> None: - config = set_config_defaults( +async def test_rest_proxy_janitor_expiring_credentials(karapace_container: KarapaceContainer) -> None: + config = karapace_container.config().set_config_defaults( { "rest_authorization": True, "sasl_bootstrap_uri": "localhost:9094", @@ -92,8 +92,8 @@ async def test_rest_proxy_janitor_expiring_credentials() -> None: assert unused_proxy_expiring_later_than_tolerance.method_calls == [call.num_consumers(), call.aclose()] -async def test_rest_proxy_janitor_default() -> None: - config = set_config_defaults( +async def test_rest_proxy_janitor_default(karapace_container: KarapaceContainer) -> None: + config = karapace_container.config().set_config_defaults( { "rest_authorization": True, "sasl_bootstrap_uri": "localhost:9094", @@ -148,8 +148,8 @@ async def test_rest_proxy_janitor_default() -> None: assert active_proxy_with_consumers.method_calls == [call.num_consumers()] -async def test_rest_proxy_janitor_destructive() -> None: - config = set_config_defaults( +async def test_rest_proxy_janitor_destructive(karapace_container: KarapaceContainer) -> None: + config = karapace_container.config().set_config_defaults( { "rest_authorization": True, "sasl_bootstrap_uri": "localhost:9094", diff --git a/tests/unit/test_schema_reader.py b/tests/unit/test_schema_reader.py index d500c5b03..1134b6ae8 100644 --- a/tests/unit/test_schema_reader.py +++ b/tests/unit/test_schema_reader.py @@ -9,7 +9,7 @@ from concurrent.futures import Future, ThreadPoolExecutor from confluent_kafka import Message from dataclasses import dataclass -from karapace.config import DEFAULTS +from karapace.container import KarapaceContainer from karapace.errors import CorruptKafkaRecordException, ShutdownException from karapace.in_memory_database import InMemoryDatabase from karapace.kafka.consumer import KafkaConsumer @@ -154,7 +154,7 @@ class ReadinessTestCase(BaseTestCase): ), ], ) -def test_readiness_check(testcase: ReadinessTestCase) -> None: +def test_readiness_check(testcase: ReadinessTestCase, karapace_container: KarapaceContainer) -> None: key_formatter_mock = Mock() consumer_mock = Mock() consumer_mock.consume.return_value = [] @@ -163,7 +163,7 @@ def test_readiness_check(testcase: ReadinessTestCase) -> None: offset_watcher = OffsetWatcher() schema_reader = KafkaSchemaReader( - config=DEFAULTS, + config=karapace_container.config(), offset_watcher=offset_watcher, key_formatter=key_formatter_mock, master_coordinator=None, @@ -176,7 +176,7 @@ def test_readiness_check(testcase: ReadinessTestCase) -> None: assert schema_reader.ready() is testcase.expected -def test_num_max_messages_to_consume_moved_to_one_after_ready() -> None: +def test_num_max_messages_to_consume_moved_to_one_after_ready(karapace_container: KarapaceContainer) -> None: key_formatter_mock = Mock() consumer_mock = Mock() consumer_mock.consume.return_value = [] @@ -185,7 +185,7 @@ def test_num_max_messages_to_consume_moved_to_one_after_ready() -> None: offset_watcher = OffsetWatcher() schema_reader = KafkaSchemaReader( - config=DEFAULTS, + config=karapace_container.config(), offset_watcher=offset_watcher, key_formatter=key_formatter_mock, master_coordinator=None, @@ -200,7 +200,9 @@ def test_num_max_messages_to_consume_moved_to_one_after_ready() -> None: assert schema_reader.max_messages_to_process == MAX_MESSAGES_TO_CONSUME_AFTER_STARTUP -def test_schema_reader_can_end_to_ready_state_if_last_message_is_invalid_in_schemas_topic() -> None: +def test_schema_reader_can_end_to_ready_state_if_last_message_is_invalid_in_schemas_topic( + karapace_container: KarapaceContainer, +) -> None: key_formatter_mock = Mock(spec=KeyFormatter) consumer_mock = Mock(spec=KafkaConsumer) @@ -230,7 +232,7 @@ def test_schema_reader_can_end_to_ready_state_if_last_message_is_invalid_in_sche offset_watcher = OffsetWatcher() schema_reader = KafkaSchemaReader( - config=DEFAULTS, + config=karapace_container.config(), offset_watcher=offset_watcher, key_formatter=key_formatter_mock, master_coordinator=None, @@ -255,7 +257,7 @@ def test_schema_reader_can_end_to_ready_state_if_last_message_is_invalid_in_sche assert schema_reader.max_messages_to_process == MAX_MESSAGES_TO_CONSUME_AFTER_STARTUP -def test_soft_deleted_schema_storing() -> None: +def test_soft_deleted_schema_storing(karapace_container: KarapaceContainer) -> None: """This tests a case when _schemas has been compacted and only the soft deleted version of the schema is present. """ @@ -287,7 +289,7 @@ def test_soft_deleted_schema_storing() -> None: offset_watcher = OffsetWatcher() schema_reader = KafkaSchemaReader( - config=DEFAULTS, + config=karapace_container.config(), offset_watcher=offset_watcher, key_formatter=key_formatter_mock, master_coordinator=None, @@ -302,14 +304,14 @@ def test_soft_deleted_schema_storing() -> None: assert soft_deleted_stored_schema is not None -def test_handle_msg_delete_subject_logs(caplog: LogCaptureFixture) -> None: +def test_handle_msg_delete_subject_logs(caplog: LogCaptureFixture, karapace_container: KarapaceContainer) -> None: database_mock = Mock(spec=InMemoryDatabase) database_mock.find_subject.return_value = True database_mock.find_subject_schemas.return_value = { Version(1): "SchemaVersion" } # `SchemaVersion` is an actual object, simplified for test schema_reader = KafkaSchemaReader( - config=DEFAULTS, + config=karapace_container.config(), offset_watcher=OffsetWatcher(), key_formatter=KeyFormatter(), master_coordinator=None, @@ -376,7 +378,9 @@ class HealthCheckTestCase(BaseTestCase): ), ], ) -async def test_schema_reader_health_check(testcase: HealthCheckTestCase, monkeypatch: MonkeyPatch) -> None: +async def test_schema_reader_health_check( + testcase: HealthCheckTestCase, monkeypatch: MonkeyPatch, karapace_container: KarapaceContainer +) -> None: offset_watcher = OffsetWatcher() key_formatter_mock = Mock() admin_client_mock = Mock() @@ -386,10 +390,10 @@ async def test_schema_reader_health_check(testcase: HealthCheckTestCase, monkeyp emtpy_future.set_exception(testcase.check_topic_error) else: emtpy_future.set_result(None) - admin_client_mock.describe_topics.return_value = {DEFAULTS["topic_name"]: emtpy_future} + admin_client_mock.describe_topics.return_value = {karapace_container.config().topic_name: emtpy_future} schema_reader = KafkaSchemaReader( - config=DEFAULTS, + config=karapace_container.config(), offset_watcher=offset_watcher, key_formatter=key_formatter_mock, master_coordinator=None, @@ -415,7 +419,9 @@ class KafkaMessageHandlingErrorTestCase(BaseTestCase): @pytest.fixture(name="schema_reader_with_consumer_messages_factory") -def fixture_schema_reader_with_consumer_messages_factory() -> Callable[[tuple[list[Message]]], KafkaSchemaReader]: +def fixture_schema_reader_with_consumer_messages_factory( + karapace_container: KarapaceContainer, +) -> Callable[[tuple[list[Message]]], KafkaSchemaReader]: def factory(consumer_messages: tuple[list[Message]]) -> KafkaSchemaReader: key_formatter_mock = Mock(spec=KeyFormatter) consumer_mock = Mock(spec=KafkaConsumer) @@ -425,8 +431,7 @@ def factory(consumer_messages: tuple[list[Message]]) -> KafkaSchemaReader: consumer_mock.get_watermark_offsets.return_value = (0, 4) # Update the config to run the schema reader in strict mode so errors can be raised - config = DEFAULTS.copy() - config["kafka_schema_reader_strict_mode"] = True + config = karapace_container.config().set_config_defaults({"kafka_schema_reader_strict_mode": True}) offset_watcher = OffsetWatcher() schema_reader = KafkaSchemaReader( diff --git a/tests/unit/test_schema_registry_api.py b/tests/unit/test_schema_registry_api.py index b4d87f35b..f21f47097 100644 --- a/tests/unit/test_schema_registry_api.py +++ b/tests/unit/test_schema_registry_api.py @@ -2,64 +2,79 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ -from aiohttp.test_utils import TestClient, TestServer -from karapace.config import DEFAULTS, set_config_defaults +from fastapi.exceptions import HTTPException from karapace.rapu import HTTPResponse +from karapace.schema_models import SchemaType, ValidatedTypedSchema from karapace.schema_reader import KafkaSchemaReader -from karapace.schema_registry import KarapaceSchemaRegistry -from karapace.schema_registry_apis import KarapaceSchemaRegistryController -from unittest.mock import ANY, AsyncMock, Mock, patch, PropertyMock +from schema_registry.container import SchemaRegistryContainer +from unittest.mock import Mock, patch, PropertyMock import asyncio +import json import pytest +TYPED_AVRO_SCHEMA = ValidatedTypedSchema.parse( + SchemaType.AVRO, + json.dumps( + { + "namespace": "io.aiven.data", + "name": "Test", + "type": "record", + "fields": [ + { + "name": "attr1", + "type": ["null", "string"], + } + ], + } + ), +) -async def test_validate_schema_request_body() -> None: - controller = KarapaceSchemaRegistryController(config=set_config_defaults(DEFAULTS)) - controller._validate_schema_request_body( # pylint: disable=W0212 - "application/json", {"schema": "{}", "schemaType": "JSON", "references": [], "metadata": {}, "ruleSet": {}} +async def test_validate_schema_request_body(schema_registry_container: SchemaRegistryContainer) -> None: + schema_registry_container.schema_registry_controller()._validate_schema_type( # pylint: disable=W0212 + {"schema": "{}", "schemaType": "JSON", "references": [], "metadata": {}, "ruleSet": {}} ) - with pytest.raises(HTTPResponse) as exc_info: - controller._validate_schema_request_body( # pylint: disable=W0212 - "application/json", - {"schema": "{}", "schemaType": "JSON", "references": [], "unexpected_field_name": {}, "ruleSet": {}}, + with pytest.raises(HTTPException) as exc_info: + schema_registry_container.schema_registry_controller()._validate_schema_type( # pylint: disable=W0212 + {"schema": "{}", "schemaType": "DOES_NOT_EXIST", "references": [], "unexpected_field_name": {}, "ruleSet": {}}, ) - assert exc_info.type is HTTPResponse - assert str(exc_info.value) == "HTTPResponse 422" + assert exc_info.type is HTTPException + assert str(exc_info.value) == "422: {'error_code': 422, 'message': 'Invalid schemaType DOES_NOT_EXIST'}" -async def test_forward_when_not_ready() -> None: - with patch("karapace.schema_registry_apis.KarapaceSchemaRegistry") as schema_registry_class: +async def test_forward_when_not_ready(schema_registry_container: SchemaRegistryContainer) -> None: + with patch("karapace.container.KarapaceSchemaRegistry") as schema_registry_class: schema_reader_mock = Mock(spec=KafkaSchemaReader) - ready_property_mock = PropertyMock(return_value=lambda: False) - schema_registry = AsyncMock(spec=KarapaceSchemaRegistry) + ready_property_mock = PropertyMock(return_value=False) type(schema_reader_mock).ready = ready_property_mock - schema_registry.schema_reader = schema_reader_mock - schema_registry_class.return_value = schema_registry + schema_registry_class.schema_reader = schema_reader_mock - schema_registry.get_master.return_value = (False, "http://primary-url") + schema_registry_class.schemas_get.return_value = TYPED_AVRO_SCHEMA + schema_registry_class.get_master.return_value = (False, "http://primary-url") close_future_result = asyncio.Future() close_future_result.set_result(True) close_func = Mock() close_func.return_value = close_future_result - schema_registry.close = close_func + schema_registry_class.close = close_func + + schema_registry_container.karapace_container().schema_registry = schema_registry_class + controller = schema_registry_container.schema_registry_controller() + controller.schema_registry = schema_registry_class - controller = KarapaceSchemaRegistryController(config=set_config_defaults(DEFAULTS)) mock_forward_func_future = asyncio.Future() mock_forward_func_future.set_exception(HTTPResponse({"mock": "response"})) mock_forward_func = Mock() mock_forward_func.return_value = mock_forward_func_future controller._forward_request_remote = mock_forward_func # pylint: disable=protected-access - test_server = TestServer(controller.app) - async with TestClient(test_server) as client: - await client.get("/schemas/ids/1", headers={"Content-Type": "application/json"}) - - ready_property_mock.assert_called_once() - schema_registry.get_master.assert_called_once() - mock_forward_func.assert_called_once_with( - request=ANY, body=None, url="http://primary-url/schemas/ids/1", content_type="application/json", method="GET" - ) + assert await controller.schemas_get( + schema_id=1, + include_subjects=False, + fetch_max_id=False, + format_serialized="", + user=None, + authorizer=None, + ) diff --git a/tests/unit/test_serialization.py b/tests/unit/test_serialization.py index a21d3bc00..041df44ab 100644 --- a/tests/unit/test_serialization.py +++ b/tests/unit/test_serialization.py @@ -2,8 +2,7 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ -from karapace.client import Path -from karapace.config import DEFAULTS, read_config +from karapace.container import KarapaceContainer from karapace.schema_models import SchemaType, ValidatedTypedSchema, Versioner from karapace.serialization import ( flatten_unions, @@ -12,6 +11,7 @@ InvalidMessageHeader, InvalidMessageSchema, InvalidPayload, + SchemaRegistryClient, SchemaRegistrySerializer, START_BYTE, write_value, @@ -109,16 +109,16 @@ ) -async def make_ser_deser(config_path: str, mock_client) -> SchemaRegistrySerializer: - with open(config_path, encoding="utf8") as handler: - config = read_config(handler) - serializer = SchemaRegistrySerializer(config=config) +async def make_ser_deser( + karapace_container: KarapaceContainer, mock_client: SchemaRegistryClient +) -> SchemaRegistrySerializer: + serializer = SchemaRegistrySerializer(config=karapace_container.config()) await serializer.registry_client.close() serializer.registry_client = mock_client return serializer -async def test_happy_flow(default_config_path: Path): +async def test_happy_flow(karapace_container: KarapaceContainer): mock_registry_client = Mock() get_latest_schema_future = asyncio.Future() get_latest_schema_future.set_result((1, ValidatedTypedSchema.parse(SchemaType.AVRO, schema_avro_json), Versioner.V(1))) @@ -127,7 +127,7 @@ async def test_happy_flow(default_config_path: Path): schema_for_id_one_future.set_result((ValidatedTypedSchema.parse(SchemaType.AVRO, schema_avro_json), [Subject("stub")])) mock_registry_client.get_schema_for_id.return_value = schema_for_id_one_future - serializer = await make_ser_deser(default_config_path, mock_registry_client) + serializer = await make_ser_deser(karapace_container, mock_registry_client) assert len(serializer.ids_to_schemas) == 0 schema = await serializer.get_schema_for_subject(Subject("top")) for o in test_objects_avro: @@ -213,7 +213,7 @@ def test_flatten_unions_map() -> None: assert flatten_unions(typed_schema.schema, record) == flatten_record -def test_avro_json_write_invalid() -> None: +def test_avro_json_write_invalid(karapace_container: KarapaceContainer) -> None: schema = { "namespace": "io.aiven.data", "name": "Test", @@ -236,10 +236,10 @@ def test_avro_json_write_invalid() -> None: for record in records: with pytest.raises(avro.errors.AvroTypeException): - write_value(DEFAULTS, typed_schema, bio, record) + write_value(karapace_container.config(), typed_schema, bio, record) -def test_avro_json_write_accepts_json_encoded_data_without_tagged_unions() -> None: +def test_avro_json_write_accepts_json_encoded_data_without_tagged_unions(karapace_container: KarapaceContainer) -> None: """Backwards compatibility test for Avro data using JSON encoding. The initial behavior of the API was incorrect, and it accept data with @@ -299,24 +299,24 @@ def test_avro_json_write_accepts_json_encoded_data_without_tagged_unions() -> No buffer_a = io.BytesIO() buffer_b = io.BytesIO() - write_value(DEFAULTS, typed_schema, buffer_a, properly_tagged_encoding_a) - write_value(DEFAULTS, typed_schema, buffer_b, missing_tag_encoding_a) + write_value(karapace_container.config(), typed_schema, buffer_a, properly_tagged_encoding_a) + write_value(karapace_container.config(), typed_schema, buffer_b, missing_tag_encoding_a) assert buffer_a.getbuffer() == buffer_b.getbuffer() buffer_a = io.BytesIO() buffer_b = io.BytesIO() - write_value(DEFAULTS, typed_schema, buffer_a, properly_tagged_encoding_b) - write_value(DEFAULTS, typed_schema, buffer_b, missing_tag_encoding_b) + write_value(karapace_container.config(), typed_schema, buffer_a, properly_tagged_encoding_b) + write_value(karapace_container.config(), typed_schema, buffer_b, missing_tag_encoding_b) assert buffer_a.getbuffer() == buffer_b.getbuffer() -async def test_serialization_fails(default_config_path: Path): +async def test_serialization_fails(karapace_container: KarapaceContainer): mock_registry_client = Mock() get_latest_schema_future = asyncio.Future() get_latest_schema_future.set_result((1, ValidatedTypedSchema.parse(SchemaType.AVRO, schema_avro_json), Versioner.V(1))) mock_registry_client.get_schema.return_value = get_latest_schema_future - serializer = await make_ser_deser(default_config_path, mock_registry_client) + serializer = await make_ser_deser(karapace_container, mock_registry_client) with pytest.raises(InvalidMessageSchema): schema = await serializer.get_schema_for_subject(Subject("topic")) await serializer.serialize(schema, {"foo": "bar"}) @@ -324,13 +324,13 @@ async def test_serialization_fails(default_config_path: Path): assert mock_registry_client.method_calls == [call.get_schema("topic")] -async def test_deserialization_fails(default_config_path: Path): +async def test_deserialization_fails(karapace_container: KarapaceContainer): mock_registry_client = Mock() schema_for_id_one_future = asyncio.Future() schema_for_id_one_future.set_result((ValidatedTypedSchema.parse(SchemaType.AVRO, schema_avro_json), [Subject("stub")])) mock_registry_client.get_schema_for_id.return_value = schema_for_id_one_future - deserializer = await make_ser_deser(default_config_path, mock_registry_client) + deserializer = await make_ser_deser(karapace_container, mock_registry_client) invalid_header_payload = struct.pack(">bII", 1, 500, 500) with pytest.raises(InvalidMessageHeader): await deserializer.deserialize(invalid_header_payload) From c36507e7cbb24bba2e4773a0821b335ca26b3eea Mon Sep 17 00:00:00 2001 From: Emmanuel Evbuomwan Date: Wed, 27 Nov 2024 12:11:49 +0100 Subject: [PATCH 10/11] feat: github actions to run docker unit tests --- .github/workflows/tests.yml | 43 +++++++++++++++++++++++++++++++++++-- bin/smoke-test-registry.sh | 1 + 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3423067f9..1ce27c993 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -17,7 +17,7 @@ env: KARAPACE_DOTENV: ${{ github.workspace }}/karapace.config.env jobs: - tests: + unit-tests: runs-on: ubuntu-latest strategy: matrix: @@ -41,11 +41,49 @@ jobs: with: go-version: '1.21.0' + - name: Install requirements + run: make install + + - name: Resolve Karapace version + run: | + source ./venv/bin/activate + KARAPACE_VERSION=$(python -c "from karapace import version; print(version.__version__)") + echo KARAPACE_VERSION=$KARAPACE_VERSION >> $GITHUB_ENV + + - name: Run containers + run: KARAPACE_VERSION=${{ env.KARAPACE_VERSION }} docker compose --file=container/compose.yml up --build --wait --detach + - run: make install-dev - - run: make unit-tests + - run: make unit-tests-in-docker env: COVERAGE_FILE: ".coverage.${{ matrix.python-version }}" PYTEST_ARGS: "--cov=src --cov-append --numprocesses 4" + KARAPACE_VERSION=: ${{ env.KARAPACE_VERSION }} + + integration-tests: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ '3.10', '3.11', '3.12' ] + env: + PYTEST_ADDOPTS: >- + --log-dir=/tmp/ci-logs + --log-file=/tmp/ci-logs/pytest.log + --showlocals + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + cache: pip + python-version: ${{ matrix.python-version }} + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.21.0' + - run: make integration-tests env: COVERAGE_FILE: ".coverage.${{ matrix.python-version }}" @@ -57,6 +95,7 @@ jobs: with: name: karapace-integration-test-logs-${{ matrix.python-version }} path: /tmp/ci-logs + - name: Archive coverage file uses: actions/upload-artifact@v4 with: diff --git a/bin/smoke-test-registry.sh b/bin/smoke-test-registry.sh index 71f4e4fc7..aab54ac76 100755 --- a/bin/smoke-test-registry.sh +++ b/bin/smoke-test-registry.sh @@ -6,6 +6,7 @@ for ((i = 0; i <= retries; i++)); do response=$( curl --silent --verbose --fail --request POST \ --header 'Content-Type: application/vnd.schemaregistry.v1+json' \ + --header 'Authorization: Basic Og==' \ --data '{"schema": "{\"type\": \"record\", \"name\": \"Obj\", \"fields\":[{\"name\": \"age\", \"type\": \"int\"}]}"}' \ http://localhost:8081/subjects/test-key/versions ) From af56c95627d730be20b37e98663041aacd744a0c Mon Sep 17 00:00:00 2001 From: Emmanuel Evbuomwan Date: Wed, 27 Nov 2024 12:40:59 +0100 Subject: [PATCH 11/11] feat: e2e tests within docker - we split out e2e tests into own folder - e2e tests will run against the service in docker compose - we start by splitting out the prometheus and kafka e2e tests which do not need local running services --- .coveragerc | 6 +- .dockerignore | 1 - .github/workflows/container-smoke-test.yml | 20 +- .github/workflows/tests.yml | 65 +- .gitignore | 5 +- GNUmakefile | 37 +- ...-test-rest.sh => smoke-test-rest-proxy.sh} | 2 +- ...istry.sh => smoke-test-schema-registry.sh} | 3 +- container/Dockerfile | 4 - container/Dockerfile.dev | 59 +- container/compose.yml | 55 +- container/karapace.registry.env | 47 - container/karapace.rest.env | 51 - container/start.sh | 56 - karapace.config.env | 45 - mypy.ini | 8 +- pyproject.toml | 7 +- requirements/requirements-dev.txt | 169 ++- requirements/requirements-typing.txt | 160 ++- requirements/requirements.txt | 155 ++- src/karapace/auth.py | 10 +- src/karapace/base_config.yaml | 3 - src/karapace/config.py | 28 +- src/karapace/container.py | 9 +- .../dependencies/config_dependency.py | 23 - .../dependencies/controller_dependency.py | 23 - .../dependencies/forward_client_dependency.py | 20 - .../schema_registry_dependency.py | 24 - src/karapace/dependencies/stats_dependeny.py | 23 - src/karapace/instrumentation/prometheus.py | 1 + .../kafka_rest_apis/consumer_manager.py | 2 +- src/karapace/karapace_all.py | 3 +- src/karapace/protobuf/io.py | 2 +- src/karapace/routers/compatibility_router.py | 33 - src/karapace/routers/config_router.py | 113 -- src/karapace/routers/errors.py | 56 - src/karapace/routers/health_router.py | 64 -- src/karapace/routers/mode_router.py | 42 - src/karapace/routers/requests.py | 101 -- src/karapace/routers/root_router.py | 16 - src/karapace/routers/schemas_router.py | 83 -- src/karapace/routers/subjects_router.py | 189 ---- src/karapace/schema_registry_apis.py | 982 ------------------ src/karapace/typing.py | 5 +- src/schema_registry/__main__.py | 4 +- src/schema_registry/routers/health.py | 8 +- .../routers/master_availability.py} | 20 +- src/schema_registry/routers/requests.py | 15 +- src/schema_registry/routers/setup.py | 2 + src/schema_registry/schema_registry_apis.py | 5 - src/schema_registry/user.py | 15 +- tests/conftest.py | 5 +- tests/e2e/__init__.py | 4 + tests/e2e/conftest.py | 128 +++ .../e2e/instrumentation}/__init__.py | 0 .../instrumentation/test_prometheus.py | 0 .../instrumentation => e2e/kafka}/__init__.py | 0 .../{integration => e2e}/kafka/test_admin.py | 0 .../kafka/test_consumer.py | 0 .../kafka/test_producer.py | 0 .../kafka => e2e/schema_registry}/__init__.py | 0 .../schema_registry/test_jsonschema.py | 0 tests/e2e/test_karapace.py | 64 ++ tests/integration/conftest.py | 39 +- tests/integration/schema_registry/__init__.py | 0 ...est_dependencies_compatibility_protobuf.py | 13 +- tests/integration/test_karapace.py | 54 - tests/integration/test_master_coordinator.py | 1 + tests/integration/test_schema.py | 38 +- tests/integration/test_schema_protobuf.py | 65 +- .../integration/test_schema_registry_auth.py | 2 + tests/integration/utils/cluster.py | 65 +- tests/integration/utils/zookeeper.py | 2 +- tests/utils.py | 8 +- 74 files changed, 996 insertions(+), 2371 deletions(-) rename bin/{smoke-test-rest.sh => smoke-test-rest-proxy.sh} (80%) rename bin/{smoke-test-registry.sh => smoke-test-schema-registry.sh} (84%) delete mode 100644 container/karapace.registry.env delete mode 100644 container/karapace.rest.env delete mode 100755 container/start.sh delete mode 100644 karapace.config.env delete mode 100644 src/karapace/base_config.yaml delete mode 100644 src/karapace/dependencies/config_dependency.py delete mode 100644 src/karapace/dependencies/controller_dependency.py delete mode 100644 src/karapace/dependencies/forward_client_dependency.py delete mode 100644 src/karapace/dependencies/schema_registry_dependency.py delete mode 100644 src/karapace/dependencies/stats_dependeny.py delete mode 100644 src/karapace/routers/compatibility_router.py delete mode 100644 src/karapace/routers/config_router.py delete mode 100644 src/karapace/routers/errors.py delete mode 100644 src/karapace/routers/health_router.py delete mode 100644 src/karapace/routers/mode_router.py delete mode 100644 src/karapace/routers/requests.py delete mode 100644 src/karapace/routers/root_router.py delete mode 100644 src/karapace/routers/schemas_router.py delete mode 100644 src/karapace/routers/subjects_router.py delete mode 100644 src/karapace/schema_registry_apis.py rename src/{karapace/routers/master_available_router.py => schema_registry/routers/master_availability.py} (71%) create mode 100644 tests/e2e/__init__.py create mode 100644 tests/e2e/conftest.py rename {src/karapace/routers => tests/e2e/instrumentation}/__init__.py (100%) rename tests/{integration => e2e}/instrumentation/test_prometheus.py (100%) rename tests/{integration/instrumentation => e2e/kafka}/__init__.py (100%) rename tests/{integration => e2e}/kafka/test_admin.py (100%) rename tests/{integration => e2e}/kafka/test_consumer.py (100%) rename tests/{integration => e2e}/kafka/test_producer.py (100%) rename tests/{integration/kafka => e2e/schema_registry}/__init__.py (100%) rename tests/{integration => e2e}/schema_registry/test_jsonschema.py (100%) create mode 100644 tests/e2e/test_karapace.py delete mode 100644 tests/integration/schema_registry/__init__.py delete mode 100644 tests/integration/test_karapace.py diff --git a/.coveragerc b/.coveragerc index 2a6a5d055..b473a4c14 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,4 +1,8 @@ [run] branch = true relative_files = true -source = src/karapace +source = src +disable_warnings = module-not-measured, no-data-collected + +[report] +skip_empty = true diff --git a/.dockerignore b/.dockerignore index 57efb59ad..4b946a334 100644 --- a/.dockerignore +++ b/.dockerignore @@ -10,7 +10,6 @@ !LICENSE !pyproject.toml !setup.py -!container/start.sh !container/healthcheck.py # Ignore some files in source directories. diff --git a/.github/workflows/container-smoke-test.yml b/.github/workflows/container-smoke-test.yml index cced926bd..25cdc08c9 100644 --- a/.github/workflows/container-smoke-test.yml +++ b/.github/workflows/container-smoke-test.yml @@ -26,14 +26,22 @@ jobs: KARAPACE_VERSION=$(python -c "from karapace import version; print(version.__version__)") echo KARAPACE_VERSION=$KARAPACE_VERSION >> $GITHUB_ENV - - name: Build container - run: docker build --build-arg KARAPACE_VERSION=${{ env.KARAPACE_VERSION }} --file=container/Dockerfile . + - run: echo "RUNNER_UID=$(id -u)" >> $GITHUB_ENV + - run: echo "RUNNER_GID=$(id -g)" >> $GITHUB_ENV - name: Run container - run: docker compose --file=container/compose.yml up --build --wait --detach + run: make start-karapace-docker-resources + env: + KARAPACE_VERSION: ${{ env.KARAPACE_VERSION }} + RUNNER_UID: ${{ env.RUNNER_UID }} + RUNNER_GID: ${{ env.RUNNER_GID }} - - name: Smoke test registry - run: bin/smoke-test-registry.sh + - name: Smoke test schema registry + run: bin/smoke-test-schema-registry.sh + env: + KARAPACE_PORT: 8081 - name: Smoke test REST proxy - run: bin/smoke-test-rest.sh + run: bin/smoke-test-rest-proxy.sh + env: + KARAPACE_PORT: 8082 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1ce27c993..60df28286 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -14,14 +14,13 @@ env: FORCE_COLOR: 1 PIP_PROGRESS_BAR: off PYTHONUNBUFFERED: 1 - KARAPACE_DOTENV: ${{ github.workspace }}/karapace.config.env jobs: - unit-tests: + tests: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.10', '3.11', '3.12' ] + python-version: [ '3.9', '3.10', '3.11', '3.12' ] env: PYTEST_ADDOPTS: >- --log-dir=/tmp/ci-logs @@ -41,53 +40,35 @@ jobs: with: go-version: '1.21.0' - - name: Install requirements - run: make install - - name: Resolve Karapace version - run: | - source ./venv/bin/activate - KARAPACE_VERSION=$(python -c "from karapace import version; print(version.__version__)") - echo KARAPACE_VERSION=$KARAPACE_VERSION >> $GITHUB_ENV + run: echo KARAPACE_VERSION=4.1.1.dev44+gac20eeed.d20241205 >> $GITHUB_ENV - - name: Run containers - run: KARAPACE_VERSION=${{ env.KARAPACE_VERSION }} docker compose --file=container/compose.yml up --build --wait --detach + - run: echo "RUNNER_UID=$(id -u)" >> $GITHUB_ENV + - run: echo "RUNNER_GID=$(id -g)" >> $GITHUB_ENV - - run: make install-dev - run: make unit-tests-in-docker env: - COVERAGE_FILE: ".coverage.${{ matrix.python-version }}" - PYTEST_ARGS: "--cov=src --cov-append --numprocesses 4" - KARAPACE_VERSION=: ${{ env.KARAPACE_VERSION }} - - integration-tests: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [ '3.10', '3.11', '3.12' ] - env: - PYTEST_ADDOPTS: >- - --log-dir=/tmp/ci-logs - --log-file=/tmp/ci-logs/pytest.log - --showlocals - steps: - - uses: actions/checkout@v4 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - cache: pip - python-version: ${{ matrix.python-version }} + KARAPACE_VERSION: ${{ env.KARAPACE_VERSION }} + RUNNER_UID: ${{ env.RUNNER_UID }} + RUNNER_GID: ${{ env.RUNNER_GID }} + COVERAGE_FILE: "/opt/karapace/coverage/.coverage.${{ matrix.python-version }}" + PYTEST_ARGS: "--cov=karapace --cov=schema_registry --cov-append --numprocesses 4" - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version: '1.21.0' + - run: make e2e-tests-in-docker + env: + KARAPACE_VERSION: ${{ env.KARAPACE_VERSION }} + RUNNER_UID: ${{ env.RUNNER_UID }} + RUNNER_GID: ${{ env.RUNNER_GID }} + COVERAGE_FILE: "/opt/karapace/coverage/.coverage.${{ matrix.python-version }}" + PYTEST_ARGS: "--cov=karapace --cov=schema_registry --cov-append --numprocesses 4" - - run: make integration-tests + - run: make integration-tests-in-docker env: - COVERAGE_FILE: ".coverage.${{ matrix.python-version }}" - PYTEST_ARGS: "--cov=src --cov-append --random-order --numprocesses 4" + KARAPACE_VERSION: ${{ env.KARAPACE_VERSION }} + RUNNER_UID: ${{ env.RUNNER_UID }} + RUNNER_GID: ${{ env.RUNNER_GID }} + COVERAGE_FILE: "/opt/karapace/coverage/.coverage.${{ matrix.python-version }}" + PYTEST_ARGS: "--cov=karapace --cov=schema_registry --cov-append --random-order --numprocesses 4" - name: Archive logs uses: actions/upload-artifact@v4 diff --git a/.gitignore b/.gitignore index 612ad46b2..59077267f 100644 --- a/.gitignore +++ b/.gitignore @@ -10,12 +10,13 @@ __pycache__/ /build/ /dist/ -/karapace.egg-info/ +src/karapace.egg-info/ /karapace-rpm-src.tar /kafka_*.tgz /kafka_*/ venv -/karapace/version.py +*.so +src/karapace/version.py .run .python-version .hypothesis/ diff --git a/GNUmakefile b/GNUmakefile index b8305d65a..032def928 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -3,9 +3,9 @@ SHELL := /usr/bin/env bash VENV_DIR ?= $(CURDIR)/venv PIP ?= pip3 --disable-pip-version-check --no-input --require-virtualenv PYTHON ?= python3 -PYTHON_VERSION ?= 3.10 +PYTHON_VERSION ?= 3.9 DOCKER_COMPOSE ?= docker compose -KARAPACE-CLI ?= $(DOCKER_COMPOSE) -f container/compose.yml run karapace-cli +KARAPACE-CLI ?= $(DOCKER_COMPOSE) -f container/compose.yml run --rm karapace-cli define PIN_VERSIONS_COMMAND pip install pip-tools && \ @@ -105,9 +105,40 @@ schema: pin-requirements: docker run -e CUSTOM_COMPILE_COMMAND='make pin-requirements' -it -v .:/karapace --security-opt label=disable python:$(PYTHON_VERSION)-bullseye /bin/bash -c "$(PIN_VERSIONS_COMMAND)" +.PHONY: start-karapace-docker-resources +start-karapace-docker-resources: export KARAPACE_VERSION ?= 4.1.1.dev44+gac20eeed.d20241205 +start-karapace-docker-resources: + sudo touch .coverage.3.9 .coverage.3.10 .coverage.3.11 .coverage.3.12 + sudo chown ${RUNNER_UID}:${RUNNER_GID} .coverage.3.9 .coverage.3.10 .coverage.3.11 .coverage.3.12 + $(DOCKER_COMPOSE) -f container/compose.yml up -d --build --wait --detach + .PHONY: unit-tests-in-docker unit-tests-in-docker: export PYTEST_ARGS ?= -unit-tests-in-docker: +unit-tests-in-docker: start-karapace-docker-resources rm -fr runtime/* $(KARAPACE-CLI) $(PYTHON) -m pytest -s -vvv $(PYTEST_ARGS) tests/unit/ rm -fr runtime/* + +.PHONY: e2e-tests-in-docker +e2e-tests-in-docker: export PYTEST_ARGS ?= +e2e-tests-in-docker: start-karapace-docker-resources + rm -fr runtime/* + sleep 10 + $(KARAPACE-CLI) $(PYTHON) -m pytest -s -vvv $(PYTEST_ARGS) tests/e2e/test_karapace.py + rm -fr runtime/* + +.PHONY: integration-tests-in-docker +integration-tests-in-docker: export PYTEST_ARGS ?= +integration-tests-in-docker: start-karapace-docker-resources + rm -fr runtime/* + sleep 10 + $(KARAPACE-CLI) $(PYTHON) -m pytest -s -vvv $(PYTEST_ARGS) tests/integration/ + rm -fr runtime/* + +.PHONY: type-check-mypy-in-docker +type-check-mypy-in-docker: start-karapace-docker-resources + $(KARAPACE-CLI) $(PYTHON) -m mypy src + +.PHONY: cli +cli: start-karapace-docker-resources + $(KARAPACE-CLI) bash diff --git a/bin/smoke-test-rest.sh b/bin/smoke-test-rest-proxy.sh similarity index 80% rename from bin/smoke-test-rest.sh rename to bin/smoke-test-rest-proxy.sh index 665ab08e5..58ea1b594 100755 --- a/bin/smoke-test-rest.sh +++ b/bin/smoke-test-rest-proxy.sh @@ -3,7 +3,7 @@ retries=5 for ((i = 0; i <= retries; i++)); do - response=$(curl --silent --verbose --fail http://localhost:8082/topics) + response=$(curl --silent --verbose --fail "http://localhost:$KARAPACE_PORT/topics") if [[ $response == '["_schemas","__consumer_offsets"]' ]]; then echo "Ok!" diff --git a/bin/smoke-test-registry.sh b/bin/smoke-test-schema-registry.sh similarity index 84% rename from bin/smoke-test-registry.sh rename to bin/smoke-test-schema-registry.sh index aab54ac76..0e3295dae 100755 --- a/bin/smoke-test-registry.sh +++ b/bin/smoke-test-schema-registry.sh @@ -6,9 +6,8 @@ for ((i = 0; i <= retries; i++)); do response=$( curl --silent --verbose --fail --request POST \ --header 'Content-Type: application/vnd.schemaregistry.v1+json' \ - --header 'Authorization: Basic Og==' \ --data '{"schema": "{\"type\": \"record\", \"name\": \"Obj\", \"fields\":[{\"name\": \"age\", \"type\": \"int\"}]}"}' \ - http://localhost:8081/subjects/test-key/versions + "http://localhost:$KARAPACE_PORT/subjects/test-key/versions" ) if [[ $response == '{"id":1}' ]]; then diff --git a/container/Dockerfile b/container/Dockerfile index 2e1544319..55ca06e1c 100644 --- a/container/Dockerfile +++ b/container/Dockerfile @@ -55,10 +55,6 @@ RUN apt-get update \ COPY --from=builder /venv /venv ENV PATH="/venv/bin:$PATH" -COPY ./container/start.sh /opt/karapace -RUN chmod 500 /opt/karapace/start.sh \ - && chown karapace:karapace /opt/karapace/start.sh - COPY ./container/healthcheck.py /opt/karapace WORKDIR /opt/karapace diff --git a/container/Dockerfile.dev b/container/Dockerfile.dev index 2ba1c0250..37a6cb5ab 100644 --- a/container/Dockerfile.dev +++ b/container/Dockerfile.dev @@ -2,10 +2,17 @@ FROM python:3.10.11-bullseye AS builder ARG KARAPACE_VERSION +ARG RUNNER_UID +ARG RUNNER_GID + +# Setup files and directories. +RUN mkdir /opt/karapace /opt/karapace/runtime /var/log/karapace /opt/karapace/coverage \ +&& touch /opt/karapace/coverage/.coverage.3.9 /opt/karapace/coverage/.coverage.3.10 /opt/karapace/coverage/.coverage.3.11 /opt/karapace/coverage/.coverage.3.12 \ +&& chown --recursive "$RUNNER_UID:$RUNNER_GID" /opt/karapace /opt/karapace/coverage /var/log/karapace # Create, activate, and enforce usage of virtualenv. -RUN python3 -m venv /venv -ENV PATH="/venv/bin:$PATH" +RUN python3 -m venv /opt/karapace/venv +ENV PATH="/opt/karapace/venv/bin:$PATH" ENV PIP_REQUIRE_VIRTUALENV=true # Install golang needed by extensions @@ -15,36 +22,6 @@ RUN wget --progress=dot:giga "https://go.dev/dl/go${GO_VERSION}.linux-$(dpkg --p && tar -C /usr/local -xzf "go${GO_VERSION}.linux-$(dpkg --print-architecture).tar.gz" \ && rm "go${GO_VERSION}.linux-$(dpkg --print-architecture).tar.gz" -# Copy the requirements.txt and install dependencies in venv. Using a separate -# command to use layer caching. -# -# Note: the requirements.txt is pinned, if any of the dependencies is updated -# the cache will be invalidated and the image regenerated, which is the -# intended behavior. -COPY ./requirements/requirements.txt /build/ -COPY ./requirements/requirements-dev.txt /build/ -RUN --mount=type=cache,target=/root/.cache/pip \ - python3 -m pip install -r /build/requirements.txt -r /build/requirements-dev.txt - -COPY . /build/karapace-repo -WORKDIR /build/karapace-repo -RUN --mount=type=cache,target=/root/.cache/pip \ - if [ -z "${KARAPACE_VERSION}" ]; then \ - PRETEND_VERSION="$(python -c 'from src.karapace import version; print(version.__version__)')"; \ - else \ - PRETEND_VERSION=$KARAPACE_VERSION; \ - fi; \ - SETUPTOOLS_SCM_PRETEND_VERSION=$PRETEND_VERSION python3 -m pip install --no-deps . - -# Karapace image, i.e. production. -FROM python:3.10.11-slim-bullseye AS karapace - -# Setup user and directories. -RUN groupadd --system karapace \ - && useradd --system --gid karapace karapace \ - && mkdir /opt/karapace /opt/karapace/runtime /var/log/karapace \ - && chown --recursive karapace:karapace /opt/karapace /var/log/karapace - # Install protobuf compiler. ARG PROTOBUF_COMPILER_VERSION="3.12.4-1+deb11u1" RUN apt-get update \ @@ -52,11 +29,17 @@ RUN apt-get update \ protobuf-compiler=$PROTOBUF_COMPILER_VERSION \ && rm -rf /var/lib/apt/lists/* -# Copy virtualenv from builder and activate it. -COPY --from=builder /venv /venv -ENV PATH="/venv/bin:$PATH" - -COPY ./container/healthcheck.py /opt/karapace +# Install Java via openjdk-11 +COPY --from=openjdk:11 /usr/local/openjdk-11 /usr/local/openjdk-11 +ENV JAVA_HOME /usr/local/openjdk-11 +RUN update-alternatives --install /usr/bin/java java /usr/local/openjdk-11/bin/java 1 WORKDIR /opt/karapace -USER karapace + +COPY ./requirements /opt/karapace/requirements +RUN python3 -m pip install -r /opt/karapace/requirements/requirements.txt -r /opt/karapace/requirements/requirements-dev.txt -r /opt/karapace/requirements/requirements-typing.txt + +COPY . . +RUN SETUPTOOLS_SCM_PRETEND_VERSION=$KARAPACE_VERSION python3 -m pip install . + +ENV PYTHONPATH="/opt/karapace/src:$PYTHONPATH" diff --git a/container/compose.yml b/container/compose.yml index 87106ee90..4b7d8728b 100644 --- a/container/compose.yml +++ b/container/compose.yml @@ -59,6 +59,8 @@ services: build: context: .. dockerfile: container/Dockerfile + args: + KARAPACE_VERSION: $KARAPACE_VERSION entrypoint: - python3 - -m @@ -67,17 +69,30 @@ services: - kafka ports: - 8081:8081 - volumes: - - ./karapace.registry.env:/opt/karapace/karapace.env environment: - KARAPACE_DOTENV: /opt/karapace/karapace.env + KARAPACE_KARAPACE_REGISTRY: true + KARAPACE_ADVERTISED_HOSTNAME: karapace-schema-registry + KARAPACE_BOOTSTRAP_URI: kafka:29092 KARAPACE_PORT: 8081 + KARAPACE_HOST: 0.0.0.0 + KARAPACE_CLIENT_ID: karapace-schema-registry + KARAPACE_GROUP_ID: karapace-schema-registry + KARAPACE_MASTER_ELIGIBILITY: true + KARAPACE_TOPIC_NAME: _schemas + KARAPACE_LOG_LEVEL: DEBUG + KARAPACE_COMPATIBILITY: FULL + KARAPACE_STATSD_HOST: statsd-exporter + KARAPACE_STATSD_PORT: 8125 + KARAPACE_KAFKA_SCHEMA_READER_STRICT_MODE: false + KARAPACE_KAFKA_RETRIABLE_ERRORS_SILENCED: true karapace-rest-proxy: image: ghcr.io/aiven-open/karapace:develop build: context: .. dockerfile: container/Dockerfile + args: + KARAPACE_VERSION: $KARAPACE_VERSION entrypoint: - python3 - -m @@ -87,17 +102,30 @@ services: - karapace-schema-registry ports: - 8082:8082 - volumes: - - ./karapace.rest.env:/opt/karapace/karapace.env environment: - KARAPACE_DOTENV: /opt/karapace/karapace.env + KARAPACE_KARAPACE_REST: true KARAPACE_PORT: 8082 + KARAPACE_HOST: 0.0.0.0 + KARAPACE_ADVERTISED_HOSTNAME: karapace-rest-proxy + KARAPACE_BOOTSTRAP_URI: kafka:29092 + KARAPACE_REGISTRY_HOST: karapace-schema-registry + KARAPACE_REGISTRY_PORT: 8081 + KARAPACE_ADMIN_METADATA_MAX_AGE: 0 + KARAPACE_LOG_LEVEL: DEBUG + KARAPACE_STATSD_HOST: statsd-exporter + KARAPACE_STATSD_PORT: 8125 + KARAPACE_KAFKA_SCHEMA_READER_STRICT_MODE: false + KARAPACE_KAFKA_RETRIABLE_ERRORS_SILENCED: true karapace-cli: image: ghcr.io/aiven-open/karapace:cli build: context: .. dockerfile: container/Dockerfile.dev + args: + KARAPACE_VERSION: $KARAPACE_VERSION + RUNNER_UID: $RUNNER_UID + RUNNER_GID: $RUNNER_GID tty: true depends_on: - kafka @@ -105,9 +133,20 @@ services: - karapace-rest-proxy volumes: - ../tests:/opt/karapace/tests - - ../karapace.config.env:/opt/karapace/karapace.env + - ../pytest.ini:/opt/karapace/pytest.ini + - ../mypy.ini:/opt/karapace/mypy.ini + - ../.flake8:/opt/karapace/.flake8 + - ../.isort.cfg:/opt/karapace/.isort.cfg + - ../.pre-commit-config.yaml:/opt/karapace/.pre-commit-config.yaml + - ../.pylintrc:/opt/karapace/.pylintrc + - ../.coveragerc:/opt/karapace/.coveragerc + - ../.coverage.3.9:/opt/karapace/coverage/.coverage.3.9 + - ../.coverage.3.10:/opt/karapace/coverage/.coverage.3.10 + - ../.coverage.3.11:/opt/karapace/coverage/.coverage.3.11 + - ../.coverage.3.12:/opt/karapace/coverage/.coverage.3.12 environment: - KARAPACE_DOTENV: /opt/karapace/karapace.env + - COVERAGE_FILE + - COVERAGE_RCFILE=/opt/karapace/.coveragerc prometheus: image: prom/prometheus diff --git a/container/karapace.registry.env b/container/karapace.registry.env deleted file mode 100644 index cd757a99b..000000000 --- a/container/karapace.registry.env +++ /dev/null @@ -1,47 +0,0 @@ -KARAPACE_DOTENV=/opt/karapace/karapace.env -ACCESS_LOGS_DEBUG=False -ADVERTISED_HOSTNAME=karapace-schema-registry -ADVERTISED_PORT=8081 -ADVERTISED_PROTOCOL=http -BOOTSTRAP_URI=kafka:29092 -CLIENT_ID=karapace-schema-registry -COMPATIBILITY=BACKWARD -CONNECTIONS_MAX_IDLE_MS=15000 -CONSUMER_ENABLE_AUTO_COMMIT=True -CONSUMER_REQUEST_TIMEOUT_MS=11000 -CONSUMER_REQUEST_MAX_BYTES=67108864 -CONSUMER_IDLE_DISCONNECT_TIMEOUT=0 -FETCH_MIN_BYTES=1 -GROUP_ID=karapace-schema-registry -HOST=0.0.0.0 -PORT=8081 -REGISTRY_HOST=karapace-schema-registry -REGISTRY_PORT=8081 -REST_AUTHORIZATION=False -LOG_HANDLER=stdout -LOG_LEVEL=WARNING -LOG_FORMAT=%(asctime)s [%(threadName)s] %(filename)s:%(funcName)s:%(lineno)d %(message)s -MASTER_ELIGIBILITY=True -REPLICATION_FACTOR=1 -SECURITY_PROTOCOL=PLAINTEXT -SSL_CHECK_HOSTNAME=True -TOPIC_NAME=_schemas -METADATA_MAX_AGE_MS=60000 -ADMIN_METADATA_MAX_AGE=5 -PRODUCER_ACKS=1 -PRODUCER_COUNT=5 -PRODUCER_LINGER_MS=100 -PRODUCER_MAX_REQUEST_SIZE=1048576 -SESSION_TIMEOUT_MS=10000 -KARAPACE_REST=False -KARAPACE_REGISTRY=True -KARAPACE_PORT=8081 -NAME_STRATEGY=topic_name -NAME_STRATEGY_VALIDATION=True -MASTER_ELECTION_STRATEGY=lowest -PROTOBUF_RUNTIME_DIRECTORY=runtime -STATSD_HOST=statsd-exporter -STATSD_PORT=8125 -KAFKA_SCHEMA_READER_STRICT_MODE=False -KAFKA_RETRIABLE_ERRORS_SILENCED=True -USE_PROTOBUF_FORMATTER=False diff --git a/container/karapace.rest.env b/container/karapace.rest.env deleted file mode 100644 index 3df13f3b2..000000000 --- a/container/karapace.rest.env +++ /dev/null @@ -1,51 +0,0 @@ -KARAPACE_DOTENV=/opt/karapace/karapace.env -ACCESS_LOGS_DEBUG=False -# ACCESS_LOG_CLASS=karapace.utils.DebugAccessLogger -ACCESS_LOG_CLASS=aiohttp.web_log.AccessLogger -ADVERTISED_HOSTNAME=karapace-rest-proxy -ADVERTISED_PORT=8082 -ADVERTISED_PROTOCOL=http -BOOTSTRAP_URI=kafka:29092 -CLIENT_ID=karapace-rest-proxy -COMPATIBILITY=BACKWARD -CONNECTIONS_MAX_IDLE_MS=15000 -CONSUMER_ENABLE_AUTO_COMMIT=True -CONSUMER_REQUEST_TIMEOUT_MS=11000 -CONSUMER_REQUEST_MAX_BYTES=67108864 -CONSUMER_IDLE_DISCONNECT_TIMEOUT=0 -FETCH_MIN_BYTES=1 -GROUP_ID=karapace-rest-proxy -HOST=0.0.0.0 -PORT=8082 -REGISTRY_HOST=karapace-schema-registry -REGISTRY_PORT=8081 -REST_AUTHORIZATION=False -LOG_HANDLER=stdout -LOG_LEVEL=WARNING -LOG_FORMAT=%(asctime)s [%(threadName)s] %(filename)s:%(funcName)s:%(lineno)d %(message)s -MASTER_ELIGIBILITY=True -REPLICATION_FACTOR=1 -SECURITY_PROTOCOL=PLAINTEXT -SSL_CHECK_HOSTNAME=True -TOPIC_NAME=_schemas -METADATA_MAX_AGE_MS=60000 -ADMIN_METADATA_MAX_AGE=5 -PRODUCER_ACKS=1 -PRODUCER_COUNT=5 -PRODUCER_LINGER_MS=100 -PRODUCER_MAX_REQUEST_SIZE=1048576 -SESSION_TIMEOUT_MS=10000 -KARAPACE_REST=True -KARAPACE_REGISTRY=False -KARAPACE_PORT=8082 -NAME_STRATEGY=topic_name -NAME_STRATEGY_VALIDATION=True -MASTER_ELECTION_STRATEGY=lowest -PROTOBUF_RUNTIME_DIRECTORY=runtime -STATSD_HOST=statsd-exporter -STATSD_PORT=8125 -KAFKA_SCHEMA_READER_STRICT_MODE=False -KAFKA_RETRIABLE_ERRORS_SILENCED=True -USE_PROTOBUF_FORMATTER=False -HTTP_REQUEST_MAX_SIZE=1048576 -TAGS='{ "app": "karapace-rest-proxy" }' diff --git a/container/start.sh b/container/start.sh deleted file mode 100755 index 95ac86aa2..000000000 --- a/container/start.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env bash -set -Eeuo pipefail - -# Configuration is done using environment variables. The environment variable -# names are the same as the configuration keys, all letters in caps, and always -# start with `KARAPACE_`. - -# In the code below the expression ${var+isset} is used to check if the -# variable was defined, and ${var-isunset} if not. -# -# Ref: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_02 - -case $1 in -rest) - # Reexport variables for compatibility - [[ -n ${KARAPACE_REST_ADVERTISED_HOSTNAME+isset} ]] && export KARAPACE_ADVERTISED_HOSTNAME="${KARAPACE_REST_ADVERTISED_HOSTNAME}" - [[ -n ${KARAPACE_REST_BOOTSTRAP_URI+isset} ]] && export KARAPACE_BOOTSTRAP_URI="${KARAPACE_REST_BOOTSTRAP_URI}" - [[ -n ${KARAPACE_REST_REGISTRY_HOST+isset} ]] && export KARAPACE_REGISTRY_HOST="${KARAPACE_REST_REGISTRY_HOST}" - [[ -n ${KARAPACE_REST_REGISTRY_PORT+isset} ]] && export KARAPACE_REGISTRY_PORT="${KARAPACE_REST_REGISTRY_PORT}" - [[ -n ${KARAPACE_REST_HOST+isset} ]] && export KARAPACE_HOST="${KARAPACE_REST_HOST}" - [[ -n ${KARAPACE_REST_PORT+isset} ]] && export KARAPACE_PORT="${KARAPACE_REST_PORT}" - [[ -n ${KARAPACE_REST_ADMIN_METADATA_MAX_AGE+isset} ]] && export KARAPACE_ADMIN_METADATA_MAX_AGE="${KARAPACE_REST_ADMIN_METADATA_MAX_AGE}" - [[ -n ${KARAPACE_REST_LOG_LEVEL+isset} ]] && export KARAPACE_LOG_LEVEL="${KARAPACE_REST_LOG_LEVEL}" - export KARAPACE_REST=1 - echo "{}" >/opt/karapace/rest.config.json - - echo "Starting Karapace REST API" - exec python3 -m karapace.karapace_all /opt/karapace/rest.config.json - ;; -registry) - # Reexport variables for compatibility - [[ -n ${KARAPACE_REGISTRY_ADVERTISED_HOSTNAME+isset} ]] && export KARAPACE_ADVERTISED_HOSTNAME="${KARAPACE_REGISTRY_ADVERTISED_HOSTNAME}" - [[ -n ${KARAPACE_REGISTRY_BOOTSTRAP_URI+isset} ]] && export KARAPACE_BOOTSTRAP_URI="${KARAPACE_REGISTRY_BOOTSTRAP_URI}" - [[ -n ${KARAPACE_REGISTRY_HOST+isset} ]] && export KARAPACE_HOST="${KARAPACE_REGISTRY_HOST}" - [[ -n ${KARAPACE_REGISTRY_PORT+isset} ]] && export KARAPACE_PORT="${KARAPACE_REGISTRY_PORT}" - [[ -n ${KARAPACE_REGISTRY_CLIENT_ID+isset} ]] && export KARAPACE_CLIENT_ID="${KARAPACE_REGISTRY_CLIENT_ID}" - [[ -n ${KARAPACE_REGISTRY_GROUP_ID+isset} ]] && export KARAPACE_GROUP_ID="${KARAPACE_REGISTRY_GROUP_ID}" - # Map misspelled environment variables to correct spelling for backwards compatibility. - [[ -n ${KARAPACE_REGISTRY_MASTER_ELIGIBITY+isset} ]] && export KARAPACE_MASTER_ELIGIBILITY="${KARAPACE_REGISTRY_MASTER_ELIGIBITY}" - [[ -n ${KARAPACE_REGISTRY_MASTER_ELIGIBILITY+isset} ]] && export KARAPACE_MASTER_ELIGIBILITY="${KARAPACE_REGISTRY_MASTER_ELIGIBILITY}" - [[ -n ${KARAPACE_REGISTRY_TOPIC_NAME+isset} ]] && export KARAPACE_TOPIC_NAME="${KARAPACE_REGISTRY_TOPIC_NAME}" - [[ -n ${KARAPACE_REGISTRY_COMPATIBILITY+isset} ]] && export KARAPACE_COMPATIBILITY="${KARAPACE_REGISTRY_COMPATIBILITY}" - [[ -n ${KARAPACE_REGISTRY_LOG_LEVEL+isset} ]] && export KARAPACE_LOG_LEVEL="${KARAPACE_REGISTRY_LOG_LEVEL}" - export KARAPACE_REGISTRY=1 - echo "{}" >/opt/karapace/registry.config.json - - echo "Starting Karapace Schema Registry" - exec python3 -m karapace.karapace_all /opt/karapace/registry.config.json - ;; -*) - echo "usage: start-karapace.sh " - exit 0 - ;; -esac - -wait diff --git a/karapace.config.env b/karapace.config.env deleted file mode 100644 index 70cf0c616..000000000 --- a/karapace.config.env +++ /dev/null @@ -1,45 +0,0 @@ -ACCESS_LOGS_DEBUG=False -ADVERTISED_HOSTNAME=127.0.0.1 -ADVERTISED_PORT=8081 -ADVERTISED_PROTOCOL=http -BOOTSTRAP_URI=127.0.0.1:9092 -CLIENT_ID=sr-1 -COMPATIBILITY=BACKWARD -CONNECTIONS_MAX_IDLE_MS=15000 -CONSUMER_ENABLE_AUTO_COMMIT=True -CONSUMER_REQUEST_TIMEOUT_MS=11000 -CONSUMER_REQUEST_MAX_BYTES=67108864 -CONSUMER_IDLE_DISCONNECT_TIMEOUT=0 -FETCH_MIN_BYTES=1 -GROUP_ID=group_id8357e932 -HOST=127.0.0.1 -PORT=8081 -REGISTRY_HOST=127.0.0.1 -REGISTRY_PORT=8081 -REST_AUTHORIZATION=False -LOG_HANDLER=stdout -LOG_LEVEL=DEBUG -LOG_FORMAT=%(asctime)s [%(threadName)s] %(filename)s:%(funcName)s:%(lineno)d %(message)s -MASTER_ELIGIBILITY=True -REPLICATION_FACTOR=1 -SECURITY_PROTOCOL=PLAINTEXT -SSL_CHECK_HOSTNAME=True -TOPIC_NAME=_schemas913ed946 -METADATA_MAX_AGE_MS=60000 -ADMIN_METADATA_MAX_AGE=5 -PRODUCER_ACKS=1 -PRODUCER_COUNT=5 -PRODUCER_LINGER_MS=100 -PRODUCER_MAX_REQUEST_SIZE=1048576 -SESSION_TIMEOUT_MS=10000 -KARAPACE_REST=False -KARAPACE_REGISTRY=True -NAME_STRATEGY=topic_name -NAME_STRATEGY_VALIDATION=True -MASTER_ELECTION_STRATEGY=lowest -PROTOBUF_RUNTIME_DIRECTORY=runtime -STATSD_HOST=127.0.0.1 -STATSD_PORT=8125 -KAFKA_SCHEMA_READER_STRICT_MODE=False -KAFKA_RETRIABLE_ERRORS_SILENCED=True -USE_PROTOBUF_FORMATTER=False diff --git a/mypy.ini b/mypy.ini index c4ef8efd1..981e4061c 100644 --- a/mypy.ini +++ b/mypy.ini @@ -15,7 +15,7 @@ warn_no_return = True warn_unreachable = True strict_equality = True -[mypy-karapace.schema_registry_apis] +[mypy-schema_registry.schema_registry_apis] ignore_errors = True [mypy-karapace.compatibility.jsonschema.checks] @@ -88,3 +88,9 @@ ignore_missing_imports = True [mypy-systemd.*] ignore_missing_imports = True + +[mypy-confluent_kafka.*] +ignore_missing_imports = True + +[mypy-isodate.*] +ignore_missing_imports = True diff --git a/pyproject.toml b/pyproject.toml index f1f9016cb..9df5adf5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,8 @@ dependencies = [ "lz4", "networkx < 4", "protobuf < 4", - "pydantic == 1.10.17", + "pydantic == 2.10.2", + "pydantic-settings == 2.6.1", "pyjwt >= 2.4.0 , < 3", "python-dateutil < 3", "python-snappy", @@ -33,6 +34,10 @@ dependencies = [ "zstandard", "prometheus-client == 0.20.0", "yarl == 1.12.1", + "opentelemetry-api == 1.28.2", + "opentelemetry-sdk == 1.28.2", + "opentelemetry-instrumentation-fastapi == 0.49b2", + "dependency-injector == 4.43.0", # Patched dependencies # diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index 9848f80e0..bb95b5fc9 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -6,17 +6,24 @@ # accept-types==0.4.1 # via karapace (/karapace/pyproject.toml) -aiohappyeyeballs==2.4.3 +aiohappyeyeballs==2.4.4 # via aiohttp -aiohttp==3.10.10 +aiohttp==3.10.11 # via karapace (/karapace/pyproject.toml) aiokafka==0.10.0 # via karapace (/karapace/pyproject.toml) aiosignal==1.3.1 # via aiohttp -anyio==4.6.2.post1 - # via watchfiles -async-timeout==4.0.3 +annotated-types==0.7.0 + # via pydantic +anyio==4.7.0 + # via + # httpx + # starlette + # watchfiles +asgiref==3.8.1 + # via opentelemetry-instrumentation-asgi +async-timeout==5.0.1 # via # aiohttp # aiokafka @@ -38,20 +45,36 @@ cachetools==5.3.3 certifi==2024.8.30 # via # geventhttpclient + # httpcore + # httpx # requests # sentry-sdk charset-normalizer==3.4.0 # via requests click==8.1.7 - # via flask + # via + # flask + # rich-toolkit + # typer + # uvicorn configargparse==1.7 # via locust confluent-kafka==2.4.0 # via karapace (/karapace/pyproject.toml) -coverage[toml]==7.6.4 +coverage[toml]==7.6.9 # via pytest-cov cramjam==2.9.0 # via python-snappy +dependency-injector==4.43.0 + # via karapace (/karapace/pyproject.toml) +deprecated==1.2.15 + # via + # opentelemetry-api + # opentelemetry-semantic-conventions +dnspython==2.7.0 + # via email-validator +email-validator==2.2.0 + # via fastapi exceptiongroup==1.2.2 # via # anyio @@ -61,9 +84,13 @@ execnet==2.1.1 # via pytest-xdist fancycompleter==0.9.1 # via pdbpp +fastapi[standard]==0.115.5 + # via karapace (/karapace/pyproject.toml) +fastapi-cli[standard]==0.0.6 + # via fastapi filelock==3.16.1 # via karapace (/karapace/pyproject.toml) -flask==3.0.3 +flask==3.1.0 # via # flask-cors # flask-login @@ -80,19 +107,33 @@ gevent==24.11.1 # via # geventhttpclient # locust -geventhttpclient==2.3.1 +geventhttpclient==2.3.3 # via locust greenlet==3.1.1 # via gevent -hypothesis==6.118.8 +h11==0.14.0 + # via + # httpcore + # uvicorn +httpcore==1.0.7 + # via httpx +httptools==0.6.4 + # via uvicorn +httpx==0.28.0 + # via fastapi +hypothesis==6.122.1 # via karapace (/karapace/pyproject.toml) idna==3.10 # via # anyio + # email-validator + # httpx # requests # yarl importlib-metadata==8.5.0 - # via flask + # via + # flask + # opentelemetry-api iniconfig==2.0.0 # via pytest isodate==0.7.2 @@ -100,12 +141,14 @@ isodate==0.7.2 itsdangerous==2.2.0 # via flask jinja2==3.1.4 - # via flask + # via + # fastapi + # flask jsonschema==4.23.0 # via karapace (/karapace/pyproject.toml) jsonschema-specifications==2024.10.1 # via jsonschema -locust==2.32.2 +locust==2.32.4 # via karapace (/karapace/pyproject.toml) lz4==4.3.3 # via karapace (/karapace/pyproject.toml) @@ -125,9 +168,38 @@ multidict==6.1.0 # yarl networkx==3.2.1 # via karapace (/karapace/pyproject.toml) +opentelemetry-api==1.28.2 + # via + # karapace (/karapace/pyproject.toml) + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-instrumentation==0.49b2 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-asgi==0.49b2 + # via opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-fastapi==0.49b2 + # via karapace (/karapace/pyproject.toml) +opentelemetry-sdk==1.28.2 + # via karapace (/karapace/pyproject.toml) +opentelemetry-semantic-conventions==0.49b2 + # via + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-sdk +opentelemetry-util-http==0.49b2 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi packaging==24.2 # via # aiokafka + # opentelemetry-instrumentation # pytest pdbpp==0.10.3 # via karapace (/karapace/pyproject.toml) @@ -142,15 +214,24 @@ psutil==6.1.0 # karapace (/karapace/pyproject.toml) # locust # pytest-xdist +pydantic==2.10.2 + # via + # fastapi + # karapace (/karapace/pyproject.toml) + # pydantic-settings +pydantic-core==2.27.1 + # via pydantic +pydantic-settings==2.6.1 + # via karapace (/karapace/pyproject.toml) pygments==2.18.0 # via # pdbpp # rich -pyjwt==2.9.0 +pyjwt==2.10.1 # via karapace (/karapace/pyproject.toml) pyrepl==0.9.0 # via fancycompleter -pytest==8.3.3 +pytest==8.3.4 # via # karapace (/karapace/pyproject.toml) # pytest-cov @@ -167,8 +248,16 @@ pytest-xdist[psutil]==3.6.1 # via karapace (/karapace/pyproject.toml) python-dateutil==2.9.0.post0 # via karapace (/karapace/pyproject.toml) +python-dotenv==1.0.1 + # via + # pydantic-settings + # uvicorn +python-multipart==0.0.19 + # via fastapi python-snappy==0.7.3 # via karapace (/karapace/pyproject.toml) +pyyaml==6.0.2 + # via uvicorn pyzmq==26.2.0 # via locust referencing==0.35.1 @@ -180,32 +269,54 @@ requests==2.32.3 # karapace (/karapace/pyproject.toml) # locust rich==13.7.1 - # via karapace (/karapace/pyproject.toml) -rpds-py==0.21.0 + # via + # karapace (/karapace/pyproject.toml) + # rich-toolkit + # typer +rich-toolkit==0.12.0 + # via fastapi-cli +rpds-py==0.22.3 # via # jsonschema # referencing -sentry-sdk==2.18.0 +sentry-sdk==2.19.2 # via karapace (/karapace/pyproject.toml) +shellingham==1.5.4 + # via typer six==1.16.0 - # via python-dateutil + # via + # dependency-injector + # python-dateutil sniffio==1.3.1 # via anyio sortedcontainers==2.4.0 # via hypothesis +starlette==0.41.3 + # via fastapi tenacity==9.0.0 # via karapace (/karapace/pyproject.toml) -tomli==2.1.0 +tomli==2.2.1 # via # coverage # locust # pytest +typer==0.15.1 + # via fastapi-cli typing-extensions==4.12.2 # via # anyio + # asgiref + # fastapi # karapace (/karapace/pyproject.toml) # locust # multidict + # opentelemetry-sdk + # pydantic + # pydantic-core + # rich-toolkit + # starlette + # typer + # uvicorn ujson==5.10.0 # via karapace (/karapace/pyproject.toml) urllib3==2.2.3 @@ -213,8 +324,18 @@ urllib3==2.2.3 # geventhttpclient # requests # sentry-sdk +uvicorn[standard]==0.32.1 + # via + # fastapi + # fastapi-cli +uvloop==0.21.0 + # via uvicorn watchfiles==0.24.0 - # via karapace (/karapace/pyproject.toml) + # via + # karapace (/karapace/pyproject.toml) + # uvicorn +websockets==14.1 + # via uvicorn werkzeug==3.1.3 # via # flask @@ -222,6 +343,10 @@ werkzeug==3.1.3 # locust wmctrl==0.5 # via pdbpp +wrapt==1.17.0 + # via + # deprecated + # opentelemetry-instrumentation xxhash==3.5.0 # via karapace (/karapace/pyproject.toml) yarl==1.12.1 @@ -232,7 +357,7 @@ zipp==3.21.0 # via importlib-metadata zope-event==5.0 # via gevent -zope-interface==7.1.1 +zope-interface==7.2 # via gevent zstandard==0.23.0 # via karapace (/karapace/pyproject.toml) diff --git a/requirements/requirements-typing.txt b/requirements/requirements-typing.txt index aef63ee86..eec3f3d32 100644 --- a/requirements/requirements-typing.txt +++ b/requirements/requirements-typing.txt @@ -6,17 +6,24 @@ # accept-types==0.4.1 # via karapace (/karapace/pyproject.toml) -aiohappyeyeballs==2.4.3 +aiohappyeyeballs==2.4.4 # via aiohttp -aiohttp==3.10.10 +aiohttp==3.10.11 # via karapace (/karapace/pyproject.toml) aiokafka==0.10.0 # via karapace (/karapace/pyproject.toml) aiosignal==1.3.1 # via aiohttp -anyio==4.6.2.post1 - # via watchfiles -async-timeout==4.0.3 +annotated-types==0.7.0 + # via pydantic +anyio==4.7.0 + # via + # httpx + # starlette + # watchfiles +asgiref==3.8.1 + # via opentelemetry-instrumentation-asgi +async-timeout==5.0.1 # via # aiohttp # aiokafka @@ -30,23 +37,61 @@ avro @ https://github.com/aiven/avro/archive/5a82d57f2a650fd87c819a30e433f1abb2c cachetools==5.3.3 # via karapace (/karapace/pyproject.toml) certifi==2024.8.30 - # via sentry-sdk + # via + # httpcore + # httpx + # sentry-sdk +click==8.1.7 + # via + # rich-toolkit + # typer + # uvicorn confluent-kafka==2.4.0 # via karapace (/karapace/pyproject.toml) cramjam==2.9.0 # via python-snappy +dependency-injector==4.43.0 + # via karapace (/karapace/pyproject.toml) +deprecated==1.2.15 + # via + # opentelemetry-api + # opentelemetry-semantic-conventions +dnspython==2.7.0 + # via email-validator +email-validator==2.2.0 + # via fastapi exceptiongroup==1.2.2 # via anyio +fastapi[standard]==0.115.5 + # via karapace (/karapace/pyproject.toml) +fastapi-cli[standard]==0.0.6 + # via fastapi frozenlist==1.5.0 # via # aiohttp # aiosignal +h11==0.14.0 + # via + # httpcore + # uvicorn +httpcore==1.0.7 + # via httpx +httptools==0.6.4 + # via uvicorn +httpx==0.28.0 + # via fastapi idna==3.10 # via # anyio + # email-validator + # httpx # yarl +importlib-metadata==8.5.0 + # via opentelemetry-api isodate==0.7.2 # via karapace (/karapace/pyproject.toml) +jinja2==3.1.4 + # via fastapi jsonschema==4.23.0 # via karapace (/karapace/pyproject.toml) jsonschema-specifications==2024.10.1 @@ -55,6 +100,8 @@ lz4==4.3.3 # via karapace (/karapace/pyproject.toml) markdown-it-py==3.0.0 # via rich +markupsafe==3.0.2 + # via jinja2 mdurl==0.1.2 # via markdown-it-py multidict==6.1.0 @@ -67,41 +114,101 @@ mypy-extensions==1.0.0 # via mypy networkx==3.2.1 # via karapace (/karapace/pyproject.toml) +opentelemetry-api==1.28.2 + # via + # karapace (/karapace/pyproject.toml) + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-instrumentation==0.49b2 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-asgi==0.49b2 + # via opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-fastapi==0.49b2 + # via karapace (/karapace/pyproject.toml) +opentelemetry-sdk==1.28.2 + # via karapace (/karapace/pyproject.toml) +opentelemetry-semantic-conventions==0.49b2 + # via + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-sdk +opentelemetry-util-http==0.49b2 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi packaging==24.2 - # via aiokafka + # via + # aiokafka + # opentelemetry-instrumentation prometheus-client==0.20.0 # via karapace (/karapace/pyproject.toml) protobuf==3.20.3 # via karapace (/karapace/pyproject.toml) +pydantic==2.10.2 + # via + # fastapi + # karapace (/karapace/pyproject.toml) + # pydantic-settings +pydantic-core==2.27.1 + # via pydantic +pydantic-settings==2.6.1 + # via karapace (/karapace/pyproject.toml) pygments==2.18.0 # via rich -pyjwt==2.9.0 +pyjwt==2.10.1 # via karapace (/karapace/pyproject.toml) python-dateutil==2.9.0.post0 # via karapace (/karapace/pyproject.toml) +python-dotenv==1.0.1 + # via + # pydantic-settings + # uvicorn +python-multipart==0.0.19 + # via fastapi python-snappy==0.7.3 # via karapace (/karapace/pyproject.toml) +pyyaml==6.0.2 + # via uvicorn referencing==0.35.1 # via # jsonschema # jsonschema-specifications # types-jsonschema rich==13.7.1 - # via karapace (/karapace/pyproject.toml) -rpds-py==0.21.0 + # via + # karapace (/karapace/pyproject.toml) + # rich-toolkit + # typer +rich-toolkit==0.12.0 + # via fastapi-cli +rpds-py==0.22.3 # via # jsonschema # referencing -sentry-sdk==2.18.0 +sentry-sdk==2.19.2 # via karapace (/karapace/pyproject.toml) +shellingham==1.5.4 + # via typer six==1.16.0 - # via python-dateutil + # via + # dependency-injector + # python-dateutil sniffio==1.3.1 # via anyio +starlette==0.41.3 + # via fastapi tenacity==9.0.0 # via karapace (/karapace/pyproject.toml) -tomli==2.1.0 +tomli==2.2.1 # via mypy +typer==0.15.1 + # via fastapi-cli types-cachetools==5.5.0.20240820 # via karapace (/karapace/pyproject.toml) types-jsonschema==4.23.0.20240813 @@ -111,20 +218,45 @@ types-protobuf==3.20.4.6 typing-extensions==4.12.2 # via # anyio + # asgiref + # fastapi # karapace (/karapace/pyproject.toml) # multidict # mypy + # opentelemetry-sdk + # pydantic + # pydantic-core + # rich-toolkit + # starlette + # typer + # uvicorn ujson==5.10.0 # via karapace (/karapace/pyproject.toml) urllib3==2.2.3 # via sentry-sdk +uvicorn[standard]==0.32.1 + # via + # fastapi + # fastapi-cli +uvloop==0.21.0 + # via uvicorn watchfiles==0.24.0 - # via karapace (/karapace/pyproject.toml) + # via + # karapace (/karapace/pyproject.toml) + # uvicorn +websockets==14.1 + # via uvicorn +wrapt==1.17.0 + # via + # deprecated + # opentelemetry-instrumentation xxhash==3.5.0 # via karapace (/karapace/pyproject.toml) yarl==1.12.1 # via # aiohttp # karapace (/karapace/pyproject.toml) +zipp==3.21.0 + # via importlib-metadata zstandard==0.23.0 # via karapace (/karapace/pyproject.toml) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 5bb9cf22e..4a32fdcd3 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -6,17 +6,24 @@ # accept-types==0.4.1 # via karapace (/karapace/pyproject.toml) -aiohappyeyeballs==2.4.3 +aiohappyeyeballs==2.4.4 # via aiohttp -aiohttp==3.10.10 +aiohttp==3.10.11 # via karapace (/karapace/pyproject.toml) aiokafka==0.10.0 # via karapace (/karapace/pyproject.toml) aiosignal==1.3.1 # via aiohttp -anyio==4.6.2.post1 - # via watchfiles -async-timeout==4.0.3 +annotated-types==0.7.0 + # via pydantic +anyio==4.7.0 + # via + # httpx + # starlette + # watchfiles +asgiref==3.8.1 + # via opentelemetry-instrumentation-asgi +async-timeout==5.0.1 # via # aiohttp # aiokafka @@ -29,22 +36,61 @@ avro @ https://github.com/aiven/avro/archive/5a82d57f2a650fd87c819a30e433f1abb2c # via karapace (/karapace/pyproject.toml) cachetools==5.3.3 # via karapace (/karapace/pyproject.toml) +certifi==2024.8.30 + # via + # httpcore + # httpx +click==8.1.7 + # via + # rich-toolkit + # typer + # uvicorn confluent-kafka==2.4.0 # via karapace (/karapace/pyproject.toml) cramjam==2.9.0 # via python-snappy +dependency-injector==4.43.0 + # via karapace (/karapace/pyproject.toml) +deprecated==1.2.15 + # via + # opentelemetry-api + # opentelemetry-semantic-conventions +dnspython==2.7.0 + # via email-validator +email-validator==2.2.0 + # via fastapi exceptiongroup==1.2.2 # via anyio +fastapi[standard]==0.115.5 + # via karapace (/karapace/pyproject.toml) +fastapi-cli[standard]==0.0.6 + # via fastapi frozenlist==1.5.0 # via # aiohttp # aiosignal +h11==0.14.0 + # via + # httpcore + # uvicorn +httpcore==1.0.7 + # via httpx +httptools==0.6.4 + # via uvicorn +httpx==0.28.0 + # via fastapi idna==3.10 # via # anyio + # email-validator + # httpx # yarl +importlib-metadata==8.5.0 + # via opentelemetry-api isodate==0.7.2 # via karapace (/karapace/pyproject.toml) +jinja2==3.1.4 + # via fastapi jsonschema==4.23.0 # via karapace (/karapace/pyproject.toml) jsonschema-specifications==2024.10.1 @@ -53,6 +99,8 @@ lz4==4.3.3 # via karapace (/karapace/pyproject.toml) markdown-it-py==3.0.0 # via rich +markupsafe==3.0.2 + # via jinja2 mdurl==0.1.2 # via markdown-it-py multidict==6.1.0 @@ -61,50 +109,135 @@ multidict==6.1.0 # yarl networkx==3.2.1 # via karapace (/karapace/pyproject.toml) +opentelemetry-api==1.28.2 + # via + # karapace (/karapace/pyproject.toml) + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-instrumentation==0.49b2 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-asgi==0.49b2 + # via opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-fastapi==0.49b2 + # via karapace (/karapace/pyproject.toml) +opentelemetry-sdk==1.28.2 + # via karapace (/karapace/pyproject.toml) +opentelemetry-semantic-conventions==0.49b2 + # via + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-sdk +opentelemetry-util-http==0.49b2 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi packaging==24.2 - # via aiokafka + # via + # aiokafka + # opentelemetry-instrumentation prometheus-client==0.20.0 # via karapace (/karapace/pyproject.toml) protobuf==3.20.3 # via karapace (/karapace/pyproject.toml) +pydantic==2.10.2 + # via + # fastapi + # karapace (/karapace/pyproject.toml) + # pydantic-settings +pydantic-core==2.27.1 + # via pydantic +pydantic-settings==2.6.1 + # via karapace (/karapace/pyproject.toml) pygments==2.18.0 # via rich -pyjwt==2.9.0 +pyjwt==2.10.1 # via karapace (/karapace/pyproject.toml) python-dateutil==2.9.0.post0 # via karapace (/karapace/pyproject.toml) +python-dotenv==1.0.1 + # via + # pydantic-settings + # uvicorn +python-multipart==0.0.19 + # via fastapi python-snappy==0.7.3 # via karapace (/karapace/pyproject.toml) +pyyaml==6.0.2 + # via uvicorn referencing==0.35.1 # via # jsonschema # jsonschema-specifications rich==13.7.1 - # via karapace (/karapace/pyproject.toml) -rpds-py==0.21.0 + # via + # karapace (/karapace/pyproject.toml) + # rich-toolkit + # typer +rich-toolkit==0.12.0 + # via fastapi-cli +rpds-py==0.22.3 # via # jsonschema # referencing +shellingham==1.5.4 + # via typer six==1.16.0 - # via python-dateutil + # via + # dependency-injector + # python-dateutil sniffio==1.3.1 # via anyio +starlette==0.41.3 + # via fastapi tenacity==9.0.0 # via karapace (/karapace/pyproject.toml) +typer==0.15.1 + # via fastapi-cli typing-extensions==4.12.2 # via # anyio + # asgiref + # fastapi # karapace (/karapace/pyproject.toml) # multidict + # opentelemetry-sdk + # pydantic + # pydantic-core + # rich-toolkit + # starlette + # typer + # uvicorn ujson==5.10.0 # via karapace (/karapace/pyproject.toml) +uvicorn[standard]==0.32.1 + # via + # fastapi + # fastapi-cli +uvloop==0.21.0 + # via uvicorn watchfiles==0.24.0 - # via karapace (/karapace/pyproject.toml) + # via + # karapace (/karapace/pyproject.toml) + # uvicorn +websockets==14.1 + # via uvicorn +wrapt==1.17.0 + # via + # deprecated + # opentelemetry-instrumentation xxhash==3.5.0 # via karapace (/karapace/pyproject.toml) yarl==1.12.1 # via # aiohttp # karapace (/karapace/pyproject.toml) +zipp==3.21.0 + # via importlib-metadata zstandard==0.23.0 # via karapace (/karapace/pyproject.toml) diff --git a/src/karapace/auth.py b/src/karapace/auth.py index cfc566cdf..e801bfae9 100644 --- a/src/karapace/auth.py +++ b/src/karapace/auth.py @@ -11,7 +11,7 @@ from karapace.config import Config, InvalidConfiguration from karapace.statsd import StatsClient from karapace.utils import json_decode, json_encode -from typing import Protocol +from typing import Final, Protocol from typing_extensions import override, TypedDict from watchfiles import awatch, Change @@ -114,6 +114,8 @@ def check_authorization_any(self, user: User | None, operation: Operation, resou class AuthenticatorAndAuthorizer(AuthenticateProtocol, AuthorizeProtocol): + MUST_AUTHENTICATE: Final[bool] = True + async def close(self) -> None: ... @@ -122,6 +124,8 @@ async def start(self, stats: StatsClient) -> None: class NoAuthAndAuthz(AuthenticatorAndAuthorizer): + MUST_AUTHENTICATE: Final[bool] = False + @override def authenticate(self, *, username: str, password: str) -> User: return None @@ -205,9 +209,9 @@ def check_authorization_any(self, user: User | None, operation: Operation, resou class HTTPAuthorizer(ACLAuthorizer, AuthenticatorAndAuthorizer): - def __init__(self, config: Config) -> None: + def __init__(self, auth_file: str) -> None: super().__init__() - self._auth_filename: str = config.registry_authfile + self._auth_filename: str = auth_file self._auth_mtime: float = -1 self._refresh_auth_task: asyncio.Task | None = None self._refresh_auth_awatch_stop_event = asyncio.Event() diff --git a/src/karapace/base_config.yaml b/src/karapace/base_config.yaml deleted file mode 100644 index 5aa4d42d2..000000000 --- a/src/karapace/base_config.yaml +++ /dev/null @@ -1,3 +0,0 @@ -karapace: - env_file: ${KARAPACE_DOTENV} - env_file_encoding: utf-8 diff --git a/src/karapace/config.py b/src/karapace/config.py index d445b56a8..fde708361 100644 --- a/src/karapace/config.py +++ b/src/karapace/config.py @@ -12,19 +12,14 @@ from karapace.typing import ElectionStrategy, NameStrategy from karapace.utils import json_encode from pathlib import Path -from pydantic import BaseModel, BaseSettings, PyObject -from typing import Final +from pydantic import BaseModel, ImportString +from pydantic_settings import BaseSettings, SettingsConfigDict import logging import os import socket import ssl -KARAPACE_ROOT: Final[Path] = Path(__file__).parent -KARAPACE_BASE_CONFIG_YAML_PATH: Final[Path] = KARAPACE_ROOT / "base_config.yaml" - -HOSTNAME = socket.gethostname() - HOSTNAME = socket.gethostname() @@ -33,12 +28,14 @@ class KarapaceTags(BaseModel): class Config(BaseSettings): + model_config = SettingsConfigDict(env_prefix="karapace_", env_ignore_empty=True) + access_logs_debug: bool = False - access_log_class: PyObject = "aiohttp.web_log.AccessLogger" + access_log_class: ImportString = "karapace.utils.DebugAccessLogger" advertised_hostname: str | None = None advertised_port: int | None = None advertised_protocol: str = "http" - bootstrap_uri: str = "127.0.0.1:9092" + bootstrap_uri: str = "kafka:29092" sasl_bootstrap_uri: str | None = None client_id: str = "sr-1" compatibility: str = "BACKWARD" @@ -55,7 +52,7 @@ class Config(BaseSettings): port: int = 8081 server_tls_certfile: str | None = None server_tls_keyfile: str | None = None - registry_host: str = "127.0.0.1" + registry_host: str = "karapace-schema-registry" registry_port: int = 8081 registry_user: str | None = None registry_password: str | None = None @@ -95,7 +92,7 @@ class Config(BaseSettings): name_strategy_validation: bool = True master_election_strategy: str = "lowest" protobuf_runtime_directory: str = "runtime" - statsd_host: str = "127.0.0.1" + statsd_host: str = "statsd-exporter" statsd_port: int = 8125 kafka_schema_reader_strict_mode: bool = False kafka_retriable_errors_silenced: bool = True @@ -130,14 +127,15 @@ def to_env_str(self) -> str: env_lines.append(f"{key.upper()}={value}") return "\n".join(env_lines) - def set_config_defaults(self, new_config: Mapping[str, str]) -> Config: + def set_config_defaults(self, new_config: Mapping[str, str] | None = None) -> Config: config = deepcopy(self) - for key, value in new_config.items(): - setattr(config, key, value) + if new_config: + for key, value in new_config.items(): + setattr(config, key, value) # Fallback to default port if `advertised_port` is not set if config.advertised_port is None: - config.advertised_port = new_config["port"] + config.advertised_port = config.port # Fallback to `advertised_*` constructed URI if not set if config.rest_base_uri is None: diff --git a/src/karapace/container.py b/src/karapace/container.py index 7c71e99ca..951956bf2 100644 --- a/src/karapace/container.py +++ b/src/karapace/container.py @@ -13,18 +13,13 @@ class KarapaceContainer(containers.DeclarativeContainer): - base_config = providers.Configuration() - config = providers.Singleton( - Config, - _env_file=base_config.karapace.env_file, - _env_file_encoding=base_config.karapace.env_file_encoding, - ) + config = providers.Singleton(Config) statsd = providers.Singleton(StatsClient, config=config) no_auth_authorizer = providers.Singleton(NoAuthAndAuthz) - http_authorizer = providers.Singleton(HTTPAuthorizer, config=config) + http_authorizer = providers.Singleton(HTTPAuthorizer, auth_file=config().registry_authfile) schema_registry = providers.Singleton(KarapaceSchemaRegistry, config=config) diff --git a/src/karapace/dependencies/config_dependency.py b/src/karapace/dependencies/config_dependency.py deleted file mode 100644 index 9c299b725..000000000 --- a/src/karapace/dependencies/config_dependency.py +++ /dev/null @@ -1,23 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - -from fastapi import Depends -from karapace.config import Config -from typing import Annotated - -import os - -env_file = os.environ.get("KARAPACE_DOTENV", None) - - -class ConfigDependencyManager: - CONFIG = Config(_env_file=env_file, _env_file_encoding="utf-8") - - @classmethod - def get_config(cls) -> Config: - return ConfigDependencyManager.CONFIG - - -ConfigDep = Annotated[Config, Depends(ConfigDependencyManager.get_config)] diff --git a/src/karapace/dependencies/controller_dependency.py b/src/karapace/dependencies/controller_dependency.py deleted file mode 100644 index e056b52c2..000000000 --- a/src/karapace/dependencies/controller_dependency.py +++ /dev/null @@ -1,23 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - - -from fastapi import Depends -from karapace.dependencies.config_dependency import ConfigDep -from karapace.dependencies.schema_registry_dependency import SchemaRegistryDep -from karapace.dependencies.stats_dependeny import StatsDep -from karapace.schema_registry_apis import KarapaceSchemaRegistryController -from typing import Annotated - - -async def get_controller( - config: ConfigDep, - stats: StatsDep, - schema_registry: SchemaRegistryDep, -) -> KarapaceSchemaRegistryController: - return KarapaceSchemaRegistryController(config=config, schema_registry=schema_registry, stats=stats) - - -KarapaceSchemaRegistryControllerDep = Annotated[KarapaceSchemaRegistryController, Depends(get_controller)] diff --git a/src/karapace/dependencies/forward_client_dependency.py b/src/karapace/dependencies/forward_client_dependency.py deleted file mode 100644 index 57459c371..000000000 --- a/src/karapace/dependencies/forward_client_dependency.py +++ /dev/null @@ -1,20 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - -from fastapi import Depends -from karapace.forward_client import ForwardClient -from typing import Annotated - -FORWARD_CLIENT: ForwardClient | None = None - - -def get_forward_client() -> ForwardClient: - global FORWARD_CLIENT - if not FORWARD_CLIENT: - FORWARD_CLIENT = ForwardClient() - return FORWARD_CLIENT - - -ForwardClientDep = Annotated[ForwardClient, Depends(get_forward_client)] diff --git a/src/karapace/dependencies/schema_registry_dependency.py b/src/karapace/dependencies/schema_registry_dependency.py deleted file mode 100644 index 68d9b0700..000000000 --- a/src/karapace/dependencies/schema_registry_dependency.py +++ /dev/null @@ -1,24 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - -from fastapi import Depends -from karapace.dependencies.config_dependency import ConfigDependencyManager -from karapace.schema_registry import KarapaceSchemaRegistry -from typing import Annotated - - -class SchemaRegistryDependencyManager: - SCHEMA_REGISTRY: KarapaceSchemaRegistry | None = None - - @classmethod - async def get_schema_registry(cls) -> KarapaceSchemaRegistry: - if not SchemaRegistryDependencyManager.SCHEMA_REGISTRY: - SchemaRegistryDependencyManager.SCHEMA_REGISTRY = KarapaceSchemaRegistry( - config=ConfigDependencyManager.get_config() - ) - return SchemaRegistryDependencyManager.SCHEMA_REGISTRY - - -SchemaRegistryDep = Annotated[KarapaceSchemaRegistry, Depends(SchemaRegistryDependencyManager.get_schema_registry)] diff --git a/src/karapace/dependencies/stats_dependeny.py b/src/karapace/dependencies/stats_dependeny.py deleted file mode 100644 index 98c116dac..000000000 --- a/src/karapace/dependencies/stats_dependeny.py +++ /dev/null @@ -1,23 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - - -from fastapi import Depends -from karapace.dependencies.config_dependency import ConfigDependencyManager -from karapace.statsd import StatsClient -from typing import Annotated - - -class StatsDependencyManager: - STATS_CLIENT: StatsClient | None = None - - @classmethod - def get_stats(cls) -> StatsClient: - if not StatsDependencyManager.STATS_CLIENT: - StatsDependencyManager.STATS_CLIENT = StatsClient(config=ConfigDependencyManager.get_config()) - return StatsDependencyManager.STATS_CLIENT - - -StatsDep = Annotated[StatsClient, Depends(StatsDependencyManager.get_stats)] diff --git a/src/karapace/instrumentation/prometheus.py b/src/karapace/instrumentation/prometheus.py index 1336b4ab0..90d260057 100644 --- a/src/karapace/instrumentation/prometheus.py +++ b/src/karapace/instrumentation/prometheus.py @@ -22,6 +22,7 @@ class PrometheusInstrumentation: METRICS_ENDPOINT_PATH: Final[str] = "/metrics" + CONTENT_TYPE_LATEST: Final[str] = "text/plain; version=0.0.4; charset=utf-8" START_TIME_REQUEST_KEY: Final[str] = "start_time" registry: Final[CollectorRegistry] = CollectorRegistry() diff --git a/src/karapace/kafka_rest_apis/consumer_manager.py b/src/karapace/kafka_rest_apis/consumer_manager.py index af94b5a82..277a0aca1 100644 --- a/src/karapace/kafka_rest_apis/consumer_manager.py +++ b/src/karapace/kafka_rest_apis/consumer_manager.py @@ -477,7 +477,7 @@ async def fetch(self, internal_name: tuple[str, str], content_type: str, formats timeout = ( int(query_params["timeout"]) if "timeout" in query_params - else consumer_config.consumer.request.timeout.ms + else consumer_config["consumer.request.timeout.ms"] ) # we get to be more in line with the confluent proxy by doing a bunch of fetches each time and # respecting the max fetch request size diff --git a/src/karapace/karapace_all.py b/src/karapace/karapace_all.py index 80e36cd43..8090216e9 100644 --- a/src/karapace/karapace_all.py +++ b/src/karapace/karapace_all.py @@ -6,7 +6,7 @@ from dependency_injector.wiring import inject, Provide from karapace import version as karapace_version -from karapace.config import Config, KARAPACE_BASE_CONFIG_YAML_PATH +from karapace.config import Config from karapace.container import KarapaceContainer from karapace.instrumentation.prometheus import PrometheusInstrumentation from karapace.kafka_rest_apis import KafkaRest @@ -42,6 +42,5 @@ def main( if __name__ == "__main__": container = KarapaceContainer() - container.base_config.from_yaml(KARAPACE_BASE_CONFIG_YAML_PATH, envs_required=True, required=True) container.wire(modules=[__name__]) sys.exit(main()) diff --git a/src/karapace/protobuf/io.py b/src/karapace/protobuf/io.py index 36c76e491..89cdd26f1 100644 --- a/src/karapace/protobuf/io.py +++ b/src/karapace/protobuf/io.py @@ -97,7 +97,7 @@ def get_protobuf_class_instance( class_name: str, cfg: Config, ) -> _ProtobufModel: - directory = Path(cfg["protobuf_runtime_directory"]) + directory = Path(cfg.protobuf_runtime_directory) deps_list = crawl_dependencies(schema) root_class_name = "" for value in deps_list.values(): diff --git a/src/karapace/routers/compatibility_router.py b/src/karapace/routers/compatibility_router.py deleted file mode 100644 index 0db406d2a..000000000 --- a/src/karapace/routers/compatibility_router.py +++ /dev/null @@ -1,33 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - -from fastapi import APIRouter -from karapace.auth.auth import Operation -from karapace.auth.dependencies import AuthenticatorAndAuthorizerDep, CurrentUserDep -from karapace.dependencies.controller_dependency import KarapaceSchemaRegistryControllerDep -from karapace.routers.errors import unauthorized -from karapace.routers.requests import CompatibilityCheckResponse, SchemaRequest -from karapace.typing import Subject - -compatibility_router = APIRouter( - prefix="/compatibility", - tags=["compatibility"], - responses={404: {"description": "Not found"}}, -) - - -@compatibility_router.post("/subjects/{subject}/versions/{version}", response_model_exclude_none=True) -async def compatibility_post( - controller: KarapaceSchemaRegistryControllerDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - subject: Subject, - version: str, # TODO support actual Version object - schema_request: SchemaRequest, -) -> CompatibilityCheckResponse: - if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): - raise unauthorized() - - return await controller.compatibility_check(subject=subject, schema_request=schema_request, version=version) diff --git a/src/karapace/routers/config_router.py b/src/karapace/routers/config_router.py deleted file mode 100644 index a83f24f60..000000000 --- a/src/karapace/routers/config_router.py +++ /dev/null @@ -1,113 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - -from fastapi import APIRouter, Request -from karapace.auth.auth import Operation -from karapace.auth.dependencies import AuthenticatorAndAuthorizerDep, CurrentUserDep -from karapace.dependencies.controller_dependency import KarapaceSchemaRegistryControllerDep -from karapace.dependencies.forward_client_dependency import ForwardClientDep -from karapace.dependencies.schema_registry_dependency import SchemaRegistryDep -from karapace.routers.errors import no_primary_url_error, unauthorized -from karapace.routers.requests import CompatibilityLevelResponse, CompatibilityRequest, CompatibilityResponse -from karapace.typing import Subject - -config_router = APIRouter( - prefix="/config", - tags=["config"], - responses={404: {"description": "Not found"}}, -) - - -@config_router.get("") -async def config_get( - controller: KarapaceSchemaRegistryControllerDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, -) -> CompatibilityLevelResponse: - if authorizer and not authorizer.check_authorization(user, Operation.Read, "Config:"): - raise unauthorized() - - return await controller.config_get() - - -@config_router.put("") -async def config_put( - request: Request, - controller: KarapaceSchemaRegistryControllerDep, - schema_registry: SchemaRegistryDep, - forward_client: ForwardClientDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - compatibility_level_request: CompatibilityRequest, -) -> CompatibilityResponse: - if authorizer and not authorizer.check_authorization(user, Operation.Write, "Config:"): - raise unauthorized() - - i_am_primary, primary_url = await schema_registry.get_master() - if i_am_primary: - return await controller.config_set(compatibility_level_request=compatibility_level_request) - elif not primary_url: - raise no_primary_url_error() - else: - return await forward_client.forward_request_remote(request=request, primary_url=primary_url) - - -@config_router.get("/{subject}") -async def config_get_subject( - controller: KarapaceSchemaRegistryControllerDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - subject: Subject, - defaultToGlobal: bool = False, -) -> CompatibilityLevelResponse: - if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): - raise unauthorized() - - return await controller.config_subject_get(subject=subject, default_to_global=defaultToGlobal) - - -@config_router.put("/{subject}") -async def config_set_subject( - request: Request, - controller: KarapaceSchemaRegistryControllerDep, - schema_registry: SchemaRegistryDep, - forward_client: ForwardClientDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - subject: Subject, - compatibility_level_request: CompatibilityRequest, -) -> CompatibilityResponse: - if authorizer and not authorizer.check_authorization(user, Operation.Write, f"Subject:{subject}"): - raise unauthorized() - - i_am_primary, primary_url = await schema_registry.get_master() - if i_am_primary: - return await controller.config_subject_set(subject=subject, compatibility_level_request=compatibility_level_request) - elif not primary_url: - raise no_primary_url_error() - else: - return await forward_client.forward_request_remote(request=request, primary_url=primary_url) - - -@config_router.delete("/{subject}") -async def config_delete_subject( - request: Request, - controller: KarapaceSchemaRegistryControllerDep, - schema_registry: SchemaRegistryDep, - forward_client: ForwardClientDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - subject: Subject, -) -> CompatibilityResponse: - if authorizer and not authorizer.check_authorization(user, Operation.Write, f"Subject:{subject}"): - raise unauthorized() - - i_am_primary, primary_url = await schema_registry.get_master() - if i_am_primary: - return await controller.config_subject_delete(subject=subject) - elif not primary_url: - raise no_primary_url_error() - else: - return await forward_client.forward_request_remote(request=request, primary_url=primary_url) diff --git a/src/karapace/routers/errors.py b/src/karapace/routers/errors.py deleted file mode 100644 index a16c9797a..000000000 --- a/src/karapace/routers/errors.py +++ /dev/null @@ -1,56 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - -from enum import Enum, unique -from fastapi import HTTPException, status -from fastapi.exceptions import RequestValidationError - - -@unique -class SchemaErrorCodes(Enum): - HTTP_BAD_REQUEST = status.HTTP_400_BAD_REQUEST - HTTP_NOT_FOUND = status.HTTP_404_NOT_FOUND - HTTP_CONFLICT = status.HTTP_409_CONFLICT - HTTP_UNPROCESSABLE_ENTITY = status.HTTP_422_UNPROCESSABLE_ENTITY - HTTP_INTERNAL_SERVER_ERROR = status.HTTP_500_INTERNAL_SERVER_ERROR - SUBJECT_NOT_FOUND = 40401 - VERSION_NOT_FOUND = 40402 - SCHEMA_NOT_FOUND = 40403 - SUBJECT_SOFT_DELETED = 40404 - SUBJECT_NOT_SOFT_DELETED = 40405 - SCHEMAVERSION_SOFT_DELETED = 40406 - SCHEMAVERSION_NOT_SOFT_DELETED = 40407 - SUBJECT_LEVEL_COMPATIBILITY_NOT_CONFIGURED_ERROR_CODE = 40408 - INVALID_VERSION_ID = 42202 - INVALID_COMPATIBILITY_LEVEL = 42203 - INVALID_SCHEMA = 42201 - INVALID_SUBJECT = 42208 - SCHEMA_TOO_LARGE_ERROR_CODE = 42209 - REFERENCES_SUPPORT_NOT_IMPLEMENTED = 44302 - REFERENCE_EXISTS = 42206 - NO_MASTER_ERROR = 50003 - - -class KarapaceValidationError(RequestValidationError): - def __init__(self, error_code: int, error: str): - super().__init__(errors=[], body=error) - self.error_code = error_code - - -def no_primary_url_error() -> HTTPException: - return HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail={ - "error_code": SchemaErrorCodes.NO_MASTER_ERROR, - "message": "Error while forwarding the request to the master.", - }, - ) - - -def unauthorized() -> HTTPException: - return HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail={"message": "Forbidden"}, - ) diff --git a/src/karapace/routers/health_router.py b/src/karapace/routers/health_router.py deleted file mode 100644 index 950e08cfc..000000000 --- a/src/karapace/routers/health_router.py +++ /dev/null @@ -1,64 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - -from fastapi import APIRouter, HTTPException, status -from karapace.dependencies.schema_registry_dependency import SchemaRegistryDep -from pydantic import BaseModel - - -class HealthStatus(BaseModel): - schema_registry_ready: bool - schema_registry_startup_time_sec: float - schema_registry_reader_current_offset: int - schema_registry_reader_highest_offset: int - schema_registry_is_primary: bool | None - schema_registry_is_primary_eligible: bool - schema_registry_primary_url: str | None - schema_registry_coordinator_running: bool - schema_registry_coordinator_generation_id: int - - -class HealthCheck(BaseModel): - status: HealthStatus - healthy: bool - - -health_router = APIRouter( - prefix="/_health", - tags=["health"], - responses={404: {"description": "Not found"}}, -) - - -@health_router.get("") -async def health( - schema_registry: SchemaRegistryDep, -) -> HealthCheck: - starttime = 0.0 - if schema_registry.schema_reader.ready(): - starttime = schema_registry.schema_reader.last_check - schema_registry.schema_reader.start_time - - cs = schema_registry.mc.get_coordinator_status() - - health_status = HealthStatus( - schema_registry_ready=schema_registry.schema_reader.ready(), - schema_registry_startup_time_sec=starttime, - schema_registry_reader_current_offset=schema_registry.schema_reader.offset, - schema_registry_reader_highest_offset=schema_registry.schema_reader.highest_offset(), - schema_registry_is_primary=cs.is_primary, - schema_registry_is_primary_eligible=cs.is_primary_eligible, - schema_registry_primary_url=cs.primary_url, - schema_registry_coordinator_running=cs.is_running, - schema_registry_coordinator_generation_id=cs.group_generation_id, - ) - # if self._auth is not None: - # resp["schema_registry_authfile_timestamp"] = self._auth.authfile_last_modified - - if not await schema_registry.schema_reader.is_healthy(): - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - ) - - return HealthCheck(status=health_status, healthy=True) diff --git a/src/karapace/routers/mode_router.py b/src/karapace/routers/mode_router.py deleted file mode 100644 index d8c98363a..000000000 --- a/src/karapace/routers/mode_router.py +++ /dev/null @@ -1,42 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - -from fastapi import APIRouter -from karapace.auth.auth import Operation -from karapace.auth.dependencies import AuthenticatorAndAuthorizerDep, CurrentUserDep -from karapace.dependencies.controller_dependency import KarapaceSchemaRegistryControllerDep -from karapace.routers.errors import unauthorized -from karapace.typing import Subject - -mode_router = APIRouter( - prefix="/mode", - tags=["mode"], - responses={404: {"description": "Not found"}}, -) - - -@mode_router.get("") -async def mode_get( - controller: KarapaceSchemaRegistryControllerDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, -): - if authorizer and not authorizer.check_authorization(user, Operation.Read, "Config:"): - raise unauthorized() - - return await controller.get_global_mode() - - -@mode_router.get("/{subject}") -async def mode_get_subject( - controller: KarapaceSchemaRegistryControllerDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - subject: Subject, -): - if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): - raise unauthorized() - - return await controller.get_subject_mode(subject=subject) diff --git a/src/karapace/routers/requests.py b/src/karapace/routers/requests.py deleted file mode 100644 index 8400f629d..000000000 --- a/src/karapace/routers/requests.py +++ /dev/null @@ -1,101 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - -from karapace.routers.errors import KarapaceValidationError -from karapace.schema_type import SchemaType -from karapace.typing import Subject -from pydantic import BaseModel, Field, validator -from typing import Any - - -class SchemaReference(BaseModel): - name: str - subject: Subject - version: int - - -class SchemaRequest(BaseModel): - schema_str: str = Field(alias="schema") - schema_type: SchemaType = Field(alias="schemaType", default=SchemaType.AVRO) - references: list[SchemaReference] | None = None - metadata: Any | None - ruleSet: Any | None - - class Config: - extra = "forbid" - - @validator("schema_str") - def validate_schema(cls, schema_str: str) -> str: - if not schema_str and not schema_str.strip(): - raise KarapaceValidationError( - error_code=42201, - error="Empty schema", - ) - return schema_str - - -class SchemaResponse(BaseModel): - subject: Subject - version: int - schema_id: int = Field(alias="id") - schema_str: str = Field(alias="schema") - schema_type: SchemaType | None = Field(alias="schemaType", default=None) - - -class SchemasResponse(BaseModel): - schema_str: str = Field(alias="schema") - subjects: list[Subject] | None = None - schema_type: SchemaType | None = Field(alias="schemaType", default=None) - references: list[Any] | None = None # TODO: typing - maxId: int | None = None - - -class SchemaListingItem(BaseModel): - subject: Subject - schema_str: str = Field(alias="schema") - version: int - schema_id: int = Field(alias="id") - schema_type: SchemaType | None = Field(alias="schemaType", default=None) - references: list[Any] | None - - -class SchemaIdResponse(BaseModel): - schema_id: int = Field(alias="id") - - -class CompatibilityRequest(BaseModel): - compatibility: str - - -class CompatibilityResponse(BaseModel): - compatibility: str - - -class CompatibilityLevelResponse(BaseModel): - compatibility_level: str = Field(alias="compatibilityLevel") - - -class CompatibilityCheckResponse(BaseModel): - is_compatible: bool - messages: list[str] | None = None - - -class ModeResponse(BaseModel): - mode: str - - -class SubjectVersion(BaseModel): - subject: Subject - version: int - - -class SubjectSchemaVersionResponse(BaseModel): - subject: Subject - version: int - schema_id: int = Field(alias="id") - schema_str: str = Field(alias="schema") - references: list[Any] | None = None - schema_type: SchemaType | None = Field(alias="schemaType", default=None) - compatibility: str | None = None diff --git a/src/karapace/routers/root_router.py b/src/karapace/routers/root_router.py deleted file mode 100644 index 6bec6cb9c..000000000 --- a/src/karapace/routers/root_router.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - -from fastapi import APIRouter - -root_router = APIRouter( - tags=["root"], - responses={404: {"description": "Not found"}}, -) - - -@root_router.get("/") -async def root() -> dict: - return {} diff --git a/src/karapace/routers/schemas_router.py b/src/karapace/routers/schemas_router.py deleted file mode 100644 index c06cd4a48..000000000 --- a/src/karapace/routers/schemas_router.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - -from fastapi import APIRouter -from karapace.auth.dependencies import AuthenticatorAndAuthorizerDep, CurrentUserDep -from karapace.dependencies.controller_dependency import KarapaceSchemaRegistryControllerDep -from karapace.routers.requests import SchemaListingItem, SchemasResponse, SubjectVersion - -schemas_router = APIRouter( - prefix="/schemas", - tags=["schemas"], - responses={404: {"description": "Not found"}}, -) - - -# TODO is this needed? Is this actually the ids/schema/id/schema?? -@schemas_router.get("") -async def schemas_get_list( - controller: KarapaceSchemaRegistryControllerDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - deleted: bool = False, - latestOnly: bool = False, -) -> list[SchemaListingItem]: - return await controller.schemas_list( - deleted=deleted, - latest_only=latestOnly, - user=user, - authorizer=authorizer, - ) - - -@schemas_router.get("/ids/{schema_id}", response_model_exclude_none=True) -async def schemas_get( - controller: KarapaceSchemaRegistryControllerDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - schema_id: str, # TODO: type to actual type - includeSubjects: bool = False, # TODO: include subjects? - fetchMaxId: bool = False, # TODO: fetch max id? - format: str = "", -) -> SchemasResponse: - return await controller.schemas_get( - schema_id=schema_id, - include_subjects=includeSubjects, - fetch_max_id=fetchMaxId, - format_serialized=format, - user=user, - authorizer=authorizer, - ) - - -# @schemas_router.get("/ids/{schema_id}/schema") -# async def schemas_get_only_id( -# controller: KarapaceSchemaRegistryControllerDep, -# ) -> SchemasResponse: -# # TODO retrieve by id only schema -# return await controller.schemas_get() - - -@schemas_router.get("/ids/{schema_id}/versions") -async def schemas_get_versions( - controller: KarapaceSchemaRegistryControllerDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - schema_id: str, - deleted: bool = False, -) -> list[SubjectVersion]: - return await controller.schemas_get_versions( - schema_id=schema_id, - deleted=deleted, - user=user, - authorizer=authorizer, - ) - - -@schemas_router.get("/types") -async def schemas_get_subjects_list( - controller: KarapaceSchemaRegistryControllerDep, -) -> list[str]: - return await controller.schemas_types() diff --git a/src/karapace/routers/subjects_router.py b/src/karapace/routers/subjects_router.py deleted file mode 100644 index 9bde67743..000000000 --- a/src/karapace/routers/subjects_router.py +++ /dev/null @@ -1,189 +0,0 @@ -""" -Copyright (c) 2024 Aiven Ltd -See LICENSE for details -""" - -from fastapi import APIRouter, Request -from karapace.auth.auth import Operation -from karapace.auth.dependencies import AuthenticatorAndAuthorizerDep, CurrentUserDep -from karapace.dependencies.controller_dependency import KarapaceSchemaRegistryControllerDep -from karapace.dependencies.forward_client_dependency import ForwardClientDep -from karapace.dependencies.schema_registry_dependency import SchemaRegistryDep -from karapace.routers.errors import no_primary_url_error, unauthorized -from karapace.routers.requests import SchemaIdResponse, SchemaRequest, SchemaResponse, SubjectSchemaVersionResponse -from karapace.typing import Subject - -import logging - -LOG = logging.getLogger(__name__) - - -subjects_router = APIRouter( - prefix="/subjects", - tags=["subjects"], - responses={404: {"description": "Not found"}}, -) - - -@subjects_router.get("") -async def subjects_get( - controller: KarapaceSchemaRegistryControllerDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - deleted: bool = False, -) -> list[str]: - return await controller.subjects_list( - deleted=deleted, - user=user, - authorizer=authorizer, - ) - - -@subjects_router.post("/{subject}", response_model_exclude_none=True) -async def subjects_subject_post( - controller: KarapaceSchemaRegistryControllerDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - subject: Subject, - schema_request: SchemaRequest, - deleted: bool = False, - normalize: bool = False, -) -> SchemaResponse: - if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): - raise unauthorized() - - return await controller.subjects_schema_post( - subject=subject, - schema_request=schema_request, - deleted=deleted, - normalize=normalize, - ) - - -@subjects_router.delete("/{subject}") -async def subjects_subject_delete( - request: Request, - controller: KarapaceSchemaRegistryControllerDep, - schema_registry: SchemaRegistryDep, - forward_client: ForwardClientDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - subject: Subject, - permanent: bool = False, -) -> list[int]: - if authorizer and not authorizer.check_authorization(user, Operation.Write, f"Subject:{subject}"): - raise unauthorized() - - i_am_primary, primary_url = await schema_registry.get_master() - if i_am_primary: - return await controller.subject_delete(subject=subject, permanent=permanent) - elif not primary_url: - raise no_primary_url_error() - else: - return await forward_client.forward_request_remote(request=request, primary_url=primary_url) - - -@subjects_router.post("/{subject}/versions") -async def subjects_subject_versions_post( - request: Request, - controller: KarapaceSchemaRegistryControllerDep, - forward_client: ForwardClientDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - subject: Subject, - schema_request: SchemaRequest, - normalize: bool = False, -) -> SchemaIdResponse: - if authorizer and not authorizer.check_authorization(user, Operation.Write, f"Subject:{subject}"): - raise unauthorized() - - # TODO: split the functionality so primary error and forwarding can be handled here - # and local/primary write is in controller. - return await controller.subject_post( - subject=subject, - schema_request=schema_request, - normalize=normalize, - forward_client=forward_client, - request=request, - ) - - -@subjects_router.get("/{subject}/versions") -async def subjects_subject_versions_list( - controller: KarapaceSchemaRegistryControllerDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - subject: Subject, - deleted: bool = False, -) -> list[int]: - if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): - raise unauthorized() - - return await controller.subject_versions_list(subject=subject, deleted=deleted) - - -@subjects_router.get("/{subject}/versions/{version}", response_model_exclude_none=True) -async def subjects_subject_version_get( - controller: KarapaceSchemaRegistryControllerDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - subject: Subject, - version: str, - deleted: bool = False, -) -> SubjectSchemaVersionResponse: - if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): - raise unauthorized() - - return await controller.subject_version_get(subject=subject, version=version, deleted=deleted) - - -@subjects_router.delete("/{subject}/versions/{version}") -async def subjects_subject_version_delete( - request: Request, - controller: KarapaceSchemaRegistryControllerDep, - schema_registry: SchemaRegistryDep, - forward_client: ForwardClientDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - subject: Subject, - version: str, - permanent: bool = False, -) -> int: - if authorizer and not authorizer.check_authorization(user, Operation.Write, f"Subject:{subject}"): - raise unauthorized() - - i_am_primary, primary_url = await schema_registry.get_master() - if i_am_primary: - return await controller.subject_version_delete(subject=subject, version=version, permanent=permanent) - elif not primary_url: - raise no_primary_url_error() - else: - return await forward_client.forward_request_remote(request=request, primary_url=primary_url) - - -@subjects_router.get("/{subject}/versions/{version}/schema") -async def subjects_subject_version_schema_get( - controller: KarapaceSchemaRegistryControllerDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - subject: Subject, - version: str, -) -> dict: - if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): - raise unauthorized() - - return await controller.subject_version_schema_get(subject=subject, version=version) - - -@subjects_router.get("/{subject}/versions/{version}/referencedby") -async def subjects_subject_version_referenced_by( - controller: KarapaceSchemaRegistryControllerDep, - user: CurrentUserDep, - authorizer: AuthenticatorAndAuthorizerDep, - subject: Subject, - version: str, -) -> list[int]: - if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): - raise unauthorized() - - return await controller.subject_version_referencedby_get(subject=subject, version=version) diff --git a/src/karapace/schema_registry_apis.py b/src/karapace/schema_registry_apis.py deleted file mode 100644 index 44d8bd128..000000000 --- a/src/karapace/schema_registry_apis.py +++ /dev/null @@ -1,982 +0,0 @@ -""" -Copyright (c) 2023 Aiven Ltd -See LICENSE for details -""" -from __future__ import annotations - -from avro.errors import SchemaParseException -from enum import Enum, unique -from fastapi import HTTPException, Request, Response, status -from karapace.auth.auth import Operation, User -from karapace.auth.dependencies import AuthenticatorAndAuthorizerDep -from karapace.compatibility import CompatibilityModes -from karapace.compatibility.jsonschema.checks import is_incompatible -from karapace.compatibility.schema_compatibility import SchemaCompatibility -from karapace.config import Config -from karapace.errors import ( - IncompatibleSchema, - InvalidReferences, - InvalidSchema, - InvalidSchemaType, - InvalidVersion, - ReferenceExistsException, - SchemasNotFoundException, - SchemaTooLargeException, - SchemaVersionNotSoftDeletedException, - SchemaVersionSoftDeletedException, - SubjectNotFoundException, - SubjectNotSoftDeletedException, - SubjectSoftDeletedException, - VersionNotFoundException, -) -from karapace.forward_client import ForwardClient -from karapace.protobuf.exception import ProtobufUnresolvedDependencyException -from karapace.routers.errors import no_primary_url_error -from karapace.routers.requests import ( - CompatibilityCheckResponse, - CompatibilityLevelResponse, - CompatibilityRequest, - CompatibilityResponse, - ModeResponse, - SchemaIdResponse, - SchemaListingItem, - SchemaRequest, - SchemaResponse, - SchemasResponse, - SubjectSchemaVersionResponse, - SubjectVersion, -) -from karapace.schema_models import ParsedTypedSchema, SchemaType, SchemaVersion, TypedSchema, ValidatedTypedSchema, Versioner -from karapace.schema_references import LatestVersionReference, Reference -from karapace.schema_registry import KarapaceSchemaRegistry -from karapace.statsd import StatsClient -from karapace.typing import JsonData, JsonObject, SchemaId, Subject, Version -from karapace.utils import JSONDecodeError -from typing import Any, cast - -import json -import logging -import time - -LOG = logging.getLogger(__name__) - - -# TODO Remove, already in router/errors -@unique -class SchemaErrorCodes(Enum): - HTTP_BAD_REQUEST = status.HTTP_400_BAD_REQUEST - HTTP_NOT_FOUND = status.HTTP_404_NOT_FOUND - HTTP_CONFLICT = status.HTTP_409_CONFLICT - HTTP_UNPROCESSABLE_ENTITY = status.HTTP_422_UNPROCESSABLE_ENTITY - HTTP_INTERNAL_SERVER_ERROR = status.HTTP_500_INTERNAL_SERVER_ERROR - SUBJECT_NOT_FOUND = 40401 - VERSION_NOT_FOUND = 40402 - SCHEMA_NOT_FOUND = 40403 - SUBJECT_SOFT_DELETED = 40404 - SUBJECT_NOT_SOFT_DELETED = 40405 - SCHEMAVERSION_SOFT_DELETED = 40406 - SCHEMAVERSION_NOT_SOFT_DELETED = 40407 - SUBJECT_LEVEL_COMPATIBILITY_NOT_CONFIGURED_ERROR_CODE = 40408 - INVALID_VERSION_ID = 42202 - INVALID_COMPATIBILITY_LEVEL = 42203 - INVALID_SCHEMA = 42201 - INVALID_SUBJECT = 42208 - SCHEMA_TOO_LARGE_ERROR_CODE = 42209 - REFERENCES_SUPPORT_NOT_IMPLEMENTED = 44302 - REFERENCE_EXISTS = 42206 - NO_MASTER_ERROR = 50003 - - -@unique -class SchemaErrorMessages(Enum): - SUBJECT_NOT_FOUND_FMT = "Subject '{subject}' not found." - INVALID_COMPATIBILITY_LEVEL = ( - "Invalid compatibility level. Valid values are none, backward, " - "forward, full, backward_transitive, forward_transitive, and " - "full_transitive" - ) - SUBJECT_LEVEL_COMPATIBILITY_NOT_CONFIGURED_FMT = ( - "Subject '{subject}' does not have subject-level compatibility configured" - ) - REFERENCES_SUPPORT_NOT_IMPLEMENTED = "Schema references are not supported for '{schema_type}' schema type" - - -class KarapaceSchemaRegistryController: - def __init__(self, config: Config, schema_registry: KarapaceSchemaRegistry, stats: StatsClient) -> None: - # super().__init__(config=config, not_ready_handler=self._forward_if_not_ready_to_serve) - - self.config = config - self._process_start_time = time.monotonic() - self.stats = stats - self.schema_registry = schema_registry - - def _add_schema_registry_routes(self) -> None: - pass - - def _subject_get(self, subject: Subject, include_deleted: bool = False) -> dict[Version, SchemaVersion]: - try: - schema_versions = self.schema_registry.subject_get(subject, include_deleted) - except SubjectNotFoundException: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, - "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), - }, - ) - except SchemasNotFoundException: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, - "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), - }, - ) - return schema_versions - - def _invalid_version(self, version: str | int) -> HTTPException: - """Shall be called when InvalidVersion is raised""" - return HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail={ - "error_code": SchemaErrorCodes.INVALID_VERSION_ID.value, - "message": ( - f"The specified version '{version}' is not a valid version id. " - 'Allowed values are between [1, 2^31-1] and the string "latest"' - ), - }, - ) - - async def compatibility_check( - self, - *, - subject: Subject, - schema_request: SchemaRequest, - version: str, - ) -> CompatibilityCheckResponse: - """Check for schema compatibility""" - try: - compatibility_mode = self.schema_registry.get_compatibility_mode(subject=subject) - except ValueError as ex: - # Using INTERNAL_SERVER_ERROR because the subject and configuration - # should have been validated before. - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail={ - "error_code": SchemaErrorCodes.HTTP_INTERNAL_SERVER_ERROR.value, - "message": str(ex), - }, - ) - - new_schema = self.get_new_schema(schema_request=schema_request) - old_schema = self.get_old_schema(subject, Versioner.V(version)) # , content_type) - if compatibility_mode.is_transitive(): - # Ignore the schema version provided in the rest api call (`version`) - # Instead check against all previous versions (including `version` if existing) - result = self.schema_registry.check_schema_compatibility(new_schema, subject) - else: - # Check against the schema version provided in the rest api call (`version`) - result = SchemaCompatibility.check_compatibility(old_schema, new_schema, compatibility_mode) - - if is_incompatible(result): - return CompatibilityCheckResponse(is_compatible=False, messages=list(result.messages)) - return CompatibilityCheckResponse(is_compatible=True) - - async def schemas_list( - self, - *, - deleted: bool, - latest_only: bool, - user: User | None, - authorizer: AuthenticatorAndAuthorizerDep | None, - ) -> list[SchemaListingItem]: - schemas = await self.schema_registry.schemas_list(include_deleted=deleted, latest_only=latest_only) - response_schemas: list[SchemaListingItem] = [] - for subject, schema_versions in schemas.items(): - if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): - continue - for schema_version in schema_versions: - references: list[Any] | None = None - if schema_version.references: - references = [r.to_dict() for r in schema_version.references] - response_schemas.append( - SchemaListingItem( - subject=schema_version.subject, - schema=schema_version.schema.schema_str, - version=schema_version.version.value, - id=schema_version.schema_id, - schemaType=schema_version.schema.schema_type, - references=references, - ) - ) - - return response_schemas - - async def schemas_get( - self, - *, - schema_id: str, - fetch_max_id: bool, - include_subjects: bool, - format_serialized: str, - user: User | None, - authorizer: AuthenticatorAndAuthorizerDep, - ) -> SchemasResponse: - try: - parsed_schema_id = SchemaId(int(schema_id)) - except ValueError: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.HTTP_NOT_FOUND.value, - "message": "HTTP 404 Not Found", - }, - ) - - def _has_subject_with_id() -> bool: - # Fast path - if authorizer is None or authorizer.check_authorization(user, Operation.Read, "Subject:*"): - return True - - subjects = self.schema_registry.database.subjects_for_schema(schema_id=parsed_schema_id) - resources = [f"Subject:{subject}" for subject in subjects] - return authorizer.check_authorization_any(user=user, operation=Operation.Read, resources=resources) - - if authorizer: - has_subject = _has_subject_with_id() - if not has_subject: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SCHEMA_NOT_FOUND.value, - "message": "Schema not found", - }, - ) - - schema = self.schema_registry.schemas_get(parsed_schema_id, fetch_max_id=fetch_max_id) - if not schema: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SCHEMA_NOT_FOUND.value, - "message": "Schema not found", - }, - ) - - schema_str = schema.schema_str - if format_serialized and schema.schema_type == SchemaType.PROTOBUF: - parsed_schema = ParsedTypedSchema.parse(schema_type=schema.schema_type, schema_str=schema_str) - schema_str = parsed_schema.serialize() - - subjects: list[Subject] | None = None - schema_type: SchemaType | None = None - references: list[Any] | None = None # TODO: typing - maxId: int | None = None - - if include_subjects: - subjects = self.schema_registry.database.subjects_for_schema(parsed_schema_id) - if schema.schema_type is not SchemaType.AVRO: - schema_type = schema.schema_type - if schema.references: - references = [r.to_dict() for r in schema.references] - if fetch_max_id: - maxId = schema.max_id - - return SchemasResponse( - schema=schema_str, - subjects=subjects, - schemaType=schema_type, - references=references, - maxId=maxId, - ) - - async def schemas_get_versions( - self, - *, - schema_id: str, - deleted: bool, - user: User | None, - authorizer: AuthenticatorAndAuthorizerDep, - ) -> list[SubjectVersion]: - try: - schema_id_int = SchemaId(int(schema_id)) - except ValueError: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.HTTP_NOT_FOUND.value, - "message": "HTTP 404 Not Found", - }, - ) - - subject_versions = [] - for subject_version in self.schema_registry.get_subject_versions_for_schema(schema_id_int, include_deleted=deleted): - subject = subject_version["subject"] - if authorizer and not authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"): - continue - subject_versions.append( - # TODO correct typing - SubjectVersion( - subject=subject_version["subject"], - version=subject_version["version"].value, - ), - ) - return subject_versions - - async def schemas_types(self) -> list[str]: - return ["JSON", "AVRO", "PROTOBUF"] - - async def config_get(self) -> CompatibilityLevelResponse: - # Note: The format sent by the user differs from the return value, this - # is for compatibility reasons. - return CompatibilityLevelResponse(compatibilityLevel=self.schema_registry.schema_reader.config.compatibility) - - async def config_set( - self, - *, - compatibility_level_request: CompatibilityRequest, - ) -> CompatibilityResponse: - try: - compatibility_level = CompatibilityModes(compatibility_level_request.compatibility) - except (ValueError, KeyError): - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail={ - "error_code": SchemaErrorCodes.INVALID_COMPATIBILITY_LEVEL.value, - "message": SchemaErrorMessages.INVALID_COMPATIBILITY_LEVEL.value, - }, - ) - - self.schema_registry.send_config_message(compatibility_level=compatibility_level, subject=None) - return CompatibilityResponse(compatibility=self.schema_registry.schema_reader.config.compatibility) - - async def config_subject_get( - self, - *, - subject: str, - default_to_global: bool, - ) -> CompatibilityLevelResponse: - # Config for a subject can exist without schemas so no need to check for their existence - assert self.schema_registry.schema_reader, "KarapaceSchemaRegistry not initialized. Missing call to _init" - if self.schema_registry.database.find_subject(subject=Subject(subject)) is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, - "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), - }, - ) - - compatibility = self.schema_registry.database.get_subject_compatibility(subject=Subject(subject)) - if not compatibility and default_to_global: - compatibility = self.schema_registry.compatibility - if compatibility: - # Note: The format sent by the user differs from the return - # value, this is for compatibility reasons. - return CompatibilityLevelResponse(compatibilityLevel=compatibility) - - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SUBJECT_LEVEL_COMPATIBILITY_NOT_CONFIGURED_ERROR_CODE.value, - "message": SchemaErrorMessages.SUBJECT_LEVEL_COMPATIBILITY_NOT_CONFIGURED_FMT.value.format(subject=subject), - }, - ) - - async def config_subject_set( - self, - *, - subject: str, - compatibility_level_request: CompatibilityRequest, - ) -> CompatibilityResponse: - try: - compatibility_level = CompatibilityModes(compatibility_level_request.compatibility) - except (ValueError, KeyError): - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail={ - "error_code": SchemaErrorCodes.INVALID_COMPATIBILITY_LEVEL.value, - "message": "Invalid compatibility level", - }, - ) - - self.schema_registry.send_config_message(compatibility_level=compatibility_level, subject=Subject(subject)) - return CompatibilityResponse(compatibility=compatibility_level.value) - - async def config_subject_delete( - self, - *, - subject: str, - ) -> CompatibilityResponse: - self.schema_registry.send_config_subject_delete_message(subject=Subject(subject)) - return CompatibilityResponse(compatibility=self.schema_registry.schema_reader.config.compatibility) - - async def subjects_list( - self, - deleted: bool, - user: User | None, - authorizer: AuthenticatorAndAuthorizerDep | None, - ) -> list[str]: - subjects = [str(subject) for subject in self.schema_registry.database.find_subjects(include_deleted=deleted)] - if authorizer: - subjects = list( - filter( - lambda subject: authorizer.check_authorization(user, Operation.Read, f"Subject:{subject}"), - subjects, - ) - ) - return subjects - - async def subject_delete( - self, - *, - subject: str, - permanent: bool, - ) -> list[int]: - try: - version_list = await self.schema_registry.subject_delete_local(subject=Subject(subject), permanent=permanent) - return [version.value for version in version_list] - except (SubjectNotFoundException, SchemasNotFoundException): - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, - "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), - }, - ) - except SubjectNotSoftDeletedException: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SUBJECT_NOT_SOFT_DELETED.value, - "message": f"Subject '{subject}' was not deleted first before being permanently deleted", - }, - ) - except SubjectSoftDeletedException: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SUBJECT_SOFT_DELETED.value, - "message": f"Subject '{subject}' was soft deleted.Set permanent=true to delete permanently", - }, - ) - - except ReferenceExistsException as arg: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail={ - "error_code": SchemaErrorCodes.REFERENCE_EXISTS.value, - "message": ( - f"One or more references exist to the schema " - f"{{magic=1,keytype=SCHEMA,subject={subject},version={arg.version}}}." - ), - }, - ) - - async def subject_version_get( - self, - subject: str, - version: str, - deleted: bool, - ) -> SubjectSchemaVersionResponse: - try: - subject_data = self.schema_registry.subject_version_get( - Subject(subject), Versioner.V(version), include_deleted=deleted - ) - return SubjectSchemaVersionResponse( - subject=subject_data["subject"], - version=subject_data["version"], - id=subject_data["id"], - schema=subject_data["schema"], - references=subject_data.get("references", None), - schemaType=subject_data.get("schemaType", None), - compatibility=None, # Do not return compatibility from this endpoint. - ) - except (SubjectNotFoundException, SchemasNotFoundException): - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, - "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), - }, - ) - except VersionNotFoundException: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.VERSION_NOT_FOUND.value, - "message": f"Version {version} not found.", - }, - ) - except InvalidVersion: - raise self._invalid_version(version) - - async def subject_version_delete( - self, - *, - subject: str, - version: str, - permanent: bool, - ) -> int: - try: - resolved_version = await self.schema_registry.subject_version_delete_local( - Subject(subject), Versioner.V(version), permanent - ) - return resolved_version.value - except (SubjectNotFoundException, SchemasNotFoundException): - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, - "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), - }, - ) - except VersionNotFoundException: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.VERSION_NOT_FOUND.value, - "message": f"Version {version} not found.", - }, - ) - except SchemaVersionSoftDeletedException: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SCHEMAVERSION_SOFT_DELETED.value, - "message": ( - f"Subject '{subject}' Version {version} was soft deleted. " - "Set permanent=true to delete permanently" - ), - }, - ) - except SchemaVersionNotSoftDeletedException: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SCHEMAVERSION_NOT_SOFT_DELETED.value, - "message": ( - f"Subject '{subject}' Version {version} was not deleted " "first before being permanently deleted" - ), - }, - ) - except ReferenceExistsException as arg: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail={ - "error_code": SchemaErrorCodes.REFERENCE_EXISTS.value, - "message": ( - f"One or more references exist to the schema " - f"{{magic=1,keytype=SCHEMA,subject={subject},version={arg.version}}}." - ), - }, - ) - except InvalidVersion: - self._invalid_version(version) - - async def subject_version_schema_get( - self, - *, - subject: str, - version: str, - ) -> dict: - try: - subject_data = self.schema_registry.subject_version_get(Subject(subject), Versioner.V(version)) - return json.loads(cast(str, subject_data["schema"])) # TODO typing - except InvalidVersion: - raise self._invalid_version(version) - except VersionNotFoundException: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.VERSION_NOT_FOUND.value, - "message": f"Version {version} not found.", - }, - ) - except (SchemasNotFoundException, SubjectNotFoundException): - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, - "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), - }, - ) - - async def subject_version_referencedby_get( - self, - *, - subject: str, - version, - ) -> list[int]: - referenced_by: list[int] = [] - try: - referenced_by = await self.schema_registry.subject_version_referencedby_get( - Subject(subject), Versioner.V(version) - ) - except (SubjectNotFoundException, SchemasNotFoundException): - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, - "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), - }, - ) - except VersionNotFoundException: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.VERSION_NOT_FOUND.value, - "message": f"Version {version} not found.", - }, - ) - except InvalidVersion: - raise self._invalid_version(version) - - return referenced_by - - async def subject_versions_list( - self, - *, - subject: str, - deleted: bool, - ) -> list[int]: - try: - schema_versions = self.schema_registry.subject_get(Subject(subject), include_deleted=deleted) - version_list = [version.value for version in schema_versions] - return version_list - except (SubjectNotFoundException, SchemasNotFoundException): - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, - "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), - }, - ) - - def _validate_schema_type(self, data: JsonData) -> SchemaType: - # TODO: simplify the calling code, this functionality should not be required - # for old schemas. - if not isinstance(data, dict): - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail={ - "error_code": SchemaErrorCodes.HTTP_BAD_REQUEST.value, - "message": "Malformed request", - }, - ) - schema_type_unparsed = data.get("schemaType", SchemaType.AVRO.value) - try: - schema_type = SchemaType(schema_type_unparsed) - except ValueError: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail={ - "error_code": SchemaErrorCodes.HTTP_UNPROCESSABLE_ENTITY.value, - "message": f"Invalid schemaType {schema_type_unparsed}", - }, - ) - return schema_type - - def _validate_references( - self, - schema_request: SchemaRequest, - ) -> list[Reference | LatestVersionReference] | None: - references = schema_request.references - # Allow passing `null` as value for compatibility - if references is None: - return None - if references and schema_request.schema_type != SchemaType.PROTOBUF: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail={ - "error_code": SchemaErrorCodes.REFERENCES_SUPPORT_NOT_IMPLEMENTED.value, - "message": SchemaErrorMessages.REFERENCES_SUPPORT_NOT_IMPLEMENTED.value.format( - schema_type=schema_request.schema_type.value - ), - }, - ) - - validated_references = [] - for reference in references: - version = Versioner.V(reference.version) - if version.is_latest: - validated_references.append( - LatestVersionReference( - name=reference.name, - subject=Subject(reference.subject), - ) - ) - else: - validated_references.append( - Reference( - name=reference.name, - subject=Subject(reference.subject), - version=version, - ) - ) - if validated_references: - return validated_references - return None - - async def subjects_schema_post( - self, - *, - subject: Subject, - schema_request: SchemaRequest, - deleted: bool, - normalize: bool, - ) -> SchemaResponse: - try: - subject_data = self._subject_get(subject, include_deleted=deleted) - except (SchemasNotFoundException, SubjectNotFoundException): - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, - "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), - }, - ) - references = None - new_schema_dependencies = None - references = self._validate_references(schema_request) - references, new_schema_dependencies = self.schema_registry.resolve_references(references) - - new_schema: ParsedTypedSchema | None = None - try: - # When checking if schema is already registered, allow unvalidated schema in as - # there might be stored schemas that are non-compliant from the past. - new_schema = ParsedTypedSchema.parse( - schema_type=schema_request.schema_type, - schema_str=schema_request.schema_str, - references=references, - dependencies=new_schema_dependencies, - normalize=normalize, - use_protobuf_formatter=self.config.use_protobuf_formatter, - ) - except InvalidSchema: - LOG.warning("Invalid schema: %r", schema_request.schema_str) - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail={ - "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, - "message": f"Error while looking up schema under subject {subject}", - }, - ) - except InvalidReferences: - human_error = "Provided references is not valid" - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail={ - "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, - "message": f"Invalid {schema_request.schema_type} references. Error: {human_error}", - }, - ) - - # Match schemas based on version from latest to oldest - for schema_version in sorted(subject_data.values(), key=lambda item: item.version, reverse=True): - other_references, other_dependencies = self.schema_registry.resolve_references(schema_version.references) - try: - parsed_typed_schema = ParsedTypedSchema.parse( - schema_version.schema.schema_type, - schema_version.schema.schema_str, - references=other_references, - dependencies=other_dependencies, - normalize=normalize, - ) - except InvalidSchema as e: - failed_schema_id = schema_version.schema_id - LOG.exception("Existing schema failed to parse. Id: %s", failed_schema_id) - self.stats.unexpected_exception( - ex=e, where="Matching existing schemas to posted. Failed schema id: {failed_schema_id}" - ) - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail={ - "error_code": SchemaErrorCodes.HTTP_INTERNAL_SERVER_ERROR.value, - "message": f"Error while looking up schema under subject {subject}", - }, - ) - - if schema_request.schema_type is SchemaType.JSONSCHEMA: - schema_valid = parsed_typed_schema.to_dict() == new_schema.to_dict() - else: - schema_valid = new_schema.match(parsed_typed_schema) - if parsed_typed_schema.schema_type == new_schema.schema_type and schema_valid: - schema_type: SchemaType | None = None - if schema_request.schema_type is not SchemaType.AVRO: - schema_type = schema_request.schema_type - return SchemaResponse( - subject=subject, - version=schema_version.version.value, - id=schema_version.schema_id, - schema=parsed_typed_schema.schema_str, - schemaType=schema_type, - ) - else: - LOG.debug("Schema %r did not match %r", schema_version, parsed_typed_schema) - - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SCHEMA_NOT_FOUND.value, - "message": "Schema not found", - }, - ) - - async def subject_post( - self, - *, - subject: str, - schema_request: SchemaRequest, - normalize: bool, - forward_client: ForwardClient, - request: Request, - ) -> SchemaIdResponse | Response: - LOG.debug("POST with subject: %r, request: %r", subject, schema_request) - - references = self._validate_references(schema_request=schema_request) - - try: - references, resolved_dependencies = self.schema_registry.resolve_references(references) - new_schema = ValidatedTypedSchema.parse( - schema_type=schema_request.schema_type, - schema_str=schema_request.schema_str, - references=references, - dependencies=resolved_dependencies, - normalize=normalize, - use_protobuf_formatter=self.config.use_protobuf_formatter, - ) - except (InvalidReferences, InvalidSchema, InvalidSchemaType) as e: - LOG.warning("Invalid schema: %r", schema_request.schema_str, exc_info=True) - if isinstance(e.__cause__, (SchemaParseException, JSONDecodeError, ProtobufUnresolvedDependencyException)): - human_error = f"{e.__cause__.args[0]}" # pylint: disable=no-member - else: - from_body_schema_str = schema_request.schema_str - human_error = ( - f"Invalid schema {from_body_schema_str} with refs {references} of type {schema_request.schema_type}" - ) - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail={ - "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, - "message": f"Invalid {schema_request.schema_type.value} schema. Error: {human_error}", - }, - ) - - schema_id = self.get_schema_id_if_exists(subject=Subject(subject), schema=new_schema, include_deleted=False) - if schema_id is not None: - return SchemaIdResponse(id=schema_id) - - i_am_primary, primary_url = await self.schema_registry.get_master() - if i_am_primary: - try: - schema_id = await self.schema_registry.write_new_schema_local(Subject(subject), new_schema, references) - return SchemaIdResponse(id=schema_id) - except InvalidSchema as ex: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail={ - "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, - "message": f"Invalid {schema_request.schema_type.value} schema. Error: {str(ex)}", - }, - ) - except IncompatibleSchema as ex: - raise HTTPException( - status_code=status.HTTP_409_CONFLICT, - detail={ - "error_code": SchemaErrorCodes.HTTP_CONFLICT.value, - "message": str(ex), - }, - ) - except SchemaTooLargeException: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail={ - "error_code": SchemaErrorCodes.SCHEMA_TOO_LARGE_ERROR_CODE.value, - "message": "Schema is too large", - }, - ) - except Exception as xx: - raise xx - - elif not primary_url: - raise no_primary_url_error() - else: - return await forward_client.forward_request_remote(request=request, primary_url=primary_url) - - async def get_global_mode(self) -> ModeResponse: - return ModeResponse(mode=str(self.schema_registry.get_global_mode())) - - async def get_subject_mode( - self, - *, - subject: str, - ) -> ModeResponse: - if self.schema_registry.database.find_subject(subject=Subject(subject)) is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.SUBJECT_NOT_FOUND.value, - "message": SchemaErrorMessages.SUBJECT_NOT_FOUND_FMT.value.format(subject=subject), - }, - ) - return ModeResponse(mode=str(self.schema_registry.get_global_mode())) - - def get_schema_id_if_exists(self, *, subject: Subject, schema: TypedSchema, include_deleted: bool) -> SchemaId | None: - schema_id = self.schema_registry.database.get_schema_id_if_exists( - subject=subject, schema=schema, include_deleted=include_deleted - ) - return schema_id - - def get_new_schema(self, schema_request: SchemaRequest) -> ValidatedTypedSchema: - references = self._validate_references(schema_request=schema_request) - try: - references, new_schema_dependencies = self.schema_registry.resolve_references(references) - new_schema_dependencies = {} - return ValidatedTypedSchema.parse( - schema_type=schema_request.schema_type, - schema_str=schema_request.schema_str, - references=references, - dependencies=new_schema_dependencies, - use_protobuf_formatter=self.config.use_protobuf_formatter, - ) - except InvalidSchema: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail={ - "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, - "message": f"Invalid {schema_request.schema_type} schema", - }, - ) - - def get_old_schema(self, subject: Subject, version: Version) -> ParsedTypedSchema: - old: JsonObject | None = None - try: - old = self.schema_registry.subject_version_get(subject=subject, version=version) - except InvalidVersion: - self._invalid_version(version.value) - except (VersionNotFoundException, SchemasNotFoundException, SubjectNotFoundException): - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "error_code": SchemaErrorCodes.VERSION_NOT_FOUND.value, - "message": f"Version {version} not found.", - }, - ) - assert old is not None - old_schema_type = self._validate_schema_type(data=old) - try: - old_references = old.get("references", None) - old_dependencies = None - if old_references: - old_references, old_dependencies = self.schema_registry.resolve_references(old_references) - old_schema = ParsedTypedSchema.parse(old_schema_type, old["schema"], old_references, old_dependencies) - return old_schema - except InvalidSchema: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail={ - "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, - "message": f"Found an invalid {old_schema_type} schema registered", - }, - ) diff --git a/src/karapace/typing.py b/src/karapace/typing.py index 753b234aa..a205ae9de 100644 --- a/src/karapace/typing.py +++ b/src/karapace/typing.py @@ -8,6 +8,7 @@ from collections.abc import Mapping, Sequence from enum import Enum, unique from karapace.errors import InvalidVersion +from pydantic import ValidationInfo from typing import Any, ClassVar, NewType, Union from typing_extensions import TypeAlias @@ -35,11 +36,13 @@ class Subject(str): @classmethod + # TODO[pydantic]: We couldn't refactor `__get_validators__`, please create the `__get_pydantic_core_schema__` manually. + # Check https://docs.pydantic.dev/latest/migration/#defining-custom-types for more information. def __get_validators__(cls): yield cls.validate @classmethod - def validate(cls, subject_str: str) -> str: + def validate(cls, subject_str: str, _: ValidationInfo) -> str: """Subject may not contain control characters.""" if bool([c for c in subject_str if (ord(c) <= 31 or (ord(c) >= 127 and ord(c) <= 159))]): raise ValueError(f"The specified subject '{subject_str}' is not a valid.") diff --git a/src/schema_registry/__main__.py b/src/schema_registry/__main__.py index 0663bf774..7ff513584 100644 --- a/src/schema_registry/__main__.py +++ b/src/schema_registry/__main__.py @@ -2,7 +2,6 @@ Copyright (c) 2024 Aiven Ltd See LICENSE for details """ -from karapace.config import KARAPACE_BASE_CONFIG_YAML_PATH from karapace.container import KarapaceContainer from schema_registry.container import SchemaRegistryContainer from schema_registry.factory import create_karapace_application, karapace_schema_registry_lifespan @@ -11,6 +10,7 @@ import schema_registry.routers.compatibility import schema_registry.routers.config import schema_registry.routers.health +import schema_registry.routers.master_availability import schema_registry.routers.metrics import schema_registry.routers.mode import schema_registry.routers.schemas @@ -21,7 +21,6 @@ if __name__ == "__main__": container = KarapaceContainer() - container.base_config.from_yaml(KARAPACE_BASE_CONFIG_YAML_PATH, envs_required=True, required=True) container.wire( modules=[ __name__, @@ -42,6 +41,7 @@ schema_registry.routers.config, schema_registry.routers.compatibility, schema_registry.routers.mode, + schema_registry.routers.master_availability, ] ) diff --git a/src/schema_registry/routers/health.py b/src/schema_registry/routers/health.py index df3a8822f..b02d2f760 100644 --- a/src/schema_registry/routers/health.py +++ b/src/schema_registry/routers/health.py @@ -15,9 +15,9 @@ class HealthStatus(BaseModel): schema_registry_startup_time_sec: float schema_registry_reader_current_offset: int schema_registry_reader_highest_offset: int - schema_registry_is_primary: bool | None + schema_registry_is_primary: bool | None = None schema_registry_is_primary_eligible: bool - schema_registry_primary_url: str | None + schema_registry_primary_url: str | None = None schema_registry_coordinator_running: bool schema_registry_coordinator_generation_id: int @@ -40,13 +40,13 @@ async def health( schema_registry: KarapaceSchemaRegistry = Depends(Provide[SchemaRegistryContainer.karapace_container.schema_registry]), ) -> HealthCheck: starttime = 0.0 - if schema_registry.schema_reader.ready: + if schema_registry.schema_reader.ready(): starttime = schema_registry.schema_reader.last_check - schema_registry.schema_reader.start_time cs = schema_registry.mc.get_coordinator_status() health_status = HealthStatus( - schema_registry_ready=schema_registry.schema_reader.ready, + schema_registry_ready=schema_registry.schema_reader.ready(), schema_registry_startup_time_sec=starttime, schema_registry_reader_current_offset=schema_registry.schema_reader.offset, schema_registry_reader_highest_offset=schema_registry.schema_reader.highest_offset(), diff --git a/src/karapace/routers/master_available_router.py b/src/schema_registry/routers/master_availability.py similarity index 71% rename from src/karapace/routers/master_available_router.py rename to src/schema_registry/routers/master_availability.py index e55389f42..55e792275 100644 --- a/src/karapace/routers/master_available_router.py +++ b/src/schema_registry/routers/master_availability.py @@ -3,13 +3,14 @@ See LICENSE for details """ -from fastapi import APIRouter, HTTPException, Request, Response, status +from dependency_injector.wiring import inject, Provide +from fastapi import APIRouter, Depends, HTTPException, Request, Response, status from fastapi.responses import JSONResponse -from karapace.config import LOG -from karapace.dependencies.config_dependency import ConfigDep -from karapace.dependencies.forward_client_dependency import ForwardClientDep -from karapace.dependencies.schema_registry_dependency import SchemaRegistryDep +from karapace.config import Config +from karapace.forward_client import ForwardClient +from karapace.schema_registry import KarapaceSchemaRegistry from pydantic import BaseModel +from schema_registry.container import SchemaRegistryContainer from typing import Final import json @@ -33,12 +34,13 @@ class MasterAvailabilityResponse(BaseModel): @master_availability_router.get("") -async def master_available( - config: ConfigDep, - schema_registry: SchemaRegistryDep, - forward_client: ForwardClientDep, +@inject +async def master_availability( request: Request, response: Response, + config: Config = Depends(Provide[SchemaRegistryContainer.karapace_container.config]), + forward_client: ForwardClient = Depends(Provide[SchemaRegistryContainer.karapace_container.forward_client]), + schema_registry: KarapaceSchemaRegistry = Depends(Provide[SchemaRegistryContainer.karapace_container.schema_registry]), ) -> MasterAvailabilityResponse: are_we_master, master_url = await schema_registry.get_master() LOG.info("are master %s, master url %s", are_we_master, master_url) diff --git a/src/schema_registry/routers/requests.py b/src/schema_registry/routers/requests.py index fb4b51511..3d7f108b3 100644 --- a/src/schema_registry/routers/requests.py +++ b/src/schema_registry/routers/requests.py @@ -5,7 +5,7 @@ from karapace.schema_type import SchemaType from karapace.typing import Subject -from pydantic import BaseModel, Field, validator +from pydantic import BaseModel, ConfigDict, Field, field_validator from schema_registry.routers.errors import KarapaceValidationError from typing import Any @@ -20,13 +20,12 @@ class SchemaRequest(BaseModel): schema_str: str = Field(alias="schema") schema_type: SchemaType = Field(alias="schemaType", default=SchemaType.AVRO) references: list[SchemaReference] | None = None - metadata: Any | None - ruleSet: Any | None + metadata: Any | None = None + ruleSet: Any | None = None + model_config = ConfigDict(extra="forbid") - class Config: - extra = "forbid" - - @validator("schema_str") + @field_validator("schema_str") + @classmethod def validate_schema(cls, schema_str: str) -> str: if not schema_str and not schema_str.strip(): raise KarapaceValidationError( @@ -58,7 +57,7 @@ class SchemaListingItem(BaseModel): version: int schema_id: int = Field(alias="id") schema_type: SchemaType | None = Field(alias="schemaType", default=None) - references: list[Any] | None + references: list[Any] | None = None class SchemaIdResponse(BaseModel): diff --git a/src/schema_registry/routers/setup.py b/src/schema_registry/routers/setup.py index fe0b6be9b..663639583 100644 --- a/src/schema_registry/routers/setup.py +++ b/src/schema_registry/routers/setup.py @@ -7,6 +7,7 @@ from schema_registry.routers.compatibility import compatibility_router from schema_registry.routers.config import config_router from schema_registry.routers.health import health_router +from schema_registry.routers.master_availability import master_availability_router from schema_registry.routers.metrics import metrics_router from schema_registry.routers.mode import mode_router from schema_registry.routers.root import root_router @@ -23,3 +24,4 @@ def setup_routers(app: FastAPI) -> None: app.include_router(schemas_router) app.include_router(subjects_router) app.include_router(metrics_router) + app.include_router(master_availability_router) diff --git a/src/schema_registry/schema_registry_apis.py b/src/schema_registry/schema_registry_apis.py index cc9a01bb2..13f6bb8f2 100644 --- a/src/schema_registry/schema_registry_apis.py +++ b/src/schema_registry/schema_registry_apis.py @@ -65,9 +65,6 @@ class KarapaceSchemaRegistryController: def __init__(self, config: Config, schema_registry: KarapaceSchemaRegistry, stats: StatsClient) -> None: # super().__init__(config=config, not_ready_handler=self._forward_if_not_ready_to_serve) - print("+++++++++========") - print(schema_registry) - self.config = config self._process_start_time = time.monotonic() self.stats = stats @@ -219,8 +216,6 @@ def _has_subject_with_id() -> bool: ) schema = self.schema_registry.schemas_get(parsed_schema_id, fetch_max_id=fetch_max_id) - print("+++++++++========") - print(schema) if not schema: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, diff --git a/src/schema_registry/user.py b/src/schema_registry/user.py index 16cd55705..b3d6919a2 100644 --- a/src/schema_registry/user.py +++ b/src/schema_registry/user.py @@ -13,24 +13,17 @@ @inject async def get_current_user( - credentials: Annotated[HTTPBasicCredentials, Depends(HTTPBasic())], + credentials: Annotated[HTTPBasicCredentials, Depends(HTTPBasic(auto_error=False))], authorizer: AuthenticatorAndAuthorizer = Depends(Provide[SchemaRegistryContainer.karapace_container.authorizer]), ) -> User: - import logging - - logging.info("get_current_user ++++++++++++=============") - logging.info(f"credentials: {credentials}") - logging.info(f"authorizer: {authorizer}") - if authorizer and not credentials: + if authorizer.MUST_AUTHENTICATE and not credentials: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail={"message": "Unauthorized"}, headers={"WWW-Authenticate": 'Basic realm="Karapace Schema Registry"'}, ) - assert authorizer is not None - assert credentials is not None - username: str = credentials.username - password: str = credentials.password + username: str = credentials.username if credentials else "" + password: str = credentials.password if credentials else "" try: return authorizer.authenticate(username=username, password=password) except AuthenticationError as exc: diff --git a/tests/conftest.py b/tests/conftest.py index 91fb0b02d..f6776d924 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,6 @@ See LICENSE for details """ from avro.compatibility import SchemaCompatibilityResult -from karapace.config import KARAPACE_BASE_CONFIG_YAML_PATH from karapace.container import KarapaceContainer from pathlib import Path from schema_registry.container import SchemaRegistryContainer @@ -186,9 +185,7 @@ def fixture_tmp_file(): @pytest.fixture(name="karapace_container", scope="session") def fixture_karapace_container() -> KarapaceContainer: - container = KarapaceContainer() - container.base_config.from_yaml(KARAPACE_BASE_CONFIG_YAML_PATH, envs_required=True, required=True) - return container + return KarapaceContainer() @pytest.fixture diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py new file mode 100644 index 000000000..f53be7121 --- /dev/null +++ b/tests/e2e/__init__.py @@ -0,0 +1,4 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py new file mode 100644 index 000000000..8e1d23d64 --- /dev/null +++ b/tests/e2e/conftest.py @@ -0,0 +1,128 @@ +""" +karapace - conftest + +Copyright (c) 2023 Aiven Ltd +See LICENSE for details +""" +from __future__ import annotations + +from _pytest.fixtures import SubRequest +from aiohttp import BasicAuth +from collections.abc import AsyncGenerator, Iterator +from confluent_kafka.admin import NewTopic +from karapace.client import Client +from karapace.container import KarapaceContainer +from karapace.kafka.admin import KafkaAdminClient +from karapace.kafka.consumer import AsyncKafkaConsumer, KafkaConsumer +from karapace.kafka.producer import AsyncKafkaProducer, KafkaProducer +from tests.integration.utils.cluster import RegistryDescription, RegistryEndpoint +from tests.integration.utils.kafka_server import KafkaServers + +import asyncio +import pytest +import secrets + + +@pytest.fixture(scope="session", name="basic_auth") +def fixture_basic_auth() -> BasicAuth: + return BasicAuth("test", "test") + + +@pytest.fixture(name="karapace_container", scope="session") +def fixture_karapace_container() -> KarapaceContainer: + return KarapaceContainer() + + +@pytest.fixture(scope="session", name="kafka_servers") +def fixture_kafka_server(karapace_container: KarapaceContainer) -> Iterator[KafkaServers]: + yield KafkaServers([karapace_container.config().bootstrap_uri]) + + +@pytest.fixture(scope="function", name="producer") +def fixture_producer(kafka_servers: KafkaServers) -> Iterator[KafkaProducer]: + yield KafkaProducer(bootstrap_servers=kafka_servers.bootstrap_servers) + + +@pytest.fixture(scope="function", name="admin_client") +def fixture_admin(kafka_servers: KafkaServers) -> Iterator[KafkaAdminClient]: + yield KafkaAdminClient(bootstrap_servers=kafka_servers.bootstrap_servers) + + +@pytest.fixture(scope="function", name="consumer") +def fixture_consumer( + kafka_servers: KafkaServers, +) -> Iterator[KafkaConsumer]: + consumer = KafkaConsumer( + bootstrap_servers=kafka_servers.bootstrap_servers, + auto_offset_reset="earliest", + enable_auto_commit=False, + topic_metadata_refresh_interval_ms=200, # Speed things up for consumer tests to discover topics, etc. + ) + try: + yield consumer + finally: + consumer.close() + + +@pytest.fixture(scope="function", name="asyncproducer") +async def fixture_asyncproducer( + kafka_servers: KafkaServers, + loop: asyncio.AbstractEventLoop, +) -> AsyncGenerator[AsyncKafkaProducer, None]: + asyncproducer = AsyncKafkaProducer(bootstrap_servers=kafka_servers.bootstrap_servers, loop=loop) + await asyncproducer.start() + yield asyncproducer + await asyncproducer.stop() + + +@pytest.fixture(scope="function", name="asyncconsumer") +async def fixture_asyncconsumer( + kafka_servers: KafkaServers, + loop: asyncio.AbstractEventLoop, +) -> AsyncGenerator[AsyncKafkaConsumer, None]: + asyncconsumer = AsyncKafkaConsumer( + bootstrap_servers=kafka_servers.bootstrap_servers, + loop=loop, + auto_offset_reset="earliest", + enable_auto_commit=False, + topic_metadata_refresh_interval_ms=200, # Speed things up for consumer tests to discover topics, etc. + ) + await asyncconsumer.start() + yield asyncconsumer + await asyncconsumer.stop() + + +@pytest.fixture(scope="function", name="registry_cluster") +async def fixture_registry_cluster( + karapace_container: KarapaceContainer, + loop: asyncio.AbstractEventLoop, # pylint: disable=unused-argument +) -> RegistryDescription: + protocol = "http" + endpoint = RegistryEndpoint( + protocol, karapace_container.config().registry_host, karapace_container.config().registry_port + ) + return RegistryDescription(endpoint, karapace_container.config().topic_name) + + +@pytest.fixture(scope="function", name="registry_async_client") +async def fixture_registry_async_client( + request: SubRequest, + basic_auth: BasicAuth, + registry_cluster: RegistryDescription, + loop: asyncio.AbstractEventLoop, # pylint: disable=unused-argument +) -> AsyncGenerator[Client, None]: + client = Client( + server_uri=registry_cluster.endpoint.to_url(), + server_ca=request.config.getoption("server_ca"), + session_auth=basic_auth, + ) + try: + yield client + finally: + await client.close() + + +@pytest.fixture(scope="function", name="new_topic") +def fixture_new_topic(admin_client: KafkaAdminClient) -> NewTopic: + topic_name = secrets.token_hex(4) + return admin_client.new_topic(topic_name, num_partitions=1, replication_factor=1) diff --git a/src/karapace/routers/__init__.py b/tests/e2e/instrumentation/__init__.py similarity index 100% rename from src/karapace/routers/__init__.py rename to tests/e2e/instrumentation/__init__.py diff --git a/tests/integration/instrumentation/test_prometheus.py b/tests/e2e/instrumentation/test_prometheus.py similarity index 100% rename from tests/integration/instrumentation/test_prometheus.py rename to tests/e2e/instrumentation/test_prometheus.py diff --git a/tests/integration/instrumentation/__init__.py b/tests/e2e/kafka/__init__.py similarity index 100% rename from tests/integration/instrumentation/__init__.py rename to tests/e2e/kafka/__init__.py diff --git a/tests/integration/kafka/test_admin.py b/tests/e2e/kafka/test_admin.py similarity index 100% rename from tests/integration/kafka/test_admin.py rename to tests/e2e/kafka/test_admin.py diff --git a/tests/integration/kafka/test_consumer.py b/tests/e2e/kafka/test_consumer.py similarity index 100% rename from tests/integration/kafka/test_consumer.py rename to tests/e2e/kafka/test_consumer.py diff --git a/tests/integration/kafka/test_producer.py b/tests/e2e/kafka/test_producer.py similarity index 100% rename from tests/integration/kafka/test_producer.py rename to tests/e2e/kafka/test_producer.py diff --git a/tests/integration/kafka/__init__.py b/tests/e2e/schema_registry/__init__.py similarity index 100% rename from tests/integration/kafka/__init__.py rename to tests/e2e/schema_registry/__init__.py diff --git a/tests/integration/schema_registry/test_jsonschema.py b/tests/e2e/schema_registry/test_jsonschema.py similarity index 100% rename from tests/integration/schema_registry/test_jsonschema.py rename to tests/e2e/schema_registry/test_jsonschema.py diff --git a/tests/e2e/test_karapace.py b/tests/e2e/test_karapace.py new file mode 100644 index 000000000..ccf4bde1e --- /dev/null +++ b/tests/e2e/test_karapace.py @@ -0,0 +1,64 @@ +""" +Copyright (c) 2023 Aiven Ltd +See LICENSE for details +""" +from collections.abc import Iterator +from contextlib import closing, contextmanager, ExitStack +from pathlib import Path +from tests.integration.utils.kafka_server import KafkaServers +from tests.integration.utils.process import stop_process +from tests.utils import popen_karapace_all + +import socket + + +@contextmanager +def allocate_port_no_reuse() -> Iterator[int]: + """Allocate random free port and do not allow reuse.""" + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: + sock.bind(("127.0.0.1", 0)) + yield sock.getsockname()[1] + + +def test_regression_server_must_exit_on_exception( + tmp_path: Path, + kafka_servers: Iterator[KafkaServers], +) -> None: + """Regression test for Karapace properly exiting. + + Karapace was not closing all its background threads, so when an exception + was raised an reached the top-level, the webserver created by asyncio would + be stopped but the threads would keep the server running. + Karapace exit on exception is done by setting a reserved port as server port. + """ + with ExitStack() as stack: + karapace_rest_proxy_port = stack.enter_context(allocate_port_no_reuse()) + karapace_schema_registry_port = stack.enter_context(allocate_port_no_reuse()) + logfile = stack.enter_context((tmp_path / "karapace.log").open("w")) + errfile = stack.enter_context((tmp_path / "karapace.err").open("w")) + + karapace_rest_proxy_env = { + "KARAPACE_BOOTSTRAP_URI": kafka_servers.bootstrap_servers[0], + "KARAPACE_PORT": str(karapace_rest_proxy_port), + "KARAPACE_REGISTRY_HOST": "127.0.0.1", + "KARAPACE_REGISTRY_PORT": str(karapace_schema_registry_port), + "KARAPACE_KARAPACE_REST": "true", + } + karapace_rest_proxy = popen_karapace_all( + module="karapace.karapace_all", env=karapace_rest_proxy_env, stdout=logfile, stderr=errfile + ) + stack.callback(stop_process, karapace_rest_proxy) # make sure to stop the process if the test fails + assert karapace_rest_proxy.wait(timeout=10) != 0, "Process should have exited with an error, port is already is use" + + karapace_schema_registry_env = { + "KARAPACE_BOOTSTRAP_URI": kafka_servers.bootstrap_servers[0], + "KARAPACE_PORT": str(karapace_schema_registry_port), + "KARAPACE_KARAPACE_REGISTRY": "true", + } + karapace_schema_registry = popen_karapace_all( + module="schema_registry", env=karapace_schema_registry_env, stdout=logfile, stderr=errfile + ) + stack.callback(stop_process, karapace_schema_registry) # make sure to stop the process if the test fails + assert ( + karapace_schema_registry.wait(timeout=10) != 0 + ), "Process should have exited with an error, port is already is use" diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 84be44595..39a0a3a3c 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -15,14 +15,19 @@ from dataclasses import asdict from filelock import FileLock from karapace.client import Client -from karapace.config import Config, write_config +from karapace.config import Config from karapace.kafka.admin import KafkaAdminClient from karapace.kafka.consumer import AsyncKafkaConsumer, KafkaConsumer from karapace.kafka.producer import AsyncKafkaProducer, KafkaProducer from karapace.kafka_rest_apis import KafkaRest from pathlib import Path from tests.conftest import KAFKA_VERSION -from tests.integration.utils.cluster import RegistryDescription, RegistryEndpoint, start_schema_registry_cluster +from tests.integration.utils.cluster import ( + after_master_is_available, + RegistryDescription, + RegistryEndpoint, + start_schema_registry_cluster, +) from tests.integration.utils.config import KafkaConfig, KafkaDescription, ZKConfig from tests.integration.utils.kafka_server import ( configure_and_start_kafka, @@ -39,7 +44,6 @@ from urllib.parse import urlparse import asyncio -import contextlib import json import os import pathlib @@ -149,12 +153,12 @@ def create_kafka_server( stack.callback(stop_process, zk_proc) # Make sure zookeeper is running before trying to start Kafka - wait_for_port_subprocess(zk_config.client_port, zk_proc, wait_time=20) + wait_for_port_subprocess(zk_config.client_port, zk_proc, wait_time=KAFKA_WAIT_TIMEOUT) data_dir = session_datadir / "kafka" log_dir = session_logdir / "kafka" - data_dir.mkdir(parents=True) - log_dir.mkdir(parents=True) + data_dir.mkdir(parents=True, exist_ok=True) + log_dir.mkdir(parents=True, exist_ok=True) kafka_config = KafkaConfig( datadir=str(data_dir), logdir=str(log_dir), @@ -262,7 +266,6 @@ async def fixture_asyncconsumer( async def fixture_rest_async( request: SubRequest, loop: asyncio.AbstractEventLoop, # pylint: disable=unused-argument - tmp_path: Path, kafka_servers: KafkaServers, registry_async_client: Client, ) -> AsyncIterator[KafkaRest | None]: @@ -275,15 +278,12 @@ async def fixture_rest_async( yield None return - config_path = tmp_path / "karapace_config.json" - config = Config() config.admin_metadata_max_age = 2 config.bootstrap_uri = kafka_servers.bootstrap_servers[0] # Use non-default max request size for REST producer. config.producer_max_request_size = REST_PRODUCER_MAX_REQUEST_BYTES config.waiting_time_before_acting_as_master_ms = 300 - write_config(config_path, config) rest = KafkaRest(config=config) assert rest.serializer.registry_client @@ -333,7 +333,6 @@ async def get_client(**kwargs) -> TestClient: # pylint: disable=unused-argument async def fixture_rest_async_novalidation( request: SubRequest, loop: asyncio.AbstractEventLoop, # pylint: disable=unused-argument - tmp_path: Path, kafka_servers: KafkaServers, registry_async_client: Client, ) -> AsyncIterator[KafkaRest | None]: @@ -346,8 +345,6 @@ async def fixture_rest_async_novalidation( yield None return - config_path = tmp_path / "karapace_config.json" - config = Config() config.admin_metadata_max_age = 2 config.bootstrap_uri = kafka_servers.bootstrap_servers[0] @@ -355,7 +352,6 @@ async def fixture_rest_async_novalidation( config.producer_max_request_size = REST_PRODUCER_MAX_REQUEST_BYTES config.name_strategy_validation = False # This should be only difference from rest_async config.waiting_time_before_acting_as_master_ms = 300 - write_config(config_path, config) rest = KafkaRest(config=config) assert rest.serializer.registry_client @@ -690,21 +686,6 @@ async def fixture_registry_async_client_auth( await client.close() -@contextlib.asynccontextmanager -async def after_master_is_available( - registry_instances: list[RegistryDescription], server_ca: str | None -) -> AsyncIterator[None]: - client = Client( - server_uri=registry_instances[0].endpoint.to_url(), - server_ca=server_ca, - ) - try: - await repeat_until_master_is_available(client) - yield - finally: - await client.close() - - @pytest.fixture(scope="function", name="registry_async_retry_client_auth") async def fixture_registry_async_retry_client_auth(registry_async_client_auth: Client) -> RetryRestClient: return RetryRestClient(registry_async_client_auth) diff --git a/tests/integration/schema_registry/__init__.py b/tests/integration/schema_registry/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/test_dependencies_compatibility_protobuf.py b/tests/integration/test_dependencies_compatibility_protobuf.py index c52a49141..dcbe64f86 100644 --- a/tests/integration/test_dependencies_compatibility_protobuf.py +++ b/tests/integration/test_dependencies_compatibility_protobuf.py @@ -507,7 +507,7 @@ async def test_protobuf_schema_references_rejected_values(registry_async_client: ) assert res.status_code == 422 assert res.json()["message"] == [ - {"loc": ["body", "references"], "msg": "value is not a valid list", "type": "type_error.list"} + {"type": "list_type", "loc": ["body", "references"], "msg": "Input should be a valid list", "input": 1} ] res = await registry_async_client.post( @@ -515,7 +515,7 @@ async def test_protobuf_schema_references_rejected_values(registry_async_client: ) assert res.status_code == 422 assert res.json()["message"] == [ - {"loc": ["body", "references"], "msg": "value is not a valid list", "type": "type_error.list"} + {"type": "list_type", "loc": ["body", "references"], "msg": "Input should be a valid list", "input": "foo"} ] res = await registry_async_client.post( @@ -523,7 +523,7 @@ async def test_protobuf_schema_references_rejected_values(registry_async_client: ) assert res.status_code == 422 assert res.json()["message"] == [ - {"loc": ["body", "references"], "msg": "value is not a valid list", "type": "type_error.list"} + {"type": "list_type", "loc": ["body", "references"], "msg": "Input should be a valid list", "input": False} ] res = await registry_async_client.post( @@ -532,7 +532,12 @@ async def test_protobuf_schema_references_rejected_values(registry_async_client: ) assert res.status_code == 422 assert res.json()["message"] == [ - {"loc": ["body", "references"], "msg": "value is not a valid list", "type": "type_error.list"} + { + "type": "list_type", + "loc": ["body", "references"], + "msg": "Input should be a valid list", + "input": {"this_is_object": True}, + } ] diff --git a/tests/integration/test_karapace.py b/tests/integration/test_karapace.py deleted file mode 100644 index 043e3e21d..000000000 --- a/tests/integration/test_karapace.py +++ /dev/null @@ -1,54 +0,0 @@ -""" -Copyright (c) 2023 Aiven Ltd -See LICENSE for details -""" -from collections.abc import Iterator -from contextlib import closing, contextmanager, ExitStack -from karapace.config import Config, write_env_file -from pathlib import Path -from tests.integration.utils.kafka_server import KafkaServers -from tests.integration.utils.process import stop_process -from tests.utils import popen_karapace_all - -import socket - - -@contextmanager -def allocate_port_no_reuse() -> Iterator[int]: - """Allocate random free port and do not allow reuse.""" - with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: - sock.bind(("127.0.0.1", 0)) - yield sock.getsockname()[1] - - -def test_regression_server_must_exit_on_exception( - tmp_path: Path, - kafka_servers: Iterator[KafkaServers], -) -> None: - """Regression test for Karapace properly exiting. - - Karapace was not closing all its background threads, so when an exception - was raised an reached the top-level, the webserver created by asyncio would - be stopped but the threads would keep the server running. - Karapace exit on exception is done by setting a reserved port as server port. - """ - with ExitStack() as stack: - port = stack.enter_context(allocate_port_no_reuse()) - - config = Config() - config.bootstrap_uri = kafka_servers.bootstrap_servers[0] - config.port = port - config.karapace_registry = True - - env_path = tmp_path / "karapace.env" - - print(f"{tmp_path}/karapace.log") - print(f"{tmp_path}/karapace.err") - - logfile = stack.enter_context((tmp_path / "karapace.log").open("w")) - errfile = stack.enter_context((tmp_path / "karapace.err").open("w")) - - write_env_file(dot_env_path=env_path, config=config) - process = popen_karapace_all(env_path=env_path, stdout=logfile, stderr=errfile) - stack.callback(stop_process, process) # make sure to stop the process if the test fails - assert process.wait(timeout=10) != 0, "Process should have exited with an error, port is already is use" diff --git a/tests/integration/test_master_coordinator.py b/tests/integration/test_master_coordinator.py index 876a91d66..b4fe8088f 100644 --- a/tests/integration/test_master_coordinator.py +++ b/tests/integration/test_master_coordinator.py @@ -190,6 +190,7 @@ async def test_no_eligible_master(kafka_servers: KafkaServers) -> None: await mc.close() +@pytest.mark.skip(reason="requires master forwarding to be implemented") async def test_schema_request_forwarding( registry_async_pair, registry_async_retry_client: RetryRestClient, diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py index 9a9eb31e7..2f6a7097a 100644 --- a/tests/integration/test_schema.py +++ b/tests/integration/test_schema.py @@ -9,9 +9,9 @@ from karapace.client import Client from karapace.kafka.producer import KafkaProducer from karapace.rapu import is_success -from karapace.schema_registry_apis import SchemaErrorMessages from karapace.schema_type import SchemaType from karapace.utils import json_encode +from schema_registry.schema_registry_apis import SchemaErrorMessages from tests.base_testcase import BaseTestCase from tests.integration.utils.cluster import RegistryDescription from tests.integration.utils.kafka_server import KafkaServers @@ -1428,7 +1428,7 @@ async def test_schema_missing_body(registry_async_client: Client) -> None: ) assert res.status_code == 422 assert res.json()["error_code"] == 422 - assert res.json()["message"] == [{"loc": ["body", "schema"], "msg": "field required", "type": "value_error.missing"}] + assert res.json()["message"] == [{"type": "missing", "loc": ["body", "schema"], "msg": "Field required", "input": {}}] async def test_schema_missing_schema_body_ok(registry_async_client: Client) -> None: @@ -1539,7 +1539,7 @@ async def test_schema_subject_post_invalid(registry_async_client: Client) -> Non res = await registry_async_client.post(f"subjects/{subject_1}", json={}) assert res.status_code == 422 assert res.json()["error_code"] == 422 - assert res.json()["message"] == [{"loc": ["body", "schema"], "msg": "field required", "type": "value_error.missing"}] + assert res.json()["message"] == [{"type": "missing", "loc": ["body", "schema"], "msg": "Field required", "input": {}}] # Schema not included in the request body for subject that does not exist subject_3 = subject_name_factory() @@ -1549,7 +1549,7 @@ async def test_schema_subject_post_invalid(registry_async_client: Client) -> Non ) assert res.status_code == 422 assert res.json()["error_code"] == 422 - assert res.json()["message"] == [{"loc": ["body", "schema"], "msg": "field required", "type": "value_error.missing"}] + assert res.json()["message"] == [{"type": "missing", "loc": ["body", "schema"], "msg": "Field required", "input": {}}] async def test_schema_lifecycle(registry_async_client: Client) -> None: @@ -2306,8 +2306,18 @@ async def test_schema_body_validation(registry_async_client: Client) -> None: assert res.status_code == 422 assert res.json()["error_code"] == 422 assert res.json()["message"] == [ - {"loc": ["body", "schema"], "msg": "field required", "type": "value_error.missing"}, - {"loc": ["body", "invalid_field"], "msg": "extra fields not permitted", "type": "value_error.extra"}, + { + "type": "missing", + "loc": ["body", "schema"], + "msg": "Field required", + "input": {"invalid_field": "invalid_value"}, + }, + { + "type": "extra_forbidden", + "loc": ["body", "invalid_field"], + "msg": "Extra inputs are not permitted", + "input": "invalid_value", + }, ] # Additional field res = await registry_async_client.post( @@ -2316,13 +2326,25 @@ async def test_schema_body_validation(registry_async_client: Client) -> None: assert res.status_code == 422 assert res.json()["error_code"] == 422 assert res.json()["message"] == [ - {"loc": ["body", "invalid_field"], "msg": "extra fields not permitted", "type": "value_error.extra"} + { + "type": "extra_forbidden", + "loc": ["body", "invalid_field"], + "msg": "Extra inputs are not permitted", + "input": "invalid_value", + }, ] # Invalid body type res = await registry_async_client.post(endpoint, json="invalid") assert res.status_code == 422 assert res.json()["error_code"] == 422 - assert res.json()["message"] == [{"loc": ["body"], "msg": "value is not a valid dict", "type": "type_error.dict"}] + assert res.json()["message"] == [ + { + "type": "model_attributes_type", + "loc": ["body"], + "msg": "Input should be a valid dictionary or object to extract fields from", + "input": "invalid", + } + ] async def test_version_number_validation(registry_async_client: Client) -> None: diff --git a/tests/integration/test_schema_protobuf.py b/tests/integration/test_schema_protobuf.py index 55825fb92..53d55fc62 100644 --- a/tests/integration/test_schema_protobuf.py +++ b/tests/integration/test_schema_protobuf.py @@ -4,16 +4,22 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from __future__ import annotations + from dataclasses import dataclass from karapace.client import Client +from karapace.config import Config from karapace.errors import InvalidTest from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.schema_type import SchemaType from karapace.typing import JsonData, SchemaMetadata, SchemaRuleSet +from pathlib import Path from tests.base_testcase import BaseTestCase +from tests.integration.utils.cluster import after_master_is_available, start_schema_registry_cluster +from tests.integration.utils.kafka_server import KafkaServers from tests.utils import create_subject_name_factory -from typing import Optional, Union +import asyncio import logging import pytest @@ -472,10 +478,10 @@ class TestCaseSchema: schema_type: SchemaType schema_str: str subject: str - references: Optional[list[JsonData]] = None + references: list[JsonData] | None = None expected: int = 200 expected_msg: str = "" - expected_error_code: Optional[int] = None + expected_error_code: int | None = None TestCaseSchema.__test__ = False @@ -488,7 +494,7 @@ class TestCaseDeleteSchema: schema_id: int expected: int = 200 expected_msg: str = "" - expected_error_code: Optional[int] = None + expected_error_code: int | None = None TestCaseDeleteSchema.__test__ = False @@ -501,7 +507,7 @@ class TestCaseHardDeleteSchema(TestCaseDeleteSchema): @dataclass class ReferenceTestCase(BaseTestCase): - schemas: list[Union[TestCaseSchema, TestCaseDeleteSchema]] + schemas: list[TestCaseSchema | TestCaseDeleteSchema] # Base case @@ -1326,30 +1332,43 @@ async def test_protobuf_update_ordering(registry_async_client: Client) -> None: """ -@pytest.mark.parametrize( - "registry_cluster, status", - [({"config": {}}, 404), ({"config": {"use_protobuf_formatter": True}}, 200)], - indirect=["registry_cluster"], -) -async def test_registering_normalized_schema(registry_async_client: Client, status: int) -> None: +async def test_registering_normalized_schema(session_logdir: Path, kafka_servers: KafkaServers) -> None: subject = create_subject_name_factory("test_protobuf_normalization")() - body = {"schemaType": "PROTOBUF", "schema": SCHEMA_WITH_OPTION_ORDERED} - res = await registry_async_client.post(f"subjects/{subject}/versions?normalize=true", json=body) + config1 = Config() + config1.bootstrap_uri = kafka_servers.bootstrap_servers[0] - assert res.status_code == 200 - assert "id" in res.json() - original_schema_id = res.json()["id"] + config2 = Config() + config2.bootstrap_uri = kafka_servers.bootstrap_servers[0] + config2.use_protobuf_formatter = True - body = {"schemaType": "PROTOBUF", "schema": SCHEMA_WITH_OPTION_UNORDERDERED} - res = await registry_async_client.post(f"subjects/{subject}", json=body) - assert res.status_code == status + async with start_schema_registry_cluster( + config_templates=[config1, config2], + data_dir=session_logdir / subject, + ) as endpoints: + async with after_master_is_available(endpoints, None): + servers = [server.endpoint.to_url() for server in endpoints] + client1 = Client(server_uri=servers[0], server_ca=None) + client2 = Client(server_uri=servers[1], server_ca=None) - res = await registry_async_client.post(f"subjects/{subject}?normalize=true", json=body) + await asyncio.sleep(10) - assert res.status_code == 200 - assert "id" in res.json() - assert original_schema_id == res.json()["id"] + body = {"schemaType": "PROTOBUF", "schema": SCHEMA_WITH_OPTION_ORDERED} + res = await client1.post(f"subjects/{subject}/versions?normalize=true", json=body) + + assert res.status_code == 200 + assert "id" in res.json() + original_schema_id = res.json()["id"] + + body = {"schemaType": "PROTOBUF", "schema": SCHEMA_WITH_OPTION_UNORDERDERED} + res = await client1.post(f"subjects/{subject}", json=body) + assert res.status_code == 404 + + res = await client2.post(f"subjects/{subject}?normalize=true", json=body) + + assert res.status_code == 200 + assert "id" in res.json() + assert original_schema_id == res.json()["id"] @pytest.mark.parametrize("registry_cluster", [{"config": {}}, {"config": {"use_protobuf_formatter": True}}], indirect=True) diff --git a/tests/integration/test_schema_registry_auth.py b/tests/integration/test_schema_registry_auth.py index 89832355f..7624fffb6 100644 --- a/tests/integration/test_schema_registry_auth.py +++ b/tests/integration/test_schema_registry_auth.py @@ -19,6 +19,7 @@ import aiohttp import asyncio +import pytest NEW_TOPIC_TIMEOUT = 10 @@ -203,6 +204,7 @@ async def test_sr_ids(registry_async_retry_client_auth: RetryRestClient) -> None assert res.status_code == 200 +@pytest.mark.skip(reason="requires master forwarding to be implemented") async def test_sr_auth_forwarding( registry_async_auth_pair: list[str], registry_async_retry_client_auth: RetryRestClient ) -> None: diff --git a/tests/integration/utils/cluster.py b/tests/integration/utils/cluster.py index 66df9335a..67b2c97ea 100644 --- a/tests/integration/utils/cluster.py +++ b/tests/integration/utils/cluster.py @@ -7,11 +7,12 @@ from collections.abc import AsyncIterator from contextlib import asynccontextmanager, ExitStack from dataclasses import dataclass -from karapace.config import Config, write_env_file +from karapace.client import Client +from karapace.config import Config from pathlib import Path from tests.integration.utils.network import allocate_port from tests.integration.utils.process import stop_process, wait_for_port_subprocess -from tests.utils import new_random_name, popen_karapace_all +from tests.utils import new_random_name, popen_karapace_all, repeat_until_master_is_available @dataclass(frozen=True) @@ -52,41 +53,57 @@ async def start_schema_registry_cluster( all_registries = [] with ExitStack() as stack: for pos, config in enumerate(config_templates): - # For testing we don't want to expose the hostname, usually the loopback interface is - # used (127.0.0.1), and the name resolution would instead return the machine's network - # address, (e.g. 192.168.0.1), which would cause connect failures - host = config.host - config.advertised_hostname = host - config.topic_name = schemas_topic - config.karapace_registry = True - config.log_level = "DEBUG" - config.log_format = "%(asctime)s [%(threadName)s] %(filename)s:%(funcName)s:%(lineno)d %(message)s" - actual_group_id = config.group_id = group_id - - port = config.port = stack.enter_context(allocate_port()) + port = stack.enter_context(allocate_port()) assert isinstance(port, int), "Port must be an integer" - group_dir = data_dir / str(actual_group_id) + group_dir = data_dir / str(group_id) group_dir.mkdir(parents=True, exist_ok=True) - env_path = group_dir / f"{pos}.env" log_path = group_dir / f"{pos}.log" error_path = group_dir / f"{pos}.error" - # config = set_config_defaults(config) - write_env_file(env_path, config) - logfile = stack.enter_context(open(log_path, "w")) errfile = stack.enter_context(open(error_path, "w")) - process = popen_karapace_all(env_path=env_path, stdout=logfile, stderr=errfile) + + env = { + "KARAPACE_HOST": config.host, + "KARAPACE_PORT": str(port), + "KARAPACE_GROUP_ID": group_id, + "KARAPACE_ADVERTISED_HOSTNAME": config.host, + "KARAPACE_BOOTSTRAP_URI": config.bootstrap_uri, + "KARAPACE_TOPIC_NAME": schemas_topic, + "KARAPACE_LOG_LEVEL": "DEBUG", + "KARAPACE_LOG_FORMAT": "%(asctime)s [%(threadName)s] %(filename)s:%(funcName)s:%(lineno)d %(message)s", + "KARAPACE_KARAPACE_REGISTRY": "true", + "KARAPACE_REGISTRY_AUTHFILE": config.registry_authfile if config.registry_authfile else "", + "KARAPACE_SERVER_TLS_CERTFILE": config.server_tls_certfile if config.server_tls_certfile else "", + "KARAPACE_SERVER_TLS_KEYFILE": config.server_tls_keyfile if config.server_tls_keyfile else "", + "KARAPACE_USE_PROTOBUF_FORMATTER": "true" if config.use_protobuf_formatter else "false", + } + process = popen_karapace_all(module="schema_registry", env=env, stdout=logfile, stderr=errfile) stack.callback(stop_process, process) - all_processes.append(process) + all_processes.append((process, port)) protocol = "http" if config.server_tls_keyfile is None else "https" - endpoint = RegistryEndpoint(protocol, host, port) + endpoint = RegistryEndpoint(protocol, config.host, port) description = RegistryDescription(endpoint, schemas_topic) all_registries.append(description) - for process in all_processes: - wait_for_port_subprocess(port, process, hostname=host) + for process, port in all_processes: + wait_for_port_subprocess(port, process, hostname=config.host, wait_time=120) yield all_registries + + +@asynccontextmanager +async def after_master_is_available( + registry_instances: list[RegistryDescription], server_ca: str | None +) -> AsyncIterator[None]: + client = Client( + server_uri=registry_instances[0].endpoint.to_url(), + server_ca=server_ca, + ) + try: + await repeat_until_master_is_available(client) + yield + finally: + await client.close() diff --git a/tests/integration/utils/zookeeper.py b/tests/integration/utils/zookeeper.py index 5dffcfeca..1ffb798db 100644 --- a/tests/integration/utils/zookeeper.py +++ b/tests/integration/utils/zookeeper.py @@ -25,7 +25,7 @@ def configure_and_start_zk(config: ZKConfig, kafka_description: KafkaDescription zk_dir = Path(config.path) cfg_path = zk_dir / "zoo.cfg" logs_dir = zk_dir / "logs" - logs_dir.mkdir(parents=True) + logs_dir.mkdir(parents=True, exist_ok=True) zoo_cfg = { # Number of milliseconds of each tick diff --git a/tests/utils.py b/tests/utils.py index e36093dc0..ecddea84e 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -10,7 +10,7 @@ from karapace.utils import Expiration from pathlib import Path from subprocess import Popen -from typing import Any, Callable, IO, Union +from typing import Any, Callable, IO from urllib.parse import quote import asyncio @@ -344,12 +344,12 @@ def python_exe() -> str: return python -def popen_karapace_all(*, env_path: Union[Path, str], stdout: IO, stderr: IO, **kwargs) -> Popen: +def popen_karapace_all(*, module: str, env: dict[str], stdout: IO, stderr: IO, **kwargs) -> Popen: kwargs["stdout"] = stdout kwargs["stderr"] = stderr return Popen( - [python_exe(), "-m", "karapace.karapace_all"], - env={"KARAPACE_DOTENV": str(env_path)}, + [python_exe(), "-m", module], + env=env, **kwargs, )