diff --git a/docs/deployment/configuration.mdx b/docs/deployment/configuration.mdx index 61a7a501d..30b0584a7 100644 --- a/docs/deployment/configuration.mdx +++ b/docs/deployment/configuration.mdx @@ -220,6 +220,24 @@ ARQ (Asynchronous Task Queue) configuration controls Keep's background task proc | **ARQ_EXPIRES** | Default job expiration time (in seconds) | No | 3600 | Positive integer | | **ARQ_EXPIRES_AI** | AI job expiration time (in seconds) | No | 3600000 | Positive integer | +### Rate Limiting + +Rate limiting configuration controls how many requests can be made to Keep's API endpoints within a specified time period. This helps prevent abuse and ensures system stability. + + +| Env var | Purpose | Required | Default Value | Valid options | +|:-------------------:|:-------:|:----------:|:-------------:|:-------------:| +| **KEEP_USE_LIMITER** | Enables or disables rate limiting | No | "false" | "true" or "false" | +| **KEEP_LIMIT_CONCURRENCY** | Sets the rate limit for API endpoints | No | "100/minute" | Format: "{number}/{interval}" where interval can be "second", "minute", "hour", "day" | + + +Currently, rate limiting is applied to the following endpoints: +- POST `/alerts/event` - Generic event ingestion endpoint +- POST `/alerts/{provider_type}` - Provider-specific event ingestion endpoints + +These endpoints are rate-limited according to the `KEEP_LIMIT_CONCURRENCY` setting when `KEEP_USE_LIMITER` is enabled. + + ## Frontend Environment Variables Frontend configuration variables control the behavior and features of Keep's user interface. These settings are crucial for customizing the frontend's appearance, functionality, and integration with the backend services. diff --git a/keep/api/api.py b/keep/api/api.py index eadddd138..7be28833a 100644 --- a/keep/api/api.py +++ b/keep/api/api.py @@ -10,6 +10,9 @@ from fastapi import FastAPI, Request from fastapi.middleware.gzip import GZipMiddleware from fastapi.responses import JSONResponse +from prometheus_fastapi_instrumentator import Instrumentator +from slowapi import _rate_limit_exceeded_handler +from slowapi.errors import RateLimitExceeded from starlette.middleware.cors import CORSMiddleware from starlette_context import plugins from starlette_context.middleware import RawContextMiddleware @@ -28,6 +31,7 @@ from keep.api.core.config import config from keep.api.core.db import dispose_session from keep.api.core.dependencies import SINGLE_TENANT_UUID +from keep.api.core.limiter import limiter from keep.api.logging import CONFIG as logging_config from keep.api.middlewares import LoggingMiddleware from keep.api.routes import ( @@ -190,6 +194,7 @@ async def lifespan(app: FastAPI): This runs for every worker on startup and shutdown. Read more about lifespan here: https://fastapi.tiangolo.com/advanced/events/#lifespan """ + app.state.limiter = limiter # create a set of background tasks background_tasks = set() # if debug tasks are enabled, create a task to check for pending tasks @@ -241,6 +246,7 @@ async def root(): return {"message": app.description, "version": KEEP_VERSION} app.add_middleware(RawContextMiddleware, plugins=(plugins.RequestIdPlugin(),)) + app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) app.add_middleware( GZipMiddleware, minimum_size=30 * 1024 * 1024 ) # Approximately 30 MiB, https://cloud.google.com/run/quotas @@ -313,6 +319,8 @@ async def catch_exception(request: Request, exc: Exception): app.add_middleware(LoggingMiddleware) + if config("KEEP_METRICS", default="true", cast=bool): + Instrumentator().instrument(app=app, metric_namespace="keep") keep.api.observability.setup(app) return app @@ -325,16 +333,12 @@ def run(app: FastAPI): keep.api.config.on_starting() - # run the server - workers = config("KEEP_WORKERS", default=None, cast=int) - if workers: - uvicorn.run( - "keep.api.api:get_app", - host=HOST, - port=PORT, - log_config=logging_config, - lifespan="on", - workers=workers, - ) - else: - uvicorn.run(app, host=HOST, port=PORT, log_config=logging_config, lifespan="on") + uvicorn.run( + "keep.api.api:get_app", + host=HOST, + port=PORT, + log_config=logging_config, + lifespan="on", + workers=config("KEEP_WORKERS", default=None, cast=int), + limit_concurrency=config("KEEP_LIMIT_CONCURRENCY", default=None, cast=int), + ) diff --git a/keep/api/core/dependencies.py b/keep/api/core/dependencies.py index 9d61505c8..5e57cf171 100644 --- a/keep/api/core/dependencies.py +++ b/keep/api/core/dependencies.py @@ -12,6 +12,7 @@ SINGLE_TENANT_UUID = "keep" SINGLE_TENANT_EMAIL = "admin@keephq" + async def extract_generic_body(request: Request) -> dict | bytes | FormData: """ Extracts the body of the request based on the content type. diff --git a/keep/api/core/limiter.py b/keep/api/core/limiter.py new file mode 100644 index 000000000..ddce7888b --- /dev/null +++ b/keep/api/core/limiter.py @@ -0,0 +1,8 @@ +# https://slowapi.readthedocs.io/en/latest/#fastapi +from slowapi import Limiter +from slowapi.util import get_remote_address + +from keep.api.core.config import config + +limiter_enabled = config("KEEP_USE_LIMITER", default="false", cast=bool) +limiter = Limiter(key_func=get_remote_address, enabled=limiter_enabled) diff --git a/keep/api/core/metrics.py b/keep/api/core/metrics.py index 39c5d6177..8ff667855 100644 --- a/keep/api/core/metrics.py +++ b/keep/api/core/metrics.py @@ -1,51 +1,39 @@ import os -from prometheus_client import CollectorRegistry, Counter, Gauge, Summary, multiprocess +from prometheus_client import Counter, Gauge, Summary PROMETHEUS_MULTIPROC_DIR = os.environ.get("PROMETHEUS_MULTIPROC_DIR", "/tmp/prometheus") os.makedirs(PROMETHEUS_MULTIPROC_DIR, exist_ok=True) METRIC_PREFIX = "keep_" -# Create a single registry for all metrics -registry = CollectorRegistry() -multiprocess.MultiProcessCollector(registry, path=PROMETHEUS_MULTIPROC_DIR) - # Process event metrics events_in_counter = Counter( f"{METRIC_PREFIX}events_in_total", "Total number of events received", - registry=registry, ) events_out_counter = Counter( f"{METRIC_PREFIX}events_processed_total", "Total number of events processed", - registry=registry, ) events_error_counter = Counter( f"{METRIC_PREFIX}events_error_total", "Total number of events with error", - registry=registry, ) processing_time_summary = Summary( f"{METRIC_PREFIX}processing_time_seconds", "Average time spent processing events", - registry=registry, ) -# Running tasks metrics running_tasks_gauge = Gauge( f"{METRIC_PREFIX}running_tasks_current", "Current number of running tasks", - registry=registry, multiprocess_mode="livesum", ) -# Per-process running tasks metrics running_tasks_by_process_gauge = Gauge( f"{METRIC_PREFIX}running_tasks_by_process", "Current number of running tasks per process", labelnames=["pid"], - registry=registry, multiprocess_mode="livesum", ) diff --git a/keep/api/routes/alerts.py b/keep/api/routes/alerts.py index 8f3870328..dbc411a58 100644 --- a/keep/api/routes/alerts.py +++ b/keep/api/routes/alerts.py @@ -28,6 +28,7 @@ ) from keep.api.core.dependencies import extract_generic_body, get_pusher_client from keep.api.core.elastic import ElasticClient +from keep.api.core.limiter import limiter from keep.api.core.metrics import running_tasks_by_process_gauge, running_tasks_gauge from keep.api.models.alert import ( AlertDto, @@ -344,6 +345,7 @@ def create_process_event_task( response_model=AlertDto | list[AlertDto], status_code=202, ) +@limiter.limit(config("KEEP_LIMIT_CONCURRENCY", default="100/minute", cast=str)) async def receive_generic_event( event: AlertDto | list[AlertDto] | dict, bg_tasks: BackgroundTasks, @@ -432,6 +434,7 @@ async def webhook_challenge(): description="Receive an alert event from a provider", status_code=202, ) +@limiter.limit(config("KEEP_LIMIT_CONCURRENCY", default="100/minute", cast=str)) async def receive_event( provider_type: str, bg_tasks: BackgroundTasks, diff --git a/keep/api/routes/metrics.py b/keep/api/routes/metrics.py index 4f7f121c3..71444c2e5 100644 --- a/keep/api/routes/metrics.py +++ b/keep/api/routes/metrics.py @@ -2,14 +2,21 @@ import chevron from fastapi import APIRouter, Depends, Query, Request, Response -from prometheus_client import CONTENT_TYPE_LATEST, generate_latest +from fastapi.responses import JSONResponse +from prometheus_client import ( + CONTENT_TYPE_LATEST, + CollectorRegistry, + generate_latest, + multiprocess, +) +from keep.api.core.config import config from keep.api.core.db import ( get_last_alerts_for_incidents, get_last_incidents, get_workflow_executions_count, ) -from keep.api.core.metrics import registry +from keep.api.core.limiter import limiter from keep.api.models.alert import AlertDto from keep.identitymanager.authenticatedentity import AuthenticatedEntity from keep.identitymanager.identitymanagerfactory import IdentityManagerFactory @@ -20,8 +27,14 @@ @router.get("/processing", include_in_schema=False) -async def get_processing_metrics(request: Request): - # Generate all metrics from the single registry +async def get_processing_metrics( + request: Request, + authenticated_entity: AuthenticatedEntity = Depends( + IdentityManagerFactory.get_auth_verifier(["read:metrics"]) + ), +): + registry = CollectorRegistry() + multiprocess.MultiProcessCollector(registry) metrics = generate_latest(registry) return Response(content=metrics, media_type=CONTENT_TYPE_LATEST) @@ -122,3 +135,19 @@ def get_metrics( export += f"workflows_executions_total {{status=\"other\"}} {workflow_execution_counts['other']}\n" return Response(content=export, media_type=CONTENT_TYPE_LATEST) + + +@router.get("/dumb", include_in_schema=False) +@limiter.limit(config("KEEP_LIMIT_CONCURRENCY", default="10/minute", cast=str)) +async def get_dumb(request: Request) -> JSONResponse: + """ + This endpoint is used to test the rate limiting. + + Args: + request (Request): The request object. + + Returns: + JSONResponse: A JSON response with the message "hello world" ({"hello": "world"}). + """ + # await asyncio.sleep(5) + return JSONResponse(content={"hello": "world"}) diff --git a/poetry.lock b/poetry.lock index 6ef1eb261..79d159014 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2379,6 +2379,34 @@ atomic-cache = ["atomicwrites"] nearley = ["js2py"] regex = ["regex"] +[[package]] +name = "limits" +version = "3.14.1" +description = "Rate limiting utilities" +optional = false +python-versions = ">=3.9" +files = [ + {file = "limits-3.14.1-py3-none-any.whl", hash = "sha256:051aca02da56e6932599a25cb8e70543959294f5d587d57bcd7e38df234e697b"}, + {file = "limits-3.14.1.tar.gz", hash = "sha256:cad16a9b3cf3924e27da48e78bdab33ef312ecb7194fdb50e509cc8111c8d0bb"}, +] + +[package.dependencies] +deprecated = ">=1.2" +packaging = ">=21,<25" +typing-extensions = "*" + +[package.extras] +all = ["aetcd", "coredis (>=3.4.0,<5)", "emcache (>=0.6.1)", "emcache (>=1)", "etcd3", "motor (>=3,<4)", "pymemcache (>3,<5.0.0)", "pymongo (>4.1,<5)", "redis (>3,!=4.5.2,!=4.5.3,<6.0.0)", "redis (>=4.2.0,!=4.5.2,!=4.5.3)"] +async-etcd = ["aetcd"] +async-memcached = ["emcache (>=0.6.1)", "emcache (>=1)"] +async-mongodb = ["motor (>=3,<4)"] +async-redis = ["coredis (>=3.4.0,<5)"] +etcd = ["etcd3"] +memcached = ["pymemcache (>3,<5.0.0)"] +mongodb = ["pymongo (>4.1,<5)"] +redis = ["redis (>3,!=4.5.2,!=4.5.3,<6.0.0)"] +rediscluster = ["redis (>=4.2.0,!=4.5.2,!=4.5.3)"] + [[package]] name = "logmine" version = "0.4.1" @@ -3307,6 +3335,21 @@ files = [ [package.extras] twisted = ["twisted"] +[[package]] +name = "prometheus-fastapi-instrumentator" +version = "7.0.0" +description = "Instrument your FastAPI with Prometheus metrics." +optional = false +python-versions = ">=3.8.1,<4.0.0" +files = [ + {file = "prometheus_fastapi_instrumentator-7.0.0-py3-none-any.whl", hash = "sha256:96030c43c776ee938a3dae58485ec24caed7e05bfc60fe067161e0d5b5757052"}, + {file = "prometheus_fastapi_instrumentator-7.0.0.tar.gz", hash = "sha256:5ba67c9212719f244ad7942d75ded80693b26331ee5dfc1e7571e4794a9ccbed"}, +] + +[package.dependencies] +prometheus-client = ">=0.8.0,<1.0.0" +starlette = ">=0.30.0,<1.0.0" + [[package]] name = "propcache" version = "0.2.1" @@ -4490,7 +4533,6 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f66efbc1caa63c088dead1c4170d148eabc9b80d95fb75b6c92ac0aad2437d76"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22353049ba4181685023b25b5b51a574bce33e7f51c759371a7422dcae5402a6"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:932205970b9f9991b34f55136be327501903f7c66830e9760a8ffb15b07f05cd"}, - {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a52d48f4e7bf9005e8f0a89209bf9a73f7190ddf0489eee5eb51377385f59f2a"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win32.whl", hash = "sha256:3eac5a91891ceb88138c113f9db04f3cebdae277f5d44eaa3651a4f573e6a5da"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win_amd64.whl", hash = "sha256:ab007f2f5a87bd08ab1499bdf96f3d5c6ad4dcfa364884cb4549aa0154b13a28"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:4a6679521a58256a90b0d89e03992c15144c5f3858f40d7c18886023d7943db6"}, @@ -4499,7 +4541,6 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:811ea1594b8a0fb466172c384267a4e5e367298af6b228931f273b111f17ef52"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cf12567a7b565cbf65d438dec6cfbe2917d3c1bdddfce84a9930b7d35ea59642"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7dd5adc8b930b12c8fc5b99e2d535a09889941aa0d0bd06f4749e9a9397c71d2"}, - {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1492a6051dab8d912fc2adeef0e8c72216b24d57bd896ea607cb90bb0c4981d3"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win32.whl", hash = "sha256:bd0a08f0bab19093c54e18a14a10b4322e1eacc5217056f3c063bd2f59853ce4"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win_amd64.whl", hash = "sha256:a274fb2cb086c7a3dea4322ec27f4cb5cc4b6298adb583ab0e211a4682f241eb"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632"}, @@ -4508,7 +4549,6 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680"}, - {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b82a7c94a498853aa0b272fd5bc67f29008da798d4f93a2f9f289feb8426a58d"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a"}, @@ -4517,7 +4557,6 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:3bc2a80e6420ca8b7d3590791e2dfc709c88ab9152c00eeb511c9875ce5778bf"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e188d2699864c11c36cdfdada94d781fd5d6b0071cd9c427bceb08ad3d7c70e1"}, - {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4f6f3eac23941b32afccc23081e1f50612bdbe4e982012ef4f5797986828cd01"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:fc4b630cd3fa2cf7fce38afa91d7cfe844a9f75d7f0f36393fa98815e911d987"}, @@ -4526,7 +4565,6 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2f1c3765db32be59d18ab3953f43ab62a761327aafc1594a2a1fbe038b8b8a7"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d85252669dc32f98ebcd5d36768f5d4faeaeaa2d655ac0473be490ecdae3c285"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e143ada795c341b56de9418c58d028989093ee611aa27ffb9b7f609c00d813ed"}, - {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2c59aa6170b990d8d2719323e628aaf36f3bfbc1c26279c0eeeb24d05d2d11c7"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win32.whl", hash = "sha256:beffaed67936fbbeffd10966a4eb53c402fafd3d6833770516bf7314bc6ffa12"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win_amd64.whl", hash = "sha256:040ae85536960525ea62868b642bdb0c2cc6021c9f9d507810c0c604e66f5a7b"}, {file = "ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f"}, @@ -4686,6 +4724,23 @@ files = [ {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, ] +[[package]] +name = "slowapi" +version = "0.1.9" +description = "A rate limiting extension for Starlette and Fastapi" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "slowapi-0.1.9-py3-none-any.whl", hash = "sha256:cfad116cfb84ad9d763ee155c1e5c5cbf00b0d47399a769b227865f5df576e36"}, + {file = "slowapi-0.1.9.tar.gz", hash = "sha256:639192d0f1ca01b1c6d95bf6c71d794c3a9ee189855337b4821f7f457dddad77"}, +] + +[package.dependencies] +limits = ">=2.3" + +[package.extras] +redis = ["redis (>=3.4.1,<4.0.0)"] + [[package]] name = "sniffio" version = "1.3.1" @@ -5408,4 +5463,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "8e764f85116828d2a0f772cc96ae127707ac1ca6ee061b42823f99d79975dc73" +content-hash = "d1ecb84ec2278190d29b2131ef67b077971af74f076c0b4055c475073f36ad10" diff --git a/pyproject.toml b/pyproject.toml index f2f5d6475..0fef9c044 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "keep" -version = "0.32.8" +version = "0.33.0" description = "Alerting. for developers, by developers." authors = ["Keep Alerting LTD"] packages = [{include = "keep"}] @@ -89,6 +89,8 @@ psycopg = "^3.2.3" prometheus-client = "^0.21.1" psycopg2-binary = "^2.9.10" +prometheus-fastapi-instrumentator = "^7.0.0" +slowapi = "^0.1.9" [tool.poetry.group.dev.dependencies] pre-commit = "^3.0.4" pre-commit-hooks = "^4.4.0"