(proxy perf improvement) - use uvloop for higher RPS (10%-20% highe…

…r RPS) (BerriAI#7662) * uvicorn use uvloop * fix uvloop==0.21.0 * add uvloop to pyproject * test_completion_response_ratelimit_headers
rajatvig · Jan 15, 2025 · e34ec8a · e34ec8a
1 parent 7d07db9
commit e34ec8a
Show file tree

Hide file tree

Showing 7 changed files with 261 additions and 197 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -867,6 +867,7 @@ jobs:
             pip install "boto3==1.34.34"
             pip install jinja2
             pip install tokenizers=="0.20.0"
+            pip install uvloop==0.21.0
             pip install jsonschema
       - run:
           name: Run tests

diff --git a/.circleci/requirements.txt b/.circleci/requirements.txt
@@ -10,3 +10,4 @@ orjson==3.9.15
 pydantic==2.7.1
 google-cloud-aiplatform==1.43.0
 fastapi-sso==0.10.0
+uvloop==0.21.0
diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py
@@ -671,7 +671,10 @@ def _make_openai_completion():
                 )
                 uvicorn_args["ssl_keyfile"] = ssl_keyfile_path
                 uvicorn_args["ssl_certfile"] = ssl_certfile_path
-            uvicorn.run(**uvicorn_args)
+            uvicorn.run(
+                **uvicorn_args,
+                loop="uvloop",
+            )
         elif run_gunicorn is True:
             # Gunicorn Application Class
             class StandaloneApplication(gunicorn.app.base.BaseApplication):

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -33,6 +33,7 @@ pydantic = "^2.0.0"
 jsonschema = "^4.22.0"
 
 uvicorn = {version = "^0.22.0", optional = true}
+uvloop = {version = "^0.21.0", optional = true}
 gunicorn = {version = "^22.0.0", optional = true}
 fastapi = {version = "^0.115.5", optional = true}
 backoff = {version = "*", optional = true}

diff --git a/requirements.txt b/requirements.txt
@@ -7,6 +7,7 @@ backoff==2.2.1 # server dep
 pyyaml==6.0.2 # server dep
 uvicorn==0.29.0 # server dep
 gunicorn==22.0.0 # server dep
+uvloop==0.21.0 # uvicorn dep, gives us much better performance under load
 boto3==1.34.34 # aws bedrock/sagemaker calls
 redis==5.0.0 # caching
 numpy==2.1.1 # semantic caching

diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py
@@ -4285,7 +4285,7 @@ async def test_completion_ai21_chat():
 
 @pytest.mark.parametrize(
     "model",
-    ["gpt-4o", "azure/chatgpt-v-2", "claude-3-sonnet-20240229"],
+    ["gpt-4o", "azure/chatgpt-v-2"],
 )
 @pytest.mark.parametrize(
     "stream",