Skip to content

Commit

Permalink
(proxy perf improvement) - use uvloop for higher RPS (10%-20% highe…
Browse files Browse the repository at this point in the history
…r RPS) (BerriAI#7662)

* uvicorn use uvloop

* fix uvloop==0.21.0

* add uvloop to pyproject

* test_completion_response_ratelimit_headers
  • Loading branch information
ishaan-jaff authored and rajatvig committed Jan 15, 2025
1 parent 7d07db9 commit e34ec8a
Show file tree
Hide file tree
Showing 7 changed files with 261 additions and 197 deletions.
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -867,6 +867,7 @@ jobs:
pip install "boto3==1.34.34"
pip install jinja2
pip install tokenizers=="0.20.0"
pip install uvloop==0.21.0
pip install jsonschema
- run:
name: Run tests
Expand Down
1 change: 1 addition & 0 deletions .circleci/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ orjson==3.9.15
pydantic==2.7.1
google-cloud-aiplatform==1.43.0
fastapi-sso==0.10.0
uvloop==0.21.0
5 changes: 4 additions & 1 deletion litellm/proxy/proxy_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,10 @@ def _make_openai_completion():
)
uvicorn_args["ssl_keyfile"] = ssl_keyfile_path
uvicorn_args["ssl_certfile"] = ssl_certfile_path
uvicorn.run(**uvicorn_args)
uvicorn.run(
**uvicorn_args,
loop="uvloop",
)
elif run_gunicorn is True:
# Gunicorn Application Class
class StandaloneApplication(gunicorn.app.base.BaseApplication):
Expand Down
447 changes: 252 additions & 195 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ pydantic = "^2.0.0"
jsonschema = "^4.22.0"

uvicorn = {version = "^0.22.0", optional = true}
uvloop = {version = "^0.21.0", optional = true}
gunicorn = {version = "^22.0.0", optional = true}
fastapi = {version = "^0.115.5", optional = true}
backoff = {version = "*", optional = true}
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ backoff==2.2.1 # server dep
pyyaml==6.0.2 # server dep
uvicorn==0.29.0 # server dep
gunicorn==22.0.0 # server dep
uvloop==0.21.0 # uvicorn dep, gives us much better performance under load
boto3==1.34.34 # aws bedrock/sagemaker calls
redis==5.0.0 # caching
numpy==2.1.1 # semantic caching
Expand Down
2 changes: 1 addition & 1 deletion tests/local_testing/test_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4285,7 +4285,7 @@ async def test_completion_ai21_chat():

@pytest.mark.parametrize(
"model",
["gpt-4o", "azure/chatgpt-v-2", "claude-3-sonnet-20240229"],
["gpt-4o", "azure/chatgpt-v-2"],
)
@pytest.mark.parametrize(
"stream",
Expand Down

0 comments on commit e34ec8a

Please sign in to comment.