Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add vllm #28931

Draft
wants to merge 16 commits into
base: main
Choose a base branch
from
14 changes: 14 additions & 0 deletions .azure-pipelines/azure-pipelines-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ jobs:
done
displayName: Manage disk space

- script: |
sudo fallocate -l 10GiB /swapfile || true
sudo chmod 600 /swapfile || true
sudo mkswap /swapfile || true
sudo swapon /swapfile || true
displayName: Create swap file

- script: |
# sudo pip install --upgrade pip
sudo pip install setuptools shyaml
Expand Down Expand Up @@ -121,6 +128,13 @@ jobs:
done
displayName: Manage disk space

- script: |
sudo fallocate -l 10GiB /swapfile || true
sudo chmod 600 /swapfile || true
sudo mkswap /swapfile || true
sudo swapon /swapfile || true
displayName: Create swap file

- script: |
# sudo pip install --upgrade pip
sudo pip install setuptools shyaml
Expand Down
3 changes: 3 additions & 0 deletions conda-forge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@ azure:
settings_win:
pool:
vmImage: windows-2022
free_disk_space: true
settings_linux:
swapfile_size: 10GiB
108 changes: 108 additions & 0 deletions recipes/vllm/recipe.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
context:
version: 0.7.0

package:
name: vllm
version: ${{ version }}

source:
- url: https://pypi.org/packages/source/v/vllm/vllm-${{ version }}.tar.gz
sha256: 91521ca3e7629e2a40a4fceea3290cbde34c3ea6cb291ee9f84ad2ce752e65be

build:
number: 0
script: |
python use_existing_torch.py
VERBOSE=1 ${{ PYTHON }} -m pip install . --no-build-isolation
python:
entry_points:
- vllm = vllm.scripts:main
skip: cuda_compiler_version == "None"

requirements:
host:
- python
- pip
- setuptools
- setuptools-scm
- ninja
- packaging
- wheel
- jinja2
- pytorch * cuda*
- cmake
- git
- ${{ stdlib('c') }}
- ${{ compiler('c') }}
- ${{ compiler('cxx') }}
- ${{ compiler('cuda') }}
run:
- python
- psutil
- sentencepiece
- numpy <2.0.0
- requests >=2.26.0
- tqdm
- blake3
- py-cpuinfo
- transformers >=4.45.2
- tokenizers >=0.19.1
- protobuf
- fastapi !=0.113.*,!=0.114.0,>=0.107.0
- aiohttp
- openai >=1.52.0
- uvicorn
- pydantic >=2.9
- prometheus_client >=0.18.0
- pillow
- prometheus-fastapi-instrumentator >=7.0.0
- tiktoken >=0.6.0
- lm-format-enforcer <0.11,>=0.10.9
- outlines ==0.1.11
- lark ==1.2.2
# - xgrammar >=0.1.6 # platform_machine == "x86_64"
- typing_extensions >=4.10
- filelock >=3.16.1
- partial-json-parser
- pyzmq
- msgspec
- gguf ==0.10.0
- importlib-metadata
- mistral_common >=1.5.0
- pyyaml
# - six >=1.16.0 # python_version > "3.11"
# - setuptools >=74.1.1 # python_version > "3.11"
- einops
- compressed-tensors ==0.8.1
- depyf ==0.18.0
- cloudpickle
- ray-default >=2.9
- nvidia-ml-py >=12.560.30
- torch ==2.5.1
- torchvision ==0.20.1
# - xformers ==0.0.28.post3 # platform_system == "Linux" and platform_machine == "x86_64"
# - tensorizer >=2.9.0 # extra == "tensorizer"
# - runai-model-streamer # extra == "runai"
# - runai-model-streamer-s3 # extra == "runai"
# - boto3 # extra == "runai"
# - librosa # extra == "audio"
# - soundfile # extra == "audio"
# - decord # extra == "video"

tests:
- python:
imports:
- vllm
pip_check: true

about:
homepage: https://github.com/vllm-project/vllm
summary: A high-throughput and memory-efficient inference and serving engine for LLMs
description: Easy, fast, and cheap LLM serving for everyone
license: Apache-2.0
license_file: LICENSE
documentation: https://vllm.readthedocs.io/en/latest/

extra:
recipe-maintainers:
- maresb
Loading