clean up

neuralmagic · Jun 7, 2024 · 8d35d93 · 8d35d93 · github-actions · Jun 7, 2024
1 parent 7ce0fc7
commit 8d35d93
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 12 deletions.
diff --git a/.github/actions/nm-build-docker/action.yml b/.github/actions/nm-build-docker/action.yml
@@ -5,6 +5,10 @@ inputs:
     description: "tag to be used for the docker image"
     type: string
     required: true
+  additional_tag:
+    description: "additional tag for the docker image"
+    type: string
+    required: true
   build_type:
     description: "type of nm-vllm to install for the docker image: NIGHTLY (default) or RELEASE"
     type: string
@@ -28,13 +32,9 @@ runs:
              --build-arg build_type=${{ inputs.build_type }} \
              --build-arg build_version=${{ inputs.build_version }} \
              --target vllm-openai . || status=$?
-      tag=nightly
-      if [[ "${build_type}" = "RELEASE" ]]; then
-          tag=latest
-      fi
       if [ ${status} -eq 0 ]; then
-          echo "Add tag ${tag} for "${build_type}" build too"
-          docker image tag ghcr.io/neuralmagic/nm-vllm-openai:${{ inputs.docker_tag }} ghcr.io/neuralmagic/nm-vllm-openai:${tag} || ((status+=$?))
+          echo "Add tag ${additional_tag} for "${build_type}" build too"
+          docker image tag ghcr.io/neuralmagic/nm-vllm-openai:${{ inputs.docker_tag }} ghcr.io/neuralmagic/nm-vllm-openai:${additional_tag} || ((status+=$?))
       fi
       docker image ls -a
       echo "status=${status}" >> $GITHUB_OUTPUT

diff --git a/.github/workflows/publish-docker.yml b/.github/workflows/publish-docker.yml
@@ -49,20 +49,21 @@ jobs:
             id: setup
             uses: ./.github/actions/nm-setup-nvidia-container-toolkit/
 
+          - name: Get image additional tag
+            id: tag
+            uses: ./.github/actions/nm-get-tag/
+            with:
+              build_type: ${{ inputs.build_type }}
+
           - name: Build image
             id: build
             uses: ./.github/actions/nm-build-docker/
             with:
               docker_tag: ${{ inputs.docker_tag }}
+              additional_tag: ${{ steps.tag.outputs.tag }}
               build_type: ${{ inputs.build_type }}
               build_version: ${{ inputs.build_version }}
 
-          - name: Get image additional tag
-            id: tag
-            uses: ./.github/actions/nm-get-tag/
-            with:
-              build_type: ${{ inputs.build_type }}
-
           - name: Push image
             uses: docker/build-push-action@v5
             if: ${{ inputs.push_to_repository == 'yes' && steps.build.outputs.status == 0 }}
Benchmark suite	Current: `8d35d93`	Previous: `87571b8`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:29:40) \n[GCC 9.4.0]", "torch_version": "2.3.0+cu121"}`	`3.7926169052128205` prompts/s	`3.7927348938373964` prompts/s	`1.00`
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:29:40) \n[GCC 9.4.0]", "torch_version": "2.3.0+cu121"}`	`1456.364891601723` tokens/s	`1456.4101992335602` tokens/s	`1.00`
Benchmark suite	Current: `8d35d93`	Previous: `87571b8`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:38:31) [GCC 9.4.0]", "torch_version": "2.3.0+cu121"}`	`3.7949229904166253` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:38:31) [GCC 9.4.0]", "torch_version": "2.3.0+cu121"}`	`1457.2504283199842` tokens/s
Benchmark suite	Current: `8d35d93`	Previous: `87571b8`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:39) \n[GCC 9.4.0]", "torch_version": "2.3.0+cu121"}`	`3.792616141310529` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:39) \n[GCC 9.4.0]", "torch_version": "2.3.0+cu121"}`	`1456.3645982632431` tokens/s
Benchmark suite	Current: `8d35d93`	Previous: `87571b8`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 10:57:56) [GCC 9.4.0]", "torch_version": "2.3.0+cu121"}`	`3.8491447416740576` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 10:57:56) [GCC 9.4.0]", "torch_version": "2.3.0+cu121"}`	`1478.071580802838` tokens/s