upload RELEASE wheel to pypi.org

passing the workflow category and wheel file name to nm-upload-assets-to-gcp.yml workflow so that they can be used by the publish_whl action.
neuralmagic · Jul 3, 2024 · 6218ac3 · 6218ac3 · github-actions · Jul 3, 2024
1 parent 53347d5
commit 6218ac3
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 1 deletion.
diff --git a/.github/workflows/nm-build-test.yml b/.github/workflows/nm-build-test.yml
@@ -169,6 +169,8 @@ jobs:
             label: gcp-k8s-util
             timeout: ${{ inputs.build_timeout }}
             gitref: ${{ github.ref }}
+            wf_category: ${{ inputs.wf_category }}
+            whl: ${{ needs.BUILD.outputs.whl }}
         secrets: inherit
 
     # update docker

diff --git a/.github/workflows/nm-upload-assets-to-gcp.yml b/.github/workflows/nm-upload-assets-to-gcp.yml
@@ -16,7 +16,14 @@ on:
                 description: 'git commit hash or branch name'
                 type: string
                 required: true
-
+            wf_category:
+                description: "workflow category: REMOTE, NIGHTLY, RELEASE"
+                type: string
+                default: "REMOTE"
+            whl:
+                description: "wheel file path"
+                type: string
+                required: true
 jobs:
 
     PUBLISH:
@@ -53,6 +60,16 @@ jobs:
               with:
                   path: assets
 
+            - name: push wheel to pypi.org
+              # this workflow is only run if push-to-pypi is True, and we only
+              # want to push RELEASE wheels to the external pypi.org
+              if: ${{ inputs.wf_category }} == "RELEASE"
+              uses: neuralmagic/nm-actions/actions/publish_whl/action.yml@main
+              with:
+                username: ${{ secrets.PYPI_PUBLIC_USER }}
+                password: ${{ secrets.PYPI_PUBLIC_AUTH }}
+                whl: ${{ inputs.whl }}
+
             # GCP
             - name: 'Authenticate to Google Cloud'
               id: auth
@@ -70,3 +87,4 @@ jobs:
             - name: cp assets
               id: cp-assets
               uses: ./.github/actions/nm-cp-assets/
+
Benchmark suite	Current: `6218ac3`	Previous: `53347d5`	Ratio
`{"name": "mean_ttft_ms", "description": "VLLM Serving - Dense\nmodel - facebook/opt-350m\nmax-model-len - 2048\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`24.74136927666071` ms	`23.707702909998716` ms	`1.04`
`{"name": "mean_tpot_ms", "description": "VLLM Serving - Dense\nmodel - facebook/opt-350m\nmax-model-len - 2048\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`6.183022304156892` ms	`5.927541407824298` ms	`1.04`
`{"name": "mean_ttft_ms", "description": "VLLM Serving - Dense\nmodel - meta-llama/Meta-Llama-3-8B-Instruct\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`187.70898154333813` ms	`189.56996849333186` ms	`0.99`
`{"name": "mean_tpot_ms", "description": "VLLM Serving - Dense\nmodel - meta-llama/Meta-Llama-3-8B-Instruct\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`85.07850605865046` ms	`84.82286798803653` ms	`1.00`