.github/workflows/nm-benchmark.yml

name: benchmark
on:
  # makes workflow reusable
  workflow_call:
    inputs:
      label:
        description: "requested runner label (specifies instance)"
        type: string
        required: true
      benchmark_config_list_file:
        description: "path to a file containing a list of benchmark-configs to run benchmarks with. For reference look at .github/data/nm_benchmark_configs_list.txt"
        type: string
        required: true
      timeout:
        description: "maximum time runner will be up"
        type: string
        required: true
      gitref:
        description: "git commit hash or branch name"
        type: string
        required: true
      python:
        description: "python version, e.g. 3.10.12"
        type: string
        required: true
      whl:
        description: "whl to test (variable appears late binding so unusable outside 'download artifact')"
        type: string
        required: true
      push_benchmark_results_to_gh_pages:
        description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI"
        type: string
        required: true

  # makes workflow manually callable
  workflow_dispatch:
    inputs:
      label:
        description: "requested runner label (specifies instance)"
        type: string
        required: true
      benchmark_config_list_file:
        description: "path to a file containing a list of benchmark-configs to run benchmarks with. For reference look at .github/data/nm_benchmark_configs_list.txt"
        type: string
        required: true
      timeout:
        description: "maximum time runner will be up"
        type: string
        required: true
      gitref:
        description: "git commit hash or branch name"
        type: string
        required: true
      python:
        description: "python version, e.g. 3.10.12"
        type: string
        required: true
      whl:
        description: "whl to test (variable appears late binding so unusable outside 'download artifact')"
        type: string
        required: true
      push_benchmark_results_to_gh_pages:
        description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI"
        type: choice
        options:
          - 'true'
          - 'false'
        default: 'false'

env:
    BENCHMARK_RESULTS: /model-cache/benchmark_results

jobs:

  BENCHMARK:

    runs-on: ${{ inputs.label }}
    timeout-minutes: ${{ fromJSON(inputs.timeout) }}

    outputs:
      gh_action_benchmark_input_artifact_name: ${{ steps.set_gh_action_benchmark_input_artifact_name.outputs.gh_action_benchmark_input_artifact_name}}

    steps:

      - name: set python
        id: set_python
        uses: actions/setup-python@v5
        with:
            python-version: ${{ inputs.python }}

      - name: install automation components
        run: |
            sudo apt-get update --fix-missing
            sudo apt-get install -y git-all
            sudo apt-get install -y curl

      - name: checkout repository code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          ref: ${{ inputs.gitref }}
          submodules: recursive

      - name: setenv
        id: setenv
        uses: ./.github/actions/nm-set-env/
        with:
          hf_token: ${{ secrets.NM_HF_TOKEN }}
          Gi_per_thread: 1
          nvcc_threads: 0

      - name: hf cache
        id: hf_cache
        uses: ./.github/actions/nm-hf-cache/
        with:
          fs_cache: ${{ secrets.HF_FS_CACHE }}

    #   - name: download whl
    #     id: download
    #     uses: actions/download-artifact@v4
    #     with:
    #       name: ${{ inputs.whl }}
    #       path: ${{ inputs.whl }}

    #   - name: install whl
    #     id: install_whl
    #     uses: ./.github/actions/nm-install-whl/
    #     with:
    #         python: ${{ inputs.python }}
    #         venv:

      - name: install whl
        run: |
            pip install --extra-index-url https://pypi.neuralmagic.com/simple nm-vllm
            mv vllm vllm-ignore
            mv csrc csrc-ignore

      - name: run benchmarks
        uses: ./.github/actions/nm-benchmark/
        with:
          benchmark_config_list_file: ${{ inputs.benchmark_config_list_file }}
          output_directory: benchmark-results
          python: ${{ inputs.python }}
          venv:

      - name: take a look
        run: |
            ls -al benchmark-results

      - name: store benchmark result artifacts
        if: success()
        uses: actions/upload-artifact@v4
        with:
          name: ${{ github.run_id }}-${{ inputs.label }}-${{ inputs.python }}
          path: benchmark-results
          retention-days: 2

      - name: mount EFS
        run: |
            if [[ ${{ inputs.label }} == *aws* ]]; then
                sudo mkdir -m 777 -p /EFS
                sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport ${{ secrets.HF_FS_CACHE }}:/ /EFS
                sudo chown -R $(whoami):$(whoami) /EFS
                # update results target
                echo "BENCHMARK_RESULTS=/EFS/benchmark_results" >> $GITHUB_ENV
            fi

      - name: copy benchmark results to Filestore
        if: success()
        uses: ./.github/actions/nm-copy-benchmark-data-to-fs
        with:
          label: ${{ inputs.label }}
          python: ${{ inputs.python }}
          src: benchmark-results
          fs_dst: ${{ env.BENCHMARK_RESULTS }}

      # Produce GHA benchmark JSONs
      - name: make github-action-benchmark JSONs
        uses: ./.github/actions/nm-produce-gha-benchmark-json
        with:
          vllm_benchmark_jsons_path: benchmark-results
          # Metrics that are "better" when the value is greater are stored here
          bigger_is_better_output_file_path: gh-action-benchmark-jsons/bigger_is_better.json
          # Metrics that are "better" when the value is smaller are stored here
          smaller_is_better_output_file_path: gh-action-benchmark-jsons/smaller_is_better.json
          # Metrics that we only want to observe are stored here
          observation_metrics_output_file_path: gh-action-benchmark-jsons/observation_metrics.json
          python: ${{ inputs.python }}
          venv:

      - name: set gh action benchmark input artifact name
        id: set_gh_action_benchmark_input_artifact_name
        run: |
          GH_ACTION_BENCHMARK_INPUT_ARTIFACT_NAME=`echo "gh_action_benchmark_jsons-${{ github.run_id }}-${{ inputs.label }}-${{ inputs.python }}"`
          echo "gh_action_benchmark_input_artifact_name=$GH_ACTION_BENCHMARK_INPUT_ARTIFACT_NAME" >> $GITHUB_OUTPUT

      - name: store gh action benchmark input artifacts
        if: success()
        uses: actions/upload-artifact@v4
        with:
          name: ${{ steps.set_gh_action_benchmark_input_artifact_name.outputs.gh_action_benchmark_input_artifact_name}}
          path: gh-action-benchmark-jsons
          retention-days: 1

      - name: copy gh action benchmark JSONs to Filestore
        if: success()
        uses: ./.github/actions/nm-copy-benchmark-data-to-fs
        with:
          label: ${{ inputs.label }}
          python: ${{ inputs.python }}
          src: gh-action-benchmark-jsons
          fs_dst: ${{ env.BENCHMARK_RESULTS }}

  BENCHMARK_REPORT:

    needs: [BENCHMARK]
    runs-on: ubuntu-latest
    timeout-minutes: 20
    permissions:
      # Permissions required to be able to push to the nm-gh-pages branch
      contents: write

    steps:
      - name: checkout repository code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          ref: ${{ inputs.gitref }}
          submodules: recursive

      - name: download benchmark results artifact
        uses: actions/download-artifact@v4
        with:
          name: ${{ needs.BENCHMARK.outputs.gh_action_benchmark_input_artifact_name }}
          path: downloads

      - name: display structure of downloaded files
        run: ls -R ./downloads

      - name: nm-github-action-benchmark(bigger_is_better.json)
        # Absence of the file indicates that there were no "bigger_is_better" metrics
        if: (success() || failure()) && (hashFiles('downloads/bigger_is_better.json') != '')
        uses: ./.github/actions/nm-github-action-benchmark
        with:
          gh_action_benchmark_name: "bigger_is_better"
          gh_action_benchmark_json_file_path:  "downloads/bigger_is_better.json"
          gh_action_benchmark_tool: "customBiggerIsBetter"
          gh_pages_branch: "nm-gh-pages"
          auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
          reporting_enabled: "true"
          github_token: ${{ secrets.GITHUB_TOKEN }}

      - name: nm-github-action-benchmark(smaller_is_better.json)
        # Absence of the file indicates that there were no "smaller_is_better" metrics
        if: (success() || failure()) && (hashFiles('downloads/smaller_is_better.json') != '')
        uses: ./.github/actions/nm-github-action-benchmark
        with:
          gh_action_benchmark_name: "smaller_is_better"
          gh_action_benchmark_json_file_path:  "downloads/smaller_is_better.json"
          gh_action_benchmark_tool: "customSmallerIsBetter"
          gh_pages_branch: "nm-gh-pages"
          auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
          reporting_enabled: "true"
          github_token: ${{ secrets.GITHUB_TOKEN }}

      - name: nm-github-action-benchmark(observation_metrics.json)
        # Absence of the file indicates that there were no "observation" metrics
        if: (success() || failure()) && (hashFiles('downloads/observation_metrics.json') != '')
        uses: ./.github/actions/nm-github-action-benchmark
        with:
          gh_action_benchmark_name: "observation_metrics"
          gh_action_benchmark_json_file_path:  "downloads/observation_metrics.json"
          # `github-action-benchmark` expects a tool name that is either
          # "customBiggerIsBetter" or "customSmallerIsBetter". This is a hack to
          # work around that. Since we mark the action to not report failures, this
          # is fine.
          gh_action_benchmark_tool: "customBiggerIsBetter"
          gh_pages_branch: "nm-gh-pages"
          auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
          reporting_enabled: "false"
          github_token: ${{ secrets.GITHUB_TOKEN }}