Skip to content

Commit

Permalink
#49: GitHub CI Benchmarking
Browse files Browse the repository at this point in the history
  • Loading branch information
henryleberre committed Dec 15, 2023
1 parent 371c51a commit e7604c3
Show file tree
Hide file tree
Showing 13 changed files with 251 additions and 107 deletions.
67 changes: 67 additions & 0 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
name: 'Benchmark'

on:
push:
paths:
- '**.f90'
- '**.fpp'
- '**.py'
- '**.yml'
- 'mfc.sh'
- 'CMakeLists.txt'
- 'requirements.txt'

pull_request:

workflow_dispatch:

jobs:
self:
name: Georgia Tech | Phoenix (NVHPC)
if: github.repository == 'MFlowCode/MFC'
strategy:
matrix:
device: ['cpu', 'gpu']
runs-on:
group: phoenix
labels: self-hosted
steps:
- name: Clone
uses: actions/checkout@v3

- name: Bench - PR
run: |
bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }}
watch -n 1 squeue -u $USER
mv bench-${{ matrix.device }}.out bench-${{ matrix.device }}-pr.out
- name: Bench - Master
run: |
git remote add upstream https://github.com/MFlowCode/MFC.git
git fetch upstream
git checkout upstream/master
bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }}
watch -n 1 squeue -u $USER
mv bench-${{ matrix.device }}.out bench-${{ matrix.device }}-master.out
- name: Generate Comment
run: |
export "BENCH_COMMENT="$(python3 .github/workflows/phoenix/compare.py bench-${{ matrix.device }}-master.out bench-${{ matrix.device }}-pr.out)"" >> $GITHUB_ENV
- name: Post Comment
uses: actions/github-script@v6
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
github.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: process.env.BENCH_COMMENT
})
- name: Print
if: always()
run: |
cat bench-${{ matrix.device }}-master.out || true
cat bench-${{ matrix.device }}-pr.out || true
2 changes: 1 addition & 1 deletion .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Build & Publish thereto
uses: docker/build-push-action@v4
uses: docker/build-push-action@v3
with:
file: toolchain/Dockerfile
push: true
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/phoenix/bench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

./mfc.sh bench "$job_slug.yaml" -j $(nproc) -b mpirun
53 changes: 53 additions & 0 deletions .github/workflows/phoenix/compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env python3

import argparse

import yaml

parser = argparse.ArgumentParser()
parser.add_argument('master', metavar="MASTER", type=str)
parser.add_argument('pr', metavar="PR", type=str)

args = parser.parse_args()

print(f"Comparing {args.master} to {args.pr}...")

def load_cases(filepath):
return { case["name"]: case for case in yaml.safe_load(open(filepath))["cases"] }

master, pr = load_cases(args.master), load_cases(args.pr)

master_keys = set(master.keys())
pr_keys = set(pr.keys())

for case in master_keys.union(pr_keys) - master_keys.intersection(pr_keys):
print(f"Warning: Case {case} is missing from master or PR.")

speedups = {}

for case in master_keys.intersection(pr_keys):
print(f"Case {case}:")

speedups[case] = {
"pre_proess": pr[case]["pre_process"] / master[case]["pre_process"],
"simulation": pr[case]["simulation"] / master[case]["simulation"],
}

avg_speedup = sum([ speedups[case]["simulation"] for case in speedups ]) / len(speedups)

message = f"""\
**[Benchmark Results]** Compared to Master, this PR's `simulation` is on average **~{avg_speedup:0.2f}x faster**.
| **Case** | **Master** | **PR** | **Speedup** |
| -------- | ---------- | ------ | ----------- |\
"""

for case in sorted(speedups.keys()):
speedup = speedups[case]

message = f"""\
{message}
| {case} | {master[case]['simulation']:0.2f}s | {pr[case]['simulation']:0.2f}s | {speedups[case]['simulation']:0.2f}x |\
"""

print(message)
58 changes: 58 additions & 0 deletions .github/workflows/phoenix/submit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/bin/bash

usage() {
echo "Usage: $0 [script.sh] [cpu|gpu]"
}

if [ ! -z "$1" ]; then
sbatch_script_contents=`cat $1`
else
usage
exit 1
fi

sbatch_cpu_opts="\
#SBATCH --ntasks-per-node=12 # Number of cores per node required
#SBATCH --mem-per-cpu=2G # Memory per core\
"

sbatch_gpu_opts="\
#SBATCH -CV100-16GB
#SBATCH -G2\
"

if [ "$2" == "cpu" ]; then
sbatch_device_opts="$sbatch_cpu_opts"
elif [ "$2" == "gpu" ]; then
sbatch_device_opts="$sbatch_gpu_opts"
else
usage
exit 1
fi

job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"

sbatch <<EOT
#!/bin/bash
#SBATCH -Jshb-$job_slug # Job name
#SBATCH --account=gts-sbryngelson3 # charge account
#SBATCH -N1 # Number of nodes required
$sbatch_device_opts
#SBATCH -t 04:00:00 # Duration of the job (Ex: 15 mins)
#SBATCH -q embers # QOS Name
#SBATCH -o$job_slug.out # Combined output and error messages file
#SBATCH -W # Do not exit until the submitted job terminates.
set -x
cd "\$SLURM_SUBMIT_DIR"
echo "Running in $(pwd):"
job_slug="$job_slug"
job_device="$2"
. ./mfc.sh load -c p -m $2
$sbatch_script_contents
EOT
9 changes: 9 additions & 0 deletions .github/workflows/phoenix/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

if [ "$job_device" == "gpu" ]; then
gpu_count=$(nvidia-smi -L | wc -l) # number of GPUs on node
gpu_ids=$(seq -s ' ' 0 $(($gpu_count-1))) # 0,1,2,...,gpu_count-1
device_opts="--gpu -g $gpu_ids"
fi

./mfc.sh test -a -b mpirun -j $(nproc) $device_opts
9 changes: 2 additions & 7 deletions .github/workflows/ci.yml → .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,8 @@ jobs:
./mfc.sh build -j 2 $(if [ '${{ matrix.device }}' == 'gpu' ]; then echo '--gpu'; fi)
- name: Test
run: |
. ./mfc.sh load -c p -m gpu
mv misc/run-phoenix-release-${{ matrix.device }}.sh ./
sbatch run-phoenix-release-${{ matrix.device }}.sh
run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/test.sh ${{ matrix.device }}

- name: Print
if: always()
run: |
cat test.out
run: cat test-${{ matrix.device }}.out
16 changes: 0 additions & 16 deletions misc/run-phoenix-release-cpu.sh

This file was deleted.

24 changes: 0 additions & 24 deletions misc/run-phoenix-release-gpu.sh

This file was deleted.

7 changes: 7 additions & 0 deletions toolchain/bench.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- name: 1D_bubblescreen
path: examples/1D_bubblescreen/case.py
args: []

- name: 1D_kapilashocktube
path: examples/1D_kapilashocktube/case.py
args: []
4 changes: 3 additions & 1 deletion toolchain/mfc/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,9 @@ def add_common_arguments(p, mask = None):
run.add_argument("--wait", action="store_true", default=False, help="(Batch) Wait for the job to finish.")

# === BENCH ===
add_common_arguments(bench, "t")
add_common_arguments(bench, "tjgn")
bench.add_argument("output", metavar="OUTPUT", default=None, type=str, help="Path to the YAML output file to write the results to.")
bench.add_argument(metavar="FORWARDED", default=[], dest='forwarded', nargs=argparse.REMAINDER, help="Arguments to forward to the ./mfc.sh run invocations.")

# === COUNT ===
add_common_arguments(count, "g")
Expand Down
95 changes: 43 additions & 52 deletions toolchain/mfc/bench.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,53 @@
import os, json, time, typing, datetime, subprocess

import rich.table
import sys, time, subprocess, dataclasses

from .printer import cons
from .state import ARG
from .state import ARG, CFG
from .build import PRE_PROCESS, SIMULATION, build_targets
from .common import system, MFC_SUBDIR
from .common import system, MFC_BENCH_FILEPATH, file_load_yaml, file_dump_yaml
from . import sched


@dataclasses.dataclass
class BenchCase:
name: str
path: str
args: list[str]


def bench():
build_targets([PRE_PROCESS, SIMULATION])

cons.print()
cons.print("[bold]Benchmarking [magenta]simulation[/magenta]:[/bold]")
cons.indent()

CASES = ["1D_bubblescreen", "1D_exercise_WENO", "1D_kapilashocktube"]
RESULTS = []

table = rich.table.Table(show_lines=False, show_edge=False)
table.add_column("Case")
table.add_column("(Simulation) Runtime (s)")

def __worker(case: str, devices: typing.Set[int]):
nonlocal RESULTS

system(["./mfc.sh", "run", f"examples/{case}/case.py", "--no-build", "-t", "pre_process"], stdout=subprocess.DEVNULL)
start = time.monotonic()
system(["./mfc.sh", "run", f"examples/{case}/case.py", "--no-build", "-t", "simulation"], stdout=subprocess.DEVNULL)
end = time.monotonic()
runtime = datetime.timedelta(seconds=end - start).total_seconds()

RESULTS.append({
"name": f"Simulation: {case}",
"unit": "seconds",
"value": runtime
})

table.add_row(case, str(runtime))

tasks: typing.List[sched.Task] = [
sched.Task(1, __worker, [ case ], 1) for case in CASES
]

cons.print()
nThreads = min(ARG('jobs'), len(ARG('gpus'))) if ARG("gpu") else ARG('jobs')
if ARG('case_optimization'):
nThreads = 1

sched.sched(tasks, nThreads, ARG("gpus"))
cons.print()
cons.unindent()
cons.print("[bold]Benchmark Results:[/bold]")
cons.print()
cons.raw.print(table)
cons.print()

filepath = os.path.join(MFC_SUBDIR, "bench.json")
with open(filepath, "w") as f:
json.dump(RESULTS, f)

cons.print(f"[bold green]✓[/bold green] Saved results to [magenta]{filepath}[/magenta].")
CASES = [ BenchCase(**case) for case in file_load_yaml(MFC_BENCH_FILEPATH) ]

for case in CASES:
case.args = case.args + ARG("forwarded")

cons.print(f"Found [magenta]{len(CASES)}[/magenta] cases.")

results = {
"metadata": {
"invocation": sys.argv[1:],
"lock": dataclasses.asdict(CFG())
},
"cases": [],
}

for i, case in enumerate(CASES):
cons.print(f"{str(i+1).zfill(len(CASES) // 10 + 1)}/{len(CASES)}: {case.name} @ [bold]{case.path}[/bold]")
system(["./mfc.sh", "build", "--targets", "pre_process", "simulation", "--case-optimization", "--input", case.path], stdout=subprocess.DEVNULL)

case_results = dataclasses.asdict(case)

for target in [PRE_PROCESS, SIMULATION]:
start = time.time()
system(["./mfc.sh", "run", case.path, "--targets", target.name, "--case-optimization", *case.args], stdout=subprocess.DEVNULL)
case_results[target.name] = time.time() - start

results["cases"].append(case_results)

file_dump_yaml(ARG("output"), results)

cons.unindent()
Loading

0 comments on commit e7604c3

Please sign in to comment.