Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Draft] Caliperize hpcg #295

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/run.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:

- name: Upload Workspace Archive as CI Artifact
if: always()
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b
with:
name: workspace-archive
path: './workspace/saxpy/openmp/nosite-x86_64/workspace/archive/**'
Expand Down
14 changes: 12 additions & 2 deletions bin/benchpark
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ def benchpark_setup_handler(args):
# Always exclude files that start with "."
if fname.startswith("."):
return False
if fname.endswith(".yaml") or fname.endswith(".tpl"):
if fname.endswith(".yaml"):
return True
return False

Expand All @@ -411,8 +411,18 @@ def benchpark_setup_handler(args):
ramble_spack_experiment_configs_dir,
include_fn,
)

template_name = "execute_experiment.tpl"
experiment_template_options = [
configs_src_dir / template_name,
experiment_src_dir / template_name,
source_dir / "common-resources" / template_name,
]
for choice_template in experiment_template_options:
if os.path.exists(choice_template):
break
os.symlink(
source_dir / "common-resources" / "execute_experiment.tpl",
choice_template,
ramble_configs_dir / "execute_experiment.tpl",
)

Expand Down
2 changes: 1 addition & 1 deletion checkout-versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@

versions:
ramble: cc7a83e708d22a482ead437e2f71260e59c0ee8f # develop on 6/5/2024 (a few commits past 0.5.0 release).
spack: cd741c368ca3f0df4297d46092107a49e7698828 # develop before 6/5/2024 (a few commits past 0.22.0 release)
spack: 99405e6 # develop-2024-06-30 (newer than 0.22.0 release)
2 changes: 2 additions & 0 deletions common-resources/execute_experiment.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,6 @@

cd {experiment_run_dir}

{pre_exec}
{command}
{post_exec}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ variables:
timeout: "120"
scheduler: "flux"
sys_cores_per_node: "64"
sys_gpus_per_node: "4"
sys_gpus_per_node: "8"
max_request: "1000" # n_ranks/n_nodes cannot exceed this
n_ranks: '1000001' # placeholder value
n_nodes: '1000001' # placeholder value
Expand Down
8 changes: 7 additions & 1 deletion configs/RCCS-Fugaku-Fujitsu-A64FX-TofuD/variables.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,15 @@ variables:
scheduler: "pjm"
sys_cores_per_node: "48"
sys_mem_per_node: "32"
extra_cmd_opts: |
-std-proc fjmpioutdir/bmexe
extra_batch_opts: |
-x PJM_LLIO_GFSCACHE="/vol0002:/vol0003:/vol0004:/vol0005:/vol0006"
post_exec_cmds: |
for F in $(ls -1v fjmpioutdir/bmexe.*); do cat $F >> {log_file}; done
max_request: "1000" # n_ranks/n_nodes cannot exceed this
n_ranks: '1000001' # placeholder value
n_nodes: '1000001' # placeholder value
batch_submit: "placeholder"
mpi_command: "placeholder"
#sys_arch: 'arch=linux-rhel8-a64fx'
#sys_arch: 'arch=linux-rhel8-a64fx'
2 changes: 2 additions & 0 deletions configs/nosite-x86_64/variables.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ variables:
scheduler: "mpi"
sys_cores_per_node: "1"
# sys_gpus_per_node unset
extra_cmd_opts: |
--oversubscribe
max_request: "1000" # n_ranks/n_nodes cannot exceed this
n_ranks: '1000001' # placeholder value
n_nodes: '1000001' # placeholder value
Expand Down
19 changes: 19 additions & 0 deletions configs/test-extra-batch-opts/spack.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright 2023 Lawrence Livermore National Security, LLC and other
# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: Apache-2.0

spack:
packages:
default-compiler:
spack_spec: gcc
compiler-gcc:
spack_spec: gcc
compiler-intel:
spack_spec: intel
default-mpi:
spack_spec: openmpi
blas:
spack_spec: blas
lapack:
spack_spec: lapack
22 changes: 22 additions & 0 deletions configs/test-extra-batch-opts/variables.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright 2023 Lawrence Livermore National Security, LLC and other
# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: Apache-2.0

variables:
timeout: "120"
scheduler: "pjm"
sys_cores_per_node: "48"
sys_mem_per_node: "32"
extra_cmd_opts: "-z value3"
extra_batch_opts: |
-x PJM_LLIO_GFSCACHE="/vol0002:/vol0003:/vol0004:/vol0005:/vol0006"
-y value2
post_exec_cmds: |
for F in $(ls -1v fjmpioutdir/bmexe.*); do cat $F >> {log_file}; done
echo "done"
max_request: "1000" # n_ranks/n_nodes cannot exceed this
n_ranks: '1000001' # placeholder value
n_nodes: '1000001' # placeholder value
batch_submit: "placeholder"
mpi_command: "placeholder"
4 changes: 3 additions & 1 deletion experiments/hpcg/openmp/ramble.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ ramble:
include:
- ./configs/spack.yaml
- ./configs/variables.yaml
- ./configs/modifier.yaml

config:
deprecated: true
Expand Down Expand Up @@ -40,10 +41,11 @@ ramble:
spack:
packages:
hpcg-omp:
spack_spec: '[email protected] +openmp'
spack_spec: [email protected] +openmp{modifier_spack_variant}
compiler: default-compiler
environments:
hpcg-omp:
packages:
- default-mpi
- hpcg-omp
- '{modifier_package_name}'
59 changes: 46 additions & 13 deletions modifiers/allocation/modifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,23 @@ class AllocOpt(Enum):
MAX_REQUEST = 202
QUEUE = 203

# Exec customization for inserting arbitrary options and commands,
# inserted verbatim
EXTRA_BATCH_OPTS = 300
EXTRA_CMD_OPTS = 301
POST_EXEC_CMDS = 302
PRE_EXEC_CMDS = 303

@staticmethod
def as_type(enumval, input):
if enumval in [AllocOpt.SCHEDULER, AllocOpt.QUEUE]:
if enumval in [
AllocOpt.SCHEDULER,
AllocOpt.QUEUE,
AllocOpt.EXTRA_BATCH_OPTS,
AllocOpt.EXTRA_CMD_OPTS,
AllocOpt.POST_EXEC_CMDS,
AllocOpt.PRE_EXEC_CMDS,
]:
return str(input)
else:
return int(input)
Expand Down Expand Up @@ -290,8 +304,7 @@ def determine_allocation(self, v):
raise ValueError(f"Request exceeds maximum: {var}/{val}/{max_request}")

def slurm_instructions(self, v):
srun_opts = []
sbatch_opts = [] # opts just for the sbatch script
sbatch_opts, srun_opts = Allocation._init_batch_and_cmd_opts(v)

if v.n_ranks:
srun_opts.append(f"-n {v.n_ranks}")
Expand Down Expand Up @@ -331,9 +344,30 @@ def gpus_as_gpus_per_rank(self, v):
else:
raise ValueError(err_msg)

@staticmethod
def _init_batch_and_cmd_opts(v):
"""System/experiment may have universal options they want to apply
for all batch allocations or exec calls.
"""
batch_opts, cmd_opts = [], []
if v.extra_batch_opts:
batch_opts.extend(v.extra_batch_opts.strip().split("\n"))
if v.extra_cmd_opts:
cmd_opts.extend(v.extra_cmd_opts.strip().split("\n"))

if v.pre_exec_cmds:
v.pre_exec = v.pre_exec_cmds
else:
v.pre_exec = ""
if v.post_exec_cmds:
v.post_exec = v.post_exec_cmds
else:
v.post_exec = ""

return batch_opts, cmd_opts

def flux_instructions(self, v):
cmd_opts = []
batch_opts = []
batch_opts, cmd_opts = Allocation._init_batch_and_cmd_opts(v)

if v.n_ranks:
cmd_opts.append(f"-n {v.n_ranks}")
Expand All @@ -354,7 +388,10 @@ def flux_instructions(self, v):
v.allocation_directives = "\n".join(batch_directives)

def mpi_instructions(self, v):
v.mpi_command = f"mpirun -n {v.n_ranks} --oversubscribe"
batch_opts, cmd_opts = Allocation._init_batch_and_cmd_opts(v)
cmd_opts.extend([f"-n {v.n_ranks}"])

v.mpi_command = "mpirun " + " ".join(cmd_opts)
v.batch_submit = "{execute_experiment}"
v.allocation_directives = ""

Expand All @@ -364,8 +401,7 @@ def lsf_instructions(self, v):
machines (there is not currently a method for generating jsrun
invocations).
"""
cmd_opts = []
batch_opts = []
batch_opts, cmd_opts = Allocation._init_batch_and_cmd_opts(v)

if v.n_ranks:
cmd_opts.append(f"-n {v.n_ranks}")
Expand All @@ -390,21 +426,18 @@ def lsf_instructions(self, v):
v.allocation_directives = "\n".join(batch_directives)

def pjm_instructions(self, v):
batch_opts = []
batch_opts, cmd_opts = Allocation._init_batch_and_cmd_opts(v)

if v.n_ranks:
batch_opts.append(f"--mpi proc={v.n_ranks}")
if v.n_nodes:
batch_opts.append(f'-L "node={v.n_nodes}"')
if v.timeout:
batch_opts.append(f'-L "elapse={TimeFormat.as_hhmmss(v.timeout)}"')
batch_opts.append(
'-x PJM_LLIO_GFSCACHE="/vol0001:/vol0002:/vol0003:/vol0004:/vol0005:/vol0006"'
)

batch_directives = list(f"#PJM {x}" for x in batch_opts)

v.mpi_command = "mpiexec"
v.mpi_command = "mpiexec " + " ".join(cmd_opts)
v.batch_submit = "pjsub {execute_experiment}"
v.allocation_directives = "\n".join(batch_directives)

Expand Down
22 changes: 22 additions & 0 deletions repo/hpcc/application.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright 2023 Lawrence Livermore National Security, LLC and other
# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: Apache-2.0

import sys

from ramble.appkit import *
from ramble.app.builtin.hpcc import Hpcc as HpccBase


class Hpcc(HpccBase):

tags = ['synthetic',
'blas','solver','dense-linear-algebra','fft',
'large-scale',
'high-fp','high-memory-bandwidth',
'regular-memory-access','irregular-memory-access',
'mpi','network-collectives','network-point-to-point',
'network-bandwidth-bound','network-bisection-bandwidth-bound',
'network-latency-bound',
'c','fortran','openmp']
18 changes: 18 additions & 0 deletions repo/hpcg/application.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright 2023 Lawrence Livermore National Security, LLC and other
# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: Apache-2.0

import sys

from ramble.appkit import *
from ramble.app.builtin.hpcg import Hpcg as HpcgBase


class Hpcg(HpcgBase):

tags = ['synthetic',
'conjugate-gradient','solver','sparse-linear-algebra',
'large-scale',
'mpi','network-point-to-point',
'c++','openmp']
48 changes: 48 additions & 0 deletions repo/hpcg/package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Copyright 2023 Lawrence Livermore National Security, LLC and other
# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: Apache-2.0

from spack.package import *


class HPCG(CMakePackage):
"""HPCG is a software package that performs a fixed number of multigrid
preconditioned (using a symmetric Gauss-Seidel smoother) conjugate gradient
(PCG) iterations using double precision (64 bit) floating point values.
"""

tags = ["benchmark"]
homepage = "https://www.hpcg-benchmark.org/downloads/hpcg-3.1.tar.gz"
git = "https://github.com/daboehme/hpcg.git"

license("BSD-3")

version("3.1", sha256="33a434e716b79e59e745f77ff72639c32623e7f928eeb7977655ffcaade0f4a4")
version("3.1-caliper", git="https://github.com/daboehme/hpcg.git",
branch="caliper-support", preferred=False)

variant("mpi", default=True, description="Enable MPI support")
variant("openmp", default=False, description="Enable OpenMP support")
variant("caliper", default=False, description="Enable Caliper monitoring")

depends_on("mpi", when="+mpi")
depends_on("caliper", when="+caliper")
depends_on("adiak", when="+caliper")

requires("@3.1-caliper", when="+caliper")

def cmake_args(self):
args = [
self.define_from_variant("HPCG_ENABLE_MPI", "mpi"),
self.define_from_variant("HPCG_ENABLE_OPENMP", "openmp"),
self.define_from_variant("HPCG_ENABLE_CALIPER", "caliper")
]

return args

def install(self, spec, prefix):
# HPCG does not provide install target, so we have to copy
# things into place.
mkdirp(prefix.bin)
install(join_path(self.build_directory, "xhpgc"), prefix.bin)
18 changes: 18 additions & 0 deletions repo/hpl/application.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright 2023 Lawrence Livermore National Security, LLC and other
# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: Apache-2.0

import sys

from ramble.appkit import *
from ramble.app.builtin.hpl import Hpl as HplBase


class Hpl(HplBase):

tags = ['synthetic',
'blas','solver','dense-linear-algebra',
'large-scale',
'mpi','network-collectives','network-point-to-point',
'c']
14 changes: 14 additions & 0 deletions repo/md-test/application.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2023 Lawrence Livermore National Security, LLC and other
# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: Apache-2.0

import sys

from ramble.appkit import *
from ramble.app.builtin.md_test import MdTest as MdTestBase


class MdTest(MdTestBase):

tags = ['synthetic','i-o','large-scale','mpi','c']
Loading
Loading