Skip to content

Commit

Permalink
Merge pull request #309 from LLNL/feature/performance_tests
Browse files Browse the repository at this point in the history
CI Overhaul
  • Loading branch information
ldowen authored Nov 26, 2024
2 parents 4e27343 + 1673bb2 commit e282594
Show file tree
Hide file tree
Showing 32 changed files with 831 additions and 1,342 deletions.
20 changes: 11 additions & 9 deletions .gitlab/machines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,27 @@
.on_ruby:
tags:
- ruby
- shell
- batch
variables:
SCHEDULER_ACTION: allocate
SCHEDULER_PARAMETERS: "--res=ci --exclusive -N 2 -t 120"
NPROC: 112
HOSTNAME: 'ruby'
PARTITION: pdebug
BUILD_ALLOC: srun -N 1 -c 36 -p pdebug -t 60
TEST_ALLOC: ''
CLEAN_ALLOC: srun -n 20
timeout: 120 minutes
extends: [.on_toss_4_x86]

.on_lassen:
tags:
- lassen
- shell
- batch
variables:
SCHEDULER_PARAMETERS: "-nnodes 1 -W 150 -q pci -alloc_flags atsdisable"
LSB_JOB_STARTER: "ENVIRONMENT=BATCH /usr/tcetmp/bin/bsub_job_starter %USRCMD"
NPROC: 40
ENVIRONMENT: "BATCH"
HOSTNAME: 'lassen'
BUILD_ALLOC: lalloc 1 -W 60
TEST_ALLOC: $BUILD_ALLOC
CLEAN_ALLOC: lalloc 1 lrun -n 20
LC_MODULES: "cuda/11.1.0"
timeout: 150 minutes
extends: [.on_blueos_3_ppc64]

# ------------------------------------------------------------------------------
Expand Down
22 changes: 12 additions & 10 deletions .gitlab/scripts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
- cd $CI_BUILD_DIR
- echo $SPEC

- $BUILD_ALLOC ./$SCRIPT_DIR/gitlab/build_and_install.py --spec="$SPEC" --tpls-only
- ./$SCRIPT_DIR/gitlab/build_and_install.py --spec="$SPEC" --tpls-only
artifacts:
paths:
- ci-dir.txt
Expand All @@ -27,7 +27,7 @@
script:
- CI_BUILD_DIR=$(cat ci-dir.txt)
- cd $CI_BUILD_DIR && cat job-name.txt
- $BUILD_ALLOC ./$SCRIPT_DIR/devtools/host-config-build.py --host-config gitlab.cmake --build $EXTRA_CMAKE_ARGS
- ./$SCRIPT_DIR/devtools/host-config-build.py --no-clean --build --nprocs $NPROC --host-config gitlab.cmake $EXTRA_CMAKE_ARGS
artifacts:
paths:
- ci-dir.txt
Expand Down Expand Up @@ -55,7 +55,7 @@
- CI_BUILD_DIR=$(cat ci-dir.txt)
- cd $CI_BUILD_DIR && cat job-name.txt

- ./build_gitlab/install/spheral $SCRIPT_DIR/gitlab/run_ats.py --test-alloc "$TEST_ALLOC" --ats-file $ATS_FILE --ci-build-dir $CI_BUILD_DIR || exit_code=$?
- ./build_gitlab/install/spheral-ats --ciRun ./build_gitlab/install/$ATS_FILE || exit_code=$?
- cp -r test-logs $CI_PROJECT_DIR
- exit $exit_code
artifacts:
Expand All @@ -76,7 +76,7 @@

- ml load mpifileutils
- cd $SPHERAL_BUILDS_DIR
- $CLEAN_ALLOC drm $CI_BUILD_DIR/..
- drm $CI_BUILD_DIR/..

# ------------------------------------------------------------------------------
# Shared TPL scripts.
Expand All @@ -85,15 +85,15 @@
.update_tpls:
stage: update_tpls
script:
- $BUILD_ALLOC ./$SCRIPT_DIR/devtools/tpl-manager.py --spec-list="$SCRIPT_DIR/devtools/spec-list.json" --spheral-spack-dir=$UPSTREAM_DIR
- ./$SCRIPT_DIR/devtools/tpl-manager.py --no-upstream --spec-list="$SCRIPT_DIR/devtools/spec-list.json" --spheral-spack-dir=$UPSTREAM_DIR

.toss_update_permissions:
stage: update_permissions
variables:
GIT_STRATEGY: none
script:
- ml load mpifileutils
- srun -N 1 -p $PARTITION -n 20 -t 10 dchmod --mode go+rx $UPSTREAM_DIR
- dchmod --mode go+rx $UPSTREAM_DIR

# ------------------------------------------------------------------------------
# Production Installation scripts
Expand All @@ -117,7 +117,7 @@
- INSTALL_DIR=/usr/gapps/Spheral/$SYS_TYPE/spheral-$SPHERAL_REV_STR
- DEV_PKG_NAME=$SYS_TYPE-spheral-dev-pkg-$SPHERAL_REV_STR

- env SPHERAL_REV_STR=$SPHERAL_REV_STR INSTALL_DIR=$INSTALL_DIR SPEC=$SPEC SPACK_PKG_NAME=$SPACK_PKG_NAME BUILD_ALLOC="$BUILD_ALLOC" SCRIPT_DIR=$SCRIPT_DIR
- env SPHERAL_REV_STR=$SPHERAL_REV_STR INSTALL_DIR=$INSTALL_DIR SPEC=$SPEC SPACK_PKG_NAME=$SPACK_PKG_NAME SCRIPT_DIR=$SCRIPT_DIR
bash ./$SCRIPT_DIR/lc/generate-buildcache.sh

- echo $INSTALL_DIR &> install-dir.txt
Expand All @@ -131,6 +131,8 @@

.install_dev_pkg:
stage: install_production
variables:
GIT_STRATEGY: none
script:
- INSTALL_DIR=$(cat install-dir.txt)
- DEV_PKG_NAME=$(cat dev-pkg-name.txt)
Expand All @@ -139,7 +141,7 @@
- tar -xzf $DEV_PKG_NAME.tar.gz
- cd $DEV_PKG_NAME

- env INSTALL_DIR=$INSTALL_DIR SPEC=$SPEC SPACK_PKG_NAME=$SPACK_PKG_NAME BUILD_ALLOC="$BUILD_ALLOC" SCRIPT_DIR=$SCRIPT_DIR
- env INSTALL_DIR=$INSTALL_DIR SPEC=$SPEC SPACK_PKG_NAME=$SPACK_PKG_NAME BUILD_ALLOC="" SCRIPT_DIR=$SCRIPT_DIR
bash ./$SCRIPT_DIR/lc/install-from-dev-pkg.sh

artifacts:
Expand All @@ -158,7 +160,7 @@
- chmod go+r /usr/gapps/Spheral/modulefiles/Spheral/"$ALIAS".lua

- ml load mpifileutils
- srun -N 1 -p $PARTITION -n 20 -t 10 dchmod --mode go+rx $INSTALL_DIR
- dchmod --mode go+rx $INSTALL_DIR
- ln -sfn $INSTALL_DIR /usr/gapps/Spheral/$SYS_TYPE/$ALIAS


Expand All @@ -181,7 +183,7 @@
- echo $DIR_LIST

- ml load mpifileutils
- if [[ $DIR_LIST ]]; then $CLEAN_ALLOC drm $DIR_LIST; else echo "No directories to remove at this time."; fi
- if [[ $DIR_LIST ]]; then drm $DIR_LIST; else echo "No directories to remove at this time."; fi
when: always

.merge_pr_rule:
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ include(cmake/SpheralVersion.cmake)
project(spheral LANGUAGES C CXX Fortran VERSION ${SPHERAL_VERSION})

set(SPHERAL_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "Path to Spheral source directory")
set(SPHERAL_TEST_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
set(SPHERAL_TEST_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}/tests)

include(cmake/SetupSpheral.cmake)

Expand Down
10 changes: 9 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,13 @@ RUN make install
# Run ATS testing suite.
WORKDIR ../install
ENV MPLBACKEND=agg
RUN ./spheral-atstest --filter="level<100" tests/integration.ats

# ATS currently does not allow us to run in parallel for regular linux machines
# If it did, we would need some of the following commands
#RUN export OMP_NUM_THREADS=1
#RUN export MACHINE_TYPE="winParallel"
#RUN ./spheral-ats --level 99 --mpiexe mpiexec --npMax $JCXX tests/integration.ats

# Instead, we will just run it normally
RUN ./spheral-ats --level 99 tests/integration.ats
# -----------------------------------------------------------------------------
8 changes: 6 additions & 2 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ Notable changes include:
* Physics::postStateUpdate now returns a bool indicating if boundary conditions should be enforced again.
* Physics packages can now have Physics sub-packages, which can be run before or after the main package. The SpheralController
now checks for these packages and adds them to the physics package list as needed.
* Physics packages can indicate if they require Voronoi cell information be available. If so, a new package which computes and
* Physics packages can indicate if they require Voronoi cell information be available. If so, a new package which computes and
updates the Voronoi information is automatically added to the package list by the SpheralController (similar to how the
Reproducing Kernel corrections are handled).
* Command line options are now consistent. Default values of a string "None" are no longer allowed and any input through the command line of "None" will become the python NoneType None.
* Cleaned up use of std::any in State objects using a visitor pattern to be rigorous ensuring all state entries are handled properly
during assignement, equality, and cloning operations. This is intended to help ensure our Physics advance during time integration
is correct.
Expand All @@ -43,13 +44,16 @@ Notable changes include:
* ENABLE\_DEV\_BUILD can now export targets properly.
* Added a GCC flag to prevent building variable tracking symbols when building PYB11 modules. This is unnecessary, and
on some platforms trying to build such symbols is very expensive and in some cases fails.
* Consolidates lcatstest.in and run\_ats.py into a single spheral\_ats.py script.
* SPHERAL\_TEST\_INSTALL\_PREFIX now includes the tests directory.
* Removed most configured files and added a SpheralConfigs.py file to use at runtime instead.
* Bug Fixes / improvements:
* Wrappers for MPI calls are simplified and improved.
* Time step estimate due to velocity divergence in RZ space has been fixed.
* Fixed tolerances for ANEOS equation of state temperature lookup
* Clang C++ warnings have eliminated, so the Clang CI tests have been updated to treat warnings as errors.
* Fix for installing libraries when building individual package WITH ENABLE_DEV_BUILD=On.
* Fix for installing libraries when building individual package with ENABLE\_DEV\_BUILD=On.
* Bugfix for RZ solid CRKSPH with compatible energy.
* Parsing of None string now always becomes None python type. Tests have been updated accordingly.
* IO for checkpoints and visuzalization can now be properly turned off through SpheralController input options.
Expand Down
41 changes: 8 additions & 33 deletions cmake/SetupSpheral.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -153,40 +153,15 @@ endif()
# Build C++ tests and install tests to install directory
#-------------------------------------------------------------------------------
if (ENABLE_TESTS)
install(DIRECTORY ${SPHERAL_ROOT_DIR}/tests/
USE_SOURCE_PERMISSIONS
DESTINATION "${SPHERAL_TEST_INSTALL_PREFIX}"
PATTERN "*CMakeLists.txt*" EXCLUDE
PATTERN "*.cmake" EXCLUDE
PATTERN "*.in" EXCLUDE
PATTERN "*.pyc" EXCLUDE
PATTERN "*~" EXCLUDE)
add_subdirectory(${SPHERAL_ROOT_DIR}/tests/unit)

# A macro to preserve directory structure when installing files
macro(install_with_directory)
set(optionsArgs "")
set(oneValueArgs SOURCE DESTINATION)
set(multiValueArgs FILES)
cmake_parse_arguments(CAS "${optionsArgs}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )
foreach(FILE ${CAS_FILES})
get_filename_component(DIR ${FILE} DIRECTORY)
INSTALL(FILES ${CAS_SOURCE}/${FILE} DESTINATION ${CAS_DESTINATION}/${DIR})
endforeach()
endmacro(install_with_directory)

# Find the test files we want to install
set(test_files1 "")
if (EXISTS "${CMAKE_SOURCE_DIR}/.git")
execute_process(
COMMAND git ls-files tests
WORKING_DIRECTORY ${SPHERAL_ROOT_DIR}
OUTPUT_VARIABLE test_files1)
else()
execute_process(
COMMAND find tests -type f
WORKING_DIRECTORY ${SPHERAL_ROOT_DIR}
OUTPUT_VARIABLE test_files1)
endif()
string(REPLACE "\n" " " test_files ${test_files1})
separate_arguments(test_files)
list(REMOVE_ITEM test_files tests/unit/CXXTests/runCXXTests.ats)
install_with_directory(
FILES ${test_files}
SOURCE ${SPHERAL_ROOT_DIR}
DESTINATION ${SPHERAL_TEST_INSTALL_PREFIX})
endif()

include(${SPHERAL_ROOT_DIR}/cmake/SpheralConfig.cmake)
102 changes: 63 additions & 39 deletions docs/developer/dev/diagnostic_tools.rst
Original file line number Diff line number Diff line change
@@ -1,95 +1,119 @@
Code Performance Diagnostics
############################
Code Debugging and Diagnostics
##############################

Spheral uses Caliper to preform code diagnostics, such as timing. To enable this functionality in the code, Spheral needs to be configured with ``ENABLE_TIMER=ON``. Otherwise, the timing regions are no-ops for improved preformance.
Valgrind
========

We advise using Valgrind to check memory leaks when doing development on Spheral.
When using Valgrind to check Spheral, be sure to use the provided suppression file
::

./scripts/devtools/host-config-build.py <sys_type>-<spec>.cmake -DENABLE_TIMER=ON
valgrind --suppressions=./scripts/devtools/valgrind_python_suppression ./spheral


Querying using Caliper
======================
Using Caliper
=============

Spheral uses Caliper to preform code diagnostics, such as timing. To enable this functionality in the code, Spheral needs to be configured with ``ENABLE_TIMER=ON``. Otherwise, the timing regions are no-ops for improved preformance.
::

./scripts/devtools/host-config-build.py <sys_type>-<spec>.cmake -DENABLE_TIMER=ON

Caliper is configured and started through the ``cali::ConfigManager``.
The ``cali::ConfigManager`` is wrapped in a ``TimerMgr`` singleton class, which has a python interface.

.. note::
``TimerMgr`` is initialized and started during ``commandLine()`` in ``src/SimulationControl/SpheralOptionParser.py``. This is because ``commandLine()`` is almost always invoked directly near the start of a problem. However, if ``commandLine()`` is not called, the timers would need to be configured and started directly using the ``TimerMgr`` class. See :ref:`below <manual_caliper>` for more details.
``TimerMgr`` is initialized in ``src/SimulationControl/SpheralTimingParser.py`` which is called during ``commandLine()`` in ``src/SimulationControl/SpheralOptionParser.py``. This is because ``commandLine()`` is almost always invoked directly near the start of a problem. However, if ``commandLine()`` is not called, the timer manager would need to be configured and started directly using the ``TimerMgr`` class. See :ref:`below <manual_caliper>` for more details.

By default, the Caliper configuration is set to ``spot`` and outputs Caliper files (``.cali``).
For the default configuration, the Caliper files are named based on what file is being run, for example:
::

python Noh-cylindrical-2d.py
There are many different Caliper configurations to view various information. Here are some extra links for those who want to read or experiment with other features in Caliper that can be incorporated into Spheral:

will produce a timing file called ``Noh-cylindrical-2d_YEAR_MONTH_DATE_TIME.cali`` where the file name includes the current date and time.
* `Configuration basics <https://software.llnl.gov/Caliper/CaliperBasics.html#more-on-configurations>`_
* `Builtin Configuration <https://software.llnl.gov/Caliper/BuiltinConfigurations.html>`_
* `Manual Configuration <https://software.llnl.gov/Caliper/configuration.html>`_
* `Output Format <https://software.llnl.gov/Caliper/OutputFormats.html>`_

The Caliper file name can be specified using the command line
::
Caliper and Adiak Options
-------------------------

python Noh-cylindrical-2d.py --caliperFilename 'new_test_name.cali'
.. option:: --caliperFilename

Different Caliper configurations can be set at the command line using ``--caliperConfig`` like so
::
Name of Caliper timing file. Should include file extensions. Optional, default: ``name_of_file_YEAR_MONTH_DATE_TIME.cali``.

python Noh-cylindrical-2d.py --caliperConfig 'runtime-report(output=time.txt),calc.inclusive,region.count'
.. option:: --caliperConfig CONFIG_STR

.. note::
The above configuration produces timing results similar to the previous ``Spheral::Timer`` method. This results in a file named ``time.txt`` with cumulative times for the nested regions as well as a count of how many times each region ran.
Specify a built-in Caliper configuration or turn off timers with ``none``. Optional, default: ``spot``.

Similarly, a non-default Caliper configuration can be read in from a JSON file using ``--caliperConfigJSON`` and providing the file name.
Lastly, Caliper timers can be turned off using ``--caliperConfig none``.
**Example**:
::

There are many different Caliper configurations to view various information. Here are some extra links for those who want to read or experiment with other features in Caliper that can be incorporated into Spheral:
./spheral ex_prog.py --caliperConfig 'runtime-report(output=time.txt),calc.inclusive,region.count'

* `Configuration basics <https://software.llnl.gov/Caliper/CaliperBasics.html#more-on-configurations>`_
* `Builtin Configuration <https://software.llnl.gov/Caliper/BuiltinConfigurations.html>`_
* `Manual Configuration <https://software.llnl.gov/Caliper/configuration.html>`_
* `Output Format <https://software.llnl.gov/Caliper/OutputFormats.html>`_
.. note::
The configuration in the example above produces timing results similar to the previous ``Spheral::Timer`` method. This results in a file named ``time.txt`` with cumulative times for the nested regions as well as a count of how many times each region ran.

.. option:: --caliperConfigJSON JSON_FILE

Adding Region Timers in C++
===========================
Specify a JSON file containing a non-default Caliper configuration. Optional.

So far there are two different types of regions in Spheral, using the following macros:
::
.. option:: --adiakData ADIAK_DATA_STR

TIME_FUNCTION
Specify any Adiak data directly in the command line. Must be a string in key:value format, separated by commas. Optional.

or
**Example**:
::

::
./spheral ex_prog.py --adiakData "test_name: the_cheat, test_num:10"

TIME_BEGIN("timer_name")
TIME_END("timer_name")
.. note::
By default, all ``commandLine()`` inputs are added as Adiak metadata. ``--adiakData`` are for metadata that does not come through Spheral command line arguments. Adiak metadata can also be added through the python interface. See :ref:`below <python_adiak>` for more details.


Adding Region Timers in C++
---------------------------

The following macros are used to create timing regions in the Spheral C++ interface:

- ``TIME_FUNCTION`` can be added to the very beginning of a function and creates a region for the entire function using the function's name. ``TIME_FUNCTION`` uses just the function name and no class or parameter information, so be careful when using this method with functions that could share names.

- ``TIME_BEGIN("timer_name")`` and ``TIME_END("timer_name")`` create a region between the two different calls and use the string (in this case ``timer_name``) as the name.


Adding Region Timers in Python
==============================
------------------------------

Region timers can be added inside the python code using the following function calls:
::

from SpheralUtilities import TimerMgr
TimerMgr.timer_start("timer_name")
some_function_call()
TimerMgr.timer_end("timer_name")

.. note::
IMPORTANT: All timers must have both a start and end call. Otherwise, memory issues will occur.
All timers must have both a start and end call. Otherwise, memory issues will occur.

.. _python_adiak:

Adding Adiak Metadata in Python
-------------------------------

Adiak metadata can be added inside python code using the following function calls:

.. code-block:: python
adiak_values("value_name", value)
.. _manual_caliper:

Starting Caliper Manually
========================
=========================

As mentioned above, Caliper (not an individual Caliper timer) is normally configured and started in ``commandLine()`` python routine. However, Caliper can be directly configured and started through the python interface, if desired. This can be done by putting the following into the python file:
As mentioned above, the Caliper timing manager is normally configured and started in the ``commandLine()`` routine. However, Caliper can be directly configured and started through the python interface, if desired. This can be done by putting the following into the python file:
::

from SpheralUtilities import TimerMgr
caliper_config = "some_configuration(output=some_filename.txt)"
TimerMgr.add(caliper_config)
TimerMgr.start()
Loading

0 comments on commit e282594

Please sign in to comment.