bisect-benchmark.ini

[bisect-benchmark]
# This is the configuration file for bisecting benchmark jobs in the CI.
# Usage:
# - Create a temporary branch based on the main branch (or the bad commit)
# - Fill in this configuration file, preferably using the automated script
#   graal-enterprise/graalpython-enterprise/scripts/create-bisect-config
# - Commit and push the file
# - The push command output should give you a link to create a PR. Open it, but
#   don't create a PR. Instead, you should execute the job on your commit using
#   "Actions->Shcedule CI jobs" in the commit list. You may need to wait a bit
#   for the job enumerator to run before it shows the job list. Run the
#   "bisect-benchmark" job.
# - Wait for the jobs to finish. You should get an email when it's done

# When you're unsure about the config and would like to try it locally to see
# the first iteration or you would like to debug a failing build, you can use
# the configuration locally using:
# BISECT_BENCHMARK_CONFIG=bisect-benchmark.ini mx bisect-benchmark

# The command to build particular configuration. You can copy paste this from
# the benchmark job log. Don't worry about sforceimports, the script takes care
# of that
build_command = mx --dy /compiler build

# The commmand to run the particular benchmark on the configuration. You can
# copy paste this from the benchmark job log. Make sure you replace the '*'
# wildcard with a single benchmark, the script only supports bisecting a single
# benchmark at a time. Note the command typically ends with "--" and the
# "-XX ..." list is not part of the command, that's already part of the output
benchmark_command = mx --dy /compiler benchmark micro:try-except-simple


# If the benchmark command runs multiple benchmarks, then the results file
# will contain all their results and the tool would not know which one to
# choose. In particular, this is the case of some of the pyperformance
# benchmarks. Use the following option to filter the results by benchmark name:
#
# benchmark_name = scimark_lu

# Once the good and bad commits are identified, the tool will run all the
# following benchmark commands on both the good and bad revisions. One can
# run the benchmarks again with some extra logging, IGV dumping, ...
#
# Individual commands can be separated by ';'. Trailing whitespace including
# newlines is removed from the commands.
#
# The example commands below are good default that should provide good insight
# into the regression at hand:
#   * "--checkup" runs the benchmark with extra iterations, and extra compilation
#       and GC logging
#   * "--cpusampler" with delay of 10000ms (adjust if the whole benchmark command
#       runs for less than that)
#   * "--cputracer" and IGV dumping: they filter the roots with name "*measure*",
#       adjust that to the benchmark main function(s)
rerun_with_commands =
    mx --dy /compiler benchmark micro:try-except-simple -- --checkup ;
    mx --dy /compiler benchmark micro:try-except-simple -- --cpusampler --cpusampler.Delay=10000 ;
    mx --dy /compiler benchmark micro:try-except-simple -- --cputracer --cputracer.TraceStatements --cputracer.FilterRootName=measure ;
    mx --dy /compiler benchmark micro:try-except-simple -- --vm.Dgraal.Dump=Truffle:2 --vm.Dgraal.MethodFilter="*measure*"

# The first known "bad" merge commit for bisection. Try to use long commit
# SHAs, the CI cache has higher probability of short SHAs being ambiguous
bad = 1234deadbeef

# The last known "good" merge commit for bisection
good = 1234cafebabe

# Whether to checkout graal-enterprise
enterprise = true

# Which metric should be used for comparisons, corresponds to "metric.name" in
# the benchmark data. If there are multiple entries of given metric, then the
# last one is used.
# A special value "WORKS" can be used to bisect a benchmark that fails to
# execute to find when it started to fail.
benchmark_metric = time