Skip to content
This repository has been archived by the owner on May 5, 2023. It is now read-only.

Commit

Permalink
Better job control logic
Browse files Browse the repository at this point in the history
  • Loading branch information
joergboe committed Aug 7, 2019
1 parent 523c452 commit 8fd6d94
Show file tree
Hide file tree
Showing 11 changed files with 128 additions and 62 deletions.
21 changes: 18 additions & 3 deletions bin/case.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ set -o errexit; set -o errtrace; set -o nounset; set -o pipefail
shopt -s globstar nullglob

# Shutdown and interrupt vars and functions
declare -r TTTI_commandname="${0##*/}" #required in coreutils
declare TTTI_interruptReceived="" #required in coreutils
declare -r TTTI_commandname="${0##*/}" #not used here but required in coreutils
declare TTTI_interruptReceived="" #not used here but required in coreutils

# Function errorTrapFunc
# global error exit function - prints the caller stack
Expand All @@ -32,6 +32,15 @@ function errorTrapFunc {
}
trap errorTrapFunc ERR

# Function abortTrapFunc
# exits test case
#abortTrapFunc() {
# printError "Abort trap received stop test case"
# caseFinalization
# errorExit
#}
#trap abortTrapFunc SIGABRT

#includes
source "${TTRO_scriptDir}/defs.sh"
source "${TTRO_scriptDir}/util.sh"
Expand Down Expand Up @@ -76,6 +85,7 @@ export TTXX_searchPath

#test finalization function
function caseFinalization {
isDebug && printDebug "$FUNCNAME"
if [[ $TTTT_executionState == 'initializing' ]]; then
return 0
fi
Expand Down Expand Up @@ -134,13 +144,18 @@ function caseFinalization {
}

function caseExitFunction {
isDebug && printDebug "$FUNCNAME"
printInfo "$FUNCNAME"
if ! TTTF_isSkip; then
caseFinalization
fi
}
trap caseExitFunction EXIT

#handleSigUsr1() {
# echo "SIGUSR1"
#}
#trap handleSigUsr1 USR1
#trap -p
#
# success exit / failure exit and error exit
# do not use this functions directly
Expand Down
145 changes: 89 additions & 56 deletions bin/suite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@ unset -f command
#more setting to be save
set -o posix;
set -o errexit; set -o errtrace; set -o nounset; set -o pipefail
#set -o monitor #enables job monitor -> synchron (endless) commands started from cases are terminated with this option
#but is has consequences for read commands issued from suite -> return 149 means it receives SIGTTIN
shopt -s globstar nullglob

#-----------------------------------------------------
# Shutdown and interrut vars and functions
declare -i TTTI_interruptReceived=0
declare -r TTTI_commandname="${0##*/}"
#declare caseExecutionLoopRunning=''
declare -r TTTI_sigspec='TERM'
#start time
declare -r TTTT_suiteStartTime=$(date -u +%s)
#state
Expand All @@ -30,9 +32,9 @@ declare TTTT_executionState='initializing'
handleSigint() {
TTTI_interruptReceived=$((TTTI_interruptReceived+1))
if [[ $TTTI_interruptReceived -eq 1 ]]; then
printWarning "SIGINT: Test Suite will be stopped. To interrupt running test cases press ^C again"
printWarning "SIGINT #1: Test Suite will be stopped. To interrupt running test cases press ^C again"
elif [[ $TTTI_interruptReceived -eq 2 ]]; then
printWarning "SIGINT: Test cases will be stopped"
printWarning "SIGINT #2: Test cases will be stopped"
elif [[ $TTTI_interruptReceived -gt 3 ]]; then
printWarning "SIGINT: Abort Suite"
exit $errSigint
Expand Down Expand Up @@ -346,6 +348,7 @@ TTTF_fixPropsVars
TTTT_executionState='execution'

# check for duplicate jobspec in running jobs list
# set jobspec to delete if duplicate was started
function checkDuplicateJobspec {
local i
local js
Expand All @@ -355,8 +358,8 @@ function checkDuplicateJobspec {
#echo "Check index $i list entry $js - value to be inserted $1"
if [[ -n $js ]]; then #and as a jobspec assigned
if [[ $js -eq $1 ]]; then
printWarning "Jobspec $1 is already in running jobs list at index i=$i ! Delete the jobspec"
TTTI_tjobid[$i]=''
printError "Jobspec $1 is already in running jobs list at index i=$i ! Delete the jobspec"
TTTI_tjobid[$i]='delete'
fi
fi
fi
Expand Down Expand Up @@ -423,9 +426,9 @@ checkJobTimeouts() {
else
tempjobspec="%${TTTI_tjobid[$i]}"
fi
printWarning "Timeout Kill i=${i} jobspec=${tempjobspec} with SIGTERM case=${TTTI_tcase[$i]} variant=${TTTI_tvariant[$i]} pid=${TTTI_tpid[$i]}"
printWarning "Timeout Kill jobspec=${tempjobspec} with SIG${TTTI_sigspec} i=${i} pid=${TTTI_tpid[$i]} case=${TTTI_tcase[$i]} variant=${TTTI_tvariant[$i]}"
#SIGINT and SIGHUP seems not to work can not install handler for both signals in case.sh
if kill "${tempjobspec}"; then
if kill -s $TTTI_sigspec "${tempjobspec}"; then
echo "timeout" > "${TTTI_tcaseWorkDir[$i]}/TIMEOUT"
else
printWarning "Can not kill i=${i} jobspec=${tempjobspec} Gone?"
Expand All @@ -441,9 +444,9 @@ checkJobTimeouts() {
else
tempjobspec="%${TTTI_tjobid[$i]}"
fi
printError "Forced Kill i=${i} jobspec=${tempjobspec} case=${TTTI_tcase[$i]} variant=${TTTI_tvariant[$i]} pid=${TTTI_tpid[$i]}"
printError "Forced kill -s KILL i=${i} jobspec=${tempjobspec} case=${TTTI_tcase[$i]} variant=${TTTI_tvariant[$i]} pid=${TTTI_tpid[$i]}"
if ! kill -9 "${tempjobspec}"; then
printWarning "Can not force kill i=${i} jobspec=${tempjobspec} pid=${TTTI_tpid[$i]} Gone?"
printWarning "Can not force kill -s SIGKILL i=${i} jobspec=${tempjobspec} pid=${TTTI_tpid[$i]} Gone?"
fi
fi
fi
Expand All @@ -466,43 +469,52 @@ handleJobEnd() {
if [[ -n $pid ]]; then
isDebug && printDebug "check wether job is still running i=$i pid=$pid jobspec=%$jobid"
local thisJobRuns='true'
if [[ -z $jobid ]]; then
local jobState=''
if [[ ( "$jobid" == "error" ) || ( "$jobid" == "delete" ) ]]; then
printWarning "Check (expired) job running i=$i jobid=$jobid pid=$pid"
if ps --pid "$pid"; then
printErrorAndExit "Check (expired) job running i=$i jobid=$jobid pid=$pid true"
fi
thisJobRuns=''
echo "JOB Gone jobid is re-used $jobid"
isDebug && printDebug "Job is Gone jobid is re-used $jobid"
jobState="$jobid"
printInfo "Job (expired) is gone i=$i jobid=$jobid pid=$pid"
else
#if ps --pid "$pid" &> /dev/null; then
#if jobsOutput=$(LC_ALL=en_US jobs "%$jobid" 2>/dev/null); then ... this does not work in rhel 6 (bash 4.1.2)
local jobsOutput
if jobsOutput=$(export LC_ALL='en_US.UTF-8'; jobs "%$jobid" 2>/dev/null); then
#echo "***** $jobsOutput"
local tmp1=$(cut -d ' ' -f1 <<< $jobsOutput)
local tmp2=$(cut -d ' ' -f2 <<< $jobsOutput)
if [[ $tmp1 =~ \[(.*)\] ]]; then
local tmp3="${BASH_REMATCH[1]}"
if [[ $tmp2 == 'Done' ]]; then
thisJobRuns=''
#echo "JOB DONE $tmp3"
isDebug && printDebug "Job is Done $tmp3"
elif [[ $tmp2 == 'Running' ]]; then
#echo "JOB RUNS $tmp3"
isDebug && printDebug "Job is Running $tmp3"
else
printError "Invalid job state $tmp2 jobspec=%$tmp3"
thisJobRuns=''
fi
local jobsOutput=''
local psres="$errSigint"
while [[ $psres -eq $errSigint ]]; do
psres=0
jobsOutput=$(export LC_ALL='en_US.UTF-8'; jobs "%$jobid" 2>/dev/null) || psres="$?"
done
if [[ ( $psres -eq 0 ) && ( -n $jobsOutput ) ]]; then
local part1="${jobsOutput%%[[:space:]]*}"
local rest1="${jobsOutput#*[[:space:]]}"
local TTTT_trim
trim "$rest1"
local tmp2="${TTTT_trim%%[[:space:]]*}"
[[ $part1 =~ \[(.*)\] ]] || printErrorAndExit "Wrong output of jobs command $jobsOutput"
local tmp3="${BASH_REMATCH[1]}"
[[ $tmp3 == $jobid ]] || printErrorAndExit "jobid from command jobs $tmp3 is not eq jobid $jobid"
if [[ $tmp2 == Done* ]]; then
thisJobRuns=''
jobState="$tmp2"
isDebug && printDebug "Job is Done $tmp3"
elif [[ $tmp2 == 'Running' ]]; then
isDebug && printDebug "Job is Running $tmp3"
else
printError "Wrong output of jobs command $jobsOutput"
printError "Invalid job state $tmp2 jobspec=%$tmp3"
thisJobRuns=''
jobState="$tmp2"
fi
else
local psres=$?
if [[ $psres -eq $errSigint ]]; then
isDebug && printDebug "SIGINT: during jobs"
thisJobRuns=''
if [[ $psres -eq 0 ]]; then
jobState="''"
else
thisJobRuns=''
#echo "JOB Gone $jobid"
isDebug && printDebug "Job is Gone $jobid"
jobState="exit $psres"
fi
isDebug && printDebug "Job is Gone $jobid"
fi
fi
if [[ -z $thisJobRuns ]]; then
Expand All @@ -528,7 +540,7 @@ handleJobEnd() {
fi
fi
echo "$tmpCaseAndVariant : $caseElapsedTime" >> "${TTRO_workDirSuite}/CASE_EXECUTE"
printInfon "END: case=${tmpCase} variant='${tmpVariant}' i=$i running=$TTTI_numberJobsRunning systemLoad=$TTTT_systemLoad maxJobs=$TTTI_currentParralelJobsEffective jobspec=%$jobid pid=$pid"
printInfon "END: case=${tmpCase} variant='${tmpVariant}' i=$i running=$TTTI_numberJobsRunning systemLoad=$TTTT_systemLoad maxJobs=$TTTI_currentParralelJobsEffective jobspec=%$jobid pid=$pid state=$jobState"
TTTI_tpid[$i]=""
TTTI_tjobid[$i]=""
TTTI_texclusiveExecution=''
Expand Down Expand Up @@ -642,37 +654,59 @@ startNewJobs() {
TTTI_numberJobsRunning=$((TTTI_numberJobsRunning+1))
printInfon "START: case=$caseName variant='$caseVariant' jobIndex=$TTTI_nextJobIndexToStart i=$freeSlot running=$TTTI_numberJobsRunning systemLoad=$TTTT_systemLoad maxJobs=$TTTI_currentParralelJobsEffective"
#Start job connect output to stdout in single thread case
local commandString
if [[ "$TTRO_noParallelCases" -eq 1 ]]; then
commandString="$cmd $casePath $cworkdir $caseVariant $cpreamblError 2>&1 | tee -i ${cworkdir}/${TEST_LOG}"
$cmd "$casePath" "$cworkdir" "$caseVariant" "$cpreamblError" 2>&1 | tee -i "${cworkdir}/${TEST_LOG}" &
local newPid=$!
else
commandString="$cmd $casePath $cworkdir $caseVariant $cpreamblError &> ${cworkdir}/${TEST_LOG}"
$cmd "$casePath" "$cworkdir" "$caseVariant" "$cpreamblError" &> "${cworkdir}/${TEST_LOG}" &
local newPid=$!
fi
#jobsOutput=$(LC_ALL=en_US jobs %+) ... this does not work in rhel 6 (bash 4.1.2)
local jobsOutput=$(export LC_ALL='en_US.UTF-8'; jobs %+)
echo "$jobsOutput" > "$cworkdir/JOBS"
echo "Full Job list" >> "$cworkdir/JOBS"
LC_ALL='en_US.UTF-8' jobs -l >> "$cworkdir/JOBS"
isDebug && printDebug "jobspec:$jobsOutput"
local tmp1=$(cut -d ' ' -f1 <<< $jobsOutput)
local jobState=$(cut -d ' ' -f2 <<< $jobsOutput)
if [[ $tmp1 =~ \[(.*)\]\+ ]]; then
local tmp5="${BASH_REMATCH[1]}"
echo " jobspec=%$tmp5 pid=$newPid state=$jobState"
checkDuplicateJobspec "$tmp5"
local jobsOutput=''
local newPidLead=''
local jobState=''
local thisJobspec=''
local psres="$errSigint"
while [[ $psres -eq $errSigint ]]; do
psres=0;
jobsOutput=$(export LC_ALL='en_US.UTF-8'; jobs -l %+) || psres=$?
done
if [[ ( $psres -eq 0 ) && ( -n "$jobsOutput" ) ]]; then
echo "$jobsOutput" > "$cworkdir/JOBS"
echo "Full Job list" >> "$cworkdir/JOBS"
LC_ALL='en_US.UTF-8' jobs -l >> "$cworkdir/JOBS"
isDebug && printDebug "jobspec:$jobsOutput"
local part1="${jobsOutput%%[[:space:]]*}"
local rest1="${jobsOutput#*[[:space:]]}"
local TTTT_trim
trim "$rest1"
newPidLead="${TTTT_trim%%[[:space:]]*}"
rest1="${TTTT_trim#*[[:space:]]}"
trim "$rest1"
jobState="${TTTT_trim%%[[:space:]]*}"
if [[ $part1 =~ \[(.*)\]\+ ]]; then
thisJobspec="${BASH_REMATCH[1]}"
echo " jobspec=%$thisJobspec pid=$newPidLead state=$jobState"
checkDuplicateJobspec "$thisJobspec"
else
echo
printErrorAndExit "No jobindex extract from jobs output '$jobsOutput'" $errRt
fi
else
echo
TTTI_tjobid[$freeSlot]=""
printErrorAndExit "No jobindex extract from jobs output '$jobsOutput'" $errRt
newPidLead="$newPid"
thisJobspec='error'
fi
TTTI_tpid[$freeSlot]="$newPid"
TTTI_tjobid[$freeSlot]="$tmp5"
TTTI_tpid[$freeSlot]="$newPidLead"
TTTI_tjobid[$freeSlot]="$thisJobspec"
TTTI_tcase[$freeSlot]="$caseName"
TTTI_tvariant[$freeSlot]="$caseVariant"
TTTI_tcasePath[$freeSlot]="$casePath"
TTTI_tcaseWorkDir[$freeSlot]="$cworkdir"
TTTI_tkilled[$freeSlot]=""
isDebug && printDebug "Enter tjobid[$freeSlot]=${TTTI_tjobid[$freeSlot]} state=$jobState tpid[${freeSlot}]=$newPid time=${TTTI_now} state=$jobState"
isDebug && printDebug "Enter free slot=$freeSlot tjobid[$freeSlot]=${thisJobspec} tpid[${freeSlot}]=$newPidLead time=${TTTI_now} state=$jobState"
TTTI_tstartTime[$freeSlot]="$TTTI_now"
local jobTimeout=${TTTI_caseTimeout[$TTTI_jobIndex]}
if [[ $jobTimeout -lt $TTTT_casesTimeout ]]; then
Expand All @@ -681,7 +715,6 @@ startNewJobs() {
isVerbose && printVerbose "Job timeout $jobTimeout"
TTTI_tendTime[$freeSlot]=$((TTTI_now+jobTimeout))
TTTI_ttimeout[$freeSlot]="$jobTimeout"
TTTI_tcaseWorkDir[$freeSlot]="$cworkdir"
TTTI_texclusiveExecution="$caseExclusiveExecution"
TTTI_jobIndex=$((TTTI_jobIndex+1))
if [[ ( $TTTI_interruptReceived -gt 0 ) || ( $TTTI_jobIndex -ge $TTTI_noCaseVariants ) ]]; then
Expand Down
9 changes: 7 additions & 2 deletions bin/util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1724,7 +1724,12 @@ TTRO_help_getElapsedTime='
# TTTT_elapsedTime'
function getElapsedTime {
if [[ $# -ne 1 ]]; then printErrorAndExit "$FUNCNAME : wrong no of arguments $#" $errRt; fi
local now=$(date -u +%s)
local psres="$errSigint"
local now=''
while [[ $psres -eq $errSigint ]]; do
psres=0
now=$(date -u +%s) || psres="$?"
done
local diff=$((now-$1))
timeFromSeconds "$diff"
TTTT_elapsedTime="$TTTT_timeFromSeconds"
Expand Down Expand Up @@ -2094,7 +2099,7 @@ arrayDelete() {
readonly -f arrayDelete

TTRO_help_trim='
# Function trim removes leading trailing whitespace characters
# Function trim removes leading and trailing whitespace characters
# $1 the input string
# returns the result string in TTTT_trim'
function trim {
Expand Down
1 change: 1 addition & 0 deletions test/sequencetest/Suite1/TestCase0/TestCase.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#--variantCountx=16
1 change: 1 addition & 0 deletions test/sequencetest/Suite1/TestCase2/TestCase.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#--variantCountx=16
1 change: 1 addition & 0 deletions test/sequencetest/Suite1/TestCase3/TestCase.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#--variantCountx=16
1 change: 1 addition & 0 deletions test/sequencetest/Suite1/TestCase4/TestCase.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#--variantCountx=16
1 change: 1 addition & 0 deletions test/sequencetest/Suite1/TestCase5/TestCase.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#--variantCountx=16
8 changes: 8 additions & 0 deletions test/sequencetest/Suite1/TestSuite.sh
Original file line number Diff line number Diff line change
@@ -1 +1,9 @@
import " testutils.sh"

PREPS=myPrep

myPrep() {
echo "expected result is"
echo "***** cases executed=21 failures=0 errors=5 skipped=0"
promptYesNo
}
2 changes: 1 addition & 1 deletion test/toolkitTest/Runtests/TestSuite.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
##-----------the required tools ---------------------
import "$TTRO_scriptDir/streamsutils.sh"

setVar 'TTPR_timeout' 240
setVar 'TTPR_timeout' 60

#Make sure instance and domain is running
PREPS='cleanUpInstAndDomainAtStart mkDomain startDomain mkInst startInst'
Expand Down

0 comments on commit 8fd6d94

Please sign in to comment.