diff --git a/ci/scripts/utils/launch_java_agent.sh b/ci/scripts/utils/launch_java_agent.sh index a4e664724b..81dbe002b6 100755 --- a/ci/scripts/utils/launch_java_agent.sh +++ b/ci/scripts/utils/launch_java_agent.sh @@ -1,8 +1,70 @@ #!/bin/env bash + set -e +# ============================================================================== +# Script Name: launch_java_agent.sh +# +# Description: +# This script automates the process of launching a Jenkins agent +# on a specified machine. It ensures that the necessary +# prerequisites are met, such as the availability of JAVA_HOME, +# the Jenkins agent launch directory, and proper authentication +# with GitHub. +# +# It then proceeds to check if the Jenkins node is online and +# decides whether to launch the Jenkins agent based on the node's +# status. The agent is launched in the background, +# and its PID is logged for reference. +# +# Prerequisites: +# JAVA_HOME must be set to a valid JDK installation. +# Jenkins agent launch directory must exist and be specified. +# GitHub CLI (gh) must be installed and authenticated for messeging +# from the Jenkins controller to GitHub PR via shell commands. +# Jenkins agent launch directory must exist and be specified. +# TODO: Must use GitHub CLI v2.25.1 (newer versoins have issues) +# https://github.com/cli/cli/releases/download/v2.25.1/gh_2.25.1_linux_amd64.tar.gz +# Jenkins controller URL and authentication token must be provided. +# jenkins-secret-file: +# Must be present in the Jenkins agent launch directory. +# This file contains the secret key for the Jenkins agent +# established by the Jenkins administrator for each Node. +# jenkins_token: +# Must be present in the Jenkins agent launch directory. +# This file contains the user authentication token for the Jenkins controller +# to use the Remote API. This token can be generated by the user +# on the Jenkins controller. +# controller_user: +# Must be set to the Jenkins controller username corresponing to the jenkins_token. +# +# Usage: ./launch_java_agent.sh [now] [-f] +# The optional 'now' argument forces the script to launch the Jenkins +# agent without waiting before trying again. +# The optional 'force' argument forces the script to launch the Jenkins regarless of the node status. +# +# ============================================================================== + +force_launch="False" +skip_wait="False" +while getopts ":fnh" flag; do + case "${flag}" in + f) force_launch="True";; + n) skip_wait="True";; + h) echo "Usage: ./launch_java_agent.sh [now] [force] +Two mutually exclusive optional arguments: + -n (now) causes the script to launch the Jenkins agent without waiting before trying again. + -f (force) forces the script to launch the Jenkins regarless of its connection status." + exit 0 ;; + *) echo "Unknown flag: ${flag}" + exit 1;; + esac +done + controller_url="https://jenkins.epic.oarcloud.noaa.gov" -controller_user="terry.mcguinness" +controller_user=${controller_user:-"terry.mcguinness"} +controller_user_auth_token="jenkins_token" + HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )" host=$(hostname) @@ -13,12 +75,10 @@ host=$(hostname) source "${HOMEgfs}/ush/detect_machine.sh" case ${MACHINE_ID} in hera | orion | hercules | wcoss2) - echo "Launch Jenkins Java Controler on ${MACHINE_ID}" - ;; + echo "Launch Jenkins Java Controler on ${MACHINE_ID}";; *) echo "Unsupported platform. Exiting with error." - exit 1 - ;; + exit 1;; esac LOG=lanuched_agent-$(date +%Y%m%d%M).log @@ -43,9 +103,16 @@ echo "JAVA VERSION: " ${JAVA} -version export GH="${HOME}/bin/gh" -command -v "${GH}" +[[ -f "${GH}" ]] || echo "gh is not installed in ${HOME}/bin" ${GH} --version +check_mark=$(gh auth status -t 2>&1 | grep "Token:" | awk '{print $1}') || true +if [[ "${check_mark}" != "✓" ]]; then + echo "gh not authenticating with emcbot token" + exit 1 +fi +echo "gh authenticating with emcbot TOKEN ok" + if [[ -d "${JENKINS_AGENT_LANUCH_DIR}" ]]; then echo "Jenkins Agent Lanuch Directory: ${JENKINS_AGENT_LANUCH_DIR}" else @@ -56,22 +123,62 @@ cd "${JENKINS_AGENT_LANUCH_DIR}" if ! [[ -f agent.jar ]]; then curl -sO "${controller_url}/jnlpJars/agent.jar" + echo "Updated agent.jar downloaded" +fi + +if [[ ! -f "${controller_user_auth_token}" ]]; then + echo "User Jenkins authetication TOKEN to the controller for using the Remote API does not exist" + exit 1 fi +JENKINS_TOKEN=$(cat "${controller_user_auth_token}") + +cat << EOF > parse.py +#!/usr/bin/env python3 +import json,sys +with open(sys.argv[1], 'r') as file: + data = json.load(file) +print(data.get('offline','True')) +EOF +chmod u+x parse.py -JENKINS_TOKEN=$(cat jenkins_token) +check_node_online() { + rm -f curl_response + curl_response=$(curl --silent -u "${controller_user}:${JENKINS_TOKEN}" "${controller_url}/computer/${MACHINE_ID^}-EMC/api/json?pretty=true") || true + if [[ "${curl_response}" == "" ]]; then + echo "ERROR: Jenkins controller not reachable. Exiting with error." + exit 1 + fi + echo -n "${curl_response}" > curl_response + ./parse.py curl_response +} + +lauch_agent () { + echo "Launching Jenkins Agent on ${host}" + command="nohup ${JAVA} -jar agent.jar -jnlpUrl ${controller_url}/computer/${MACHINE_ID^}-EMC/jenkins-agent.jnlp -secret @jenkins-secret-file -workDir ${JENKINS_WORK_DIR}" + echo -e "Launching Jenkins Agent on ${host} with the command:\n${command}" >& "${LOG}" + ${command} >> "${LOG}" 2>&1 & + nohup_PID=$! + echo "Java agent running on PID: ${nohup_PID}" >> "${LOG}" 2>&1 +} + +if [[ "${force_launch}" == "True" ]]; then + lauch_agent + exit +fi -# -offline=$(curl --silent -u "${controller_user}:${JENKINS_TOKEN}" "${controller_url}/computer/${MACHINE_ID^}-EMC/api/json?pretty=true" | grep '\"offline\"' | awk '{gsub(/,/,"");print $3}') || true -echo "Jenkins Agent offline setting: ${offline}" +offline=$(set -e; check_node_online) -if [[ "${offline}" == "true" ]]; then - echo "Jenkins Agent is offline. Lanuching Jenkins Agent on ${host}" - command="nohup ${JAVA} -jar agent.jar -jnlpUrl ${controller_url}/computer/${MACHINE_ID^}-EMC/jenkins-agent.jnlp -secret @jenkins-secret-file -workDir ${JENKINS_WORK_DIR}" - echo -e "Lanuching Jenkins Agent on ${host} with the command:\n${command}" >& "${LOG}" - ${command} >> "${LOG}" 2>&1 & - nohup_PID=$! - echo "Java agent running on PID: ${nohup_PID}" >> "${LOG}" 2>&1 - echo "Java agent running on PID: ${nohup_PID}" +if [[ "${offline}" != "False" ]]; then + if [[ "${skip_wait}" != "True" ]]; then + echo "Jenkins Agent is offline. Waiting 5 more minutes to check again in the event it is a temp network issue" + sleep 300 + offline=$(set -e; check_node_online) + fi + if [[ "${offline}" != "False" ]]; then + lauch_agent + else + echo "Jenkins Agent is online (nothing done)" + fi else echo "Jenkins Agent is online (nothing done)" fi