From d142677e0e95ade43e8fa7bdbf219a96981a05f4 Mon Sep 17 00:00:00 2001 From: Jesse Nelson Date: Thu, 31 Oct 2024 10:04:08 -0500 Subject: [PATCH] Add Various Citus Runbooks (#9398) * Add citus runbooks --------- Signed-off-by: Jesse Nelson --- .../templates/zfs/configmap-init.yaml | 56 +-- .../change-citus-node-pool-machine-type.md | 20 + .../create-disk-snapshot-for-citus-cluster.md | 19 + docs/runbook/increase-zfs-disksize.md | 77 ++++ .../restore-citus-from-disk-snapshot.md | 26 ++ docs/runbook/scripts/change-machine-type.sh | 70 +++ .../scripts/restore-volume-snapshot.sh | 405 ++++++++++++++++++ .../scripts/upgrade-k8s-version-citus.sh | 72 ++++ docs/runbook/scripts/utils.sh | 213 +++++++++ docs/runbook/scripts/volume-snapshot.sh | 81 ++++ .../upgrade-k8s-version-citus-nodepool.md | 20 + 11 files changed, 1025 insertions(+), 34 deletions(-) create mode 100644 docs/runbook/change-citus-node-pool-machine-type.md create mode 100644 docs/runbook/create-disk-snapshot-for-citus-cluster.md create mode 100644 docs/runbook/increase-zfs-disksize.md create mode 100644 docs/runbook/restore-citus-from-disk-snapshot.md create mode 100755 docs/runbook/scripts/change-machine-type.sh create mode 100755 docs/runbook/scripts/restore-volume-snapshot.sh create mode 100755 docs/runbook/scripts/upgrade-k8s-version-citus.sh create mode 100755 docs/runbook/scripts/utils.sh create mode 100755 docs/runbook/scripts/volume-snapshot.sh create mode 100644 docs/runbook/upgrade-k8s-version-citus-nodepool.md diff --git a/charts/hedera-mirror-common/templates/zfs/configmap-init.yaml b/charts/hedera-mirror-common/templates/zfs/configmap-init.yaml index c064900fe0a..16b95a83021 100644 --- a/charts/hedera-mirror-common/templates/zfs/configmap-init.yaml +++ b/charts/hedera-mirror-common/templates/zfs/configmap-init.yaml @@ -10,22 +10,21 @@ data: label-wait.sh: | #!/usr/bin/env bash set -euxo pipefail - + ROOT_MOUNT_DIR="${ROOT_MOUNT_DIR:-/node}" ZFS_INITIALIZED="${ZFS_INITIALIZED:-}" until [ "${ZFS_INITIALIZED}" != "" ]; do echo "Waiting for label "; sleep 10; source "${ROOT_MOUNT_DIR}/etc/environment"; done entrypoint.sh: | #!/usr/bin/env bash - + set -euxo pipefail - + # Constants NODE_ID_LABEL="openebs.io/nodeid" - DISK_SIZE_LABEL="pool-disk-size" ROLE_LABEL="citus-role" ENV_FILE="${ROOT_MOUNT_DIR}/etc/environment" - + # Configurable environment variables CONFIG_MAP_NAME="{{ include "hedera-mirror-common.fullname" . }}-zfs-node-status" CONFIG_MAP_KEY="zfs-node-status.json" @@ -44,6 +43,12 @@ data: NODE_LABELS="$(kubectl get node ${NODE_NAME} -o jsonpath='{.metadata.labels}')" CITUS_ROLE="$(echo "${NODE_LABELS}" | jq --arg ROLE_LABEL "${ROLE_LABEL}" -r '.[$ROLE_LABEL]')" + if [[ "$CITUS_ROLE" == *"worker"* ]]; then + DISK_SIZE="${DEFAULT_DISK_SIZE_WORKER}" + else + DISK_SIZE="${DEFAULT_DISK_SIZE_COORDINATOR}" + fi + ATTEMPT_COUNT=1 while [[ "${CITUS_ROLE}" == "null" ]] do @@ -72,17 +77,15 @@ data: ZONE_CONFIG="$(echo "${MODIFIED_CONFIG_MAP}" |jq --arg KEY "${CONFIG_MAP_KEY}" --arg ZONE "${ZONE}" --arg CITUS_ROLE "${CITUS_ROLE}" '.data[$KEY][$CITUS_ROLE][$ZONE] // []')" NODE_LIST="$(kubectl get nodes -o jsonpath='{.items[*].metadata.labels}' -lcsi-type=zfs,topology.kubernetes.io/zone=${ZONE},${ROLE_LABEL}=${CITUS_ROLE} | jq -s)" NODE_NAMES="$(echo "${NODE_LIST}" | jq '. | map({(.["kubernetes.io/hostname"]): {isUpgrade: (.["operation.gke.io/type"] == "drain")}}) | add')" - + ## This will find all nodes that have been initialized by this script in the past ## and generate its own version of the config map configuration to ZFS_ZONE_NODES=$(echo "${NODE_LIST}" | jq \ --arg NODE_ID_LABEL "${NODE_ID_LABEL}" \ - --arg DISK_SIZE_LABEL "${DISK_SIZE_LABEL}" \ '. | map(select(.["generated-node-id"] == "true")) | map({ "nodeName": (.["kubernetes.io/hostname"]), "nodeId": (.[$NODE_ID_LABEL]), - "diskSize": (.[$DISK_SIZE_LABEL]), "index": (.[$NODE_ID_LABEL] | match("\\d*$").string | tonumber) }) | sort_by(.index) | @@ -124,21 +127,16 @@ data: THIS_NODE_CONFIG="$(echo "${MERGED_ZONE_CONFIG}" |jq --arg NODE_NAME "${NODE_NAME}" --arg ZONE "${ZONE}" '.[]?|select(.nodeName == $NODE_NAME)')" # If there is no entry in the config map, and the node wasn't previously labeled, it is a new node. Create the config. if [[ ! $THIS_NODE_CONFIG ]] - then - if [[ "$CITUS_ROLE" == *"worker"* ]]; then - DISK_SIZE="${DEFAULT_DISK_SIZE_WORKER}" - else - DISK_SIZE="${DEFAULT_DISK_SIZE_COORDINATOR}" - fi + then INDEX="$(echo "${MERGED_ZONE_CONFIG}" | jq '. | map(. == null) |index(true) // . | length')" NODE_ID="${CITUS_ROLE}-${ZONE}-${INDEX}" - THIS_NODE_CONFIG="{\"nodeName\": \"$NODE_NAME\", \"nodeId\": \"$NODE_ID\", \"diskSize\": \"$DISK_SIZE\"}" + THIS_NODE_CONFIG="{\"nodeName\": \"$NODE_NAME\", \"nodeId\": \"$NODE_ID\"}" MERGED_ZONE_CONFIG="$(echo "${MERGED_ZONE_CONFIG}" | jq --arg INDEX $INDEX --arg THIS_NODE_CONFIG "${THIS_NODE_CONFIG}" '.[$INDEX |tonumber] = ($THIS_NODE_CONFIG|fromjson)')" fi - + MODIFIED_CONFIG_MAP="$(echo "${MODIFIED_CONFIG_MAP}" | jq --arg KEY "${CONFIG_MAP_KEY}" --arg CITUS_ROLE "${CITUS_ROLE}" --arg ZONE "${ZONE}" --arg ZONE_CONFIG "${MERGED_ZONE_CONFIG}" '.data[$KEY][$CITUS_ROLE][$ZONE]=($ZONE_CONFIG|fromjson)')" MODIFIED_CONFIG_MAP="$(echo "${MODIFIED_CONFIG_MAP}" | jq --arg KEY "${CONFIG_MAP_KEY}" '.data[$KEY]=(.data[$KEY]|tojson)')" - + echo "patching $CONFIG_MAP_NAME with $MODIFIED_CONFIG_MAP" if (kubectl patch configmap $CONFIG_MAP_NAME -n $K8S_NAMESPACE --type merge -p "${MODIFIED_CONFIG_MAP}") then @@ -149,18 +147,15 @@ data: echo "Conflict patching config map. Retrying ..." fi done - + NODE_ID="$(echo "${THIS_NODE_CONFIG}" | jq -r '.nodeId')" if [[ "${NODE_ID}" == "null" ]] then echo "Unable to determine correct node id. Exiting ..." exit 1 fi - + DISK_NAME="${DISK_PREFIX}-${NODE_ID}-zfs" - DISK_SIZE="$(echo "${THIS_NODE_CONFIG}" | jq -r '.diskSize')" - echo "Setting up disk ${DISK_NAME} for ${CITUS_ROLE} on zfs node ${NODE_ID} with size ${DISK_SIZE}" - ACTUAL_SIZE=$(gcloud compute disks list --filter="name:${DISK_NAME}" --format="value(sizeGb)") if [[ -z "${ACTUAL_SIZE}" ]]; then echo "Creating ${DISK_NAME} for ${CITUS_ROLE} with size ${DISK_SIZE}" @@ -209,7 +204,7 @@ data: echo "Unable to create pool. Manual intervention necessary" exit 1 fi - + ATTACHED_CACHE_DEVICE="$(zpool status "${POOL}" |grep -A1 -E '^\s*cache\s*$' | grep -v cache | awk '{print $1;}')" if [[ -n "${L2_ARC_NVME_DEVICE_ID}" ]]; then NVME_DEVICE_PATH="$(readlink -f /dev/disk/by-id/google-local-ssd-block${L2_ARC_NVME_DEVICE_ID})" @@ -239,7 +234,7 @@ data: else echo "No L2 cache device specified. Skipping ..." fi - + ARC_SIZE_GB="${ARC_SIZE_GB:-2}" echo "Configuring arc to ${ARC_SIZE_GB}GB" ARC_SIZE="$((ARC_SIZE_GB*1073741824))" @@ -250,25 +245,18 @@ data: zpool online -e "${POOL}" /dev/sdb zfs list EOF - + CURRENT_NODE_ID="$(echo "${NODE_LABELS}" | jq --arg NODE_ID_LABEL "${NODE_ID_LABEL}" -r '.[$NODE_ID_LABEL]')" if [[ "${CURRENT_NODE_ID}" != "${NODE_ID}" ]] then echo "Labeling node ${NODE_NAME} with ${NODE_ID_LABEL}=${NODE_ID}" kubectl label node "${NODE_NAME}" "${NODE_ID_LABEL}=${NODE_ID}" generated-node-id=true --overwrite fi - - CURRENT_DISK_SIZE="$(echo "${NODE_LABELS}" | jq --arg DISK_SIZE_LABEL "${DISK_SIZE_LABEL}" -r '.[$DISK_SIZE_LABEL]')" - if [[ "${CURRENT_DISK_SIZE}" != "${DISK_SIZE}" || -z "${ACTUAL_SIZE}" ]]; - then - echo "Labeling node ${NODE_NAME} with ${DISK_SIZE_LABEL}=${DISK_SIZE}" - kubectl label node "${NODE_NAME}" "${DISK_SIZE_LABEL}=${DISK_SIZE}" --overwrite - fi - + source "${ENV_FILE}" ZFS_INITIALIZED="${ZFS_INITIALIZED:-}" if [[ "${ZFS_INITIALIZED}" == "" ]]; then echo "ZFS_INITIALIZED=\"true\"" > "${ENV_FILE}" fi -{{- end -}} \ No newline at end of file + {{- end -}} \ No newline at end of file diff --git a/docs/runbook/change-citus-node-pool-machine-type.md b/docs/runbook/change-citus-node-pool-machine-type.md new file mode 100644 index 00000000000..8e3cb62e635 --- /dev/null +++ b/docs/runbook/change-citus-node-pool-machine-type.md @@ -0,0 +1,20 @@ +# Change Machine Type for Citus Node Pool(s) + +## Problem + +Need to Change Machine Type for Citus Node Pool(s) + +## Prerequisites + +- Have `jq` installed +- kubectl is pointing to the cluster you want to change the machine type for +- All bash commands assume your working directory is `docs/runbook/scripts` + +## Solution + +1. Follow the steps to [create a disk snapshot for Citus cluster](./create-disk-snapshot-for-citus-cluster.md) + to backup the current cluster data +2. Run + ```bash + ./change-machine-type.sh + ``` diff --git a/docs/runbook/create-disk-snapshot-for-citus-cluster.md b/docs/runbook/create-disk-snapshot-for-citus-cluster.md new file mode 100644 index 00000000000..4c759413ee3 --- /dev/null +++ b/docs/runbook/create-disk-snapshot-for-citus-cluster.md @@ -0,0 +1,19 @@ +# Create Disk Snapshot for Citus Cluster + +## Problem + +Need to create disk snapshots for Citus cluster(s) + +## Prerequisites + +- Have access to a running Citus cluster deployed by the `hedera-mirror` chart +- Have `jq` installed +- All bash commands assume your working directory is `docs/runbook/scripts` +- The kubectl context is set to the cluster you want to create snapshots from + +## Solution + +Run script and follow along with all prompts +```bash +./volume-snapshot.sh +``` diff --git a/docs/runbook/increase-zfs-disksize.md b/docs/runbook/increase-zfs-disksize.md new file mode 100644 index 00000000000..3aa65d5463f --- /dev/null +++ b/docs/runbook/increase-zfs-disksize.md @@ -0,0 +1,77 @@ +## Problem + +The pvc for a shard is running out of space and needs to be increased beyond current capacity of the disk. + +## Prerequisites + +- Have `jq` installed +- The kubectl context is set to the cluster containing the disks you want to resize + +## Solution + +1. Identify the worker (and/or coordinator) pvc(s) that needs to be resized + ```bash + kubectl get pv -o \ + custom-columns='PVC_NAME:.spec.claimRef.name,PV_NAME:.metadata.name,CAPACITY:..spec.capacity.storage,NODE_ID:.spec.nodeAffinity.required.nodeSelectorTerms[0].matchExpressions[0].values[0]' \ + --sort-by=.spec.capacity.storage + ``` + Example output + ```text + PVC_NAME PV_NAME CAPACITY NODE_ID + sentinel-data-mirror-redis-node-1 pvc-9d9da6c6-f6e4-45a3-91cf-61d47e50dcd9 1Gi us-central1-f + sentinel-data-mirror-redis-node-2 pvc-4b25a1b7-c34a-4d1f-8af3-bfcfe908bd99 1Gi us-central1-c + sentinel-data-mirror-redis-node-0 pvc-531e97b6-d4d6-4023-a2dc-847a2fac75dd 1Gi us-central1-b + redis-data-mirror-redis-node-0 pvc-7638c7ba-2ffe-4bb7-a09d-995e4d09b3a4 8Gi us-central1-b + redis-data-mirror-redis-node-1 pvc-edc9ed5a-03b4-48eb-86b0-49def5c1af1f 8Gi us-central1-f + redis-data-mirror-redis-node-2 pvc-638cab0b-ed6c-49b0-a61b-6893a5f3415f 8Gi us-central1-c + prometheus-mirror-prometheus-prometheus-db-prometheus-mirror-prometheus-prometheus-0 pvc-4745d425-fb1d-4af3-85c6-272cff98dcb8 100Gi us-central1-b + storage-mirror-loki-0 pvc-768215f5-30e3-4253-95ea-a82fb733207e 250Gi us-central1-b + mirror-citus-coord-data-mirror-citus-coord-0 pvc-6501aa41-f238-447b-b21b-7d91a36b8f02 256Gi coordinator-us-central1-c-0 + mirror-citus-coord-data-mirror-citus-coord-1 pvc-78ef76d9-ea31-49b3-a9b6-559a3ec5cd9f 256Gi coordinator-us-central1-b-0 + mirror-citus-shard2-data-mirror-citus-shard2-0 pvc-49d46894-51a0-4a97-b2da-e9c003e382f2 3200Gi worker-us-central1-b-0 + mirror-citus-shard0-data-mirror-citus-shard0-0 pvc-5dd58b07-db59-4c3a-882f-dcd7467dfd49 10000Gi worker-us-central1-c-0 + mirror-citus-shard1-data-mirror-citus-shard1-0 pvc-f9b980a9-0771-4222-9034-bd44279ddde8 12000Gi worker-us-central1-f-0 + ``` +2. Using the `nodeId` from the previous step, increase the disk size for all disks needed + ```text + diskPrefix - value of zfs.init.diskPrefix in values.yaml + diskName - {diskPrefix}-{nodeId}-zfs + zone - extracted from the `nodeId` + diskSize - the new size of the disk in Gb. + ``` + ```bash + gcloud compute disks resize "{diskName}" --size="{diskSize}" --zone="{zone}" + ``` +3. Restart the zfs init pods + ```bash + kubectl rollout restart daemonset -n common mirror-zfs-init + ``` +4. Verify the pool size has been increased + ```bash + kubectl get pods -n common -l component=openebs-zfs-node -o json | + jq -r '.items[].metadata.name' | + xargs -I % kubectl exec -c openebs-zfs-plugin -n common % -- zfs list + ``` +5. Update the `hedera-mirror` chart's `values.yaml` to reflect the new disk size + ```yaml + stackgres: + coordinator: + persistentVolume: + size: 256Gi + worker: + overrides: + - index: 0 + pods: + persistentVolume: + size: 12000Gi + - index: 1 + pods: + persistentVolume: + size: 14000Gi + - index: 2 + pods: + persistentVolume: + size: 3200Gi + ``` +6. Deploy the changes. Be sure to leave wiggle room for zfs rounding + see [here](https://github.com/openebs/zfs-localpv/blob/develop/docs/faq.md#7-why-the-zfs-volume-size-is-different-than-the-reqeusted-size-in-pvc) diff --git a/docs/runbook/restore-citus-from-disk-snapshot.md b/docs/runbook/restore-citus-from-disk-snapshot.md new file mode 100644 index 00000000000..ec3770a5b9e --- /dev/null +++ b/docs/runbook/restore-citus-from-disk-snapshot.md @@ -0,0 +1,26 @@ +# Restore Citus Data From Disk Snapshots + +## Problem + +Need to restore Citus cluster from disk snapshots + +## Prerequisites + +- Snapshots of disks were created by following the [create snapshot](create-disk-snapshot-for-citus-cluster.md) runbook +- Have `jq` and `ksd`(kubernetes secret decrypter) installed +- The snapshots are from a compatible version of `postgres` +- The `target cluster` has a running Citus cluster deployed with `hedera-mirror` chart +- The `target cluster` you are restoring to doesn't have any pvcs with a size larger than the size of the pvc in the + snapshot. You can't decrease the size of a pvc. If needed, you can delete the existing cluster in the `target cluster` + and redeploy the `hedera-mirror` chart with the default disk sizes. +- If you have multiple Citus clusters in the `target cluster`, you will need to restore all of them +- All bash commands assume your working directory is `docs/runbook/scripts` +- Only a single citus cluster is installed per namespace +- The kubectl context is set to the cluster you want to restore snapshots to + +## Steps + +Run script and follow along with all prompts +```bash +./restore-volume-snapshot.sh +``` diff --git a/docs/runbook/scripts/change-machine-type.sh b/docs/runbook/scripts/change-machine-type.sh new file mode 100755 index 00000000000..93268e66b11 --- /dev/null +++ b/docs/runbook/scripts/change-machine-type.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash + +set -euo pipefail + +source ./utils.sh + +GCP_PROJECT="$(readUserInput "Enter GCP Project for target: ")" +if [[ -z "${GCP_PROJECT}" ]]; then + log "GCP_PROJECT is not set and is required. Exiting" + exit 1 +else + gcloud projects describe "${GCP_PROJECT}" > /dev/null +fi + +GCP_K8S_CLUSTER_REGION="$(readUserInput "Enter target cluster region: ")" +if [[ -z "${GCP_K8S_CLUSTER_REGION}" ]]; then + log "GCP_K8S_CLUSTER_REGION is not set and is required. Exiting" + exit 1 +else + gcloud compute regions describe "${GCP_K8S_CLUSTER_REGION}" --project "${GCP_PROJECT}" > /dev/null +fi + +GCP_K8S_CLUSTER_NAME="$(readUserInput "Enter target cluster name: ")" +if [[ -z "${GCP_K8S_CLUSTER_NAME}" ]]; then + log "GCP_K8S_CLUSTER_NAME is not set and is required. Exiting" + exit 1 +else + gcloud container clusters describe --project "${GCP_PROJECT}" \ + --region="${GCP_K8S_CLUSTER_REGION}" \ + "${GCP_K8S_CLUSTER_NAME}" > /dev/null +fi + +MACHINE_TYPE="$(readUserInput "Enter new machine type: ")" +if [[ -z "${MACHINE_TYPE}" ]]; then + log "MACHINE_TYPE is not set and is required. Exiting" + exit 1 +fi + +AVAILABLE_POOLS="$(gcloud container node-pools list --project="${GCP_PROJECT}" --cluster="${GCP_K8S_CLUSTER_NAME}" --region="${GCP_K8S_CLUSTER_REGION}" --format="json(name)"| jq -r '.[].name' | tr '\n' ' ')" +POOLS_TO_UPDATE_INPUT="$(readUserInput "Enter the node pools(${AVAILABLE_POOLS}) to update (space-separated): ")" +if [[ -z "${POOLS_TO_UPDATE_INPUT}" ]]; then + log "POOLS_TO_UPDATE_INPUT is not set and is required. Exiting" + exit 1 +else + IFS=', ' read -r -a POOLS_TO_UPDATE <<< "${POOLS_TO_UPDATE_INPUT}" + for pool in "${POOLS_TO_UPDATE[@]}"; do + POOL_LOCATIONS=($(gcloud container node-pools describe "${pool}" --project="${GCP_PROJECT}" --cluster="${GCP_K8S_CLUSTER_NAME}" --region="${GCP_K8S_CLUSTER_REGION}" --format="json" | jq -r '.locations[]')) + for location in "${POOL_LOCATIONS[@]}"; do + gcloud compute machine-types describe "${MACHINE_TYPE}" --project="${GCP_PROJECT}" --zone="${location}" > /dev/null + done + done +fi + +NAMESPACES=($(kubectl get sgshardedclusters.stackgres.io -A -o jsonpath='{.items[*].metadata.namespace}')) +for namespace in "${NAMESPACES[@]}" +do + unrouteTraffic "${namespace}" + pauseCitus "${namespace}" +done +resizeCitusNodePools 0 +for pool in "${POOLS_TO_UPDATE[@]}" +do +gcloud container node-pools update "${pool}" --project="${GCP_PROJECT}" --cluster="${GCP_K8S_CLUSTER_NAME}" --location="${GCP_K8S_CLUSTER_REGION}" --machine-type="${MACHINE_TYPE}" +done +resizeCitusNodePools 1 +for namespace in "${NAMESPACES[@]}" +do + unpauseCitus "${namespace}" + routeTraffic "${namespace}" +done \ No newline at end of file diff --git a/docs/runbook/scripts/restore-volume-snapshot.sh b/docs/runbook/scripts/restore-volume-snapshot.sh new file mode 100755 index 00000000000..73b9484c31f --- /dev/null +++ b/docs/runbook/scripts/restore-volume-snapshot.sh @@ -0,0 +1,405 @@ +#!/usr/bin/env bash + +set -euo pipefail + +source ./utils.sh + +REPLACE_DISKS="${REPLACE_DISKS:-true}" +ZFS_POOL_NAME="${ZFS_POOL_NAME:-zfspv-pool}" + +function configureAndValidate() { + CURRENT_CONTEXT=$(kubectl config current-context) + GCP_PROJECT="$(readUserInput "Enter GCP Project for target: ")" + if [[ -z "${GCP_PROJECT}" ]]; then + log "GCP_PROJECT is not set and is required. Exiting" + exit 1 + else + gcloud projects describe "${GCP_PROJECT}" > /dev/null + fi + + GCP_SNAPSHOT_PROJECT="$(readUserInput "Enter GCP Project for snapshot source: ")" + if [[ -z "${GCP_SNAPSHOT_PROJECT}" ]]; then + log "GCP_SNAPSHOT_PROJECT is not set and is required. Exiting" + exit 1 + else + gcloud projects describe "${GCP_SNAPSHOT_PROJECT}" > /dev/null + fi + + GCP_K8S_CLUSTER_REGION="$(readUserInput "Enter target cluster region: ")" + if [[ -z "${GCP_K8S_CLUSTER_REGION}" ]]; then + log "GCP_K8S_CLUSTER_REGION is not set and is required. Exiting" + exit 1 + else + gcloud compute regions describe "${GCP_K8S_CLUSTER_REGION}" --project "${GCP_PROJECT}" > /dev/null + fi + + GCP_K8S_CLUSTER_NAME="$(readUserInput "Enter target cluster name: ")" + if [[ -z "${GCP_K8S_CLUSTER_NAME}" ]]; then + log "GCP_K8S_CLUSTER_NAME is not set and is required. Exiting" + exit 1 + else + gcloud container clusters describe --project "${GCP_PROJECT}" \ + --region="${GCP_K8S_CLUSTER_REGION}" \ + "${GCP_K8S_CLUSTER_NAME}" > /dev/null + fi + + log "Listing snapshots in project ${GCP_SNAPSHOT_PROJECT}" + gcloud compute snapshots list --project "${GCP_SNAPSHOT_PROJECT}" --format="table(name, diskSizeGb, sourceDisk, description, creationTimestamp)" --filter="name~.*[0-9]{10,}$" --sort-by="~creationTimestamp" + + SNAPSHOT_ID="$(readUserInput "Enter snapshot id (the epoch suffix of the snapshot group): ")" + if [[ -z "${SNAPSHOT_ID}" ]]; then + log "SNAPSHOT_ID is not set and is required. Please provide an identifier that is unique across all snapshots. Exiting" + exit 1 + else + SNAPSHOTS_TO_RESTORE=$(gcloud compute snapshots list --project "${GCP_SNAPSHOT_PROJECT}" \ + --filter="name~.*${SNAPSHOT_ID}$" \ + --format="json(name, description)" | + jq -r 'map(select(.description != null) | {name: .name, description: (.description|fromjson|sort_by(.volumeName))})') + if [[ -z "${SNAPSHOTS_TO_RESTORE}" ]]; then + log "No snapshots found for snapshot id ${SNAPSHOT_ID} in project ${GCP_SNAPSHOT_PROJECT}. Exiting" + exit 1 + else + log "Found snapshots to restore: ${SNAPSHOTS_TO_RESTORE}" + doContinue + fi + fi + + DISK_PREFIX="$(readUserInput "Enter the disk prefix of target cluster (value of zfs.init.diskPrefix in values.yaml): ")" + if [[ -z "${DISK_PREFIX}" ]]; then + log "DISK_PREFIX can not be empty. Exiting" + exit 1 + fi + + if [[ -z "${ZFS_POOL_NAME}" ]]; then + log "Unable to find zfs pool name. set ZFS_POOL_NAME to value of zfs.paramaters.poolname in common values.yaml" + exit 1 + fi + + ZFS_VOLUMES=$(getZFSVolumes) + NAMESPACES=($(echo $ZFS_VOLUMES | jq -r '.[].namespace' | tr ' ' '\n' | sort -u | tr '\n' ' ')) + NODE_ID_MAP=$(echo -e "${SNAPSHOTS_TO_RESTORE}\n${ZFS_VOLUMES}" | + jq -s '.[0] as $snapshots | + .[1] as $volumes | + $volumes | group_by(.nodeId) | + map((.[0].nodeId) as $nodeId | + map(.)|sort_by(.volumeName) as $pvcs | + $pvcs | + map( + { + pvcName: .pvcName, + namespace: .namespace + } + ) as $pvcMatchData| + { + ($nodeId) : { + pvcs: ($pvcs), + snapshot: ($snapshots | map(select(.description|contains($pvcMatchData)))) + } + } + )|add') + + UNIQUE_NODE_IDS=($(echo "${NODE_ID_MAP}" | jq -r 'keys[]')) + for nodeId in "${UNIQUE_NODE_IDS[@]}"; do + local diskName="${DISK_PREFIX}-${nodeId}-zfs" + local diskZone="$(echo "${nodeId}" | cut -d '-' -f 2-4)" + + if ! gcloud compute disks describe "${diskName}" --project "${GCP_PROJECT}" --zone "${diskZone}"> /dev/null; then + log "Disk ${diskName} does not exist in project ${GCP_PROJECT} Please confirm the input for disk prefix. Exiting" + exit 1 + fi + local nodeInfo=$(echo "${NODE_ID_MAP}" | jq -r --arg NODE_ID "${nodeId}" '.[$NODE_ID]') + HAS_VALID_SNAPSHOT=$(echo "${nodeInfo}" | jq -r '.snapshot|length == 1') + if [[ "${HAS_VALID_SNAPSHOT}" == "false" ]]; then + log "Unable to find valid snapshot for node id ${nodeId} in snapshot id ${SNAPSHOT_ID}. + Please verify snapshots contain same namespace, pvc name and postgres version. + ${nodeInfo}" + exit 1 + else + log "Snapshot contains all pvcs for node ${nodeId}" + fi + done +} + +function prepareDiskReplacement() { + log "Will spin down importer and citus in the namespaces (${NAMESPACES[*]}) for context ${CURRENT_CONTEXT}" + doContinue + + for namespace in "${NAMESPACES[@]}"; do + + # Unroute traffic + unrouteTraffic "${namespace}" + + # Pause Citus + pauseCitus "${namespace}" + done + + # Spin down existing citus node pools + resizeCitusNodePools 0 +} + +function renameZfsVolumes() { + log "Waiting for zfs pods to be ready" + kubectl wait --for=condition=Ready pod -n "${COMMON_NAMESPACE}" -l 'component=openebs-zfs-node' --timeout=-1s + + local zfsNodePods=$(kubectl get pods -A -o wide -o json -l 'component=openebs-zfs-node' | jq -r '.items|map({node: (.spec.nodeName), podName: (.metadata.name)})') + local zfsNodes=$(kubectl get zfsnodes.zfs.openebs.io -A -o json | jq -r '.items|map({nodeId: .metadata.name, node: .metadata.ownerReferences[0].name})') + local nodeIdToPodMap=$(echo -e "${zfsNodePods}\n${zfsNodes}" | + jq -s '.[0] as $zfsPods | + .[1] | + map(.node as $nodeName | + { + (.nodeId) : ($zfsPods[] | select(.node == $nodeName).podName) + } + )| + add') + + log "Renaming zfs datasets" + for nodeId in "${UNIQUE_NODE_IDS[@]}"; do + local podInfo=$(echo "${nodeIdToPodMap}" | jq -r --arg NODE_ID "${nodeId}" '.[$NODE_ID]') + local nodeData=$(echo "${NODE_ID_MAP}" | jq -r --arg NODE_ID "${nodeId}" '.[$NODE_ID]') + local pvcsCount=$(echo "${nodeData}" | jq -r '.pvcs|length') + for pvcIndex in $(seq 0 $((pvcsCount - 1))); do + local pvcVolumeName=$(echo "${nodeData}" | jq -r --arg PVC_INDEX "${pvcIndex}" '.pvcs[$PVC_INDEX|tonumber].volumeName') + local snapshotPvcVolumeName=$(echo "${nodeData}" | jq -r --arg PVC_INDEX "${pvcIndex}" '.snapshot[0].description[$PVC_INDEX|tonumber].volumeName') + + if [[ "${pvcVolumeName}" != "${snapshotPvcVolumeName}" ]]; then + log "Renaming snapshot pvc ${ZFS_POOL_NAME}/${snapshotPvcVolumeName} to ${ZFS_POOL_NAME}/${pvcVolumeName}" + kubectl exec -n "${COMMON_NAMESPACE}" "${podInfo}" -c openebs-zfs-plugin -- zfs rename "${ZFS_POOL_NAME}/${snapshotPvcVolumeName}" "${ZFS_POOL_NAME}/${pvcVolumeName}" + else + log "Snapshot pvc ${ZFS_POOL_NAME}/${snapshotPvcVolumeName} already matches pvc ${ZFS_POOL_NAME}/${pvcVolumeName}" + fi + done + kubectl exec -n "${COMMON_NAMESPACE}" "${podInfo}" -c openebs-zfs-plugin -- zfs list + done + log "ZFS datasets renamed" +} + +function replaceDisks() { + log "Will delete disks ${DISK_PREFIX}-(${UNIQUE_NODE_IDS[*]})-zfs in project ${GCP_PROJECT}" + doContinue + + prepareDiskReplacement + + for nodeId in "${UNIQUE_NODE_IDS[@]}"; do + local nodeInfo=$(echo "${NODE_ID_MAP}" | jq -r --arg NODE_ID "${nodeId}" '.[$NODE_ID]') + local diskName="${DISK_PREFIX}-${nodeId}-zfs" + local diskZone=$(echo "${nodeId}" | cut -d '-' -f 2-4) + local snapshotName=$(echo "${nodeInfo}" | jq -r '.snapshot[0].name') + local snapshotFullName="projects/${GCP_SNAPSHOT_PROJECT}/global/snapshots/${snapshotName}" + log "Recreating disk ${diskName} in ${GCP_PROJECT} with snapshot ${snapshotName}" + gcloud compute disks delete "${diskName}" --project "${GCP_PROJECT}" --zone "${diskZone}" --quiet + gcloud compute disks create "${diskName}" --project "${GCP_PROJECT}" --zone "${diskZone}" --source-snapshot "${snapshotFullName}" --type=pd-balanced --quiet & + done + + log "Waiting for disks to be created" + wait + + resizeCitusNodePools 1 + renameZfsVolumes +} + +function configureShardedClusterResource() { + local pvcsInNamespace="${1}" + local shardedClusterName="${2}" + local namespace="${3}" + + local coordinatorPvcSize=$(echo "${pvcsInNamespace}" | + jq -r 'map(select(.snapshotPrimary and .citusCluster.isCoordinator))| + map(.snapshotPvcSize)|first') + local workerPvcOverrides=$(echo "${pvcsInNamespace}" | + jq -r 'map(select(.snapshotPrimary and .citusCluster.isCoordinator == false))| + sort_by(.citusCluster.citusGroup, .citusCluster.podName)| + to_entries| + map({index: .key, pods: {persistentVolume: {size: .value.snapshotPvcSize}}})') + local shardedClusterPatch=$(echo "${workerPvcOverrides}" | + jq -r --arg coordinatorPvcSize "${coordinatorPvcSize}" \ + '{ + spec: { + coordinator: { + pods: { + persistentVolume: { + size: $coordinatorPvcSize + } + } + }, + shards: { + overrides: (.) + } + } + }') + log "Patching sharded cluster ${shardedClusterName} in namespace ${namespace} with ${shardedClusterPatch}" + kubectl patch sgshardedclusters.stackgres.io -n "${namespace}" "${shardedClusterName}" --type merge -p "${shardedClusterPatch}" + log " + **** IMPORTANT **** + Please configure your helm values.yaml for namespace ${namespace} to have the following values: + + stackgres.coordinator.pods.persistentVolume.size=${coordinatorPvcSize} + + stackgres.worker.overrides=${workerPvcOverrides} + " + log "Continue to acknowledge config change is saved (do not need to apply the config change yet)" + doContinue +} + +function markAndConfigurePrimaries() { + local pvcsInNamespace="${1}" + local shardedClusterName="${2}" + + # Stackgres Passwords + local primaryCoordinator=$(echo "${pvcsInNamespace}" | + jq -r 'map(select(.snapshotPrimary and .citusCluster.isCoordinator))|first') + local sgPasswordsSecretName=$(echo "${primaryCoordinator}" | jq -r '.citusCluster.clusterName') + local sgPasswords=$(kubectl get secret -n "${namespace}" "${sgPasswordsSecretName}" -o json | + ksd | + jq -r '.stringData') + local superuserUsername=$(echo "${sgPasswords}" | jq -r '.["superuser-username"]') + local superuserPassword=$(echo "${sgPasswords}" | jq -r '.["superuser-password"]') + local replicationUsername=$(echo "${sgPasswords}" | jq -r '.["replication-username"]') + local replicationPassword=$(echo "${sgPasswords}" | jq -r '.["replication-password"]') + local authenticatorUsername=$(echo "${sgPasswords}" | jq -r '.["authenticator-username"]') + local authenticatorPassword=$(echo "${sgPasswords}" | jq -r '.["authenticator-password"]') + + # Mirror Node Passwords + local mirrorNodePasswords=$(kubectl get secret -n "${namespace}" "${HELM_RELEASE_NAME}-passwords" -o json | + ksd | + jq -r '.stringData') + local graphqlUsername=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_GRAPHQL_DB_USERNAME') + local graphqlPassword=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_GRAPHQL_DB_PASSWORD') + local grpcUsername=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_GRPC_DB_USERNAME') + local grpcPassword=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_GRPC_DB_PASSWORD') + local importerUsername=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_IMPORTER_DB_USERNAME') + local importerPassword=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_IMPORTER_DB_PASSWORD') + local ownerUsername=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_IMPORTER_DB_OWNER') + local ownerPassword=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_IMPORTER_DB_OWNERPASSWORD') + local restUsername=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_REST_DB_USERNAME') + local restPassword=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_REST_DB_PASSWORD') + local restJavaUsername=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_RESTJAVA_DB_USERNAME') + local restJavaPassword=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_RESTJAVA_DB_PASSWORD') + local rosettaUsername=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_ROSETTA_DB_USERNAME') + local rosettaPassword=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_ROSETTA_DB_PASSWORD') + local web3Username=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_WEB3_DB_USERNAME') + local web3Password=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_WEB3_DB_PASSWORD') + local dbName=$(echo "${mirrorNodePasswords}" | jq -r '.HEDERA_MIRROR_IMPORTER_DB_NAME') + local sqlCommands=( + "alter user ${superuserUsername} with password '${superuserPassword}';" + "alter user ${graphqlUsername} with password '${graphqlPassword}';" + "alter user ${grpcUsername} with password '${grpcPassword}';" + "alter user ${importerUsername} with password '${importerPassword}';" + "alter user ${ownerUsername} with password '${ownerPassword}';" + "alter user ${restUsername} with password '${restPassword}';" + "alter user ${restJavaUsername} with password '${restJavaPassword}';" + "alter user ${rosettaUsername} with password '${rosettaPassword}';" + "alter user ${web3Username} with password '${web3Password}';" + "alter user ${replicationUsername} with password '${replicationPassword}';" + "alter user ${authenticatorUsername} with password '${authenticatorPassword}';") + + local clusterGroups=$(echo "${pvcsInNamespace}" | + jq -r 'group_by(.citusCluster.clusterName)| + map({ + (.[0].citusCluster.clusterName): + .|sort_by(.citusCluster.podName)| + map( + { + group: .citusCluster.citusGroup, + isCoordinator: .citusCluster.isCoordinator, + name: .citusCluster.podName, + primary: .snapshotPrimary, + shardedClusterName: .citusCluster.shardedClusterName + } + ) + } + )|add') + local clusterNames=($(echo "${clusterGroups}" | jq -r 'keys[]')) + + for clusterName in "${clusterNames[@]}"; do + local groupPods=$(echo "${clusterGroups}" | jq -r --arg clusterName "${clusterName}" '.[$clusterName]') + local clusterPatch=$(echo "${groupPods}" | + jq -r '{status: {podStatuses: map({name: .name, primary: .primary})}}') + local citusGroup=$(echo "${groupPods}" | jq -r '.[0].group') + local primaryPod=$(echo "${groupPods}" | jq -r 'map(select(.primary))|first|.name') + local endpointName="${HELM_RELEASE_NAME}-citus-${citusGroup}" + log "Marking primary on endpoint ${endpointName}" + kubectl annotate endpoints "${endpointName}" -n "${namespace}" leader="${primaryPod}" --overwrite + log "Waiting for patroni to mark primary" + sleep 10 + local patroniClusterStatus=$(kubectl exec -n "${namespace}" -c patroni "${primaryPod}" \ + -- patronictl list --group "${citusGroup}" -f json | jq -r 'map({primary: (.Role == "Leader"), name: .Member})') + local patroniPrimaryPod=$(echo "${patroniClusterStatus}" | jq -r 'map(select(.primary))|first|.name') + if [[ "${patroniPrimaryPod}" != "${primaryPod}" ]]; then + log "Primary pod ${primaryPod} is not the patroni primary ${patroniPrimaryPod} for ${shardedClusterName} +group ${citusGroup}. Will failover" + kubectl exec -n "${namespace}" "${primaryPod}" -c patroni \ + -- patronictl failover "${shardedClusterName}" --group "${citusGroup}" --candidate "${primaryPod}" --force + patroniPrimaryPod=$(echo "${patroniClusterStatus}" | jq -r 'map(select(.primary))|first|.name') + while [[ "${patroniPrimaryPod}" != "${primaryPod}" ]]; do + log "Waiting for failover to complete expecting ${primaryPod} to be primary but got ${patroniPrimaryPod}" + sleep 10 + patroniClusterStatus=$(kubectl exec -n "${namespace}" -c patroni "${primaryPod}" \ + -- patronictl list --group "${citusGroup}" -f json | jq -r 'map({primary: (.Role == "Leader"), name: .Member})') + patroniPrimaryPod=$(echo "${patroniClusterStatus}" | jq -r 'map(select(.primary))|first|.name') + done + fi + log "Patching cluster ${clusterName} in namespace ${namespace} with ${clusterPatch}" + kubectl patch sgclusters.stackgres.io -n "${namespace}" "${clusterName}" --type merge -p "${clusterPatch}" + kubectl exec -n "${namespace}" "${primaryPod}" -c postgres-util \ + -- psql -U "${superuserUsername}" \ + -c "ALTER USER ${superuserUsername} WITH PASSWORD '${superuserPassword}';" + for sql in "${sqlCommands[@]}"; do + log "Executing sql command for cluster ${clusterName}: ${sql}" + kubectl exec -n "${namespace}" "${primaryPod}" -c postgres-util \ + -- psql -U "${superuserUsername}" -c "${sql}" + done + + kubectl exec -n "${namespace}" "${primaryPod}" -c postgres-util \ + -- psql -U "${superuserUsername}" -d "${dbName}" -c \ + "insert into pg_dist_authinfo(nodeid, rolename, authinfo) + values (0, '${superuserUsername}', 'password=${superuserPassword}'), + (0, '${graphqlUsername}', 'password=${graphqlPassword}'), + (0, '${grpcUsername}', 'password=${grpcPassword}'), + (0, '${importerUsername}', 'password=${importerPassword}'), + (0, '${ownerUsername}', 'password=${ownerPassword}'), + (0, '${restUsername}', 'password=${restPassword}'), + (0, '${restJavaUsername}', 'password=${restJavaPassword}'), + (0, '${rosettaUsername}', 'password=${rosettaPassword}'), + (0, '${web3Username}', 'password=${web3Password}') on conflict (nodeid, rolename) + do + update set authinfo = excluded.authinfo;" + done +} + +function patchCitusClusters() { + log "Patching Citus clusters in namespaces ${NAMESPACES[*]}" + local pvcsByNamespace=$(echo -e "${SNAPSHOTS_TO_RESTORE}\n${ZFS_VOLUMES}" | + jq -s '(.[0] | map(.description)| flatten) as $snapshots| + .[1] as $volumes| + $volumes | group_by(.namespace)| + map((.[0].namespace) as $namespace | + { + ($namespace): + map(. as $pvc | + $snapshots[]| + select(.pvcName == $pvc.pvcName and .namespace == $pvc.namespace) as $snapshotPvc| + $pvc + {snapshotPvcSize: $snapshotPvc.pvcSize, snapshotPrimary: $snapshotPvc.primary}) + } + )| + add') + for namespace in "${NAMESPACES[@]}"; do + local pvcsInNamespace=$(echo "${pvcsByNamespace}" | jq -r --arg namespace "${namespace}" '.[$namespace]') + local shardedClusterName=$(echo "${pvcsInNamespace}" | jq -r '.[0].citusCluster.shardedClusterName') + + configureShardedClusterResource "${pvcsInNamespace}" "${shardedClusterName}" "${namespace}" + unpauseCitus "${namespace}" "true" + markAndConfigurePrimaries "${pvcsInNamespace}" "${shardedClusterName}" + routeTraffic "${namespace}" + done +} + +configureAndValidate + +if [[ "${REPLACE_DISKS}" == "true" ]]; then + replaceDisks +else + log "REPLACE_DISKS is set to false. Skipping disk replacement" +fi + +patchCitusClusters diff --git a/docs/runbook/scripts/upgrade-k8s-version-citus.sh b/docs/runbook/scripts/upgrade-k8s-version-citus.sh new file mode 100755 index 00000000000..33543321c55 --- /dev/null +++ b/docs/runbook/scripts/upgrade-k8s-version-citus.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash + +set -euo pipefail + +source ./utils.sh + +NAMESPACES=($(kubectl get sgshardedclusters.stackgres.io -A -o jsonpath='{.items[*].metadata.namespace}')) + +GCP_PROJECT="$(readUserInput "Enter GCP Project for target: ")" +if [[ -z "${GCP_PROJECT}" ]]; then + log "GCP_PROJECT is not set and is required. Exiting" + exit 1 +else + gcloud projects describe "${GCP_PROJECT}" > /dev/null +fi + +GCP_K8S_CLUSTER_REGION="$(readUserInput "Enter target cluster region: ")" +if [[ -z "${GCP_K8S_CLUSTER_REGION}" ]]; then + log "GCP_K8S_CLUSTER_REGION is not set and is required. Exiting" + exit 1 +else + gcloud compute regions describe "${GCP_K8S_CLUSTER_REGION}" --project "${GCP_PROJECT}" > /dev/null +fi + +GCP_K8S_CLUSTER_NAME="$(readUserInput "Enter target cluster name: ")" +if [[ -z "${GCP_K8S_CLUSTER_NAME}" ]]; then + log "GCP_K8S_CLUSTER_NAME is not set and is required. Exiting" + exit 1 +else + gcloud container clusters describe --project "${GCP_PROJECT}" \ + --region="${GCP_K8S_CLUSTER_REGION}" \ + "${GCP_K8S_CLUSTER_NAME}" > /dev/null +fi + +VERSION="$(readUserInput "Enter the new Kubernetes version: ")" +if [[ -z "${VERSION}" ]]; then + log "VERSION is not set and is required. Exiting" + exit 1 +else + HAS_VERSION="$(gcloud container get-server-config --location="${GCP_K8S_CLUSTER_REGION}" --project="${GCP_PROJECT}" --format="json(validNodeVersions)" | jq -r --arg VERSION "${VERSION}" 'any(.validNodeVersions[]; . == $VERSION)')" + if [[ "${HAS_VERSION}" != "true" ]]; then + log "Version ${VERSION} is not valid. Exiting" + exit 1 + fi +fi + +AVAILABLE_POOLS="$(gcloud container node-pools list --project="${GCP_PROJECT}" --cluster="${GCP_K8S_CLUSTER_NAME}" --region="${GCP_K8S_CLUSTER_REGION}" --format="json(name)"| jq -r '.[].name' | tr '\n' ' ')" +POOLS_TO_UPDATE_INPUT="$(readUserInput "Enter the node pools(${AVAILABLE_POOLS}) to update (space-separated): ")" +if [[ -z "${POOLS_TO_UPDATE_INPUT}" ]]; then + log "POOLS_TO_UPDATE_INPUT is not set and is required. Exiting" + exit 1 +else + IFS=', ' read -r -a POOLS_TO_UPDATE <<< "${POOLS_TO_UPDATE_INPUT}" + for pool in "${POOLS_TO_UPDATE[@]}"; do + gcloud container node-pools describe "${pool}" --project="${GCP_PROJECT}" --cluster="${GCP_K8S_CLUSTER_NAME}" --region="${GCP_K8S_CLUSTER_REGION}" > /dev/null + done +fi + +for namespace in "${NAMESPACES[@]}" +do + unrouteTraffic "${namespace}" + pauseCitus "${namespace}" +done +for pool in "${POOLS_TO_UPDATE[@]}" +do +gcloud container clusters upgrade "${GCP_K8S_CLUSTER_NAME}" --node-pool="${pool}" --cluster-version="${VERSION}" --location="${GCP_K8S_CLUSTER_REGION}" --project="${GCP_PROJECT}" +done +for namespace in "${NAMESPACES[@]}" +do + unpauseCitus "${namespace}" + routeTraffic "${namespace}" +done \ No newline at end of file diff --git a/docs/runbook/scripts/utils.sh b/docs/runbook/scripts/utils.sh new file mode 100755 index 00000000000..fa542894c74 --- /dev/null +++ b/docs/runbook/scripts/utils.sh @@ -0,0 +1,213 @@ +#!/usr/bin/env bash +set -euo pipefail + +function doContinue() { + read -p "Continue? (Y/N): " confirm && [[ $confirm == [yY] || $confirm == [yY][eE][sS] ]] || exit 1 +} + +function log() { + echo "$(date -u +"%Y-%m-%dT%H:%M:%SZ") ${1}" +} + +function readUserInput() { + read -p "${1}" input + echo "${input}" +} + +function scaleDeployment() { + local namespace="${1}" + local replicas="${2}" + local deploymentLabel="${3}" + + if [[ "${replicas}" -gt 0 ]]; then # scale up + kubectl scale deployment -n "${namespace}" -l "${deploymentLabel}" --replicas="${replicas}" + log "Waiting for pods with label ${deploymentLabel} to be ready" + kubectl wait --for=condition=Ready pod -n "${namespace}" -l "${deploymentLabel}" --timeout=-1s + else # scale down + local deploymentPods=$(kubectl get pods -n "${namespace}" -l "${deploymentLabel}" -o 'jsonpath={.items[*].metadata.name}') + if [[ -z "${deploymentPods}" ]]; then + log "No pods found for deployment ${deploymentLabel} in namespace ${namespace}" + return + else + log "Removing pods ${deploymentPods} in ${namespace} for ${CURRENT_CONTEXT}" + doContinue + kubectl scale deployment -n "${namespace}" -l "${deploymentLabel}" --replicas="${replicas}" + log "Waiting for pods with label ${deploymentLabel} to be deleted" + kubectl wait --for=delete pod -n "${namespace}" -l "${deploymentLabel}" --timeout=-1s + fi + fi +} + +function unrouteTraffic() { + local namespace="${1}" + if [[ "${AUTO_UNROUTE}" == "true" ]]; then + log "Unrouting traffic to cluster in namespace ${namespace}" + if kubectl get helmrelease -n "${namespace}" "${HELM_RELEASE_NAME}" > /dev/null; then + log "Suspending helm release ${HELM_RELEASE_NAME} in namespace ${namespace}" + doContinue + flux suspend helmrelease -n "${namespace}" "${HELM_RELEASE_NAME}" + else + log "No helm release found in namespace ${namespace}. Skipping suspend" + fi + + scaleDeployment "${namespace}" 0 "app.kubernetes.io/component=monitor" + fi + scaleDeployment "${namespace}" 0 "app.kubernetes.io/component=importer" +} + +function routeTraffic() { + local namespace="${1}" + + log "Running test queries" + kubectl exec -it -n "${namespace}" "${HELM_RELEASE_NAME}-citus-coord-0" -c postgres-util -- psql -U mirror_rest -d mirror_node -c "select * from transaction limit 10" + kubectl exec -it -n "${namespace}" "${HELM_RELEASE_NAME}-citus-coord-0" -c postgres-util -- psql -U mirror_node -d mirror_node -c "select * from transaction limit 10" + doContinue + scaleDeployment "${namespace}" 1 "app.kubernetes.io/component=importer" + while true; do + local statusQuery="select $(date +%s) - (max(consensus_end) / 1000000000) from record_file" + local status=$(kubectl exec -n "${namespace}" "${HELM_RELEASE_NAME}-citus-coord-0" -c postgres-util -- psql -q --csv -t -U mirror_rest -d mirror_node -c "select $(date +%s) - (max(consensus_end) / 1000000000) from record_file" | tail -n 1) + if [[ "${status}" -lt 10 ]]; then + log "Importer is caught up with the source" + break + else + log "Waiting for importer to catch up with the source. Current lag: ${status} seconds" + sleep 10 + fi + done + if [[ "${AUTO_UNROUTE}" == "true" ]]; then + if kubectl get helmrelease -n "${namespace}" "${HELM_RELEASE_NAME}" > /dev/null; then + log "Resuming helm release ${HELM_RELEASE_NAME} in namespace ${namespace}. + Be sure to configure values.yaml with any changes before continuing" + doContinue + flux resume helmrelease -n "${namespace}" "${HELM_RELEASE_NAME}" --timeout 30m + else + log "No helm release found in namespace ${namespace}. Skipping suspend" + fi + scaleDeployment "${namespace}" 1 "app.kubernetes.io/component=monitor" + fi +} + +function pauseCitus() { + local namespace="${1}" + local citusPods=$(kubectl get pods -n "${namespace}" -l 'stackgres.io/cluster=true' -o 'jsonpath={.items[*].metadata.name}') + if [[ -z "${citusPods}" ]]; then + log "Citus is not currently running" + else + log "Removing pods (${citusPods}) in ${namespace} for ${CURRENT_CONTEXT}" + doContinue + kubectl annotate sgclusters.stackgres.io -n "${namespace}" --all stackgres.io/reconciliation-pause="true" --overwrite + sleep 5 + kubectl scale sts -n "${namespace}" -l 'stackgres.io/cluster=true' --replicas=0 + log "Waiting for citus pods to terminate" + kubectl wait --for=delete pod -l 'stackgres.io/cluster=true' -n "${namespace}" --timeout=-1s + fi +} + +function unpauseCitus() { + local namespace="${1}" + local reinitializeCitus="${2:-false}" + + local citusPods=$(kubectl get pods -n "${namespace}" -l 'stackgres.io/cluster=true' -o 'jsonpath={.items[*].metadata.name}') + if [[ -z "${citusPods}" ]]; then + log "Starting citus cluster in namespace ${namespace}" + if [[ "${reinitializeCitus}" == "true" ]]; then + kubectl annotate endpoints -n "${namespace}" -l 'stackgres.io/cluster=true' initialize- --overwrite + fi + kubectl annotate sgclusters.stackgres.io -n "${namespace}" --all stackgres.io/reconciliation-pause- --overwrite + log "Waiting for citus pods to be ready" + sleep 5 + kubectl wait --for=condition=Ready pod -l 'stackgres.io/cluster=true' -n "${namespace}" --timeout=-1s + log "Waiting for citus replica pods to be ready" + sleep 30 # Wait again as replicas will not spin up until the primary is started + kubectl wait --for=condition=Ready pod -l 'stackgres.io/cluster=true' -n "${namespace}" --timeout=-1s + else + log "Citus is already running in namespace ${namespace}. Skipping" + fi +} + +function getCitusClusters() { + kubectl get sgclusters.stackgres.io -A -o json | + jq -r '.items| + map( + .metadata as $metadata| + .spec.postgres.version as $pgVersion| + ((.metadata.labels["stackgres.io/coordinator"] // "false")| test("true")) as $isCoordinator | + .spec.configurations.patroni.initialConfig.citus.group as $citusGroup| + .status.podStatuses[]| + { + citusGroup: $citusGroup, + clusterName: $metadata.name, + isCoordinator: $isCoordinator, + namespace: $metadata.namespace, + pgVersion: $pgVersion, + podName: .name, + pvcName: "\($metadata.name)-data-\(.name)", + primary: .primary, + shardedClusterName: $metadata.ownerReferences[0].name + } + )' +} + +function getZFSVolumes () { + kubectl get pv -o json | + jq -r --arg CITUS_CLUSTERS "${CITUS_CLUSTERS}" \ + '.items| + map(select(.metadata.annotations."pv.kubernetes.io/provisioned-by"=="zfs.csi.openebs.io" and + .status.phase == "Bound")| + (.spec.claimRef.name) as $pvcName | + (.spec.claimRef.namespace) as $pvcNamespace | + { + namespace: ($pvcNamespace), + volumeName: (.metadata.name), + pvcName: ($pvcName), + pvcSize: (.spec.capacity.storage), + nodeId: (.spec.nodeAffinity.required.nodeSelectorTerms[0].matchExpressions[0].values[0]), + citusCluster: ($CITUS_CLUSTERS | fromjson | map(select(.pvcName == $pvcName and + .namespace == $pvcNamespace))|first) + } + )' +} + +function resizeCitusNodePools() { + local numNodes="${1}" + + log "Scaling nodepool ${GCP_COORDINATOR_POOL_NAME} and ${GCP_WORKER_POOL_NAME} in cluster ${GCP_K8S_CLUSTER_NAME} + for project ${GCP_PROJECT} to ${numNodes} nodes per zone" + + if [[ "${numNodes}" -gt 0 ]]; then + gcloud container clusters resize "${GCP_K8S_CLUSTER_NAME}" --node-pool "${GCP_COORDINATOR_POOL_NAME}" --num-nodes "${numNodes}" --location "${GCP_K8S_CLUSTER_REGION}" --project "${GCP_PROJECT}" --quiet & + gcloud container clusters resize "${GCP_K8S_CLUSTER_NAME}" --node-pool "${GCP_WORKER_POOL_NAME}" --num-nodes "${numNodes}" --location "${GCP_K8S_CLUSTER_REGION}" --project "${GCP_PROJECT}" --quiet & + log "Waiting for nodes to be ready" + wait + kubectl wait --for=condition=Ready node -l'citus-role=coordinator' --timeout=-1s + kubectl wait --for=condition=Ready node -l'citus-role=worker' --timeout=-1s + else + local coordinatorNodes=$(kubectl get nodes -l'citus-role=coordinator' -o 'jsonpath={.items[*].metadata.name}') + if [[ -z "${coordinatorNodes}" ]]; then + log "No coordinator nodes found" + else + log "Scaling down coordinator nodes ${coordinatorNodes}" + gcloud container clusters resize "${GCP_K8S_CLUSTER_NAME}" --node-pool "${GCP_COORDINATOR_POOL_NAME}" --num-nodes 0 --location "${GCP_K8S_CLUSTER_REGION}" --project "${GCP_PROJECT}" --quiet & + fi + + local workerNodes=$(kubectl get nodes -l'citus-role=worker' -o 'jsonpath={.items[*].metadata.name}') + if [[ -z "${workerNodes}" ]]; then + log "No worker nodes found" + else + log "Scaling down worker nodes ${workerNodes}" + gcloud container clusters resize "${GCP_K8S_CLUSTER_NAME}" --node-pool "${GCP_WORKER_POOL_NAME}" --num-nodes 0 --location "${GCP_K8S_CLUSTER_REGION}" --project "${GCP_PROJECT}" --quiet & + fi + log "Waiting for nodes to be deleted" + wait + kubectl wait --for=delete node -l'citus-role=coordinator' --timeout=-1s + kubectl wait --for=delete node -l'citus-role=worker' --timeout=-1s + fi +} + +COMMON_NAMESPACE="${COMMON_NAMESPACE:-common}" +HELM_RELEASE_NAME="${HELM_RELEASE_NAME:-mirror}" +CURRENT_CONTEXT="$(kubectl config current-context)" +CITUS_CLUSTERS="$(getCitusClusters)" +AUTO_UNROUTE="${AUTO_UNROUTE:-true}" +GCP_COORDINATOR_POOL_NAME="${GCP_COORDINATOR_POOL_NAME:-citus-coordinator}" +GCP_WORKER_POOL_NAME="${GCP_WORKER_POOL_NAME:-citus-worker}" \ No newline at end of file diff --git a/docs/runbook/scripts/volume-snapshot.sh b/docs/runbook/scripts/volume-snapshot.sh new file mode 100755 index 00000000000..f2e69cce31c --- /dev/null +++ b/docs/runbook/scripts/volume-snapshot.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash + +set -euo pipefail + +source ./utils.sh + +GCP_PROJECT="$(readUserInput "Enter GCP Project for target: ")" +if [[ -z "${GCP_PROJECT}" ]]; then + log "GCP_PROJECT is not set and is required. Exiting" + exit 1 +else + gcloud projects describe "${GCP_PROJECT}" > /dev/null +fi + +DISK_PREFIX="$(readUserInput "Enter the disk prefix of target cluster (value of zfs.init.diskPrefix in values.yaml): ")" +if [[ -z "${DISK_PREFIX}" ]]; then + log "DISK_PREFIX can not be empty. Exiting" + exit 1 +fi + +log "Finding disks with prefix ${DISK_PREFIX}" +DISKS_TO_SNAPSHOT=$(gcloud compute disks list --project "${GCP_PROJECT}" --filter="name~${DISK_PREFIX}.*-zfs" --format="json(name, sizeGb, users, zone)") +if [[ "${DISKS_TO_SNAPSHOT}" == "[]" ]]; then + log "No disks found for prefix. Exiting" + exit 1 +fi + +DISK_NAMES=($(echo $DISKS_TO_SNAPSHOT | jq -r '.[].name')) +log "Will snapshot disks ${DISK_NAMES[*]}" +doContinue + +ZFS_VOLUMES=$(getZFSVolumes) + +NAMESPACES=($(echo $ZFS_VOLUMES | jq -r '.[].namespace'| tr ' ' '\n' | sort -u | tr '\n' ' ')) + +log "Will spin down importer and citus in the namespaces (${NAMESPACES[*]}) for context ${CURRENT_CONTEXT}" +doContinue + +for namespace in "${NAMESPACES[@]}" +do + unrouteTraffic "${namespace}" + pauseCitus "${namespace}" +done + +EPOCH_SECONDS=$(date +%s) +for diskName in "${DISK_NAMES[@]}" +do + DISK_NODE_ID=${diskName#"$DISK_PREFIX"-} + DISK_NODE_ID=${DISK_NODE_ID%"-zfs"} + NODE_VOLUMES=$(echo "${ZFS_VOLUMES}" | + jq -r --arg NODE_ID "${DISK_NODE_ID}" 'map(select(.nodeId == $NODE_ID))') + SNAPSHOT_DESCRIPTION=$(echo -e "${CITUS_CLUSTERS}\n${NODE_VOLUMES}" | + jq -r -s --arg NODE_ID "${DISK_NODE_ID}" '.[0] as $clusters | + .[1] as $volumes | + $volumes | + map(. as $volume| + $clusters[]| + select(.pvcName == $volume.pvcName and .namespace == $volume.namespace)| + { + pvcName: $volume.pvcName, + volumeName: $volume.volumeName, + pvcSize: $volume.pvcSize, + namespace: $volume.namespace, + primary: .primary, + pgVersion: .pgVersion + })') + SNAPSHOT_NAME="${diskName}-${EPOCH_SECONDS}" + SNAPSHOT_REGION=$(echo "${DISK_NODE_ID}" | cut -d '-' -f 2-3) + DISK_ZONE=$(echo "${DISK_NODE_ID}" | cut -d '-' -f 2-4) + log "Creating snapshot ${SNAPSHOT_NAME} for ${diskName} with ${SNAPSHOT_DESCRIPTION} in ${SNAPSHOT_REGION}" + gcloud compute snapshots create "${SNAPSHOT_NAME}" --project="${GCP_PROJECT}" --source-disk="${diskName}" --source-disk-zone="${DISK_ZONE}" --storage-location="${SNAPSHOT_REGION}" --description="${SNAPSHOT_DESCRIPTION}" & +done +log "Waiting for snapshots to finish" +wait +log "Snapshots finished" + +for namespace in "${NAMESPACES[@]}" +do + unpauseCitus "${namespace}" + routeTraffic "${namespace}" +done \ No newline at end of file diff --git a/docs/runbook/upgrade-k8s-version-citus-nodepool.md b/docs/runbook/upgrade-k8s-version-citus-nodepool.md new file mode 100644 index 00000000000..897c939524d --- /dev/null +++ b/docs/runbook/upgrade-k8s-version-citus-nodepool.md @@ -0,0 +1,20 @@ +# Upgrade K8s Version for Citus Node Pool(s) + +## Problem + +Need to update k8s version for Citus Node Pool(s) + +## Prerequisites + +- Have `jq` installed +- The kubectl context is set to the cluster you want to upgrade +- All bash commands assume your working directory is `docs/runbook/scripts` + +## Solution + +1. Follow the steps to [create a disk snapshot for Citus cluster](./create-disk-snapshot-for-citus-cluster.md) + to backup the current cluster data +2. Run + ```bash + ./upgrade-k8s-version-citus.sh + ```