Add Various Citus Runbooks (#9398)

* Add citus runbooks --------- Signed-off-by: Jesse Nelson <[email protected]>
hashgraph · Oct 31, 2024 · d142677 · d142677
1 parent 613b069
commit d142677
Show file tree

Hide file tree

Showing 11 changed files with 1,025 additions and 34 deletions.
diff --git a/charts/hedera-mirror-common/templates/zfs/configmap-init.yaml b/charts/hedera-mirror-common/templates/zfs/configmap-init.yaml
@@ -10,22 +10,21 @@ data:
   label-wait.sh: |
     #!/usr/bin/env bash
     set -euxo pipefail
-    
+
     ROOT_MOUNT_DIR="${ROOT_MOUNT_DIR:-/node}"
     ZFS_INITIALIZED="${ZFS_INITIALIZED:-}"
     until [ "${ZFS_INITIALIZED}" != "" ]; do echo "Waiting for label "; sleep 10; source "${ROOT_MOUNT_DIR}/etc/environment"; done
 
   entrypoint.sh: |
     #!/usr/bin/env bash
-    
+
     set -euxo pipefail
-    
+
     # Constants
     NODE_ID_LABEL="openebs.io/nodeid"
-    DISK_SIZE_LABEL="pool-disk-size"
     ROLE_LABEL="citus-role"
     ENV_FILE="${ROOT_MOUNT_DIR}/etc/environment"
-    
+
     # Configurable environment variables
     CONFIG_MAP_NAME="{{ include "hedera-mirror-common.fullname" . }}-zfs-node-status"
     CONFIG_MAP_KEY="zfs-node-status.json"
@@ -44,6 +43,12 @@ data:
     NODE_LABELS="$(kubectl get node ${NODE_NAME} -o jsonpath='{.metadata.labels}')"
     CITUS_ROLE="$(echo "${NODE_LABELS}" | jq --arg ROLE_LABEL "${ROLE_LABEL}" -r '.[$ROLE_LABEL]')"
 
+    if [[ "$CITUS_ROLE" == *"worker"* ]]; then
+      DISK_SIZE="${DEFAULT_DISK_SIZE_WORKER}"
+    else
+      DISK_SIZE="${DEFAULT_DISK_SIZE_COORDINATOR}"
+    fi
+
     ATTEMPT_COUNT=1
     while [[ "${CITUS_ROLE}" == "null" ]]
     do
@@ -72,17 +77,15 @@ data:
       ZONE_CONFIG="$(echo "${MODIFIED_CONFIG_MAP}" |jq --arg KEY "${CONFIG_MAP_KEY}"  --arg ZONE "${ZONE}" --arg CITUS_ROLE "${CITUS_ROLE}" '.data[$KEY][$CITUS_ROLE][$ZONE] // []')"
       NODE_LIST="$(kubectl get nodes -o jsonpath='{.items[*].metadata.labels}' -lcsi-type=zfs,topology.kubernetes.io/zone=${ZONE},${ROLE_LABEL}=${CITUS_ROLE} | jq -s)"
       NODE_NAMES="$(echo "${NODE_LIST}" | jq '. | map({(.["kubernetes.io/hostname"]): {isUpgrade: (.["operation.gke.io/type"] == "drain")}}) | add')"
-      
+
       ## This will find all nodes that have been initialized by this script in the past
       ## and generate its own version of the config map configuration to 
       ZFS_ZONE_NODES=$(echo "${NODE_LIST}" | jq \
                       --arg NODE_ID_LABEL "${NODE_ID_LABEL}" \
-                      --arg DISK_SIZE_LABEL "${DISK_SIZE_LABEL}" \
                       '. | map(select(.["generated-node-id"] == "true")) |
                            map({
                                  "nodeName": (.["kubernetes.io/hostname"]), 
                                  "nodeId": (.[$NODE_ID_LABEL]),
-                                 "diskSize": (.[$DISK_SIZE_LABEL]),
                                  "index": (.[$NODE_ID_LABEL] | match("\\d*$").string | tonumber)
                                }) | 
                             sort_by(.index) |
@@ -124,21 +127,16 @@ data:
       THIS_NODE_CONFIG="$(echo "${MERGED_ZONE_CONFIG}" |jq  --arg NODE_NAME "${NODE_NAME}" --arg ZONE "${ZONE}" '.[]?|select(.nodeName == $NODE_NAME)')"
       # If there is no entry in the config map, and the node wasn't previously labeled, it is a new node. Create the config.
       if [[ ! $THIS_NODE_CONFIG ]]
-      then                                                                                                                                            
-        if [[ "$CITUS_ROLE" == *"worker"* ]]; then
-          DISK_SIZE="${DEFAULT_DISK_SIZE_WORKER}"
-        else
-          DISK_SIZE="${DEFAULT_DISK_SIZE_COORDINATOR}"
-        fi
+      then
         INDEX="$(echo "${MERGED_ZONE_CONFIG}" | jq '. | map(. == null) |index(true) // . | length')"
         NODE_ID="${CITUS_ROLE}-${ZONE}-${INDEX}"
-        THIS_NODE_CONFIG="{\"nodeName\": \"$NODE_NAME\", \"nodeId\": \"$NODE_ID\", \"diskSize\": \"$DISK_SIZE\"}"
+        THIS_NODE_CONFIG="{\"nodeName\": \"$NODE_NAME\", \"nodeId\": \"$NODE_ID\"}"
         MERGED_ZONE_CONFIG="$(echo "${MERGED_ZONE_CONFIG}" | jq --arg INDEX $INDEX --arg THIS_NODE_CONFIG "${THIS_NODE_CONFIG}"  '.[$INDEX |tonumber] = ($THIS_NODE_CONFIG|fromjson)')"
       fi
-        
+
       MODIFIED_CONFIG_MAP="$(echo "${MODIFIED_CONFIG_MAP}" | jq --arg KEY "${CONFIG_MAP_KEY}" --arg CITUS_ROLE "${CITUS_ROLE}"  --arg ZONE "${ZONE}" --arg ZONE_CONFIG "${MERGED_ZONE_CONFIG}" '.data[$KEY][$CITUS_ROLE][$ZONE]=($ZONE_CONFIG|fromjson)')"
       MODIFIED_CONFIG_MAP="$(echo "${MODIFIED_CONFIG_MAP}" | jq --arg KEY "${CONFIG_MAP_KEY}" '.data[$KEY]=(.data[$KEY]|tojson)')"
-    
+
       echo "patching $CONFIG_MAP_NAME with $MODIFIED_CONFIG_MAP"
       if (kubectl patch configmap $CONFIG_MAP_NAME -n $K8S_NAMESPACE --type merge -p "${MODIFIED_CONFIG_MAP}")
       then
@@ -149,18 +147,15 @@ data:
         echo "Conflict patching config map. Retrying ..."
       fi
     done
-    
+
     NODE_ID="$(echo "${THIS_NODE_CONFIG}" | jq -r '.nodeId')"
     if [[ "${NODE_ID}" == "null" ]]
     then
       echo "Unable to determine correct node id. Exiting ..."
       exit 1
     fi
-    
+
     DISK_NAME="${DISK_PREFIX}-${NODE_ID}-zfs"
-    DISK_SIZE="$(echo "${THIS_NODE_CONFIG}" | jq -r '.diskSize')"
-    echo "Setting up disk ${DISK_NAME} for ${CITUS_ROLE} on zfs node ${NODE_ID} with size ${DISK_SIZE}"
-    
     ACTUAL_SIZE=$(gcloud compute disks list --filter="name:${DISK_NAME}" --format="value(sizeGb)")
     if [[ -z "${ACTUAL_SIZE}" ]]; then
         echo "Creating ${DISK_NAME} for ${CITUS_ROLE} with size ${DISK_SIZE}"
@@ -209,7 +204,7 @@ data:
           echo "Unable to create pool. Manual intervention necessary"
           exit 1
       fi
-    
+
       ATTACHED_CACHE_DEVICE="$(zpool status "${POOL}" |grep -A1 -E '^\s*cache\s*$' | grep -v cache | awk '{print $1;}')"
       if [[ -n "${L2_ARC_NVME_DEVICE_ID}" ]]; then
         NVME_DEVICE_PATH="$(readlink -f /dev/disk/by-id/google-local-ssd-block${L2_ARC_NVME_DEVICE_ID})"
@@ -239,7 +234,7 @@ data:
       else
         echo "No L2 cache device specified. Skipping ..."
       fi
-    
+
       ARC_SIZE_GB="${ARC_SIZE_GB:-2}"
       echo "Configuring arc to ${ARC_SIZE_GB}GB"
       ARC_SIZE="$((ARC_SIZE_GB*1073741824))"
@@ -250,25 +245,18 @@ data:
       zpool online -e "${POOL}" /dev/sdb
       zfs list
     EOF
-    
+
     CURRENT_NODE_ID="$(echo "${NODE_LABELS}" | jq --arg NODE_ID_LABEL "${NODE_ID_LABEL}" -r '.[$NODE_ID_LABEL]')"
     if [[ "${CURRENT_NODE_ID}" != "${NODE_ID}" ]]
     then
       echo "Labeling node ${NODE_NAME} with ${NODE_ID_LABEL}=${NODE_ID}"
       kubectl label node "${NODE_NAME}" "${NODE_ID_LABEL}=${NODE_ID}" generated-node-id=true --overwrite
     fi
-    
-    CURRENT_DISK_SIZE="$(echo "${NODE_LABELS}" | jq --arg DISK_SIZE_LABEL "${DISK_SIZE_LABEL}" -r '.[$DISK_SIZE_LABEL]')"                                                                                                                                          
-    if [[ "${CURRENT_DISK_SIZE}" != "${DISK_SIZE}" || -z "${ACTUAL_SIZE}" ]];
-    then
-      echo "Labeling node ${NODE_NAME} with ${DISK_SIZE_LABEL}=${DISK_SIZE}"
-      kubectl label node "${NODE_NAME}" "${DISK_SIZE_LABEL}=${DISK_SIZE}" --overwrite
-    fi
-    
+
     source "${ENV_FILE}"
     ZFS_INITIALIZED="${ZFS_INITIALIZED:-}"
     if [[ "${ZFS_INITIALIZED}" == "" ]]; then
       echo "ZFS_INITIALIZED=\"true\"" > "${ENV_FILE}"  
     fi
 
-{{- end -}}
+    {{- end -}}
diff --git a/docs/runbook/change-citus-node-pool-machine-type.md b/docs/runbook/change-citus-node-pool-machine-type.md
@@ -0,0 +1,20 @@
+# Change Machine Type for Citus Node Pool(s)
+
+## Problem
+
+Need to Change Machine Type for Citus Node Pool(s)
+
+## Prerequisites
+
+- Have `jq` installed
+- kubectl is pointing to the cluster you want to change the machine type for
+- All bash commands assume your working directory is `docs/runbook/scripts`
+
+## Solution
+
+1. Follow the steps to [create a disk snapshot for Citus cluster](./create-disk-snapshot-for-citus-cluster.md)
+   to backup the current cluster data
+2. Run
+   ```bash
+   ./change-machine-type.sh
+   ```
diff --git a/docs/runbook/create-disk-snapshot-for-citus-cluster.md b/docs/runbook/create-disk-snapshot-for-citus-cluster.md
@@ -0,0 +1,19 @@
+# Create Disk Snapshot for Citus Cluster
+
+## Problem
+
+Need to create disk snapshots for Citus cluster(s)
+
+## Prerequisites
+
+- Have access to a running Citus cluster deployed by the `hedera-mirror` chart 
+- Have `jq` installed
+- All bash commands assume your working directory is `docs/runbook/scripts`
+- The kubectl context is set to the cluster you want to create snapshots from
+
+## Solution
+
+Run script and follow along with all prompts
+```bash
+./volume-snapshot.sh
+```
diff --git a/docs/runbook/increase-zfs-disksize.md b/docs/runbook/increase-zfs-disksize.md
@@ -0,0 +1,77 @@
+## Problem
+
+The pvc for a shard is running out of space and needs to be increased beyond current capacity of the disk.
+
+## Prerequisites
+
+- Have `jq` installed
+- The kubectl context is set to the cluster containing the disks you want to resize
+
+## Solution
+
+1. Identify the worker (and/or coordinator) pvc(s) that needs to be resized
+   ```bash
+   kubectl get pv -o \
+   custom-columns='PVC_NAME:.spec.claimRef.name,PV_NAME:.metadata.name,CAPACITY:..spec.capacity.storage,NODE_ID:.spec.nodeAffinity.required.nodeSelectorTerms[0].matchExpressions[0].values[0]' \
+   --sort-by=.spec.capacity.storage
+   ```
+   Example output
+   ```text
+      PVC_NAME                                                                               PV_NAME                                    CAPACITY   NODE_ID
+      sentinel-data-mirror-redis-node-1                                                      pvc-9d9da6c6-f6e4-45a3-91cf-61d47e50dcd9   1Gi        us-central1-f
+      sentinel-data-mirror-redis-node-2                                                      pvc-4b25a1b7-c34a-4d1f-8af3-bfcfe908bd99   1Gi        us-central1-c
+      sentinel-data-mirror-redis-node-0                                                      pvc-531e97b6-d4d6-4023-a2dc-847a2fac75dd   1Gi        us-central1-b
+      redis-data-mirror-redis-node-0                                                         pvc-7638c7ba-2ffe-4bb7-a09d-995e4d09b3a4   8Gi        us-central1-b
+      redis-data-mirror-redis-node-1                                                         pvc-edc9ed5a-03b4-48eb-86b0-49def5c1af1f   8Gi        us-central1-f
+      redis-data-mirror-redis-node-2                                                         pvc-638cab0b-ed6c-49b0-a61b-6893a5f3415f   8Gi        us-central1-c
+      prometheus-mirror-prometheus-prometheus-db-prometheus-mirror-prometheus-prometheus-0   pvc-4745d425-fb1d-4af3-85c6-272cff98dcb8   100Gi      us-central1-b
+      storage-mirror-loki-0                                                                  pvc-768215f5-30e3-4253-95ea-a82fb733207e   250Gi      us-central1-b
+      mirror-citus-coord-data-mirror-citus-coord-0                                           pvc-6501aa41-f238-447b-b21b-7d91a36b8f02   256Gi      coordinator-us-central1-c-0
+      mirror-citus-coord-data-mirror-citus-coord-1                                           pvc-78ef76d9-ea31-49b3-a9b6-559a3ec5cd9f   256Gi      coordinator-us-central1-b-0
+      mirror-citus-shard2-data-mirror-citus-shard2-0                                         pvc-49d46894-51a0-4a97-b2da-e9c003e382f2   3200Gi     worker-us-central1-b-0
+      mirror-citus-shard0-data-mirror-citus-shard0-0                                         pvc-5dd58b07-db59-4c3a-882f-dcd7467dfd49   10000Gi    worker-us-central1-c-0
+      mirror-citus-shard1-data-mirror-citus-shard1-0                                         pvc-f9b980a9-0771-4222-9034-bd44279ddde8   12000Gi    worker-us-central1-f-0
+   ```
+2. Using the `nodeId` from the previous step, increase the disk size for all disks needed
+   ```text
+   diskPrefix - value of zfs.init.diskPrefix in values.yaml
+   diskName - {diskPrefix}-{nodeId}-zfs
+   zone - extracted from the `nodeId`
+   diskSize - the new size of the disk in Gb.
+   ```
+   ```bash
+   gcloud compute disks resize "{diskName}" --size="{diskSize}" --zone="{zone}"
+   ```
+3. Restart the zfs init pods
+   ```bash
+   kubectl rollout restart daemonset -n common mirror-zfs-init
+   ```
+4. Verify the pool size has been increased
+   ```bash
+   kubectl get pods -n common -l component=openebs-zfs-node  -o json |
+   jq -r '.items[].metadata.name' |
+   xargs -I % kubectl exec -c openebs-zfs-plugin -n common % -- zfs list
+   ```
+5. Update the `hedera-mirror` chart's `values.yaml` to reflect the new disk size
+   ```yaml
+   stackgres:
+     coordinator:
+       persistentVolume:
+         size: 256Gi
+     worker:
+       overrides:
+         - index: 0
+           pods:
+             persistentVolume:
+               size: 12000Gi
+         - index: 1
+           pods:
+             persistentVolume:
+               size: 14000Gi
+         - index: 2
+           pods:
+             persistentVolume:
+               size: 3200Gi
+   ```
+6. Deploy the changes. Be sure to leave wiggle room for zfs rounding
+   see [here](https://github.com/openebs/zfs-localpv/blob/develop/docs/faq.md#7-why-the-zfs-volume-size-is-different-than-the-reqeusted-size-in-pvc)
diff --git a/docs/runbook/restore-citus-from-disk-snapshot.md b/docs/runbook/restore-citus-from-disk-snapshot.md
@@ -0,0 +1,26 @@
+# Restore Citus Data From Disk Snapshots
+
+## Problem
+
+Need to restore Citus cluster from disk snapshots
+
+## Prerequisites
+
+- Snapshots of disks were created by following the [create snapshot](create-disk-snapshot-for-citus-cluster.md) runbook
+- Have `jq` and `ksd`(kubernetes secret decrypter) installed
+- The snapshots are from a compatible version of `postgres`
+- The `target cluster` has a running Citus cluster deployed with `hedera-mirror` chart
+- The `target cluster` you are restoring to doesn't have any pvcs with a size larger than the size of the pvc in the
+  snapshot. You can't decrease the size of a pvc. If needed, you can delete the existing cluster in the `target cluster`
+  and redeploy the `hedera-mirror` chart with the default disk sizes.
+- If you have multiple Citus clusters in the `target cluster`, you will need to restore all of them
+- All bash commands assume your working directory is `docs/runbook/scripts`
+- Only a single citus cluster is installed per namespace
+- The kubectl context is set to the cluster you want to restore snapshots to
+
+## Steps
+
+Run script and follow along with all prompts
+```bash
+./restore-volume-snapshot.sh
+```
diff --git a/docs/runbook/scripts/change-machine-type.sh b/docs/runbook/scripts/change-machine-type.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+source ./utils.sh
+
+GCP_PROJECT="$(readUserInput "Enter GCP Project for target: ")"
+if [[ -z "${GCP_PROJECT}" ]]; then
+  log "GCP_PROJECT is not set and is required. Exiting"
+  exit 1
+else
+  gcloud projects describe "${GCP_PROJECT}" > /dev/null
+fi
+
+GCP_K8S_CLUSTER_REGION="$(readUserInput "Enter target cluster region: ")"
+if [[ -z "${GCP_K8S_CLUSTER_REGION}" ]]; then
+  log "GCP_K8S_CLUSTER_REGION is not set and is required. Exiting"
+  exit 1
+else
+  gcloud compute regions describe "${GCP_K8S_CLUSTER_REGION}" --project "${GCP_PROJECT}"  > /dev/null
+fi
+
+GCP_K8S_CLUSTER_NAME="$(readUserInput "Enter target cluster name: ")"
+if [[ -z "${GCP_K8S_CLUSTER_NAME}" ]]; then
+  log "GCP_K8S_CLUSTER_NAME is not set and is required. Exiting"
+  exit 1
+else
+  gcloud container clusters describe --project "${GCP_PROJECT}" \
+                                     --region="${GCP_K8S_CLUSTER_REGION}" \
+                                     "${GCP_K8S_CLUSTER_NAME}" > /dev/null
+fi
+
+MACHINE_TYPE="$(readUserInput "Enter new machine type: ")"
+if [[ -z "${MACHINE_TYPE}" ]]; then
+  log "MACHINE_TYPE is not set and is required. Exiting"
+  exit 1
+fi
+
+AVAILABLE_POOLS="$(gcloud container node-pools list --project="${GCP_PROJECT}" --cluster="${GCP_K8S_CLUSTER_NAME}" --region="${GCP_K8S_CLUSTER_REGION}" --format="json(name)"| jq -r '.[].name' | tr '\n' ' ')"
+POOLS_TO_UPDATE_INPUT="$(readUserInput "Enter the node pools(${AVAILABLE_POOLS}) to update (space-separated): ")"
+if [[ -z "${POOLS_TO_UPDATE_INPUT}" ]]; then
+  log "POOLS_TO_UPDATE_INPUT is not set and is required. Exiting"
+  exit 1
+else
+  IFS=', ' read -r -a POOLS_TO_UPDATE <<< "${POOLS_TO_UPDATE_INPUT}"
+  for pool in "${POOLS_TO_UPDATE[@]}"; do
+    POOL_LOCATIONS=($(gcloud container node-pools describe "${pool}" --project="${GCP_PROJECT}" --cluster="${GCP_K8S_CLUSTER_NAME}" --region="${GCP_K8S_CLUSTER_REGION}" --format="json" | jq -r '.locations[]'))
+    for location in "${POOL_LOCATIONS[@]}"; do
+      gcloud compute machine-types describe "${MACHINE_TYPE}" --project="${GCP_PROJECT}" --zone="${location}" > /dev/null
+    done
+  done
+fi
+
+NAMESPACES=($(kubectl get sgshardedclusters.stackgres.io -A -o jsonpath='{.items[*].metadata.namespace}'))
+for namespace in "${NAMESPACES[@]}"
+do
+ unrouteTraffic "${namespace}"
+ pauseCitus "${namespace}"
+done
+resizeCitusNodePools 0
+for pool in "${POOLS_TO_UPDATE[@]}"
+do
+gcloud container node-pools update "${pool}" --project="${GCP_PROJECT}" --cluster="${GCP_K8S_CLUSTER_NAME}" --location="${GCP_K8S_CLUSTER_REGION}" --machine-type="${MACHINE_TYPE}"
+done
+resizeCitusNodePools 1
+for namespace in "${NAMESPACES[@]}"
+do
+ unpauseCitus "${namespace}"
+ routeTraffic "${namespace}"
+done