Skip to content

Commit

Permalink
Merge pull request #1300 from arooshap/mongodb
Browse files Browse the repository at this point in the history
MongoDBAsAService helm chart and docker file.
  • Loading branch information
muhammadimranfarooqi authored Jan 26, 2023
2 parents a179b3b + 766f776 commit 39b1471
Show file tree
Hide file tree
Showing 20 changed files with 1,482 additions and 0 deletions.
5 changes: 5 additions & 0 deletions docker/mongodb/source/backup/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM registry.cern.ch/cloud/ciadm:v0.4.0

WORKDIR /root

COPY /backup.sh /root
5 changes: 5 additions & 0 deletions docker/mongodb/source/backup/backup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

source /sec/cmsweb-openrc.sh
openstack coe cluster list
openstack volume snapshot create --volume $VOLUME_NAME $SNAPSHOT_NAME --force
30 changes: 30 additions & 0 deletions docker/mongodb/source/backup/cron.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: backup-cron
spec:
schedule: "5 * * * *"
jobTemplate:
spec:
template:
spec:
containers:
- name: mongo-backup
image: registry.cern.ch/cmsweb/mongo-backup
env:
- name: VOLUME_NAME
value: pvc-name
- name: SNAPSHOT_NAME
value: mongodb_prod_snapshot
args:
- /bin/sh
- -c
- /root/backup.sh
volumeMounts:
- name: openstack-secrets
mountPath: /sec
restartPolicy: Never
volumes:
- name: openstack-secrets
secret:
secretName: openstack-secrets
10 changes: 10 additions & 0 deletions docker/mongodb/source/dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM mongo
WORKDIR /root
ENV MONGODB_ID mongo-0

RUN apt update
RUN apt install -y iproute2

COPY /startup-script-mongo /root

CMD ./startup-$MONGODB_ID.sh
18 changes: 18 additions & 0 deletions docker/mongodb/source/startup-script-mongo/initialize-mongo-rs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash

echo "Executing initialize-mongo-rs.sh"

mongo --eval "mongodb = ['$NODE_HOSTNAME:32001', '$NODE_HOSTNAME:32002', '$NODE_HOSTNAME:32003'], rsname = '$RS_NAME'" --shell << EOL
cfg = {
_id: rsname,
members:
[
{_id : 0, host : mongodb[0], priority : 1},
{_id : 1, host : mongodb[1], priority : 0.9},
{_id : 2, host : mongodb[2], priority : 0.5}
]
}
rs.initiate(cfg)
EOL

/root/initialize-users.sh &
34 changes: 34 additions & 0 deletions docker/mongodb/source/startup-script-mongo/initialize-users.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash

echo "Executing initialize-users.sh"

while [[ ( "$(mongo --quiet --eval "rs.status().ok")" != "1" ) || ! ( "$(mongo --quiet --eval "rs.status().members[0].state")" == "1" || "$(mongo --quiet --eval "rs.status().members[1].state")" == "1" || "$(mongo --quiet --eval "rs.status().members[2].state")" == "1" ) ]]
do
echo "MongoDB not ready for user creation, retrying in 5 seconds..."
sleep 5
done

if [[ "$(mongo --quiet --eval "db.isMaster().ismaster")" == "true" ]]
then
echo "Primary node found, creating users"
mongo --eval "adminpass = '$MONGODB_ADMIN_PASSWORD'" --shell << EOL
use admin
db.createUser(
{
user: "usersAdmin",
pwd: adminpass,
roles: [ { role: "userAdminAnyDatabase", db: "admin" } ]
}
)
db.auth("usersAdmin", adminpass)
db.getSiblingDB("admin").createUser(
{
"user" : "clusterAdmin",
"pwd" : adminpass,
roles: [ { "role" : "clusterAdmin", "db" : "admin" } ]
}
)
EOL
else
echo "Replica Set not primary..."
fi
27 changes: 27 additions & 0 deletions docker/mongodb/source/startup-script-mongo/reconfig-mongo-rs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash
retryCount=0

echo "Checking MongoDB status:"

while [[ "$(mongo --quiet --eval "rs.status().ok")" != "0" ]]
do
if [ $retryCount -gt 30 ]
then
echo "Retry count > 30, breaking out of while loop now..."
break
fi
echo "MongoDB not ready for Replica Set configuration, retrying in 5 seconds..."
sleep 5
retryCount=$((retryCount+1))
done

sleep 5

if [[ $(mongo --quiet --eval "db.isMaster().setName") != $RS_NAME ]]
then
echo "Replica Set reconfiguratoin failed..."
echo "Reinitializing Replica Set..."
/root/initialize-mongo-rs.sh &
else
echo "Replica Set reconfiguratoin successful..."
fi
6 changes: 6 additions & 0 deletions docker/mongodb/source/startup-script-mongo/startup-mongo-0.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

mkdir -p /data/db/rs-0
export POD_IP_ADDRESS=$(ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1)
/root/reconfig-mongo-rs.sh &
mongod --replSet $RS_NAME --port 27017 --bind_ip localhost,$POD_IP_ADDRESS --dbpath /data/db/rs-0 --oplogSize 128 --keyFile /etc/secrets/mongokeyfile
6 changes: 6 additions & 0 deletions docker/mongodb/source/startup-script-mongo/startup-mongo-1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

mkdir -p /data/db/rs-1
# /root/initialize-users.sh &
export POD_IP_ADDRESS=$(ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1)
mongod --replSet $RS_NAME --port 27017 --bind_ip localhost,$POD_IP_ADDRESS --dbpath /data/db/rs-1 --oplogSize 128 --keyFile /etc/secrets/mongokeyfile
6 changes: 6 additions & 0 deletions docker/mongodb/source/startup-script-mongo/startup-mongo-2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

mkdir -p /data/db/rs-2
# /root/initialize-users.sh &
export POD_IP_ADDRESS=$(ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1)
mongod --replSet $RS_NAME --port 27017 --bind_ip localhost,$POD_IP_ADDRESS --dbpath /data/db/rs-2 --oplogSize 128 --keyFile /etc/secrets/mongokeyfile
23 changes: 23 additions & 0 deletions helm/mongodb/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
6 changes: 6 additions & 0 deletions helm/mongodb/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: v2
name: mongo-replicaset
description: A Helm chart for launching a MongoDB ReplicaSet with 3 instances
type: application
version: 1.0.0
appVersion: 1.0.0
179 changes: 179 additions & 0 deletions helm/mongodb/files/prometheus-secrets/k8s.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
groups:
- name: k8s
rules:

- alert: KubernetesNodeReady
expr: kube_node_status_condition{condition="Ready",status="true"} == 0
for: 5m
labels:
severity: critical
tag: mongodb-k8s-cluster
kind: mongodb-cluster
annotations:
summary: "Kubernetes Node ready (instance {{ $labels.instance }})"
description: "Node {{ $labels.node }} has been unready for a long time\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"

- alert: KubernetesMemoryPressure
expr: kube_node_status_condition{condition="MemoryPressure",status="true"} == 1
for: 5m
labels:
severity: critical
tag: mongodb-k8s-cluster
kind: mongodb-cluster
annotations:
summary: "Kubernetes memory pressure (instance {{ $labels.instance }})"
description: "{{ $labels.node }} has MemoryPressure condition\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"

- alert: KubernetesDiskPressure
expr: kube_node_status_condition{condition="DiskPressure",status="true"} == 1
for: 5m
labels:
severity: critical
tag: mongodb-k8s-cluster
kind: mongodb-cluster
annotations:
summary: "Kubernetes disk pressure (instance {{ $labels.instance }})"
description: "{{ $labels.node }} has DiskPressure condition\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"

- alert: KubernetesOutOfDisk
expr: kube_node_status_condition{condition="OutOfDisk",status="true"} == 1
for: 5m
labels:
severity: critical
tag: mongodb-k8s-cluster
kind: mongodb-cluster
annotations:
summary: "Kubernetes out of disk (instance {{ $labels.instance }})"
description: "{{ $labels.node }} has OutOfDisk condition\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"

- alert: KubernetesJobFailed
expr: kube_job_status_failed > 0
for: 5m
labels:
severity: warning
tag: mongodb-k8s-cluster
kind: mongodb-cluster
annotations:
summary: "Kubernetes Job failed (instance {{ $labels.instance }})"
description: "Job {{$labels.namespace}}/{{$labels.exported_job}} failed to complete\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"

- alert: KubernetesCronjobSuspended
expr: kube_cronjob_spec_suspend != 0
for: 5m
labels:
severity: warning
tag: mongodb-k8s-cluster
kind: mongodb-cluster
annotations:
summary: "Kubernetes CronJob suspended (instance {{ $labels.instance }})"
description: "CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is suspended\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"

- alert: KubernetesPersistentvolumeclaimPending
expr: kube_persistentvolumeclaim_status_phase{phase="Pending"} == 1
for: 5m
labels:
severity: warning
tag: mongodb-k8s-cluster
kind: mongodb-cluster
annotations:
summary: "Kubernetes PersistentVolumeClaim pending (instance {{ $labels.instance }})"
description: "PersistentVolumeClaim {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is pending\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"

- alert: KubernetesVolumeOutOfDiskSpace
expr: kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes * 100 < 10
for: 5m
labels:
severity: warning
tag: mongodb-k8s-cluster
kind: mongodb-cluster
annotations:
summary: "Kubernetes Volume out of disk space (instance {{ $labels.instance }})"
description: "Volume is almost full (< 10% left)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"

- alert: KubernetesVolumeFullInFourDays
expr: predict_linear(kubelet_volume_stats_available_bytes[6h], 4 * 24 * 3600) < 0
for: 5m
labels:
severity: critical
tag: mongodb-k8s-cluster
kind: mongodb-cluster
annotations:
summary: "Kubernetes Volume full in four days (instance {{ $labels.instance }})"
description: "{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is expected to fill up within four days. Currently {{ $value | humanize }}% is available.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"

- alert: KubernetesPersistentvolumeError
expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
for: 5m
labels:
severity: critical
tag: mongodb-k8s-cluster
kind: mongodb-cluster
annotations:
summary: "Kubernetes PersistentVolume error (instance {{ $labels.instance }})"
description: "Persistent volume is in bad state\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"

- alert: KubernetesHpaScalingAbility
expr: kube_hpa_status_condition{condition="false", status="AbleToScale"} == 1
for: 5m
labels:
severity: warning
tag: mongodb-k8s-cluster
kind: mongodb-cluster
annotations:
summary: "Kubernetes HPA scaling ability (instance {{ $labels.instance }})"
description: "Pod is unable to scale\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"

# - alert: KubernetesPodNotHealthy
# expr: min_over_time(sum by (namespace, pod) (kube_pod_status_phase{phase=~"Pending|Unknown|Failed"})[1h:]) > 0
# for: 5m
# labels:
# severity: critical
# tag: mongodb-k8s-cluster
# kind: mongodb-cluster
# annotations:
# summary: "Kubernetes Pod not healthy (instance {{ $labels.instance }})"
# description: "Pod has been in a non-ready state for longer than an hour.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"

- alert: KubernetesPodCrashLooping
expr: rate(kube_pod_container_status_restarts_total[15m]) * 60 * 5 > 5
for: 5m
labels:
severity: warning
tag: mongodb-k8s-cluster
kind: mongodb-cluster
annotations:
summary: "Kubernetes pod crash looping (instance {{ $labels.instance }})"
description: "Pod {{ $labels.pod }} is crash looping\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"

- alert: KubernetesApiServerErrors
expr: sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[2m])) / sum(rate(apiserver_request_count{job="apiserver"}[2m])) * 100 > 3
for: 5m
labels:
severity: critical
tag: mongodb-k8s-cluster
kind: mongodb-cluster
annotations:
summary: "Kubernetes API server errors (instance {{ $labels.instance }})"
description: "Kubernetes API server is experiencing high error rate\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"

- alert: KubernetesApiClientErrors
expr: (sum(rate(rest_client_requests_total{code=~"(4|5).."}[2m])) by (instance, job) / sum(rate(rest_client_requests_total[2m])) by (instance, job)) * 100 > 1
for: 5m
labels:
severity: critical
tag: mongodb-k8s-cluster
kind: mongodb-cluster
annotations:
summary: "Kubernetes API client errors (instance {{ $labels.instance }})"
description: "Kubernetes API client is experiencing high error rate\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"

- alert: KubernetesApiServerLatency
expr: histogram_quantile(0.99, sum(apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST|WATCH|PROXY"}) WITHOUT (instance, resource)) / 1e+06 > 1
for: 5m
labels:
severity: warning
tag: mongodb-k8s-cluster
kind: mongodb-cluster
annotations:
summary: "Kubernetes API server latency (instance {{ $labels.instance }})"
description: "Kubernetes API server has a 99th percentile latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
Loading

0 comments on commit 39b1471

Please sign in to comment.