diff --git a/deployment-scenarios/cluster-restore.yaml b/deployment-scenarios/cluster-restore.yaml index 7ae63798..88a7b871 100644 --- a/deployment-scenarios/cluster-restore.yaml +++ b/deployment-scenarios/cluster-restore.yaml @@ -1,60 +1,31 @@ -# This scenario creates a cluster -# that automatically restores from a backup with the given -# parameters in the "initContainers" section. -# -# IN ORDER FOR THIS TO WORK - you must have pre-created a secret, and -# you must modify the BUCKET parameter, and make sure backups are in place -# to be restored. See the documentation for more information. +# See the documentation for more information. ############################################################### -restoreSecret: neo4j-service-key - +neo4jPassword: test +plugins: "[\"apoc\"]" core: - standalone: false - numberOfServers: 3 - initContainers: - - name: restore-from-backup - image: gcr.io/neo4j-helm/restore:4.1.0-1 - imagePullPolicy: Always - volumeMounts: - - name: datadir - mountPath: /data - # The statefulset will already have a volume named "creds" when restoreSecret is set. - # Make sure to mount this to /auth - - name: creds - mountPath: /auth - env: - - name: BUCKET - value: gs://graph-backup-storage - - name: DATABASE - value: neo4j,system - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /auth/credentials.json - - name: FORCE_OVERWRITE - value: "true" - + numberOfServers: 3 + restore: + enabled: true + image: gcr.io/neo4j-helm/restore + imageTag: 4.1.0-1 + secretName: neo4j-gcp-credentials #neo4j-aws-credentials + database: neo4j,system + cloudProvider: gcp #aws + bucket: gs://test-neo4j #s3://test-neo4j + timestamp: "latest" + forceOverwrite: true + purgeOnComplete: true readReplica: - numberOfServers: 1 - initContainers: - - name: restore-from-backup - image: gcr.io/neo4j-helm/restore:4.1.0-1 - imagePullPolicy: Always - volumeMounts: - # - name: datadir - # mountPath: /data - # The statefulset will already have a volume named "creds" when restoreSecret is set. - # Make sure to mount this to /auth - - name: creds - mountPath: /auth - env: - - name: BUCKET - value: gs://graph-backup-storage - - name: DATABASE - value: neo4j,system - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /auth/credentials.json - - name: FORCE_OVERWRITE - value: "true" - -acceptLicenseAgreement: "yes" -neo4jPassword: other + numberOfServers: 1 + restore: + enabled: true + image: gcr.io/neo4j-helm/restore + imageTag: 4.1.0-1 + secretName: neo4j-gcp-credentials #neo4j-aws-credentials + database: neo4j,system + cloudProvider: gcp #aws + bucket: gs://test-neo4j #s3://test-neo4j + timestamp: "latest" + forceOverwrite: true + purgeOnComplete: true diff --git a/doc/docs/modules/ROOT/pages/backup.adoc b/doc/docs/modules/ROOT/pages/backup.adoc index 5fc00996..77334794 100644 --- a/doc/docs/modules/ROOT/pages/backup.adoc +++ b/doc/docs/modules/ROOT/pages/backup.adoc @@ -50,34 +50,83 @@ In Neo4j 4.0, the system can be multidatabase; most systems have at least 2 DBs, First you want to create a kubernetes secret that contains the content of your account service key. This key must have permissions to access the bucket and backup set that you're trying to restore. +**AWS** + +- You must create the credential file and this file should look like this: +```aws-credentials +[default] +region= +aws_access_key_id= +aws_secret_access_key= +``` + +- You have to create a secret for this file +```shell +kubectl create secret generic neo4j-aws-credentials \ + --from-file=credentials=aws-credentials +``` + +**GCP** + +- You must create the credential file and this file should look like this: +- You don't actually need to create this file; this is the natural format of a JSON service key from Google +```gcp-credentials.json +{ + "type": "", + "project_id": "", + "private_key_id": "", + "private_key": "", + "client_email": "", + "client_id": "", + "auth_uri": "", + "token_uri": "", + "auth_provider_x509_cert_url": "", + "client_x509_cert_url": "" +} + +``` + +- You have to create a secret for this file ```shell -MY_SERVICE_ACCOUNT_KEY=$HOME/.google/my-service-key.json -kubectl create secret generic neo4j-service-key \ - --from-file=credentials.json=$MY_SERVICE_ACCOUNT_KEY +kubectl create secret generic neo4j-gcp-credentials \ + --from-file=credentials=gcp-credentials.json ``` -The backup container is going to take this kubernetes secret -(named `neo4j-service-key`) and is going to mount it as a file -inside of the backup container (`/auth/credentials.json`). That -file will then be used to authenticate the storage client that we -need to upload the backupset to cloud storage when it's complete. +[NOTE] +**'--from-file=credentials=' here is important** + ### Running a Backup The backup method is itself a mini-helm chart, and so to run a backup, you just do this as a minimal required example: +**AWS** + +```shell +helm install my-neo4j-backup . \ + --set neo4jaddr=my-neo4j.default.svc.cluster.local:6362 \ + --set bucket=s3://my-bucket \ + --set database="neo4j\,system" \ + --set cloudProvider=aws \ + --set secretName=neo4j-aws-credentials \ + --set jobSchedule="0 */12 * * *" +``` + +**GCP** ```shell -helm install my-backup-deployment . \ +helm install my-neo4j-backup . \ --set neo4jaddr=my-neo4j.default.svc.cluster.local:6362 \ - --set bucket=gs://my-bucket/ \ + --set bucket=gs://my-bucket \ --set database="neo4j\,system" \ - --set secretName=neo4j-service-key -``` + --set cloudProvider=gcp \ + --set secretName=neo4j-gcp-credentials \ + --set jobSchedule="0 */12 * * *" +``` + +[NOTE] +**This command must be run in 'https://github.com/neo4j-contrib/neo4j-helm/tree/master/tools/backup'** -from within the tools/backup directory -where the chart resides. You must have first created a `neo4j-service-key` -secret in the same namespace as your Neo4j is running. If all goes well, after a period of time when the Kubernetes Job is complete, you will simply see the backup files appear in the designated bucket. @@ -86,13 +135,23 @@ will simply see the backup files appear in the designated bucket. **If your backup does not appear, consult the job container logs to find out why** + +**If you want to get a hot backup before schedule, you can use this command:** + +```shell +kubectl create job --from=cronjob/my-neo4j-backup-job neo4j-hot-backup +``` + **Required parameters** * `neo4jaddr` pointing to an address where your cluster is running, ideally the discovery address. -* `bucket` where you want the backup copied to. It should be `gs://bucketname`. This parameter may include a relative path (`gs://bucketname/mycluster`) +* `bucket` where you want the backup copied to. It should be `gs://bucketname` or `s3://bucketname`. * `databases` a comma separated list of databases to back up. The default is `neo4j,system`. If your DBMS has many individual databases, you should change this. +* `cloudProvider` Which cloud service do you want to keep backups on?(gcp or aws) +* `secretName` the name of the secret you created (neo4j-gcp-credentials|neo4j-aws-credentials) +* `jobSchedule` what intervals do you want to take backup? It should be cron like "0 */12 * * *". You can set your own schedule(https://crontab.guru/#0_*/12_*_*_*) **Optional environment variables** diff --git a/doc/docs/modules/ROOT/pages/restore.adoc b/doc/docs/modules/ROOT/pages/restore.adoc index 317b05a0..88fc6fdf 100644 --- a/doc/docs/modules/ROOT/pages/restore.adoc +++ b/doc/docs/modules/ROOT/pages/restore.adoc @@ -27,29 +27,48 @@ directory structure that come out of archived backups in order to restore proper ### Create a service key secret to access cloud storage -First you want to create a kubernetes secret that contains the content of your account service key. This key must have permissions to access the bucket and backup set that you're trying to restore. +First you want to create a kubernetes secret that contains the content of your account service key. This key must have permissions to access the bucket and backup set that you're trying to restore. -```shell -MY_SERVICE_ACCOUNT_KEY=$HOME/.google/my-service-key.json -kubectl create secret generic neo4j-service-key \ - --from-file=credentials.json=$MY_SERVICE_ACCOUNT_KEY +**AWS** + +- You must create the credential file and this file should look like this: +```aws-credentials +[default] +region= +aws_access_key_id= +aws_secret_access_key= ``` -The restore container is going to take this kubernetes secret -(named `neo4j-service-key`) and is going to mount it as a file -inside of the backup container (`/auth/credentials.json`). That -file will then be used to authenticate the storage client that we -need to upload the backupset to cloud storage when it's complete. +- You have to create a secret for this file +```shell +kubectl create secret generic neo4j-aws-credentials \ + --from-file=credentials=aws-credentials +``` -In `values.yaml`, then configure the secret you set here like so: +**GCP** + +- You must create the credential file and this file should look like this: +```gcp-credentials.json +{ + "type": "", + "project_id": "", + "private_key_id": "", + "private_key": "", + "client_email": "", + "client_id": "", + "auth_uri": "", + "token_uri": "", + "auth_provider_x509_cert_url": "", + "client_x509_cert_url": "" +} -```yaml -restoreSecret: neo4j-service-key ``` -This allows the core and read replica nodes to access that service key -as a volume. That volume being present within the containers is necessary for the -next step, and will be mounted as `/auth/credentials.json` inside the container. +- You have to create a secret for this file +```shell +kubectl create secret generic neo4j-gcp-credentials \ + --from-file=credentials=gcp-credentials.json +``` If this service key secret is not in place, the auth information will not be able to be mounted as a volume in the initContainer, and your pods may get stuck/hung at "ContainerCreating" phase. @@ -61,35 +80,50 @@ Refer to the single instance restore deploy scenario to see how the initContaine What you will need to customize and ensure: * Ensure you have created the appropriate secret and set its name * Ensure that the volume mount to /auth matches the secret name you created above. -* Ensure that your BUCKET, and GOOGLE_APPLICATION_CREDENTIALS are -set correctly given the way you created your secret. +* Ensure that your BUCKET, and credentials are set correctly given the way you created your secret. The example scenario above creates the initContainer just for core nodes. It's strongly recommended you do the same for `readReplica.initContainers` if you are using read replicas. If you restore only to core nodes and not to read replicas, when they start the core nodes will replicate the data to the read replicas. This will work just fine, but may result in longer startup times and much more bandwidth. ## Restore Environment Variables for the Init Container -### Required - -- `GOOGLE_APPLICATION_CREDENTIALS` - path to a file with a JSON service account key (see credentials below). Defaults to /auth/credentials.json -- `BUCKET` - the storage bucket where backups are located, e.g. `gs://bucketname`. This parameter may include a relative path (`gs://bucketname/mycluster`) -- `DATABASE` - comma-separated list of databases to restore, e.g. neo4j,system -* `TIMESTAMP` - this defaults to "latest". See the backup container's documentation -on the latest pointer. But you may set this to a particular timestamp to restore -that exact moment in time. This timestamp must match the filename in storage. -So if you want to restore the backup set at `neo4j-2020-06-16-12:32:57.tar.gz ` then -the TIMESTAMP would be `2020-06-16-12:32:57`. - -### Optional +- To restore you need to add the necessary parameters to values.yaml and this file should look like this: +```values.yaml +... +core: + ... + restore: + enabled: true + secretName: (neo4j-gcp-credentials|neo4j-aws-credentials) #required + database: neo4j,system #required + cloudProvider: (gcp|aws) #required + bucket: (gs|s3)://test-neo4j #required + timestamp: "latest" #optional #default:"latest" + forceOverwrite: true #optional #default:true + purgeOnComplete: true #optinal #default:true +readReplica: + ... + restore: + enabled: true + secretName: (neo4j-gcp-credentials|neo4j-aws-credentials) #required + database: neo4j,system #required + cloudProvider: (gcp|aws) #required + bucket: (gs|s3)://test-neo4j #required + timestamp: "2020-06-16-12:32:57" #optional #default:"latest" + forceOverwrite: true #optional #default:true + purgeOnComplete: true #optinal #default:true +... +``` -- `PURGE_ON_COMPLETE` (defaults to true). If this is set to the value "true", the restore process will remove the restore artifacts from disk. With any other -value, they will be left in place. This is useful for debugging restores, to -see what was copied down from cloud storage and how it was expanded. -- `FORCE_OVERWRITE` if this is the value "true", then the restore process will overwrite and -destroy any existing data that is on the volume. Take care when using this in combination with -persistent volumes. The default is false; if data already exists on the drive, the restore operation will fail but preserve your data. **You must set this to true -if you want restore to work over-top of an existing database**. +- standard neo4j installation +``` +helm install \ + neo4j neo4j/neo4j \ + -f values.yaml \ + --set acceptLicenseAgreement=yes \ + --version 4.1.1-1 +``` -## Warnings +## Warnings A common way you might deploy Neo4j would be restore from last backup when a container initializes. This would be good for a cluster, because it would minimize how much catch-up is needed when a node is launched. Any difference between the last backup and the rest of the diff --git a/templates/core-statefulset.yaml b/templates/core-statefulset.yaml index 69aa9268..05f48e79 100644 --- a/templates/core-statefulset.yaml +++ b/templates/core-statefulset.yaml @@ -121,11 +121,6 @@ spec: {{- end }} - name: plugins mountPath: /plugins - {{- if .Values.restoreSecret }} - - name: creds - mountPath: /auth - readOnly: true - {{- end }} {{- if .Values.core.additionalVolumeMounts }} {{ toYaml .Values.core.additionalVolumeMounts | indent 8}} {{- end }} @@ -141,23 +136,47 @@ spec: {{- if .Values.imagePullSecret }} imagePullSecrets: - name: {{ .Values.imagePullSecret }} -{{- end -}} -{{- if .Values.core.initContainers }} +{{- end }} initContainers: + {{ if .Values.core.restore.enabled }} + - name: restore-from-backup + image: "{{ .Values.core.restore.image }}:{{ .Values.core.restore.imageTag }}" + imagePullPolicy: Always + volumeMounts: + - name: datadir + mountPath: /data + - name: credentials + mountPath: /credentials + readOnly: true + env: + - name: DATABASE + value: {{ .Values.core.restore.database }} + - name: CLOUD_PROVIDER + value: {{ .Values.core.restore.cloudProvider }} + - name: BUCKET + value: {{ .Values.core.restore.bucket }} + - name: TIMESTAMP + value: "{{ .Values.core.restore.timestamp }}" + - name: FORCE_OVERWRITE + value: "{{ .Values.core.restore.forceOverwrite }}" + - name: PURGE_ON_COMPLETE + value: "{{ .Values.core.restore.purgeOnComplete }}" + {{ end }} +{{- if .Values.core.initContainers }} {{ toYaml .Values.core.initContainers | indent 6 }} {{- end }} volumes: - name: init-script configMap: name: "{{ .Release.Name }}-init-script" - {{- if .Values.restoreSecret }} - - name: creds + {{ if .Values.core.restore.enabled }} + - name: credentials secret: - secretName: {{ .Values.restoreSecret }} + secretName: {{ .Values.core.restore.secretName }} items: - - key: credentials.json - path: credentials.json - {{- end }} + - key: credentials + path: credentials + {{ end }} {{- if not .Values.core.persistentVolume.enabled }} - name: datadir emptyDir: {} diff --git a/templates/readreplicas-statefulset.yaml b/templates/readreplicas-statefulset.yaml index f2d4d1f3..cf485c5f 100644 --- a/templates/readreplicas-statefulset.yaml +++ b/templates/readreplicas-statefulset.yaml @@ -119,11 +119,6 @@ spec: {{- if .Values.readReplica.persistentVolume.subPath }} subPath: {{ .Values.readReplica.persistentVolume.subPath }} {{- end }} - {{- if .Values.restoreSecret }} - - name: creds - mountPath: /auth - readOnly: true - {{- end }} {{- if .Values.readReplica.additionalVolumeMounts }} {{ toYaml .Values.readReplica.additionalVolumeMounts | indent 8}} {{- end }} @@ -136,8 +131,32 @@ spec: {{- if .Values.core.sidecarContainers }} {{ toYaml .Values.core.sidecarContainers | indent 6 }} {{- end }} -{{- if .Values.readReplica.initContainers }} initContainers: + {{ if .Values.readReplica.restore.enabled }} + - name: restore-from-backup + image: "{{ .Values.readReplica.restore.image }}:{{ .Values.readReplica.restore.imageTag }}" + imagePullPolicy: Always + volumeMounts: + - name: datadir + mountPath: /data + - name: credentials + mountPath: /credentials + readOnly: true + env: + - name: DATABASE + value: {{ .Values.readReplica.restore.database }} + - name: CLOUD_PROVIDER + value: {{ .Values.readReplica.restore.cloudProvider }} + - name: BUCKET + value: {{ .Values.readReplica.restore.bucket }} + - name: TIMESTAMP + value: "{{ .Values.readReplica.restore.timestamp }}" + - name: FORCE_OVERWRITE + value: "{{ .Values.readReplica.restore.forceOverwrite }}" + - name: PURGE_ON_COMPLETE + value: "{{ .Values.readReplica.restore.purgeOnComplete }}" + {{ end }} +{{- if .Values.readReplica.initContainers }} {{ toYaml .Values.readReplica.initContainers | indent 6 }} {{- end }} {{- with .Values.nodeSelector }} @@ -156,14 +175,14 @@ spec: - name: init-script configMap: name: "{{ .Release.Name }}-init-script" - {{- if .Values.restoreSecret }} - - name: creds + {{ if .Values.readReplica.restore.enabled }} + - name: credentials secret: - secretName: {{ .Values.restoreSecret }} + secretName: {{ .Values.readReplica.restore.secretName }} items: - - key: credentials.json - path: credentials.json - {{- end }} + - key: credentials + path: credentials + {{ end }} {{- if not .Values.readReplica.persistentVolume.enabled }} - name: datadir emptyDir: {} diff --git a/tools/backup/Dockerfile b/tools/backup/Dockerfile index 3a58a371..fa20f9e6 100644 --- a/tools/backup/Dockerfile +++ b/tools/backup/Dockerfile @@ -12,7 +12,10 @@ RUN echo "neo4j-enterprise neo4j/question select I ACCEPT" | debconf-set-selecti RUN echo "neo4j-enterprise neo4j/license note" | debconf-set-selections RUN apt-get update && apt-get install -y neo4j-enterprise=1:4.1.0 -RUN apt-get install -y google-cloud-sdk +RUN apt-get install -y google-cloud-sdk unzip less +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" +RUN unzip awscliv2.zip +RUN ./aws/install RUN mkdir /data ADD backup/backup.sh /scripts/backup.sh diff --git a/tools/backup/backup.sh b/tools/backup/backup.sh index 263ef168..183aac2c 100644 --- a/tools/backup/backup.sh +++ b/tools/backup/backup.sh @@ -1,175 +1,195 @@ #!/bin/bash -if [ -z $NEO4J_ADDR ] ; then - echo "You must specify a NEO4J_ADDR env var with port, such as my-neo4j:6362" - exit 1 +if [ -z $NEO4J_ADDR ]; then + echo "You must specify a NEO4J_ADDR env var with port, such as my-neo4j:6362" + exit 1 fi -if [ -z $DATABASE ] ; then - echo "You must specify a DATABASE env var; comma-separated list of databases to backup, such as neo4j,system" - exit 1 +if [ -z $DATABASE ]; then + echo "You must specify a DATABASE env var; comma-separated list of databases to backup, such as neo4j,system" + exit 1 +fi + +if [ -z $CLOUD_PROVIDER ]; then + echo "You must specify a CLOUD_PROVIDER env var" + exit 1 fi if [ -z $BUCKET ]; then - echo "You must specify a BUCKET address such as gs://my-backups/" - exit 1 + echo "You must specify a BUCKET address such as (gs|s3)://my-backups" + exit 1 fi -if [ -z $HEAP_SIZE ] ; then - export HEAP_SIZE=2G +if [ -z $HEAP_SIZE ]; then + export HEAP_SIZE=2G fi if [ -z $PAGE_CACHE ]; then - export PAGE_CACHE=2G + export PAGE_CACHE=2G fi -if [ -z $FALLBACK_TO_FULL ] ; then - export FALLBACK_TO_FULL="true" +if [ -z $FALLBACK_TO_FULL ]; then + export FALLBACK_TO_FULL="true" fi -if [ -z $CHECK_CONSISTENCY ] ; then - export CHECK_CONSISTENCY="true" +if [ -z $CHECK_CONSISTENCY ]; then + export CHECK_CONSISTENCY="true" fi -if [ -z $CHECK_INDEXES ] ; then - export CHECK_INDEXES="true" +if [ -z $CHECK_INDEXES ]; then + export CHECK_INDEXES="true" fi -if [ -z $CHECK_GRAPH ] ; then - export CHECK_GRAPH="true" +if [ -z $CHECK_GRAPH ]; then + export CHECK_GRAPH="true" fi -if [ -z $CHECK_LABEL_SCAN_STORE ] ; then - export CHECK_LABEL_SCAN_STORE="true" +if [ -z $CHECK_LABEL_SCAN_STORE ]; then + export CHECK_LABEL_SCAN_STORE="true" fi -if [ -z $CHECK_PROPERTY_OWNERS ] ; then - export CHECK_PROPERTY_OWNERS="false" +if [ -z $CHECK_PROPERTY_OWNERS ]; then + export CHECK_PROPERTY_OWNERS="false" fi -if [ -z $GOOGLE_APPLICATION_CREDENTIALS ] ; then - echo "Setting default google credential location to /auth/credentials.json" - export GOOGLE_APPLICATION_CREDENTIALS=/auth/credentials.json -fi +function cloud_copy() { + backup_path=$1 + database=$2 + + bucket_path="" + if [ "${BUCKET: -1}" = "/" ]; then + bucket_path="${BUCKET%?}/$database/" + else + bucket_path="$BUCKET/$database/" + fi + + echo "Pushing $backup_path -> $bucket_path" + + case $CLOUD_PROVIDER in + aws) + aws s3 cp $backup_path $bucket_path + aws s3 cp $backup_path "${bucket_path}${LATEST_POINTER}" + ;; + gcp) + gsutil cp $backup_path $bucket_path + gsutil cp $backup_path "${bucket_path}${LATEST_POINTER}" + ;; + esac +} -# This function takes a file and -# (a) uploads it to BUCKET -# (b) updates the latest pointer -function cloud_copy { - full_path=$1 - database=$2 - - # Trim trailing slash from BUCKET if it's there, because it messes up the - # copy commands if you copy gs://a//foo to gs://a//bar (double slash in path) - # https://stackoverflow.com/a/17542946/2920686 - if [ "${BUCKET: -1}" = "/" ]; then - BUCKET="${BUCKET%?}" - fi - - # Want bucket_and_path *without* a final slash, so we can add it ourselves and - # know what's a file and what's a directory. - bucket_and_path="" - if [ "${BUCKET: -1}" = "/" ]; then - bucket_and_path="${BUCKET%?}" - else - bucket_and_path=$BUCKET - fi - - echo "Pushing $full_path -> $bucket_and_path" - - # Terminating slash is important to create correct filename. - gsutil cp "$full_path" "$bucket_and_path/" - - backup="$bucket_and_path/$BACKUP_SET.tar.gz" - latest="$bucket_and_path/$LATEST_POINTER" - - echo "Updating latest backup pointer $backup -> $latest" - gsutil cp "$backup" "$latest" +function backup_database() { + db=$1 + + export BACKUP_SET="$db-$(date "+%Y-%m-%d-%H:%M:%S")" + export LATEST_POINTER="$db-latest.tar.gz" + + echo "=============== BACKUP $db ===================" + echo "Beginning backup from $NEO4J_ADDR to /data/$BACKUP_SET" + echo "Using heap size $HEAP_SIZE and page cache $PAGE_CACHE" + echo "FALLBACK_TO_FULL=$FALLBACK_TO_FULL, CHECK_CONSISTENCY=$CHECK_CONSISTENCY" + echo "CHECK_GRAPH=$CHECK_GRAPH CHECK_INDEXES=$CHECK_INDEXES" + echo "CHECK_LABEL_SCAN_STORE=$CHECK_LABEL_SCAN_STORE CHECK_PROPERTY_OWNERS=$CHECK_PROPERTY_OWNERS" + echo "To storage bucket $BUCKET using $CLOUD_PROVIDER" + echo "============================================================" + + neo4j-admin backup \ + --from="$NEO4J_ADDR" \ + --backup-dir=/data \ + --database=$db \ + --pagecache=$PAGE_CACHE \ + --fallback-to-full=$FALLBACK_TO_FULL \ + --check-consistency=$CHECK_CONSISTENCY \ + --check-graph=$CHECK_GRAPH \ + --check-indexes=$CHECK_INDEXES \ + --check-label-scan-store=$CHECK_LABEL_SCAN_STORE \ + --check-property-owners=$CHECK_PROPERTY_OWNERS \ + --verbose + + # Docs: see exit codes here: https://neo4j.com/docs/operations-manual/current/backup/performing/#backup-performing-command + backup_result=$? + case $backup_result in + 0) echo "Backup succeeded - $db" ;; + 1) echo "Backup FAILED - $db" ;; + 2) echo "Backup succeeded but consistency check failed - $db" ;; + 3) echo "Backup succeeded but consistency check found inconsistencies - $db" ;; + esac + + if [ $backup_result -eq 1 ]; then + echo "Aborting other actions; backup failed" + exit 1 + fi + + echo "Backup size:" + du -hs "/data/$db" + + echo "Final Backupset files" + ls -l "/data/$db" + + echo "Archiving and Compressing -> /data/$BACKUP_SET.tar" + + tar -zcvf "/data/$BACKUP_SET.tar.gz" "/data/$db" --remove-files + + if [ $? -ne 0 ]; then + echo "BACKUP ARCHIVING OF $db FAILED" + exit 1 + fi + + echo "Zipped backup size:" + du -hs "/data/$BACKUP_SET.tar.gz" + + cloud_copy "/data/$BACKUP_SET.tar.gz" $db + + if [ $? -ne 0 ]; then + echo "Storage copy of backup for $db FAILED" + exit 1 + fi } -function backup_database { - db=$1 - - export BACKUP_SET="$db-$(date "+%Y-%m-%d-%H:%M:%S")" - export LATEST_POINTER="$db-latest.tar.gz" - - echo "=============== BACKUP $db ===================" - echo "Beginning backup from $NEO4J_ADDR to /data/$BACKUP_SET" - echo "Using heap size $HEAP_SIZE and page cache $PAGE_CACHE" - echo "FALLBACK_TO_FULL=$FALLBACK_TO_FULL, CHECK_CONSISTENCY=$CHECK_CONSISTENCY" - echo "CHECK_GRAPH=$CHECK_GRAPH CHECK_INDEXES=$CHECK_INDEXES" - echo "CHECK_LABEL_SCAN_STORE=$CHECK_LABEL_SCAN_STORE CHECK_PROPERTY_OWNERS=$CHECK_PROPERTY_OWNERS" - echo "To google storage bucket $BUCKET using credentials located at $GOOGLE_APPLICATION_CREDENTIALS" - echo "============================================================" - - neo4j-admin backup \ - --from="$NEO4J_ADDR" \ - --backup-dir=/data \ - --database=$db \ - --pagecache=$PAGE_CACHE \ - --fallback-to-full=$FALLBACK_TO_FULL \ - --check-consistency=$CHECK_CONSISTENCY \ - --check-graph=$CHECK_GRAPH \ - --check-indexes=$CHECK_INDEXES \ - --check-label-scan-store=$CHECK_LABEL_SCAN_STORE \ - --check-property-owners=$CHECK_PROPERTY_OWNERS \ - --verbose - - # Docs: see exit codes here: https://neo4j.com/docs/operations-manual/current/backup/performing/#backup-performing-command - backup_result=$? - case $backup_result in - 0) echo "Backup succeeded - $db" ;; - 1) echo "Backup FAILED - $db" ;; - 2) echo "Backup succeeded but consistency check failed - $db" ;; - 3) echo "Backup succeeded but consistency check found inconsistencies - $db" ;; - esac - - if [ $backup_result -eq 1 ] ; then - echo "Aborting other actions; backup failed" - exit 1 - fi - - echo "Backup size:" - du -hs "/data/$db" - - echo "Final Backupset files" - ls -l "/data/$db" - - echo "Archiving and Compressing -> /data/$BACKUP_SET.tar" - - tar -zcvf "/data/$BACKUP_SET.tar.gz" "/data/$db" --remove-files - - if [ $? -ne 0 ] ; then - echo "BACKUP ARCHIVING OF $db FAILED" - exit 1 - fi - - echo "Zipped backup size:" - du -hs "/data/$BACKUP_SET.tar.gz" - - cloud_copy "/data/$BACKUP_SET.tar.gz" - - if [ $? -ne 0 ] ; then - echo "Storage copy of backup for $db FAILED" - exit 1 - fi +function activate_gcp() { + echo "Activating google credentials before beginning" + gcloud auth activate-service-account --key-file "/credentials/credentials" + + if [ $? -ne 0 ]; then + echo "Credentials failed; no way to copy to google." + exit 1 + fi } -###################################################### +function activate_aws() { + echo "Activating aws credentials before beginning" + mkdir -p /root/.aws/ + cp /credentials/credentials ~/.aws/config -echo "Activating google credentials before beginning" -gcloud auth activate-service-account --key-file "$GOOGLE_APPLICATION_CREDENTIALS" + if [ $? -ne 0 ]; then + echo "Credentials failed; no way to copy to aws." + exit 1 + fi -if [ $? -ne 0 ] ; then - echo "Credentials failed; no way to copy to google." - echo "Ensure GOOGLE_APPLICATION_CREDENTIALS is appropriately set." -fi + aws sts get-caller-identity + if [ $? -ne 0 ]; then + echo "Credentials failed; no way to copy to aws." + exit 1 + fi +} + +case $CLOUD_PROVIDER in +aws) + activate_aws + ;; +gcp) + activate_gcp + ;; +*) + echo "You must set CLOUD_PROVIDER to be one of (aws|gcp)" + exit 1 + ;; +esac # Split by comma IFS="," -read -a databases <<< "$DATABASE" -for db in "${databases[@]}"; do - backup_database "$db" +read -a databases <<<"$DATABASE" +for db in "${databases[@]}"; do + backup_database "$db" done echo "All finished" diff --git a/tools/backup/templates/backup.yaml b/tools/backup/templates/backup.yaml index 1784bf88..e6af1373 100644 --- a/tools/backup/templates/backup.yaml +++ b/tools/backup/templates/backup.yaml @@ -1,10 +1,7 @@ -# Sample pod to be applied manually or scheduled when desired -# Executes a backup against the DB when tailored to the right endpoint. -# Copies resulting full backup to $BUCKET -apiVersion: batch/v1 -kind: Job +apiVersion: batch/v1beta1 +kind: CronJob metadata: - name: "{{ .Release.Name }}-neo4j-backup-{{ date "20060102150405" now }}" + name: "{{ .Release.Name }}-job" labels: app.kubernetes.io/managed-by: {{ .Release.Service | quote }} app.kubernetes.io/instance: {{ .Release.Name | quote }} @@ -12,49 +9,52 @@ metadata: app.kubernetes.io/name: "graph-backup" app.kubernetes.io/component: backup spec: - backoffLimit: 3 - template: + schedule: {{.Values.jobSchedule | quote }} + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 1 + jobTemplate: spec: - restartPolicy: Never - containers: - - name: graph-backup - image: {{ .Values.image }}:{{ .Values.imageTag }} - imagePullPolicy: Always - env: - # Addr must include a host and port, but no scheme. See the --from parameter here: - # See https://neo4j.com/docs/operations-manual/current/backup/performing/ - - name: NEO4J_ADDR - value: {{ .Values.neo4jaddr }} - - name: BUCKET - value: {{ .Values.bucket }} - - name: DATABASE - value: {{ .Values.database }} - - name: GOOGLE_APPLICATION_CREDENTIALS - value: {{ .Values.credentials }} - - name: HEAP_SIZE - value: {{ .Values.heapSize }} - - name: PAGE_CACHE - value: {{ .Values.pageCache }} - - name: FALLBACK_TO_FULL - value: "{{ .Values.fallbackToFull }}" - - name: CHECK_CONSISTENCY - value: "{{ .Values.checkConsistency }}" - - name: CHECK_INDEXES - value: "{{ .Values.checkIndexes }}" - - name: CHECK_GRAPH - value: "{{ .Values.checkGraph }}" - - name: CHECK_LABEL_SCAN_STORE - value: "{{ .Values.checkLabelScanStore }}" - - name: CHECK_PROPERTY_OWNERS - value: "{{ .Values.checkPropertyOwners }}" - volumeMounts: - - name: creds - mountPath: /auth - readOnly: true - volumes: - - name: creds - secret: - secretName: {{ .Values.secretName }} - items: - - key: credentials.json - path: credentials.json + template: + spec: + restartPolicy: Never + containers: + - name: graph-backup + image: {{ .Values.image }}:{{ .Values.imageTag }} + imagePullPolicy: Always + env: + - name: NEO4J_ADDR + value: {{ .Values.neo4jaddr }} + - name: DATABASE + value: {{ .Values.database }} + - name: CLOUD_PROVIDER + value: {{ .Values.cloudProvider }} + - name: BUCKET + value: {{ .Values.bucket }} + - name: HEAP_SIZE + value: {{ .Values.heapSize }} + - name: PAGE_CACHE + value: {{ .Values.pageCache }} + - name: FALLBACK_TO_FULL + value: "{{ .Values.fallbackToFull }}" + - name: CHECK_CONSISTENCY + value: "{{ .Values.checkConsistency }}" + - name: CHECK_INDEXES + value: "{{ .Values.checkIndexes }}" + - name: CHECK_GRAPH + value: "{{ .Values.checkGraph }}" + - name: CHECK_LABEL_SCAN_STORE + value: "{{ .Values.checkLabelScanStore }}" + - name: CHECK_PROPERTY_OWNERS + value: "{{ .Values.checkPropertyOwners }}" + volumeMounts: + - name: credentials + mountPath: /credentials + readOnly: true + volumes: + - name: credentials + secret: + secretName: {{ .Values.secretName }} + items: + - key: credentials + path: credentials \ No newline at end of file diff --git a/tools/backup/values.yaml b/tools/backup/values.yaml index 3a6bcf34..3d2687a2 100644 --- a/tools/backup/values.yaml +++ b/tools/backup/values.yaml @@ -1,10 +1,10 @@ image: gcr.io/neo4j-helm/backup imageTag: 4.1.0-1 neo4jaddr: holder-neo4j.default.svc.cluster.local:6362 -bucket: gs://graph-backup-storage/city-graph/ +bucket: gs://test-neo4j database: neo4j,system -credentials: /auth/credentials.json -secretName: neo4j-service-key +cloudProvider: gcp +secretName: "neo4j-gcp-credentials" pageCache: 2G heapSize: 2G fallbackToFull: "true" @@ -12,4 +12,5 @@ checkConsistency: "true" checkIndexes: "true" checkGraph: "true" checkLabelScanStore: "true" -checkPropertyOwners: "false" \ No newline at end of file +checkPropertyOwners: "false" +jobSchedule: "0 */12 * * *" \ No newline at end of file diff --git a/tools/restore/Dockerfile b/tools/restore/Dockerfile index 6ea91b38..2c9fd1e4 100644 --- a/tools/restore/Dockerfile +++ b/tools/restore/Dockerfile @@ -12,7 +12,10 @@ RUN echo "neo4j-enterprise neo4j/question select I ACCEPT" | debconf-set-selecti RUN echo "neo4j-enterprise neo4j/license note" | debconf-set-selections RUN apt-get update && apt-get install -y neo4j-enterprise=1:4.1.0 -RUN apt-get install -y google-cloud-sdk unzip +RUN apt-get install -y google-cloud-sdk unzip less +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" +RUN unzip awscliv2.zip +RUN ./aws/install RUN mkdir /data ADD restore/restore.sh /scripts/restore.sh diff --git a/tools/restore/restore.sh b/tools/restore/restore.sh index 89b3000a..e5bac01a 100644 --- a/tools/restore/restore.sh +++ b/tools/restore/restore.sh @@ -2,7 +2,7 @@ # Validation of inputs upfront if [ -z $BUCKET ] ; then - echo "You must specify a bucket, such as gs://my-backups/" + echo "You must specify a bucket, such as (gs|s3)://my-backups" exit 1 fi @@ -11,21 +11,45 @@ if [ -z $DATABASE ]; then exit 1 fi +if [ -z $CLOUD_PROVIDER ]; then + echo "You must specify a CLOUD_PROVIDER env var" + exit 1 +fi + if [ -z $TIMESTAMP ]; then echo "No TIMESTAMP was provided, we are using latest" TIMESTAMP=latest fi -if [ -z $GOOGLE_APPLICATION_CREDENTIALS ] ; then - echo "Setting default google credential location to /auth/credentials.json" - export GOOGLE_APPLICATION_CREDENTIALS=/auth/credentials.json -fi - if [ -z $PURGE_ON_COMPLETE ]; then echo "Setting PURGE_ON_COMPLETE=true" PURGE_ON_COMPLETE=true fi +function fetch_backup_from_cloud() { + database=$1 + restore_path=$2 + + bucket_path="" + if [ "${BUCKET: -1}" = "/" ]; then + bucket_path="${BUCKET%?}/$database/" + else + bucket_path="$BUCKET/$database/" + fi + backup_path="${bucket_path}$database-$TIMESTAMP.tar.gz" + + echo "Fetching $backup_path -> $restore_path" + + case $CLOUD_PROVIDER in + aws) + aws s3 cp $backup_path $restore_path + ;; + gcp) + gsutil cp $backup_path $restore_path + ;; + esac +} + function restore_database { db=$1 @@ -60,22 +84,9 @@ function restore_database { echo "Making restore directory" mkdir -p "$RESTORE_ROOT" - # Trim trailing slash from BUCKET if it's there, because it messes up the - # copy commands if you copy gs://a//foo to gs://a//bar (double slash in path) - # https://stackoverflow.com/a/17542946/2920686 - if [ "${BUCKET: -1}" = "/" ]; then - BUCKET="${BUCKET%?}" - fi - - REMOTE_BACKUPSET="$BUCKET/$db-$TIMESTAMP.tar.gz" - echo "Copying $REMOTE_BACKUPSET -> $RESTORE_ROOT" - - # By copying recursively, the user can specify a dir with an uncompressed - # backup if preferred. The -m flag downloads in parallel if possible. - gsutil -m cp -r "$REMOTE_BACKUPSET" "$RESTORE_ROOT" + fetch_backup_from_cloud $db $RESTORE_ROOT if [ $? -ne 0 ] ; then - echo "Copy remote backupset $REMOTE_BACKUPSET FAILED" echo "Cannot restore $db" return fi @@ -88,7 +99,7 @@ function restore_database { # foo.zip, we need to assume that this unarchives to a directory called # foo, as neo4j backup sets are directories. So we'll remove the suffix # after unarchiving and use that as the actual backup target. - BACKUP_FILENAME=$(basename "$REMOTE_BACKUPSET") + BACKUP_FILENAME="$db-$TIMESTAMP.tar.gz" RESTORE_FROM=uninitialized if [[ $BACKUP_FILENAME =~ \.tar\.gz$ ]] ; then echo "Untarring backup file" @@ -220,10 +231,37 @@ function restore_database { echo "RESTORE OF $db COMPLETE" } +function activate_gcp() { + echo "Activating google credentials before beginning" + gcloud auth activate-service-account --key-file "/credentials/credentials" + + if [ $? -ne 0 ]; then + echo "Credentials failed; no way to copy to google." + exit 1 + fi +} + +function activate_aws() { + echo "Activating aws credentials before beginning" + mkdir -p /root/.aws/ + cp /credentials/credentials ~/.aws/config + + if [ $? -ne 0 ]; then + echo "Credentials failed; no way to copy to aws." + exit 1 + fi + + aws sts get-caller-identity + if [ $? -ne 0 ]; then + echo "Credentials failed; no way to copy to aws." + exit 1 + fi +} + echo "=============== Restore ===============================" -echo "GOOGLE_APPLICATION_CREDENTIALS=$GOOGLE_APPLICATION_CREDENTIALS" -echo "TIMESTAMP=$TIMESTAMP" +echo "CLOUD_PROVIDER=$CLOUD_PROVIDER" echo "BUCKET=$BUCKET" +echo "TIMESTAMP=$TIMESTAMP" echo "FORCE_OVERWRITE=$FORCE_OVERWRITE" echo "PURGE_ON_COMPLETE=$PURGE_ON_COMPLETE" echo "Starting point database contents: " @@ -232,14 +270,18 @@ echo "Starting point transactions: " ls /data/transactions echo "============================================================" -echo "Activating google credentials before beginning" -ls -l $GOOGLE_APPLICATION_CREDENTIALS -gcloud auth activate-service-account --key-file "$GOOGLE_APPLICATION_CREDENTIALS" - -if [ $? -ne 0 ] ; then - echo "Credentials failed; copying from Google will likely fail unless the bucket is public" - echo "Ensure GOOGLE_APPLICATION_CREDENTIALS is appropriately set." -fi +case $CLOUD_PROVIDER in +aws) + activate_aws + ;; +gcp) + activate_gcp + ;; +*) + echo "You must set CLOUD_PROVIDER to be one of (aws|gcp)" + exit 1 + ;; +esac # See: https://neo4j.com/docs/operations-manual/current/backup/restoring/#backup-restoring-cluster echo "Unbinding previous cluster state, if applicable" diff --git a/values.yaml b/values.yaml index f848b14e..70a7caf1 100644 --- a/values.yaml +++ b/values.yaml @@ -146,6 +146,18 @@ core: additionalVolumeMounts: [] terminationGracePeriodSeconds: 300 + restore: + enabled: false + image: gcr.io/neo4j-helm/restore + imageTag: 4.1.0-1 + secretName: neo4j-gcp-credentials + database: neo4j,system + cloudProvider: gcp + bucket: gs://test-neo4j + timestamp: "latest" + forceOverwrite: true + purgeOnComplete: true + # Read Replicas readReplica: # configMap: "my-custom-configmap" @@ -214,6 +226,18 @@ readReplica: additionalVolumeMounts: [] terminationGracePeriodSeconds: 300 + restore: + enabled: false + image: gcr.io/neo4j-helm/restore + imageTag: 4.1.0-1 + secretName: neo4j-gcp-credentials + database: neo4j,system + cloudProvider: gcp + bucket: gs://test-neo4j + timestamp: "latest" + forceOverwrite: true + purgeOnComplete: true + resources: {} # limits: # cpu: "1000m"