Skip to content

Commit

Permalink
Add prestop hook
Browse files Browse the repository at this point in the history
  • Loading branch information
WalBeh committed Jan 13, 2025
1 parent 7f0dd24 commit a71cfa7
Show file tree
Hide file tree
Showing 12 changed files with 787 additions and 0 deletions.
93 changes: 93 additions & 0 deletions .github/workflows/build-and-release-dc_util.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
name: Build and Release dc_util

on:
workflow_dispatch:
inputs:
release_version:
description: 'Version number for the release (e.g., v1.0.0)'
required: true
default: 'v1.0.0'

jobs:
build-and-release:
runs-on: ubuntu-latest
strategy:
matrix:
# os: [linux, windows, darwin]
os: [linux]
# goarch: [amd64, arm64]
goarch: [amd64]

steps:
- name: Checkout Repository
uses: actions/checkout@v3

- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: '1.23'

- name: Build dc_util
run: |
mkdir -p build
cd utils/dc_util # Navigate to the directory containing dc_util.go
GOOS=${{ matrix.os }} GOARCH=${{ matrix.goarch }} go build -o ../../build/dc_util-${{ matrix.os }}-${{ matrix.goarch }}
- name: Generate SHA256 Checksum
run: |
cd build
if [[ "${{ matrix.os }}" == "windows" ]]; then
sha256sum dc_util-${{ matrix.os }}-${{ matrix.goarch }}.exe > dc_util-${{ matrix.os }}-${{ matrix.goarch }}.exe.sha256
else
sha256sum dc_util-${{ matrix.os }}-${{ matrix.goarch }} > dc_util-${{ matrix.os }}-${{ matrix.goarch }}.sha256
fi
- name: Upload Binary and Checksum Artifacts
uses: actions/upload-artifact@v3
with:
name: dc_util-${{ matrix.os }}-${{ matrix.goarch }}
path: build/dc_util-${{ matrix.os }}-${{ matrix.goarch }}*

- name: Clean Up Build Directory
run: |
rm -rf build
release:
needs: build-and-release
runs-on: ubuntu-latest
strategy:
matrix:
os: [linux, windows, darwin]
goarch: [amd64, arm64]

steps:
- name: Download Binary Artifact
uses: actions/download-artifact@v3
with:
name: dc_util-${{ matrix.os }}-${{ matrix.goarch }}
path: ./release

- name: Create GitHub Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ github.event.inputs.release_version }}
release_name: Release ${{ github.event.inputs.release_version }}
draft: false
prerelease: false

- name: Upload Release Assets
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ steps.create_release.outputs.upload_url }}
asset_path: ./release
asset_name: dc_util-${{ matrix.os }}-${{ matrix.goarch }}-${{ github.event.inputs.release_version }}
asset_content_type: application/octet-stream

- name: Clean Up Release Directory
run: |
rm -rf release
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ Changelog

Unreleased
----------
* Add preStop hook to the CrateDB pods to ensure that the CrateDB process is
stopped gracefully.


2.43.1 (2025-01-08)
-------------------
Expand Down
10 changes: 10 additions & 0 deletions crate/operator/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,16 @@
GRAND_CENTRAL_BACKEND_API_PORT = 5050
GRAND_CENTRAL_PROMETHEUS_PORT = 8000

TERMINATION_GRACE_PERIOD_SECONDS = 900
DECOMMISSION_TIMEOUT = "720s"

DCUTIL_VERSION = "0.0.1"
DCUTIL_BASE_URL = (
f"https://github.com/crate/crate-operator/releases/download/dcutil-{DCUTIL_VERSION}"
)
DCUTIL_BINARY = "dc_util-linux-amd64"
DCUTIL_CHECKSUM = f"{DCUTIL_BINARY}.sha256"


class CloudProvider(str, enum.Enum):
AWS = "aws"
Expand Down
85 changes: 85 additions & 0 deletions crate/operator/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
AppsV1Api,
CoreV1Api,
PolicyV1Api,
RbacAuthorizationV1Api,
RbacV1Subject,
V1Affinity,
V1Capabilities,
V1ConfigMap,
Expand All @@ -41,10 +43,13 @@
V1EmptyDirVolumeSource,
V1EnvVar,
V1EnvVarSource,
V1ExecAction,
V1HTTPGetAction,
V1KeyToPath,
V1LabelSelector,
V1LabelSelectorRequirement,
V1Lifecycle,
V1LifecycleHandler,
V1LocalObjectReference,
V1NodeAffinity,
V1NodeSelector,
Expand All @@ -60,8 +65,12 @@
V1PodDisruptionBudgetSpec,
V1PodSpec,
V1PodTemplateSpec,
V1PolicyRule,
V1Probe,
V1ResourceRequirements,
V1Role,
V1RoleBinding,
V1RoleRef,
V1Secret,
V1SecretKeySelector,
V1SecretVolumeSource,
Expand All @@ -82,6 +91,10 @@
from crate.operator.constants import (
API_GROUP,
DATA_PVC_NAME_PREFIX,
DCUTIL_BASE_URL,
DCUTIL_BINARY,
DCUTIL_CHECKSUM,
DECOMMISSION_TIMEOUT,
LABEL_COMPONENT,
LABEL_MANAGED_BY,
LABEL_NAME,
Expand All @@ -92,6 +105,7 @@
SHARED_NODE_TOLERATION_EFFECT,
SHARED_NODE_TOLERATION_KEY,
SHARED_NODE_TOLERATION_VALUE,
TERMINATION_GRACE_PERIOD_SECONDS,
CloudProvider,
Nodepool,
Port,
Expand Down Expand Up @@ -369,6 +383,26 @@ def get_statefulset_containers(
security_context=V1SecurityContext(
capabilities=V1Capabilities(add=["SYS_CHROOT"])
),
lifecycle=V1Lifecycle(
pre_stop=(
V1LifecycleHandler(
_exec=V1ExecAction(
command=[
"/bin/sh",
"-c",
"curl -sLO "
f"{DCUTIL_BASE_URL}/{DCUTIL_BINARY} && "
"curl -sLO "
f"{DCUTIL_BASE_URL}/{DCUTIL_CHECKSUM} && "
f"sha256sum -c {DCUTIL_CHECKSUM} && "
f"chmod u+x {DCUTIL_BINARY} &&"
f"./{DCUTIL_BINARY} -min-availability PRIMARIES "
f"-timeout {DECOMMISSION_TIMEOUT}",
]
)
)
)
),
),
]

Expand Down Expand Up @@ -822,6 +856,7 @@ def get_statefulset(
init_containers=get_statefulset_init_containers(crate_image),
volumes=get_statefulset_volumes(name, ssl),
tolerations=get_tolerations(name, logger, node_spec),
termination_grace_period_seconds=TERMINATION_GRACE_PERIOD_SECONDS,
),
),
update_strategy=V1StatefulSetUpdateStrategy(type="OnDelete"),
Expand Down Expand Up @@ -917,6 +952,56 @@ async def create_statefulset(
namespace=namespace,
body=pdb,
)
"""
A Role is required to allow the POD to access the
number of replicas in the StatefulSet. This is required for the
pre-stop lifecycle hook to work correctly and detect a scale to 0.
"""
rule = RbacAuthorizationV1Api(api_client)
role = V1Role(
metadata=V1ObjectMeta(
name=f"crate-{name}",
owner_references=owner_references,
),
rules=[
V1PolicyRule(
api_groups=["apps"],
resources=["statefulsets"],
verbs=["get", "list", "watch"],
)
],
)
await call_kubeapi(
rule.create_namespaced_role,
logger,
continue_on_conflict=True,
namespace=namespace,
body=role,
)
role_binding = V1RoleBinding(
metadata=V1ObjectMeta(
name=f"crate-{name}",
owner_references=owner_references,
),
role_ref=V1RoleRef(
api_group="rbac.authorization.k8s.io",
kind="Role",
name=f"crate-{name}",
),
subjects=[
RbacV1Subject(
kind="ServiceAccount",
name="default",
)
],
)
await call_kubeapi(
rule.create_namespaced_role_binding,
logger,
continue_on_conflict=True,
namespace=namespace,
body=role_binding,
)


def get_data_service(
Expand Down
3 changes: 3 additions & 0 deletions deploy/charts/crate-operator/templates/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ rules:
- batch
- policy
- networking.k8s.io
- rbac.authorization.k8s.io
resources:
- configmaps
- cronjobs
Expand All @@ -46,6 +47,8 @@ rules:
- services
- statefulsets
- poddisruptionbudgets
- roles
- rolebindings
verbs:
- create
- delete
Expand Down
3 changes: 3 additions & 0 deletions deploy/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ rules:
- batch
- policy
- networking.k8s.io
- rbac.authorization.k8s.io
resources:
- configmaps
- cronjobs
Expand All @@ -71,6 +72,8 @@ rules:
- services
- statefulsets
- poddisruptionbudgets
- rolebindings
- roles
verbs:
- create
- delete
Expand Down
61 changes: 61 additions & 0 deletions tests/test_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,30 @@
from typing import Set
from unittest import mock

import aiohttp
import pytest
from kubernetes_asyncio.client import (
AppsV1Api,
CoreV1Api,
CustomObjectsApi,
NetworkingV1Api,
RbacAuthorizationV1Api,
)

from crate.operator.config import config
from crate.operator.constants import (
API_GROUP,
DATA_PVC_NAME_PREFIX,
DCUTIL_BASE_URL,
DCUTIL_BINARY,
GRAND_CENTRAL_PROMETHEUS_PORT,
GRAND_CENTRAL_RESOURCE_PREFIX,
LABEL_COMPONENT,
LABEL_MANAGED_BY,
LABEL_NAME,
LABEL_PART_OF,
RESOURCE_CRATEDB,
TERMINATION_GRACE_PERIOD_SECONDS,
CloudProvider,
)
from crate.operator.create import (
Expand Down Expand Up @@ -1212,6 +1217,54 @@ async def test_create_minimal(self, faker, namespace, kopf_runner, api_client):
{f"crate-data-hot-{name}-0"},
)

async def test_decommission_settings(
self, faker, namespace, kopf_runner, api_client
):
apps = AppsV1Api(api_client)
coapi = CustomObjectsApi(api_client)
core = CoreV1Api(api_client)
rbac = RbacAuthorizationV1Api(api_client)
name = faker.domain_word()

await start_cluster(name, namespace, core, coapi, 1, wait_for_healthy=False)
await assert_wait_for(
True,
self.does_statefulset_exist,
apps,
namespace.metadata.name,
f"crate-data-hot-{name}",
)
await assert_wait_for(
True,
do_pods_exist,
core,
namespace.metadata.name,
{f"crate-data-hot-{name}-0"},
)
statefulset = await apps.read_namespaced_stateful_set(
f"crate-data-hot-{name}", namespace.metadata.name
)
assert (
statefulset.spec.template.spec.termination_grace_period_seconds
== TERMINATION_GRACE_PERIOD_SECONDS
)

role = await rbac.read_namespaced_role(f"crate-{name}", namespace.metadata.name)
assert any(
rule
for rule in role.rules
if "statefulsets" in rule.resources and "list" in rule.verbs
), "Role does not contain the 'list' verb for 'statefulsets'"

rolebinding = await rbac.read_namespaced_role_binding(
f"crate-{name}", namespace.metadata.name
)
assert any(
subject
for subject in rolebinding.subjects
if subject.kind == "ServiceAccount" and subject.name == "default"
), "RoleBinding does not contain the expected ServiceAccount subject"

async def test_create_with_svc_annotations(
self, faker, namespace, kopf_runner, api_client
):
Expand Down Expand Up @@ -1586,3 +1639,11 @@ def test_get_cluster_resource_limits(node_spec, expected_limits_cpu):
get_cluster_resource_limits(node_spec, resource_type="cpu", fallback_key="cpus")
== expected_limits_cpu
)


@pytest.mark.asyncio
async def test_download_dc_util():
url = f"{DCUTIL_BASE_URL}/{DCUTIL_BINARY}"
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
assert response.status == 200, f"Expected status 200, got {response.status}"
Loading

0 comments on commit a71cfa7

Please sign in to comment.