Skip to content

Commit

Permalink
Implement lifecycle events.
Browse files Browse the repository at this point in the history
  • Loading branch information
ruivieira committed Nov 3, 2023
1 parent 1967f28 commit 837d1f2
Show file tree
Hide file tree
Showing 12 changed files with 91 additions and 26 deletions.
8 changes: 5 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
# - use environment variables to overwrite this value (e.g export VERSION=0.0.2)
VERSION ?= 0.0.1

BUILD_TOOL ?= podman

# CHANNELS define the bundle channels used in the bundle.
# Add a new line here if you would like to change its default config. (E.g CHANNELS = "candidate,fast,stable")
# To re-generate a bundle for other specific channels without changing the standard setup, you can:
Expand Down Expand Up @@ -120,11 +122,11 @@ run: manifests generate fmt vet ## Run a controller from your host.
# More info: https://docs.docker.com/develop/develop-images/build_enhancements/
.PHONY: docker-build
docker-build: test ## Build docker image with the manager.
docker build -t ${IMG} .
$(BUILD_TOOL) build -t ${IMG} .

.PHONY: docker-push
docker-push: ## Push docker image with the manager.
docker push ${IMG}
$(BUILD_TOOL) push ${IMG}

# PLATFORMS defines the target platforms for the manager image be build to provide support to multiple
# architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to:
Expand Down Expand Up @@ -212,7 +214,7 @@ bundle: manifests kustomize ## Generate bundle manifests and metadata, then vali

.PHONY: bundle-build
bundle-build: ## Build the bundle image.
docker build -f bundle.Dockerfile -t $(BUNDLE_IMG) .
$BUILD_TOOL build -f bundle.Dockerfile -t $(BUNDLE_IMG) .

.PHONY: bundle-push
bundle-push: ## Push the bundle image.
Expand Down
8 changes: 8 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ rules:
- patch
- update
- watch
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- update
- apiGroups:
- ""
resources:
Expand Down
7 changes: 7 additions & 0 deletions controllers/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,10 @@ const (
StatusReasonRouteNotFound = "RouteNotFound"
StatusReasonRouteFound = "RouteFound"
)

// Event reasons
const (
EventReasonPVCCreated = "PVCCreated"
EventReasonInferenceServiceConfigured = "InferenceServiceConfigured"
EventReasonServiceMonitorCreated = "ServiceMonitorCreated"
)
22 changes: 22 additions & 0 deletions controllers/events.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package controllers

import (
trustyaiopendatahubiov1alpha1 "github.com/trustyai-explainability/trustyai-service-operator/api/v1alpha1"
corev1 "k8s.io/api/core/v1"
)

func (r *TrustyAIServiceReconciler) eventModelMeshConfigured(instance *trustyaiopendatahubiov1alpha1.TrustyAIService) {
r.EventRecorder.Event(instance, corev1.EventTypeNormal, EventReasonInferenceServiceConfigured, "ModelMesh InferenceService configured")
}

func (r *TrustyAIServiceReconciler) eventKServeConfigured(instance *trustyaiopendatahubiov1alpha1.TrustyAIService) {
r.EventRecorder.Event(instance, corev1.EventTypeNormal, EventReasonInferenceServiceConfigured, "KServe InferenceService configured")
}

func (r *TrustyAIServiceReconciler) eventPVCCreated(instance *trustyaiopendatahubiov1alpha1.TrustyAIService) {
r.EventRecorder.Event(instance, corev1.EventTypeNormal, EventReasonPVCCreated, "PVC created")
}

func (r *TrustyAIServiceReconciler) eventLocalServiceMonitorCreated(instance *trustyaiopendatahubiov1alpha1.TrustyAIService) {
r.EventRecorder.Event(instance, corev1.EventTypeNormal, EventReasonServiceMonitorCreated, "Local ServiceMonitor created")
}
12 changes: 10 additions & 2 deletions controllers/finalizers.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,21 @@ package controllers

import (
"context"
trustyaiopendatahubiov1alpha1 "github.com/trustyai-explainability/trustyai-service-operator/api/v1alpha1"
"sigs.k8s.io/controller-runtime/pkg/log"
)

// deleteExternalDependency removes the payload processor from the ModelMesh deployment
func (r *TrustyAIServiceReconciler) deleteExternalDependency(crName, namespace string, ctx context.Context) error {
func (r *TrustyAIServiceReconciler) deleteExternalDependency(crName string, instance *trustyaiopendatahubiov1alpha1.TrustyAIService, namespace string, ctx context.Context) error {
// Call patchEnvVarsByLabelForDeployments with remove set to true
_, err := r.patchEnvVarsByLabelForDeployments(ctx, namespace, modelMeshLabelKey, modelMeshLabelValue, payloadProcessorName, crName, true)
_, err := r.patchEnvVarsByLabelForDeployments(ctx,
instance,
namespace,
modelMeshLabelKey,
modelMeshLabelValue,
payloadProcessorName,
crName,
true)
if err != nil {
log.FromContext(ctx).Error(err, "Could not remove environment variable from ModelMesh Deployment.")
// Do not return the error to avoid finalizer loop if the namespace or other resources are deleted
Expand Down
18 changes: 11 additions & 7 deletions controllers/inference_services.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
"strings"
)

func (r *TrustyAIServiceReconciler) patchEnvVarsForDeployments(ctx context.Context, deployments []appsv1.Deployment, envVarName string, url string, remove bool) (bool, error) {
func (r *TrustyAIServiceReconciler) patchEnvVarsForDeployments(ctx context.Context, instance *trustyaiopendatahubiov1alpha1.TrustyAIService, deployments []appsv1.Deployment, envVarName string, url string, remove bool) (bool, error) {
// Loop over the Deployments
for _, deployment := range deployments {

Expand Down Expand Up @@ -67,6 +67,7 @@ func (r *TrustyAIServiceReconciler) patchEnvVarsForDeployments(ctx context.Conte
log.FromContext(ctx).Error(err, "Could not update Deployment", "Deployment", deployment.Name)
return false, err
}
r.eventModelMeshConfigured(instance)
log.FromContext(ctx).Info("Updating Deployment " + deployment.Name + ", container spec " + deployment.Spec.Template.Spec.Containers[i].Name + ", env var " + envVarName + " to " + url)
}
}
Expand All @@ -75,7 +76,7 @@ func (r *TrustyAIServiceReconciler) patchEnvVarsForDeployments(ctx context.Conte
return true, nil
}

func (r *TrustyAIServiceReconciler) patchEnvVarsByLabelForDeployments(ctx context.Context, namespace string, labelKey string, labelValue string, envVarName string, crName string, remove bool) (bool, error) {
func (r *TrustyAIServiceReconciler) patchEnvVarsByLabelForDeployments(ctx context.Context, instance *trustyaiopendatahubiov1alpha1.TrustyAIService, namespace string, labelKey string, labelValue string, envVarName string, crName string, remove bool) (bool, error) {
// Get all Deployments for the label
deployments, err := r.GetDeploymentsByLabel(ctx, namespace, labelKey, labelValue)
if err != nil {
Expand All @@ -87,7 +88,7 @@ func (r *TrustyAIServiceReconciler) patchEnvVarsByLabelForDeployments(ctx contex
url := generateServiceURL(crName, namespace) + "/consumer/kserve/v2"

// Patch environment variables for the Deployments
if shouldContinue, err := r.patchEnvVarsForDeployments(ctx, deployments, envVarName, url, remove); err != nil {
if shouldContinue, err := r.patchEnvVarsForDeployments(ctx, instance, deployments, envVarName, url, remove); err != nil {
log.FromContext(ctx).Error(err, "Could not patch environment variables for Deployments.")
return shouldContinue, err
}
Expand Down Expand Up @@ -149,13 +150,13 @@ func (r *TrustyAIServiceReconciler) handleInferenceServices(ctx context.Context,
annotations := infService.GetAnnotations()
// Check the annotation "serving.kserve.io/deploymentMode: ModelMesh"
if val, ok := annotations["serving.kserve.io/deploymentMode"]; ok && val == "ModelMesh" {
shouldContinue, err := r.patchEnvVarsByLabelForDeployments(ctx, namespace, labelKey, labelValue, envVarName, crName, remove)
shouldContinue, err := r.patchEnvVarsByLabelForDeployments(ctx, instance, namespace, labelKey, labelValue, envVarName, crName, remove)
if err != nil {
log.FromContext(ctx).Error(err, "Could not patch environment variables for ModelMesh deployments.")
return shouldContinue, err
}
} else {
err := r.patchKServe(ctx, infService, namespace, crName, remove)
err := r.patchKServe(ctx, instance, infService, namespace, crName, remove)
if err != nil {
log.FromContext(ctx).Error(err, "Could not path InferenceLogger for KServe deployment.")
return false, err
Expand All @@ -173,7 +174,7 @@ func (r *TrustyAIServiceReconciler) handleInferenceServices(ctx context.Context,
}

// patchKServe adds a TrustyAI service as an InferenceLogger to a KServe InferenceService
func (r *TrustyAIServiceReconciler) patchKServe(ctx context.Context, infService kservev1beta1.InferenceService, namespace string, crName string, remove bool) error {
func (r *TrustyAIServiceReconciler) patchKServe(ctx context.Context, instance *trustyaiopendatahubiov1alpha1.TrustyAIService, infService kservev1beta1.InferenceService, namespace string, crName string, remove bool) error {

url := generateServiceURL(crName, namespace)

Expand Down Expand Up @@ -207,7 +208,10 @@ func (r *TrustyAIServiceReconciler) patchKServe(ctx context.Context, infService
}

// Update the InferenceService
if err := r.Update(ctx, &infService); err != nil {
err := r.Update(ctx, &infService)
if err != nil {
r.eventKServeConfigured(instance)
} else {
return fmt.Errorf("failed to update InferenceService %s/%s: %v", infService.Namespace, infService.Name, err)
}
return nil
Expand Down
2 changes: 2 additions & 0 deletions controllers/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ func (r *TrustyAIServiceReconciler) ensureLocalServiceMonitor(cr *trustyaiopenda
if err != nil {
log.FromContext(ctx).Error(err, "Failed to create local ServiceMonitor", "ServiceMonitor.Namespace", serviceMonitor.Namespace, "ServiceMonitor.Name", serviceMonitor.Name)
return err
} else {
r.eventLocalServiceMonitorCreated(cr)
}
} else {
log.FromContext(ctx).Error(err, "Failed to get local ServiceMonitor", "ServiceMonitor.Namespace", serviceMonitor.Namespace, "ServiceMonitor.Name", serviceMonitor.Name)
Expand Down
1 change: 0 additions & 1 deletion controllers/statuses.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ func (r *TrustyAIServiceReconciler) updateStatus(ctx context.Context, original *

func UpdateInferenceServiceNotPresent(saved *trustyaiopendatahubiov1alpha1.TrustyAIService) {
saved.SetStatus(StatusTypeInferenceServicesPresent, StatusReasonInferenceServicesNotFound, "InferenceServices not found", v1.ConditionFalse)
saved.Status.Phase = "Not Ready"
saved.Status.Ready = v1.ConditionFalse

}
Expand Down
9 changes: 8 additions & 1 deletion controllers/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,14 @@ func (r *TrustyAIServiceReconciler) ensurePVC(ctx context.Context, instance *tru
if apierrors.IsNotFound(err) {
log.FromContext(ctx).Info("PVC not found. Creating.")
// The PVC doesn't exist, so we need to create it
return r.createPVC(ctx, instance)

creationErr := r.createPVC(ctx, instance)
if creationErr == nil {
// Creation successful, emit Event
log.FromContext(ctx).Info("Created PVC " + pvcName + ".")
r.eventPVCCreated(instance)
}
return creationErr
}
return err
}
Expand Down
5 changes: 4 additions & 1 deletion controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,10 +267,13 @@ var _ = BeforeSuite(func() {
})
Expect(err).ToNot(HaveOccurred())

recorder := k8sManager.GetEventRecorderFor("trustyai-service-operator")

err = (&TrustyAIServiceReconciler{
Client: k8sManager.GetClient(),
//Log: ctrl.Log.WithName("controllers").WithName("YourController"),
Scheme: k8sManager.GetScheme(),
Scheme: k8sManager.GetScheme(),
EventRecorder: recorder,
}).SetupWithManager(k8sManager)
Expect(err).NotTo(HaveOccurred())

Expand Down
16 changes: 8 additions & 8 deletions controllers/trustyaiservice_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/handler"
Expand All @@ -44,8 +45,9 @@ var ErrPVCNotReady = goerrors.New("PVC is not ready")
// TrustyAIServiceReconciler reconciles a TrustyAIService object
type TrustyAIServiceReconciler struct {
client.Client
Scheme *runtime.Scheme
Namespace string
Scheme *runtime.Scheme
Namespace string
EventRecorder record.EventRecorder
}

//+kubebuilder:rbac:groups=trustyai.opendatahub.io.trustyai.opendatahub.io,resources=trustyaiservices,verbs=get;list;watch;create;update;patch;delete
Expand All @@ -63,8 +65,9 @@ type TrustyAIServiceReconciler struct {
//+kubebuilder:rbac:groups=serving.kserve.io,resources=servingruntimes,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=serving.kserve.io,resources=servingruntimes/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=route.openshift.io,resources=routes,verbs=list;watch;get;create;update;patch;delete
// +kubebuilder:rbac:groups=serving.kserve.io,resources=inferenceservices,verbs=list;watch;get;update;patch
// +kubebuilder:rbac:groups=serving.kserve.io,resources=inferenceservices/finalizers,verbs=list;watch;get;update;patch;delete
//+kubebuilder:rbac:groups=serving.kserve.io,resources=inferenceservices,verbs=list;watch;get;update;patch
//+kubebuilder:rbac:groups=serving.kserve.io,resources=inferenceservices/finalizers,verbs=list;watch;get;update;patch;delete
//+kubebuilder:rbac:groups="",resources=events,verbs=create;patch;update

// getCommonLabels returns the service's common labels
func getCommonLabels(serviceName string) map[string]string {
Expand Down Expand Up @@ -100,7 +103,7 @@ func (r *TrustyAIServiceReconciler) Reconcile(ctx context.Context, req ctrl.Requ
// CR is being deleted
if containsString(instance.Finalizers, finalizerName) {
// The finalizer is present, so we handle external dependency deletion
if err := r.deleteExternalDependency(req.Name, req.Namespace, ctx); err != nil {
if err := r.deleteExternalDependency(req.Name, instance, req.Namespace, ctx); err != nil {
// If fail to delete the external dependency here, return with error
// so that it can be retried
return RequeueWithErrorMessage(ctx, err, "Failed to delete external dependencies.")
Expand All @@ -123,8 +126,6 @@ func (r *TrustyAIServiceReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}
}

instance.Status.Ready = corev1.ConditionTrue

// CR found, add or update the URL
// Call the function to patch environment variables for Deployments that match the label
shouldContinue, err := r.handleInferenceServices(ctx, instance, req.Namespace, modelMeshLabelKey, modelMeshLabelValue, payloadProcessorName, req.Name, false)
Expand Down Expand Up @@ -205,7 +206,6 @@ func (r *TrustyAIServiceReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}
return RequeueWithErrorMessage(ctx, err, "Failed to get or create Route")
}

_, updateErr := r.updateStatus(ctx, instance, func(saved *trustyaiopendatahubiov1alpha1.TrustyAIService) {
// Set Route has available
UpdateRouteAvailable(saved)
Expand Down
9 changes: 6 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,18 @@ func main() {
os.Exit(1)
}

recorder := mgr.GetEventRecorderFor("trustyai-service-operator")

ns, err := controllers.GetNamespace()
if err != nil {
setupLog.Error(err, "unable to operator's namespace")
}

if err = (&controllers.TrustyAIServiceReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Namespace: ns,
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Namespace: ns,
EventRecorder: recorder,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "TrustyAIService")
os.Exit(1)
Expand Down

0 comments on commit 837d1f2

Please sign in to comment.