diff --git a/scripts/airgapped/get-all-images.sh b/scripts/airgapped/get-all-images.sh index d9c3ed97..56145b7f 100755 --- a/scripts/airgapped/get-all-images.sh +++ b/scripts/airgapped/get-all-images.sh @@ -46,6 +46,9 @@ rm -rf resource-dispatcher # manually retrieve pipelines runner image to test pipelines IMAGES+=($(echo "charmedkubeflow/pipelines-runner:ckf-1.8")) +# manually retrieve katib experiment image +IMAGES+=($(echo "docker.io/kubeflowkatib/simple-pbt:v0.16.0")) + # manually retrieve helloworld image to test knative IMAGES+=($(echo "ghcr.io/knative/helloworld-go:latest")) diff --git a/tests/airgapped/katib/README.md b/tests/airgapped/katib/README.md new file mode 100644 index 00000000..1f16b514 --- /dev/null +++ b/tests/airgapped/katib/README.md @@ -0,0 +1,22 @@ +# Testing Katib in airgapped + +This directory is dedicated to testing Katib in an airgapped environment. + +## Prerequisites + +Prepare the airgapped environment and deploy CKF by following the steps in [Airgapped test scripts](https://github.com/canonical/bundle-kubeflow/tree/main/tests/airgapped#testing-airgapped-installation). + +Once you run the test scripts, the `kubeflowkatib/simple-pbt:v0.16.0` image used in the `simple-pbt` experiment will be included in your airgapped environment. It's specifically added in the [`get-all-images.sh` script](../../../scripts/airgapped/get-all-images.sh). + +## How to test Katib in an Airgapped environment +1. Connect to the dashboard by visiting the IP of your airgapped VM. To get the IP run: + ``` + lxc ls | grep eth0 + ``` + Look for the IP of the `airgapped-microk8s` instance. + +2. Log in to the dashboard and create a Profile. +3. Go to `Experiments (AutoML)` tab from the dashboard sidebar. +4. Click `New Experiment` then `Edit and submit YAML`. +5. Paste the contents of the `simple-pbt.yaml` file found in this directory. +6. Create the Experiment, and monitor its status to check it is `Succeeded`. \ No newline at end of file diff --git a/tests/airgapped/katib/simple-pbt.yaml b/tests/airgapped/katib/simple-pbt.yaml new file mode 100644 index 00000000..61e90e13 --- /dev/null +++ b/tests/airgapped/katib/simple-pbt.yaml @@ -0,0 +1,64 @@ +# Source: katib/examples/v1beta1/hp-tuning/simple-pbt.yaml +# This example is slightly modified from upstream to consume less resources +# and disable istio sidecar. +# There's a `modified` comment where we diverge from upstream. +# When updating this file, make sure to keep those modifications. +--- +apiVersion: kubeflow.org/v1beta1 +kind: Experiment +metadata: + name: simple-pbt +spec: + maxTrialCount: 1 # modified + parallelTrialCount: 1 # modified + maxFailedTrialCount: 1 # modified + resumePolicy: FromVolume + objective: + type: maximize + goal: 0.99 + objectiveMetricName: Validation-accuracy + algorithm: + algorithmName: pbt + algorithmSettings: + - name: suggestion_trial_dir + value: /var/log/katib/checkpoints/ + - name: n_population + value: '40' + - name: truncation_threshold + value: '0.2' + parameters: + - name: lr + parameterType: double + feasibleSpace: + min: '0.0001' + max: '0.02' + step: '0.0001' + trialTemplate: + primaryContainerName: training-container + trialParameters: + - name: learningRate + description: Learning rate for training the model + reference: lr + trialSpec: + apiVersion: batch/v1 + kind: Job + spec: + template: + spec: + containers: + - name: training-container + image: 172.17.0.2:5000/kubeflowkatib/simple-pbt:v0.16.0 + command: + - "python3" + - "/opt/pbt/pbt_test.py" + - "--epochs=20" + - "--lr=${trialParameters.learningRate}" + - "--checkpoint=/var/log/katib/checkpoints/" + resources: # modified + limits: # modified + memory: "2Gi" # modified + cpu: "1" # modified + restartPolicy: Never + metadata: # modified + annotations: # modified + sidecar.istio.io/inject: "false" # modified