diff --git a/api/v1alpha1/minicluster_types.go b/api/v1alpha1/minicluster_types.go index 077dde25..607844e3 100644 --- a/api/v1alpha1/minicluster_types.go +++ b/api/v1alpha1/minicluster_types.go @@ -410,8 +410,17 @@ type Bursting struct { //+optional LeadBroker FluxBroker `json:"leadBroker"` + // Hostlist is a custom hostlist for the broker.toml + // that includes the local plus bursted cluster. This + // is typically used for bursting to another resource + // type, where we can predict the hostnames but they + // don't follow the same convention as the Flux Operator + //+optional + Hostlist string `json:"hostlist"` + // External clusters to burst to. Each external // cluster must share the same listing to align ranks + //+optional Clusters []BurstedCluster `json:"clusters"` } @@ -770,6 +779,12 @@ func (f *MiniCluster) Validate() bool { f.Spec.Flux.Bursting.LeadBroker.Port = 8050 } + // If we are provided a hostlist, we don't need bursted clusters + if f.Spec.Flux.Bursting.Hostlist != "" && len(f.Spec.Flux.Bursting.Clusters) > 0 { + fmt.Printf("😥️ A custom hostlist cannot be provided with a bursting spec, choose one or the other!\n") + return false + } + // Set default port if unset for b, bursted := range f.Spec.Flux.Bursting.Clusters { diff --git a/api/v1alpha1/swagger.json b/api/v1alpha1/swagger.json index 5df7ebb8..437d87bb 100644 --- a/api/v1alpha1/swagger.json +++ b/api/v1alpha1/swagger.json @@ -25,9 +25,6 @@ "Bursting": { "description": "Bursting Config For simplicity, we internally handle the name of the job (hostnames)", "type": "object", - "required": [ - "clusters" - ], "properties": { "clusters": { "description": "External clusters to burst to. Each external cluster must share the same listing to align ranks", @@ -37,6 +34,11 @@ "$ref": "#/definitions/BurstedCluster" } }, + "hostlist": { + "description": "Hostlist is a custom hostlist for the broker.toml that includes the local plus bursted cluster. This is typically used for bursting to another resource type, where we can predict the hostnames but they don't follow the same convention as the Flux Operator", + "type": "string", + "default": "" + }, "leadBroker": { "description": "The lead broker ip address to join to. E.g., if we burst to cluster 2, this is the address to connect to cluster 1 For the first cluster, this should not be defined", "default": {}, diff --git a/api/v1alpha1/zz_generated.openapi.go b/api/v1alpha1/zz_generated.openapi.go index 6845e53d..f7fbcf30 100644 --- a/api/v1alpha1/zz_generated.openapi.go +++ b/api/v1alpha1/zz_generated.openapi.go @@ -92,6 +92,14 @@ func schema__api_v1alpha1__Bursting(ref common.ReferenceCallback) common.OpenAPI Ref: ref("./api/v1alpha1/.FluxBroker"), }, }, + "hostlist": { + SchemaProps: spec.SchemaProps{ + Description: "Hostlist is a custom hostlist for the broker.toml that includes the local plus bursted cluster. This is typically used for bursting to another resource type, where we can predict the hostnames but they don't follow the same convention as the Flux Operator", + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, "clusters": { SchemaProps: spec.SchemaProps{ Description: "External clusters to burst to. Each external cluster must share the same listing to align ranks", @@ -107,7 +115,6 @@ func schema__api_v1alpha1__Bursting(ref common.ReferenceCallback) common.OpenAPI }, }, }, - Required: []string{"clusters"}, }, }, Dependencies: []string{ diff --git a/chart/templates/minicluster-crd.yaml b/chart/templates/minicluster-crd.yaml index 607612df..b38ab798 100644 --- a/chart/templates/minicluster-crd.yaml +++ b/chart/templates/minicluster-crd.yaml @@ -302,6 +302,13 @@ spec: type: integer type: object type: array + hostlist: + description: Hostlist is a custom hostlist for the broker.toml + that includes the local plus bursted cluster. This is typically + used for bursting to another resource type, where we can predict + the hostnames but they don't follow the same convention as + the Flux Operator + type: string leadBroker: description: The lead broker ip address to join to. E.g., if we burst to cluster 2, this is the address to connect to cluster @@ -329,8 +336,6 @@ spec: - name - size type: object - required: - - clusters type: object connectTimeout: default: 5s diff --git a/config/crd/bases/flux-framework.org_miniclusters.yaml b/config/crd/bases/flux-framework.org_miniclusters.yaml index 8acd7d68..ea0a10eb 100644 --- a/config/crd/bases/flux-framework.org_miniclusters.yaml +++ b/config/crd/bases/flux-framework.org_miniclusters.yaml @@ -304,6 +304,13 @@ spec: type: integer type: object type: array + hostlist: + description: Hostlist is a custom hostlist for the broker.toml + that includes the local plus bursted cluster. This is typically + used for bursting to another resource type, where we can + predict the hostnames but they don't follow the same convention + as the Flux Operator + type: string leadBroker: description: The lead broker ip address to join to. E.g., if we burst to cluster 2, this is the address to connect @@ -331,8 +338,6 @@ spec: - name - size type: object - required: - - clusters type: object connectTimeout: default: 5s diff --git a/controllers/flux/minicluster.go b/controllers/flux/minicluster.go index 14e1773b..9f3f8fb1 100644 --- a/controllers/flux/minicluster.go +++ b/controllers/flux/minicluster.go @@ -398,9 +398,17 @@ func (r *MiniClusterReconciler) getConfigMap( // generateHostlist for a specific size given the cluster namespace and a size func generateHostlist(cluster *api.MiniCluster, size int32) string { - // If we don't have a leadbroker address, we are at the root var hosts string - if cluster.Spec.Flux.Bursting.LeadBroker.Address == "" { + if cluster.Spec.Flux.Bursting.Hostlist != "" { + + // In case 1, we are given a custom hostlist + // This is usually the case when we are bursting to a different resource + // Where the hostlists are not predictable. + hosts = cluster.Spec.Flux.Bursting.Hostlist + + } else if cluster.Spec.Flux.Bursting.LeadBroker.Address == "" { + + // If we don't have a leadbroker address, we are at the root hosts = fmt.Sprintf("%s-[%s]", cluster.Name, generateRange(size, 0)) } else { @@ -419,12 +427,15 @@ func generateHostlist(cluster *api.MiniCluster, size int32) string { ) } - // Now regardless of where we are, we add the bursted jobs in the same order. + // For cases where the Flux Operator determines the hostlist, we need to + // add the bursted jobs in the same order. // Any cluster with bursting must share all the bursted hosts across clusters // This ensures that the ranks line up - for _, bursted := range cluster.Spec.Flux.Bursting.Clusters { - burstedHosts := fmt.Sprintf("%s-[%s]", bursted.Name, generateRange(bursted.Size, 0)) - hosts = fmt.Sprintf("%s,%s", hosts, burstedHosts) + if cluster.Spec.Flux.Bursting.Hostlist == "" { + for _, bursted := range cluster.Spec.Flux.Bursting.Clusters { + burstedHosts := fmt.Sprintf("%s-[%s]", bursted.Name, generateRange(bursted.Size, 0)) + hosts = fmt.Sprintf("%s,%s", hosts, burstedHosts) + } } return hosts } diff --git a/docs/getting_started/custom-resource-definition.md b/docs/getting_started/custom-resource-definition.md index faff686d..c6cc5ebc 100644 --- a/docs/getting_started/custom-resource-definition.md +++ b/docs/getting_started/custom-resource-definition.md @@ -536,8 +536,27 @@ Using the above, both the main and bursted to cluster will have almost the same and broker.toml (config). The main difference will be that the bursted cluster knows about the first one via it's ip address or hostname, and not, for example `flux-sample-0`. Also note that when bursting, you don't explicitly give a command to the bursted cluster - the jobs are launched on the main cluster and sent -to these external resources when they come up and are available (and needed). For a full example, -see [the bursting](https://github.com/flux-framework/flux-operator/tree/main/examples/experimental/bursting) +to these external resources when they come up and are available (and needed). + +Finally, for advanced bursting cases where the pattern of hostnames does not match the convention +deployed by the Flux Operator, we allow the CRD to define a custom list. As an example, here is how +we might burst to compute engine: + +```yaml + flux: + leadBroker: + # This is the name of the first minicluster.yaml spec + name: flux-sample + # In a cloud environment this would be a NodePort + address: 24.123.50.123 + port: 30093 + hostlist: "flux-sample-[0-3],gffw-compute-a-[001-003]" +``` + +In the above case, the clusters are not used. The bursting plugin you use will determine +how the hostnames and address are provided to the remote (second) cluster. + +For full examples, see [the bursting](https://github.com/flux-framework/flux-operator/tree/main/examples/experimental/bursting) examples directory. diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md index bcf6d00a..c6ed2b0b 100644 --- a/docs/tutorials/index.md +++ b/docs/tutorials/index.md @@ -26,6 +26,7 @@ The following tutorials are provided from their respective directories (and are - [Bursting to GKE](https://github.com/flux-framework/flux-operator/tree/main/examples/experimental/bursting/broker-gke) from a local broker to an external Google Kubernetes Engine cluster. - [Bursting to EKS](https://github.com/flux-framework/flux-operator/tree/main/examples/experimental/bursting/broker-eks) from a local broker to an external Amazon Elastic Kubernetes Service + - [Bursting to Compute Engine](https://github.com/flux-framework/flux-operator/tree/main/examples/experimental/bursting/broker-compute-engine) from a GKE broker to an external Compute Engine cluster. - [Bursting (nginx service)](https://github.com/flux-framework/flux-operator/tree/main/examples/experimental/bursting/nginx) design to use central router for bursting. #### Nested diff --git a/examples/dist/flux-operator-arm.yaml b/examples/dist/flux-operator-arm.yaml index 552ff3c8..3a38cfbd 100644 --- a/examples/dist/flux-operator-arm.yaml +++ b/examples/dist/flux-operator-arm.yaml @@ -310,6 +310,13 @@ spec: type: integer type: object type: array + hostlist: + description: Hostlist is a custom hostlist for the broker.toml + that includes the local plus bursted cluster. This is typically + used for bursting to another resource type, where we can + predict the hostnames but they don't follow the same convention + as the Flux Operator + type: string leadBroker: description: The lead broker ip address to join to. E.g., if we burst to cluster 2, this is the address to connect @@ -337,8 +344,6 @@ spec: - name - size type: object - required: - - clusters type: object connectTimeout: default: 5s diff --git a/examples/dist/flux-operator.yaml b/examples/dist/flux-operator.yaml index 51e380f9..f6d50b1e 100644 --- a/examples/dist/flux-operator.yaml +++ b/examples/dist/flux-operator.yaml @@ -310,6 +310,13 @@ spec: type: integer type: object type: array + hostlist: + description: Hostlist is a custom hostlist for the broker.toml + that includes the local plus bursted cluster. This is typically + used for bursting to another resource type, where we can + predict the hostnames but they don't follow the same convention + as the Flux Operator + type: string leadBroker: description: The lead broker ip address to join to. E.g., if we burst to cluster 2, this is the address to connect @@ -337,8 +344,6 @@ spec: - name - size type: object - required: - - clusters type: object connectTimeout: default: 5s diff --git a/examples/experimental/bursting/broker-compute-engine/README.md b/examples/experimental/bursting/broker-compute-engine/README.md new file mode 100755 index 00000000..4d2782f4 --- /dev/null +++ b/examples/experimental/bursting/broker-compute-engine/README.md @@ -0,0 +1,348 @@ +# Bursting Experiment to Compute Engine + +> Experimental setup to burst to Google Cloud Compte Engine + +This setup will expose a lead broker (index 0 of the MiniCluster job) as a service, +and then deploy a second cluster that can connect back to the first. However unlike +the other examples that burst to another Kubernetes cluster, this burst goes to Compute +Engine, and is driven by [flux-burst-compute-engine](https://github.com/converged-computing/flux-burst-compute-engine). +For the overall design, see the top level [README](../README.md). In summary, for this setup: + +1. The main cluster will be run on GKE +2. The bursted cluster will be on Compute Engine + +This is a more complex setup because it requires not just the terraform configs provided by +flux-burst-compute-engine, but also the image built from [flux-terraform-gcp](https://github.com/converged-computing/flux-terraform-gcp/tree/main/build-images/bursted). +(the repository that hosts the terraform modules). + +### What should be the same? + +During this setup, we learned that the following must be the same (or be available) for the bust to fully work. + + - The flux user id must match between two instances (e.g., here we use 1004, built into VMs and set here) + - The flux lib directory (e.g., `/usr/lib` and `/usr/lib64`) should match (you'll probably be OK installing on same OS with same method) + - The flux install location should be the same (e.g., `/usr` and `/usr/local` will have an error) + + +## Build Machine Image with Flux + +You can prepare that image as follows: + +```bash +git clone https://github.com/converged-computing/flux-terraform-gcp +cd build-images/basic +make bursted +``` + +Unlike the "basic" setup in that same respository, this is one simple image that includes Flux, +and expects customization to happen via the startup script. This allows for fewer images to maintain, +and less chance of needing to update the base image build. + +## Credentials + +Since we are interacting with Google from within the MiniCluster, you need to have your default application credentials +shared there. This can probably be scoped to a service account, but for now we are being lazy. You must ABSOLUTELY +be sure you don't add these to git. + +```bash +cp $HOME/.config/gcloud/application_default_credentials.json . +``` + +**DO NOT DO THIS FOR ANYTHING OTHER THAN DEVELOPMENT OR TESTING.** + +## Google Cloud Setup + +Since we want to have two clusters communicating (and they will need public addresses) we will deploy both to GKE. +Let's create the first cluster: + +```bash +CLUSTER_NAME=flux-cluster +GOOGLE_PROJECT=myproject +``` +```bash +$ gcloud container clusters create ${CLUSTER_NAME} --project $GOOGLE_PROJECT \ + --zone us-central1-a --machine-type n2-standard-4 \ + --num-nodes=4 --enable-network-policy --tags=flux-cluster --enable-intra-node-visibility +``` + +And be sure to activate your credentials! + +```bash +$ gcloud container clusters get-credentials ${CLUSTER_NAME} +``` + +Create the namespace, install the operator (assuming you are using a development version) and create the minicluster. +Note that if you aren't using a development version, you can apply `flux-operator.yaml` instead. + +```bash +kubectl apply -f ../../../dist/flux-operator-dev.yaml +kubectl create namespace flux-operator +kubectl apply -f minicluster.yaml +# Expose broker pod port 8050 to 30093 +kubectl apply -f service/broker-service.yaml +``` + +We need to open up the firewall to that port - this creates the rule (you only need to do this once) + +```bash +gcloud compute firewall-rules create flux-cluster-test-node-port --allow tcp:30093 +``` + +Then figure out the node that the service is running from (we are interested in lead broker flux-sample-0-*) + +```bash +$ kubectl get pods -o wide -n flux-operator +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +flux-sample-0-kktl7 1/1 Running 0 7m22s 10.116.2.4 gke-flux-cluster-default-pool-4dea9d5c-0b0d +flux-sample-1-s7r69 1/1 Running 0 7m22s 10.116.1.4 gke-flux-cluster-default-pool-4dea9d5c-1h6q +flux-sample-services 1/1 Running 0 7m22s 10.116.0.4 gke-flux-cluster-default-pool-4dea9d5c-lc1h +``` + +Then (using that node name) get the external ip for that node (for nginx, flux-services, and for the lead broker, a flux-sample-0-xx) + +```bash +$ kubectl get nodes -o wide | grep gke-flux-cluster-default-pool-4dea9d5c-0b0d +gke-flux-cluster-default-pool-4dea9d5c-0b0d Ready 69m v1.25.8-gke.500 10.128.0.83 34.171.113.254 Container-Optimized OS from Google 5.15.89+ containerd://1.6.18 +``` + +Take note of this ip address for later - we will need it for running the bursting script. +Finally, when the broker index 0 pod is running, copy your scripts and configs over to it: + +```bash +# This should be the index 0 +POD=$(kubectl get pods -n flux-operator -o json | jq -r .items[0].metadata.name) + +# This will copy configs / create directories for it +kubectl cp -n flux-operator ./run-burst.py ${POD}:/tmp/workflow/run-burst.py -c flux-sample +kubectl cp -n flux-operator ./application_default_credentials.json ${POD}:/tmp/workflow/application_default_credentials.json -c flux-sample +kubectl exec -it -n flux-operator ${POD} -- mkdir -p /tmp/workflow/external-config +kubectl cp -n flux-operator ../../../dist/flux-operator-dev.yaml ${POD}:/tmp/workflow/external-config/flux-operator-dev.yaml -c flux-sample +``` + +## Burstable Job + +Now let's create a job that cannot be run because we don't have the resources. The `flux-burst` Python module, using it's simple +default, will just look for jobs with `burstable=True` and then look for a place to assign them to burst. Since this is a plugin +framework, in the future we can implement more intelligent algorithms for either filtering the queue (e.g., "Which jobs need bursting?" +and then determining if a burst can be scheduled for some given burst plugin (e.g., GKE)). For this simple setup and example, +we ensure the job doesn't run locally because we've asked for more nodes than we have. Shell into your broker pod: + +```bash +$ kubectl exec -it -n flux-operator ${POD} bash +``` + +Connect to the broker socket. If this issues an error, it's likely the install scripts are still running (you can check +the logs and wait a minute!) + +```bash +$ sudo -u flux -E $(env) -E HOME=/home/flux flux proxy local:///run/flux/local bash +``` + +The libraries we need should be installed in the minicluster.yaml. +You might want to add others for development (e.g., IPython). +Resources we have available? + +```bash +$ flux resource list + STATE NNODES NCORES NODELIST + free 2 8 flux-sample-[0-1] + allocated 0 0 + down 6 24 flux-sample-[2-3],gffw-compute-a-[001-003] +``` + +The above shows us that the broker running here can accept burstable resources (`burst-0-[0-3]`), and even +can accept the local cluster expanding (`flux-sample[2-3]`) for a total of 24 cores. The reason +that the remote burst prefix has an extra "0" is that we could potentially have different sets of +burstable remotes, namespaced by this prefix. And now let's create a burstable job, and ask for more nodes than we have :) + +```bash +# Set burstable=1 +# this will be for 4 nodes, 8 cores each +$ flux submit -N 4 --cwd /tmp --setattr=burstable hostname +``` + +You should see it's scheduled (but not running). Note that if we asked for a resource totally unknown +to the cluster (e.g. 4 nodes and 32 tasks) it would just fail. Note that because of this, +we need in our "mark as burstable" method a way to tell Flux not to fail in this case. +Also note that once it's assigned to a plugin to be bursted, it will lose that attribute +(and note be able to be scheduled again). You can see it is scheduled and waiting for resources: + +```bash +$ flux jobs -a + JOBID USER NAME ST NTASKS NNODES TIME INFO + ƒQURAmBXV fluxuser hostname S 8 8 - +``` +```bash +$ flux job attach $(flux job last) +flux-job: ƒQURAmBXV waiting for resources +``` + +Get a variant of the munge key we can see (it's owned by root so this ensures we can see/own it as the flux user) + +```bash +sudo cp /etc/munge/munge.key ./munge.key +sudo chown $USER munge.key +``` + +Now we can run our script to find the jobs based on this attribute! + +```bash +# Our Google Project name +GOOGLE_PROJECT=myproject + +# This is the address of the lead host we discovered above +LEAD_HOST="35.202.211.23" + +# Note that the lead host will be added here as a prefix +hostnames="flux-sample-[1-3],gffw-compute-a-[001-003]" + +# This is the node port we've exposed on the cluster +LEAD_PORT=30093 +python3 run-burst.py --project ${GOOGLE_PROJECT} \ + --lead-host ${LEAD_HOST} --lead-port ${LEAD_PORT} --lead-hostnames ${hostnames} \ + --munge-key ./munge.key --curve-cert /mnt/curve/curve.cert +``` + +When you do the above you'll see the terraform configs apply, and the second Flux cluster will be launched when they finish. +You'll then be prompted to press ENTER when you want to destroy the burst. This is when you can open another terminal +to see the outcome. Here is how to shell into the cluster from another terminal: + +```bash +$ POD=$(kubectl get pods -n flux-operator -o json | jq -r .items[0].metadata.name) +$ kubectl exec -it -n flux-operator ${POD} bash +$ sudo -u flux -E $(env) -E HOME=/home/flux flux proxy local:///run/flux/local bash +``` + +Resources are now allocated: + +```bash +flux@flux-sample-0:/tmp/workflow$ flux resource list + STATE NNODES NCORES NODELIST + free 5 10 flux-sample-[0-1],gffw-compute-a-[001-003] + allocated 0 0 + down 2 4 flux-sample-[2-3] +``` +Our job has run: + +```bash +$ flux jobs -a +``` +```console + JOBID USER NAME ST NTASKS NNODES TIME INFO + ƒCJTuUPR flux hostname CD 4 4 0.623s flux-sample-1,gffw-compute-a-[001-003] +``` +And we can see the result with hostnames from the local and bursted cluster. +Note that we get an error about resources (I think) because we haven't done any work to ensure they are correct. +This is probably OK for now and we will need to tweak further to allow the template to include them. + +```bash +flux@flux-sample-0:/tmp/workflow$ flux job attach ƒXmSxZFm +``` +```console +1352.660s: flux-shell[3]: WARN: rlimit: nofile exceeds current max, raising value to hard limit +1352.660s: flux-shell[3]: WARN: rlimit: nproc exceeds current max, raising value to hard limit +1352.634s: flux-shell[1]: WARN: rlimit: nofile exceeds current max, raising value to hard limit +1352.635s: flux-shell[1]: WARN: rlimit: nproc exceeds current max, raising value to hard limit +1352.627s: flux-shell[2]: WARN: rlimit: nofile exceeds current max, raising value to hard limit +1352.627s: flux-shell[2]: WARN: rlimit: nproc exceeds current max, raising value to hard limit +flux-sample-1 +gffw-compute-a-003 +gffw-compute-a-002 +gffw-compute-a-001 +``` + +You can also launch a new job to see it interactively: + +```bash +flux@flux-sample-0:/tmp/workflow$ flux run -N 6 --cwd /tmp hostname +... +flux-sample-0 +gffw-compute-a-002 +gffw-compute-a-003 +gffw-compute-a-001 +``` + +Note that without `--cwd` you would see an error that the bursted cluster can't CD to `/tmp/workflow` (it doesn't exist there). +And that's bursting! And WOW did we learn a lot by using different operating systems! + +## Debugging + +### Checking Startup Script + +I had to debug these quite a bit, and I found the following helpful. First, shell into a compute node +(you can usually copy paste this from the Google Cloud console) + +```bash +gcloud compute ssh --zone "us-central1-a" "gffw-compute-a-003" --tunnel-through-iap --project "${GOOGLE_PROJECT}" +``` + +And then to see if the startup script ran: + +```bash +# Get logs from journalctl +sudo journalctl -u google-startup-scripts.service + +# Try running it again +sudo google_metadata_script_runner startup +``` + +I would put an IPython.embed right after the unmatched line in the [run-burst.py](run-burst.py) +script, and then you can get the plugin and get the content of the script to debug: + +```bash +plugin = client.plugins["compute_engine"] +print(plugin.params.compute_boot_script) +``` + +### Checking Service + +On the instance, you can check the status of the service (and see where that script is too.) + +```bash +$ sudo systemctl status flux-start.service +``` +```console +● flux-start.service - Flux message broker + Loaded: loaded (/etc/systemd/system/flux-start.service; enabled; vendor preset: disabled) + Active: active (running) since Mon 2023-07-10 00:13:30 UTC; 34s ago + Main PID: 5050 (flux-broker-6) + Tasks: 10 (limit: 100606) + Memory: 58.2M + CGroup: /system.slice/flux-start.service + └─5050 broker --config-path /usr/etc/flux/system/conf.d -Scron.directory=/usr/etc/flux/system/conf.d -Stbon.fanout=256 -Srundir=/run/flux -Sbroker.rc2_no> + +Jul 10 00:13:31 gffw-compute-a-003 flux[5050]: broker.debug[6]: insmod resource +Jul 10 00:13:31 gffw-compute-a-003 flux[5050]: broker.debug[6]: insmod job-info +Jul 10 00:13:32 gffw-compute-a-003 flux[5050]: broker.debug[6]: insmod job-ingest +Jul 10 00:13:32 gffw-compute-a-003 flux[5050]: job-ingest.debug[6]: configuring validator with plugins=(null), args=(null) (enabled) +Jul 10 00:13:32 gffw-compute-a-003 flux[5050]: job-ingest.debug[6]: fluid ts=1421183ms +Jul 10 00:13:32 gffw-compute-a-003 flux[5050]: broker.info[6]: rc1.0: running /etc/flux/rc1.d/01-sched-fluxion +Jul 10 00:13:32 gffw-compute-a-003 flux[5050]: broker.info[6]: rc1.0: running /etc/flux/rc1.d/02-cron +Jul 10 00:13:32 gffw-compute-a-003 flux[5050]: broker.info[6]: rc1.0: /etc/flux/rc1 Exited (rc=0) 0.9s +Jul 10 00:13:32 gffw-compute-a-003 flux[5050]: broker.info[6]: rc1-success: init->quorum 0.936237s +Jul 10 00:13:32 gffw-compute-a-003 flux[5050]: broker.info[6]: quorum-full: quorum->run 0.063886ms +``` + +That should be running as the flux user. + +### Accidentally Exit + +If you exit the run burst script and haven't cleaned up, you do so manually. + + - Delete the instances generated, including compute and the ns node. + - Then search for cloud routers, and there should be one associated with the foundation network + - Then under networks, find `foundation-net` + - Delete the subnet first + - Then delete the entire VPC + +### Cleanup + +Note that you'll only see the exposure with the kind docker container with `docker ps`. +When you are done, clean up + +```bash +kubectl delete -f minicluster.yaml +kubectl delete -f nginx.yaml +kubectl delete -f service.yaml +gcloud container clusters delete flux-cluster +``` diff --git a/examples/experimental/bursting/broker-compute-engine/minicluster.yaml b/examples/experimental/bursting/broker-compute-engine/minicluster.yaml new file mode 100755 index 00000000..6a9cd961 --- /dev/null +++ b/examples/experimental/bursting/broker-compute-engine/minicluster.yaml @@ -0,0 +1,62 @@ +apiVersion: flux-framework.org/v1alpha1 +kind: MiniCluster +metadata: + name: flux-sample + namespace: flux-operator +spec: + size: 2 + # If we don't set this, we won't be able to go above two + # and the request for 4 nodes will fail instead of scheduling + maxSize: 4 + interactive: true + + # This allows us to see zeromq and debug logging + flux: + logLevel: 7 + + # Declare that this cluster will allow for a bursted cluster + # We don't define clusters here because we are providing the hostlist + # verbatim! Note that this is the hostlist provided for the cluster + # here, anticipating to burst to compute engine. The compute engine + # hostlist will need to be: + # ",flux-sample[1-3],gffw-compute-a-[001-004]" + bursting: + hostlist: "flux-sample-[0-3],gffw-compute-a-[001-003]" + + logging: + zeromq: true + + # This is a list because a pod can support multiple containers + containers: + - image: ghcr.io/flux-framework/flux-restful-api + workingDir: /tmp/workflow + + # Commands to support installing flux-burst plugins + # We also need terraform, which is how the plugin works + commands: + pre: | + pip install flux-burst[kubernetes] + pip install flux-burst-compute-engine + apt-get update && apt-get install -y unzip + wget --quiet https://releases.hashicorp.com/terraform/1.3.9/terraform_1.3.9_linux_amd64.zip + unzip terraform_1.3.9_linux_amd64.zip + mv terraform /usr/bin + rm terraform_1.3.9_linux_amd64.zip + + # This should not be done in production + environment: + GOOGLE_APPLICATION_CREDENTIALS: /tmp/workflow/application_default_credentials.json + + # The fluxuser uid must match the name / uid in the container + fluxUser: + uid: 1004 + + # Important! We need to have resource requests for the horizonal autoscaler + # The Flux Operator doesn't know you want to use it, so it's up to you + # to provide these if your metric is about CPU + resources: + limits: + cpu: "1" + + requests: + cpu: "1" \ No newline at end of file diff --git a/examples/experimental/bursting/broker-compute-engine/run-burst.py b/examples/experimental/bursting/broker-compute-engine/run-burst.py new file mode 100755 index 00000000..2f782af8 --- /dev/null +++ b/examples/experimental/bursting/broker-compute-engine/run-burst.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 + +import argparse +import os +import sys + +# How we provide custom parameters to a flux-burst plugin +from fluxburst_compute_engine.plugin import BurstParameters +from fluxburst.client import FluxBurst + +# Save data here +here = os.path.dirname(os.path.abspath(__file__)) + + +def get_parser(): + parser = argparse.ArgumentParser( + description="Experimental Bursting", + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument("--project", help="Google Cloud project") + parser.add_argument( + "--lead-host", + help="Lead broker service hostname or ip address", + dest="lead_host", + ) + parser.add_argument("--lead-hostnames", help="Custom hostnames for cluster") + parser.add_argument( + "--lead-port", help="Lead broker service port", dest="lead_port", default=30093 + ) + parser.add_argument( + "--munge-key", + help="Path to munge.key", + ) + parser.add_argument( + "--curve-cert", dest="curve_cert", default="/mnt/curve/curve.cert" + ) + return parser + + +def main(): + """ + Create an external cluster we can burst to, and optionally resize. + """ + parser = get_parser() + + # If an error occurs while parsing the arguments, the interpreter will exit with value 2 + args, _ = parser.parse_known_args() + if not args.project: + sys.exit("Please define your Google Cloud Project with --project") + + # Lead port, lead host, and lead size and existing munge key should be + # checked / validated by the plugin + + # Create the dataclass for the plugin config + # We use a dataclass because it does implicit validation of required params, etc. + params = BurstParameters( + project=args.project, + munge_key=args.munge_key, + curve_cert=args.curve_cert, + lead_host=args.lead_host, + lead_port=args.lead_port, + lead_hostnames=args.lead_hostnames, + # This is a single VM that has flux installed + # and the build from terraform-gcp/basic/bursted + compute_family="flux-fw-bursted-x86-64", + terraform_plan_name="burst", + compute_machine_type="n2-standard-4", + ) + + # Create the flux burst client. This can be passed a flux handle (flux.Flux()) + # and will make one otherwise. Note that by default mock=False + client = FluxBurst() + + # For debugging, here is a way to see plugins available + # import fluxburst.plugins as plugins + # print(plugins.burstable_plugins) + # {'gke': } + + # Load our plugin and provide the dataclass to it! + client.load("compute_engine", params) + + # Sanity check loaded + print(f"flux-burst client is loaded with plugins for: {client.choices}") + + # We are using the default algorithms to filter the job queue and select jobs. + # If we weren't, we would add them via: + # client.set_ordering() + # client.set_selector() + + # Here is how we can see the jobs that are contenders to burst! + # client.select_jobs() + + # Now let's run the burst! The active plugins will determine if they + # are able to schedule a job, and if so, will do the work needed to + # burst. unmatched jobs (those we weren't able to schedule) are + # returned, maybe to do something with? + unmatched = client.run_burst() + + assert not unmatched + + # Get a handle to the plugin so we can cleanup! + plugin = client.plugins["compute_engine"] + input("Press Enter to when you are ready to destroy...") + plugin.cleanup() + + +if __name__ == "__main__": + main() diff --git a/examples/experimental/bursting/broker-compute-engine/service/broker-service.yaml b/examples/experimental/bursting/broker-compute-engine/service/broker-service.yaml new file mode 100755 index 00000000..ab0c3ba7 --- /dev/null +++ b/examples/experimental/bursting/broker-compute-engine/service/broker-service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: lead-broker-service + namespace: flux-operator +spec: + type: NodePort + ports: + - port: 8050 + nodePort: 30093 + selector: + job-index: "0" diff --git a/sdk/python/v1alpha1/docs/Bursting.md b/sdk/python/v1alpha1/docs/Bursting.md index ef5af518..744f1301 100644 --- a/sdk/python/v1alpha1/docs/Bursting.md +++ b/sdk/python/v1alpha1/docs/Bursting.md @@ -5,7 +5,8 @@ Bursting Config For simplicity, we internally handle the name of the job (hostna ## Properties Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**clusters** | [**list[BurstedCluster]**](BurstedCluster.md) | External clusters to burst to. Each external cluster must share the same listing to align ranks | +**clusters** | [**list[BurstedCluster]**](BurstedCluster.md) | External clusters to burst to. Each external cluster must share the same listing to align ranks | [optional] +**hostlist** | **str** | Hostlist is a custom hostlist for the broker.toml that includes the local plus bursted cluster. This is typically used for bursting to another resource type, where we can predict the hostnames but they don't follow the same convention as the Flux Operator | [optional] [default to ''] **lead_broker** | [**FluxBroker**](FluxBroker.md) | | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/sdk/python/v1alpha1/fluxoperator/models/bursting.py b/sdk/python/v1alpha1/fluxoperator/models/bursting.py index 9a332f3f..491e1eb1 100644 --- a/sdk/python/v1alpha1/fluxoperator/models/bursting.py +++ b/sdk/python/v1alpha1/fluxoperator/models/bursting.py @@ -34,25 +34,31 @@ class Bursting(object): """ openapi_types = { 'clusters': 'list[BurstedCluster]', + 'hostlist': 'str', 'lead_broker': 'FluxBroker' } attribute_map = { 'clusters': 'clusters', + 'hostlist': 'hostlist', 'lead_broker': 'leadBroker' } - def __init__(self, clusters=None, lead_broker=None, local_vars_configuration=None): # noqa: E501 + def __init__(self, clusters=None, hostlist='', lead_broker=None, local_vars_configuration=None): # noqa: E501 """Bursting - a model defined in OpenAPI""" # noqa: E501 if local_vars_configuration is None: local_vars_configuration = Configuration.get_default_copy() self.local_vars_configuration = local_vars_configuration self._clusters = None + self._hostlist = None self._lead_broker = None self.discriminator = None - self.clusters = clusters + if clusters is not None: + self.clusters = clusters + if hostlist is not None: + self.hostlist = hostlist if lead_broker is not None: self.lead_broker = lead_broker @@ -76,11 +82,32 @@ def clusters(self, clusters): :param clusters: The clusters of this Bursting. # noqa: E501 :type clusters: list[BurstedCluster] """ - if self.local_vars_configuration.client_side_validation and clusters is None: # noqa: E501 - raise ValueError("Invalid value for `clusters`, must not be `None`") # noqa: E501 self._clusters = clusters + @property + def hostlist(self): + """Gets the hostlist of this Bursting. # noqa: E501 + + Hostlist is a custom hostlist for the broker.toml that includes the local plus bursted cluster. This is typically used for bursting to another resource type, where we can predict the hostnames but they don't follow the same convention as the Flux Operator # noqa: E501 + + :return: The hostlist of this Bursting. # noqa: E501 + :rtype: str + """ + return self._hostlist + + @hostlist.setter + def hostlist(self, hostlist): + """Sets the hostlist of this Bursting. + + Hostlist is a custom hostlist for the broker.toml that includes the local plus bursted cluster. This is typically used for bursting to another resource type, where we can predict the hostnames but they don't follow the same convention as the Flux Operator # noqa: E501 + + :param hostlist: The hostlist of this Bursting. # noqa: E501 + :type hostlist: str + """ + + self._hostlist = hostlist + @property def lead_broker(self): """Gets the lead_broker of this Bursting. # noqa: E501