From b285666f8870659d8ddd1e9cb8fe301c72795d75 Mon Sep 17 00:00:00 2001 From: prayeole Date: Fri, 19 Apr 2024 08:45:41 -0700 Subject: [PATCH] Adding Sagemaker pipeline sample (#272) Issue #, if available: Description of changes: Adding a Sagemaker pipeline example for creating and executing a pipeline run. By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- samples/pipeline/README.md | 71 ++++++++++++++++++++++++ samples/pipeline/my-pipeline.json | 60 ++++++++++++++++++++ samples/pipeline/pipeline-execution.yaml | 10 ++++ samples/pipeline/pipeline.yaml | 71 ++++++++++++++++++++++++ 4 files changed, 212 insertions(+) create mode 100644 samples/pipeline/README.md create mode 100644 samples/pipeline/my-pipeline.json create mode 100644 samples/pipeline/pipeline-execution.yaml create mode 100644 samples/pipeline/pipeline.yaml diff --git a/samples/pipeline/README.md b/samples/pipeline/README.md new file mode 100644 index 00000000..7753a138 --- /dev/null +++ b/samples/pipeline/README.md @@ -0,0 +1,71 @@ +# Pipeline Sample + +This sample demonstrates how to submit a pipeline to Sagemaker for execution using your own JSON pipeline definition, using the AWS Controllers for Kubernetes (ACK) service controller for Amazon SageMaker. + +## Prerequisites + +This sample assumes that you have completed the [common prerequisites](/samples/README.md). + +### Updating the Pipeline Specification + +In the `pipeline.yaml` file, modify the placeholder values with those associated with your account. + +## Submitting your Pipeline Specification + +### Modify/Create a JSON pipeline definition + +Create the JSON pipeline definition using the JSON schema documented at https://aws-sagemaker-mlops.github.io/sagemaker-model-building-pipeline-definition-JSON-schema/. In this sample, you are provided a sample pipeline definition with one Training step. + +There are two ways to modify the *.spec.pipelineDefinition* key in the Kubernetes YAML spec. Choose one: + +Option 1: You can pass JSON pipeline definition inline as a JSON object. Example of this option is included in the `pipeline.yaml` file. + +Option 2: You can convert your JSON pipeline definition into String format. You may use online third-party tools to convert from JSON to String format. + +### Submit pipeline to Sagemaker and start an execution + +To submit your prepared pipeline specification, apply the specification to your Kubernetes cluster as such: +``` +$ kubectl apply -f my-pipeline.yaml +pipeline.sagemaker.services.k8s.aws/my-kubernetes-pipeline created +``` +To start an execution run of the pipeline: +``` +$ kubectl apply -f pipeline-execution.yaml +pipelineexecution.sagemaker.services.k8s.aws/my-kubernetes-pipeline-execution created +``` + +### List pipelines and pipeline executions + +To list all pipelines created using the ACK controller use the following command: +``` +$ kubectl get pipeline +``` +If it is a pipeline executions it is endpointsconfigs.sagemaker.services.k8s.aws +``` +$ kubectl get pipelineexecution +``` + +### Describe a pipeline and pipeline execution + +To get more details about the pipeline once it's submitted, like checking the status, errors or parameters of the pipeline, use the following command: +``` +$ kubectl describe pipeline my-kubernetes-pipeline +``` + +If it is a endpoint config it is endpointsconfigs.sagemaker.services.k8s.aws +``` +$ kubectl describe pipelineexecution my-kubernetes-pipeline-execution +``` + +### Delete a pipeline and a pipeline execution + +To delete the pipeline, use the following command: +``` +$ kubectl delete pipeline my-kubernetes-pipeline +``` + +If it is a endpoint config it is endpointsconfigs.sagemaker.services.k8s.aws +``` +$ kubectl delete pipelineexecution my-kubernetes-pipeline-execution +``` \ No newline at end of file diff --git a/samples/pipeline/my-pipeline.json b/samples/pipeline/my-pipeline.json new file mode 100644 index 00000000..db6b1827 --- /dev/null +++ b/samples/pipeline/my-pipeline.json @@ -0,0 +1,60 @@ +{ + "Version": "2020-12-01", + "Steps": [ + { + "Name": "AbaloneTrain", + "Type": "Training", + "Arguments": { + "RoleArn": "", + "HyperParameters": { + "max_depth": "5", + "gamma": "4", + "eta": "0.2", + "min_child_weight": "6", + "objective": "multi:softmax", + "num_class": "10", + "num_round": "10" + }, + "AlgorithmSpecification": { + "TrainingImage": "683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1", + "TrainingInputMode": "File" + }, + "OutputDataConfig": { + "S3OutputPath": "s3:///sagemaker/" + }, + "ResourceConfig": { + "InstanceCount": 1, + "InstanceType": "ml.m4.xlarge", + "VolumeSizeInGB": 5 + }, + "StoppingCondition": { + "MaxRuntimeInSeconds": 86400 + }, + "InputDataConfig": [ + { + "ChannelName": "train", + "DataSource": { + "S3DataSource": { + "S3DataType": "S3Prefix", + "S3Uri": "s3:///sagemaker/xgboost/train/", + "S3DataDistributionType": "FullyReplicated" + } + }, + "ContentType": "text/libsvm" + }, + { + "ChannelName": "validation", + "DataSource": { + "S3DataSource": { + "S3DataType": "S3Prefix", + "S3Uri": "s3:///sagemaker/xgboost/validation/", + "S3DataDistributionType": "FullyReplicated" + } + }, + "ContentType": "text/libsvm" + } + ] + } + } + ] +} \ No newline at end of file diff --git a/samples/pipeline/pipeline-execution.yaml b/samples/pipeline/pipeline-execution.yaml new file mode 100644 index 00000000..b87ae0a4 --- /dev/null +++ b/samples/pipeline/pipeline-execution.yaml @@ -0,0 +1,10 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: PipelineExecution +metadata: + name: my-kubernetes-pipeline-execution +spec: + parallelismConfiguration: + maxParallelExecutionSteps: 2 + pipelineExecutionDescription: "My first pipeline execution via Amazon EKS cluster." + pipelineName: my-kubernetes-pipeline + \ No newline at end of file diff --git a/samples/pipeline/pipeline.yaml b/samples/pipeline/pipeline.yaml new file mode 100644 index 00000000..718b5f66 --- /dev/null +++ b/samples/pipeline/pipeline.yaml @@ -0,0 +1,71 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: Pipeline +metadata: + name: my-kubernetes-pipeline +spec: + parallelismConfiguration: + maxParallelExecutionSteps: 2 + pipelineName: my-kubernetes-pipeline + pipelineDefinition: | + { + "Version": "2020-12-01", + "Steps": [ + { + "Name": "AbaloneTrain", + "Type": "Training", + "Arguments": { + "RoleArn": "", + "HyperParameters": { + "max_depth": "5", + "gamma": "4", + "eta": "0.2", + "min_child_weight": "6", + "objective": "multi:softmax", + "num_class": "10", + "num_round": "30" + }, + "AlgorithmSpecification": { + "TrainingImage": "683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1", + "TrainingInputMode": "File" + }, + "OutputDataConfig": { + "S3OutputPath": "s3:///sagemaker/" + }, + "ResourceConfig": { + "InstanceCount": 1, + "InstanceType": "ml.m4.xlarge", + "VolumeSizeInGB": 5 + }, + "StoppingCondition": { + "MaxRuntimeInSeconds": 86400 + }, + "InputDataConfig": [ + { + "ChannelName": "train", + "DataSource": { + "S3DataSource": { + "S3DataType": "S3Prefix", + "S3Uri": "s3:///sagemaker/xgboost/train/", + "S3DataDistributionType": "FullyReplicated" + } + }, + "ContentType": "text/libsvm" + }, + { + "ChannelName": "validation", + "DataSource": { + "S3DataSource": { + "S3DataType": "S3Prefix", + "S3Uri": "s3:///sagemaker/xgboost/validation/", + "S3DataDistributionType": "FullyReplicated" + } + }, + "ContentType": "text/libsvm" + } + ] + } + } + ] + } + pipelineDisplayName: my-kubernetes-pipeline + roleARN: \ No newline at end of file