From 468fb91f5f43a93f95914f62770e5ac4923a5766 Mon Sep 17 00:00:00 2001 From: Tom Wieczorek Date: Wed, 16 Aug 2023 16:12:05 +0200 Subject: [PATCH] Add GitHub workflows for OS testing Introduce a callable GitHub workflow that leverages the Terraform modules for OS testing to bring up a k0s cluster on AWS against which Kubernetes conformance tests will be executed. The workflow features parallel test execution to speed up test duration, if desired. Note that test results collected in parallel are most likely not accepted by the Kubernetes conformance test repo. If the tests should be submitted there, it's advised to set the `e2e-concurrency- level` to 1. Moreover, provide a matrix workflow that bundles up the first one into a matrix build, so that multiple OS and network provider configurations may be tested using a single workflow, and a nightly trigger which chooses a single OS/network provider combination to be run on a nightly basis. --- .github/workflows/ostests-e2e.yaml | 227 +++++++++++++++++++++++++ .github/workflows/ostests-matrix.yaml | 73 ++++++++ .github/workflows/ostests-nightly.yaml | 95 +++++++++++ hack/ostests/README.md | 33 ++++ hack/ostests/main.tf | 4 +- hack/ostests/variables.tf | 7 + 6 files changed, 437 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/ostests-e2e.yaml create mode 100644 .github/workflows/ostests-matrix.yaml create mode 100644 .github/workflows/ostests-nightly.yaml diff --git a/.github/workflows/ostests-e2e.yaml b/.github/workflows/ostests-e2e.yaml new file mode 100644 index 000000000000..a9d4efb41af1 --- /dev/null +++ b/.github/workflows/ostests-e2e.yaml @@ -0,0 +1,227 @@ +# OS e2e testing workflow. See ../../hack/ostests/README.md for details. + +name: "OS tests :: e2e" + +on: + workflow_call: + inputs: + k0s-version: + type: string + description: The k0s version to test. Will try to download a build artifact if empty. + e2e-concurrency-level: + type: number + description: The number of tests that may be run concurrently. + default: 2 + e2e-focus: + type: string + description: The selector for the e2e tests to be run. + default: \[Conformance\] + os: + type: string + description: The operating system to test. + required: true + network-provider: + type: string + description: The k0s network provider to test. + required: true + terraform-version: + type: string + description: The Terraform version to use when provisioning test resources. + default: 1.4.6 + k0sctl-version: + type: string + description: The k0sctl version to use when bootstrapping the test cluster. + default: 0.15.0 # k0sproject/k0sctl#495 + secrets: + aws-access-key-id: + description: The AWS access key ID to use when provisioning test resources. + required: true + aws-secret-access-key: + description: The AWS secret access key to use when provisioning test resources. + required: true + aws-session-token: + description: The AWS session token to use when provisioning test resources. + required: true + +env: + # This is hopefully somewhat physically close to where the hosted GitHub runners live. + # https://github.com/orgs/community/discussions/24969#discussioncomment-3246032 + AWS_REGION: us-east-1 + +jobs: + e2e-tests: + name: e2e tests + runs-on: ubuntu-22.04 + + env: + E2E_FOCUS: ${{ inputs.e2e-focus }} + TF_VAR_os: ${{ inputs.os }} + TF_VAR_additional_tags: '{ + "ostests.k0sproject.io/github-run-id"="${{ github.run_id }}", + "ostests.k0sproject.io/github-run-attempt"="${{ github.run_attempt }}", + }' + TF_VAR_cache_dir: ${{ github.workspace }}/.cache/k0s-ostests + TF_VAR_k0s_version: ${{ inputs.k0s-version }} + TF_VAR_k0sctl_executable_path: ${{ github.workspace }}/.cache/k0sctl + TF_VAR_k0s_network_provider: ${{ inputs.network-provider }} + + defaults: + run: + working-directory: hack/ostests + + steps: + - name: "Workflow run :: Checkout" + uses: actions/checkout@v3 + with: + persist-credentials: false + + - name: "Workflow run :: Prepare" + working-directory: ./ + run: | + kubernetesVersion="$(./vars.sh kubernetes_version)" + echo KUBERNETES_VERSION="$kubernetesVersion" >>"$GITHUB_ENV" + + - name: "Terraform :: Requisites :: Download k0s" + if: inputs.k0s-version == '' + uses: actions/download-artifact@v3 + with: + name: k0s-linux-amd64 + path: ${{ github.workspace }}/.cache + + - name: "Terraform :: Requisites :: Prepare" + env: + K0SCTL_VERSION: ${{ inputs.k0sctl-version }} + K0S_VERSION: ${{ inputs.k0s-version }} + K0S_EXECUTABLE_PATH: ${{ github.workspace }}/.cache/k0s + run: | + kubectl version --client + jq --version + + mkdir -p "$(dirname -- "$TF_VAR_k0sctl_executable_path")" + curl -sSLo "$TF_VAR_k0sctl_executable_path" "https://github.com/k0sproject/k0sctl/releases/download/v${K0SCTL_VERSION}/k0sctl-linux-x64" + chmod +x -- "$TF_VAR_k0sctl_executable_path" + "$TF_VAR_k0sctl_executable_path" version + + if [ -z "$K0S_VERSION" ]; then + chmod +x -- "$K0S_EXECUTABLE_PATH" + K0S_VERSION="$("$K0S_EXECUTABLE_PATH" version)" + echo TF_VAR_k0s_executable_path="$K0S_EXECUTABLE_PATH" >>"$GITHUB_ENV" + fi + + echo TF_VAR_k0s_version="$K0S_VERSION" >>"$GITHUB_ENV" + + - name: "Terraform :: Setup" + uses: hashicorp/setup-terraform@v2 + with: + terraform_version: ${{ inputs.terraform-version }} + terraform_wrapper: false + + - name: "Terraform :: Init" + id: tf-init + run: terraform init + + - name: "Terraform :: Apply" + id: tf-apply + env: + AWS_ACCESS_KEY_ID: ${{ secrets.aws-access-key-id }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.aws-secret-access-key }} + AWS_SESSION_TOKEN: ${{ secrets.aws-session-token }} + run: | + terraform apply -auto-approve + + kubeconfigPath="$GITHUB_WORKSPACE/.cache/kubeconfig" + terraform output -raw k0s_kubeconfig >"$kubeconfigPath" + echo "KUBECONFIG=$kubeconfigPath" >>"$GITHUB_ENV" + + # If concurrency is enabled, tests are executed in two phases. First, all + # the regular tests that can run in parallel are run with the given + # concurrency level. In a second step, all remaining tests that can't be + # run in parallel will be executed serially. This is the option B shown here: + # https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/conformance-tests.md#running-conformance-tests-with-kubetest + + - name: "e2e tests :: Run parallel tests" + id: e2e-run-parallel + if: inputs.e2e-concurrency-level > 1 + timeout-minutes: 180 # three hours + working-directory: inttest + env: + E2E_CONCURRENCY_LEVEL: ${{ inputs.e2e-concurrency-level }} + run: | + make bin/sonobuoy + bin/sonobuoy run -p e2e --wait=120 \ + --kubernetes-version=v"$KUBERNETES_VERSION" \ + --plugin-env=e2e.E2E_PARALLEL=true \ + --plugin-env=e2e.E2E_FOCUS="$E2E_FOCUS" \ + --plugin-env=e2e.E2E_SKIP='\[Serial\]' \ + --plugin-env=e2e.E2E_EXTRA_GINKGO_ARGS="-v --timeout=90m --procs=$E2E_CONCURRENCY_LEVEL" + + - name: "e2e tests :: Retrieve parallel results" + id: e2e-retrieve-parallel + if: steps.e2e-run-parallel.conclusion != 'skipped' + working-directory: inttest + run: | + sonobuoyTarGz="$(make --silent get-conformance-results)" + mv -- "$sonobuoyTarGz" sonobuoy-e2e-parallel.tar.gz + + - name: "e2e tests :: Clear parallel run" + if: steps.e2e-run-parallel.conclusion != 'skipped' + timeout-minutes: 15 + working-directory: inttest + run: | + bin/sonobuoy delete --all --wait=10 + echo E2E_FOCUS='\[Serial\].*'"$E2E_FOCUS" >>"$GITHUB_ENV" + + - name: "e2e tests :: Run serial tests" + working-directory: inttest + timeout-minutes: 180 # three hours + run: | + make bin/sonobuoy + bin/sonobuoy run -p e2e --wait=120 \ + --kubernetes-version=v"$KUBERNETES_VERSION" \ + --plugin-env=e2e.E2E_FOCUS="$E2E_FOCUS" \ + --plugin-env=e2e.E2E_EXTRA_GINKGO_ARGS='-v --timeout=90m' + + - name: "e2e tests :: Retrieve serial results" + id: e2e-retrieve-serial + working-directory: inttest + run: | + sonobuoyTarGz="$(make --silent get-conformance-results)" + mv -- "$sonobuoyTarGz" sonobuoy-e2e-serial.tar.gz + + - name: "Terraform :: Destroy" + if: always() && steps.tf-init.conclusion == 'success' + env: + AWS_ACCESS_KEY_ID: ${{ secrets.aws-access-key-id }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.aws-secret-access-key }} + AWS_SESSION_TOKEN: ${{ secrets.aws-session-token }} + run: terraform destroy -auto-approve + + - name: "e2e tests :: Upload results" + if: always() && (steps.e2e-retrieve-parallel.conclusion == 'success' || steps.e2e-retrieve-serial.conclusion == 'success') + uses: actions/upload-artifact@v3 + with: + name: ostests-e2e-${{ inputs.os }}-${{ inputs.network-provider }}-sonobuoy-results + path: | + inttest/sonobuoy-e2e-parallel.tar.gz + inttest/sonobuoy-e2e-serial.tar.gz + + - name: "e2e tests :: Check" + working-directory: inttest + run: | + fail=0 + for f in sonobuoy-e2e-*.tar.gz; do + echo "::group::$f" + bin/sonobuoy results "$f" + numNotPassedOrSkipped=$(bin/sonobuoy results "$f" -p=e2e --mode=detailed | jq --slurp '[.[] | select(.status != "passed" and .status != "skipped")] | length') + echo "Number of tests that didn't pass and weren't skipped: $numNotPassedOrSkipped" + echo ::endgroup:: + [ "$numNotPassedOrSkipped" = 0 ] || fail=1 + done + [ "$fail" = 0 ] || exit 1 + + - name: "e2e tests :: Upload k0sctl logs" + if: always() && steps.tf-init.conclusion == 'success' + uses: actions/upload-artifact@v3 + with: + name: ostests-e2e-${{ inputs.os }}-${{ inputs.network-provider }}-k0sctl-logs + path: ~/.cache/k0sctl/k0sctl.log diff --git a/.github/workflows/ostests-matrix.yaml b/.github/workflows/ostests-matrix.yaml new file mode 100644 index 000000000000..d5e18620907c --- /dev/null +++ b/.github/workflows/ostests-matrix.yaml @@ -0,0 +1,73 @@ +# OS testing matrix. See ../../hack/ostests/README.md for details. + +name: "OS tests :: Matrix" + +on: + workflow_dispatch: + inputs: + k0s-version: + type: string + description: The k0s version to test. Will build k0s from source if empty. + e2e-concurrency-level: + type: number + description: The number of tests that may be run concurrently. + default: 2 + e2e-focus: + type: string + description: The selector for the e2e tests to be run. + default: \[Conformance\] + oses: + type: string + description: The operating systems to test. + required: true + default: >- + [ + "alpine_3_17", + "centos_7", "centos_8", "centos_9", + "debian_10", "debian_11", "debian_12", + "fcos_38", + "fedora_38", + "flatcar", + "oracle_7_9", "oracle_8_7", "oracle_9_1", + "rhel_7", "rhel_8", "rhel_9", + "rocky_8", "rocky_9", + "ubuntu_2004", "ubuntu_2204", "ubuntu_2304" + ] + network-providers: + type: string + description: The k0s network providers to test. + required: true + default: >- + [ + "kuberouter", + "calico" + ] + +jobs: + build: + name: Build + if: inputs.k0s-version == '' + uses: ./.github/workflows/build-k0s.yml + with: { target-os: linux, target-arch: amd64 } + + e2e-tests: + strategy: + fail-fast: false + matrix: + os: ${{ fromJSON(github.event.inputs.oses) }} + network-provider: ${{ fromJSON(github.event.inputs.network-providers) }} + + name: "${{ matrix.os }} :: ${{ matrix.network-provider }}" + needs: build + if: always() && (inputs.k0s-version != '' || needs.build.result == 'success') + uses: ./.github/workflows/ostests-e2e.yaml + with: + k0s-version: ${{ inputs.k0s-version }} + e2e-concurrency-level: ${{ fromJSON(inputs.e2e-concurrency-level) }} # infamous GH workflows bug that looses type information (actions/runner#2206) + e2e-focus: ${{ inputs.e2e-focus }} + os: ${{ matrix.os }} + network-provider: ${{ matrix.network-provider }} + secrets: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-session-token: ${{ secrets.AWS_SESSION_TOKEN }} diff --git a/.github/workflows/ostests-nightly.yaml b/.github/workflows/ostests-nightly.yaml new file mode 100644 index 000000000000..25c0c6696c97 --- /dev/null +++ b/.github/workflows/ostests-nightly.yaml @@ -0,0 +1,95 @@ +# Nightly trigger for OS testing. See ../../hack/ostests/README.md for details. + +name: "OS tests :: Nightly" + +on: + workflow_dispatch: + schedule: + - cron: 30 2 * * * + +env: + DISTRIBUTIONS: >- + [ + ["alpine_3_17"], + ["centos_7", "centos_8", "centos_9"], + ["debian_10", "debian_11", "debian_12"], + ["fcos_38"], + ["fedora_38"], + ["flatcar"], + ["oracle_7_9", "oracle_8_7", "oracle_9_1"], + ["rhel_7", "rhel_8", "rhel_9"], + ["rocky_8", "rocky_9"], + ["ubuntu_2004", "ubuntu_2204", "ubuntu_2304"] + ] + NETWORK_PROVIDERS: >- + [ + "kuberouter", + "calico" + ] + +jobs: + select: + name: Select + runs-on: ubuntu-latest + + outputs: + os: ${{ steps.select.outputs.os }} + network-provider: ${{ steps.select.outputs.network-provider }} + + steps: + # Generate a "well-distributed" list of all possible parameter + # combinations and select an entry based on the current day. This approach + # ensures that, if run once each day, there is a significant variation in + # parameter values each day while also guaranteeing that every combination + # will be chosen at some point. + - name: Select + id: select + uses: actions/github-script@v6 + with: + script: | + const distros = JSON.parse(process.env.DISTRIBUTIONS) + const networkProviders = JSON.parse(process.env.NETWORK_PROVIDERS) + + const oses = [] + for (let i = 0; ; i++) { + let added = false + for (const distro of distros) { + if (i < distro.length) { + oses.push(distro[i]); + added = true + } + } + if (!added) { + break + } + } + + const combinations = [] + for (const [i, _] of networkProviders.entries()) { + for (const [j, os] of oses.entries()) { + combinations.push([os, networkProviders[(i + j) % networkProviders.length]]) + } + } + + const daysSinceEpoch = Math.floor(Date.now() / (24 * 60 * 60 * 1000)) + const [os, networkProvider] = combinations[daysSinceEpoch % combinations.length] + console.log(`Selected ${os}/${networkProvider}`) + core.setOutput('os', os) + core.setOutput('network-provider', networkProvider) + + build: + name: Build + uses: ./.github/workflows/build-k0s.yml + with: { target-os: linux, target-arch: amd64 } + + e2e-tests: + name: "${{ needs.select.outputs.os }} :: ${{ needs.select.outputs.network-provider }}" + needs: [select, build] + uses: ./.github/workflows/ostests-e2e.yaml + with: + os: ${{ needs.select.outputs.os }} + network-provider: ${{ needs.select.outputs.network-provider }} + secrets: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-session-token: ${{ secrets.AWS_SESSION_TOKEN }} diff --git a/hack/ostests/README.md b/hack/ostests/README.md index 66254310c6a6..c49a6a2bfe83 100644 --- a/hack/ostests/README.md +++ b/hack/ostests/README.md @@ -106,3 +106,36 @@ This may be a fixed version number, "stable" or "latest". * Test it: Be sure to have the requisites ready, as described at the top of this README, then do `terraform apply -var=os=`. When done, don't forget to clean up: `terraform destroy -var=os=`. +* Update the [nightly trigger] and [matrix workflow] with the new OS ID. + +## GitHub Actions workflows + +There's a reusable GitHub Actions workflow available in [ostests-e2e.yaml]. It +will deploy the Terraform resources and perform Kubernetes conformance tests +against the provisioned test cluster. + +[ostests-e2e.yaml]: ../../.github/workflows/ostests-e2e.yaml + +### Launch a workflow run + +There's a [nightly trigger] for the OS testing workflow. It will select and run +a single testing parameter combination each day. There's also a [matrix +workflow] that exposes more knobs and can be triggered manually, e.g. via [gh]: + +```console +$ gh workflow run ostests-matrix.yaml --ref some/experimental/branch \ + -f oses='["alpine_3_17"]' \ + -f network-providers='["calico"]' +✓ Created workflow_dispatch event for ostests-matrix.yaml at some/experimental/branch + +To see runs for this workflow, try: gh run list --workflow=ostests-matrix.yaml +``` + +[gh]: https://github.com/cli/cli + +## TODO + +* Figure out the best/canonical way to change host names of the AWS instances + +[nightly trigger]: ../../.github/workflows/ostests-nightly.yaml +[matrix workflow]: ../../.github/workflows/ostests-matrix.yaml diff --git a/hack/ostests/main.tf b/hack/ostests/main.tf index c4af5be65adc..e68948b4bbbd 100644 --- a/hack/ostests/main.tf +++ b/hack/ostests/main.tf @@ -1,10 +1,10 @@ provider "aws" { default_tags { - tags = { + tags = merge(var.additional_tags, { "ostests.k0sproject.io/instance" = local.resource_name_prefix "ostests.k0sproject.io/os" = var.os "ostests.k0sproject.io/k0s-network-provider" = var.k0s_network_provider - } + }) } } diff --git a/hack/ostests/variables.tf b/hack/ostests/variables.tf index bf65ba96dbbd..8a8ae16ca3fd 100644 --- a/hack/ostests/variables.tf +++ b/hack/ostests/variables.tf @@ -23,6 +23,13 @@ variable "cache_dir" { } } +variable "additional_tags" { + type = map(string) + description = "Additional tags to be added to resources and metrics." + default = {} + nullable = false +} + variable "os" { type = string description = "The underlying OS for the to-be-provisioned cluster."