Skip to content

Commit

Permalink
Use the Pulsar scheduler in end-to-end tests (#3141)
Browse files Browse the repository at this point in the history
  • Loading branch information
zuqq authored Dec 1, 2023
1 parent 4dee996 commit d47ea3d
Show file tree
Hide file tree
Showing 16 changed files with 44 additions and 49 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/airflow-operator.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ jobs:
# Manually create folders to ensure perms are correct.
mkdir -p .kube/internal
mkdir -p .kube/external
go run github.com/magefile/[email protected] -v localdev minimal
go run github.com/magefile/[email protected] -v localdev minimal-legacy
- name: Install Protoc
uses: arduino/setup-protoc@v2
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python-client.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ jobs:
# Manually create folders to ensure perms are correct.
mkdir -p .kube/internal
mkdir -p .kube/external
go run github.com/magefile/[email protected] -v localdev minimal
go run github.com/magefile/[email protected] -v localdev minimal-pulsar
- name: Install Protoc
uses: arduino/setup-protoc@v2
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ jobs:
cache-prefix: go-integration-tests

- name: Setup Integration Tests
run: go run github.com/magefile/[email protected] -v localdev minimal
run: go run github.com/magefile/[email protected] -v localdev minimal-pulsar

- name: Run Integration Tests
run: go run github.com/magefile/[email protected] -v testsuite
Expand Down
2 changes: 1 addition & 1 deletion developer/env/binoculars.env
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
ARMADA_CORSALLOWEDORIGINS="http://localhost:3000,http://localhost:10000,http://example.com:10000"
ARMADA_HTTP_PORT:8082
ARMADA_HTTP_PORT=8082
File renamed without changes.
2 changes: 1 addition & 1 deletion developer/env/scheduler.env
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ARMADA_HTTP_PORT:8081
ARMADA_HTTP_PORT=8081
2 changes: 2 additions & 0 deletions developer/env/server-legacy.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
EXECUTOR_UPDATE_INTERVAL="1s"
ARMADA_CORSALLOWEDORIGINS="http://localhost:3000,http://localhost:10000,http://example.com:10000"
4 changes: 2 additions & 2 deletions developer/env/server-pulsar.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
EXECUTOR_UPDATE_INTERVAL="1s"
ARMADA_CORSALLOWEDORIGINS=="http://localhost:3000,http://localhost:10000,http://example.com:10000"
ARMADA_CORSALLOWEDORIGINS="http://localhost:3000,http://localhost:10000,http://example.com:10000"
ARMADA_PULSARSCHEDULERENABLED="true"
ARMADA_PROBABILITYOFUSINGPULSARSCHEDULER="1"
ARMADA_PROBABILITYOFUSINGPULSARSCHEDULER="1"
2 changes: 0 additions & 2 deletions developer/env/server.env

This file was deleted.

41 changes: 19 additions & 22 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ services:
#
# Armada services.
#
server:
server-legacy:
container_name: server
image: ${ARMADA_IMAGE:-gresearch/armada-bundle}:${ARMADA_IMAGE_TAG:-latest}
networks:
Expand All @@ -57,14 +57,14 @@ services:
- 8080:8080
- 4000:4000
volumes:
- ./developer/config/insecure-armada.yaml:/config/insecure-armada.yaml
- "./developer/config/insecure-armada.yaml:/config/insecure-armada.yaml"
- "go-cache:/root/.cache/go-build:rw"
- "gomod-cache:/go/pkg/mod:rw"
depends_on:
- eventingester
working_dir: /app
env_file:
- ./developer/env/server.env
- ./developer/env/server-legacy.env
command: sh -c "sleep 30 && ./server --config /config/insecure-armada.yaml"

server-pulsar:
Expand All @@ -77,15 +77,15 @@ services:
- 8080:8080
- 4000:4000
volumes:
- ./developer/config/insecure-armada.yaml:/config/insecure-armada.yaml
- "./developer/config/insecure-armada.yaml:/config/insecure-armada.yaml"
- "go-cache:/root/.cache/go-build:rw"
- "gomod-cache:/go/pkg/mod:rw"
depends_on:
- eventingester
working_dir: /app
env_file:
- ./developer/env/server-pulsar.env
command: ./server --config /config/insecure-armada.yaml
command: sh -c "sleep 30 && ./server --config /config/insecure-armada.yaml"

scheduler:
container_name: scheduler
Expand All @@ -97,18 +97,18 @@ services:
- 8081:8081
- 50052:50052
volumes:
- ./developer/config/insecure-armada.yaml:/config/insecure-armada.yaml
- "./developer/config/insecure-armada.yaml:/config/insecure-armada.yaml"
- "go-cache:/root/.cache/go-build:rw"
- "gomod-cache:/go/pkg/mod:rw"
depends_on:
- postgresPulsarMigration
- scheduleringester
working_dir: /app
env_file:
- ./developer/env/scheduler.env
command: ./scheduler run --config /config/insecure-armada.yaml

postgresPulsarMigration:
container_name: postgresPulsarMigration
scheduler-migration:
container_name: scheduler-migration
image: ${ARMADA_IMAGE:-gresearch/armada-bundle}:${ARMADA_IMAGE_TAG:-latest}
networks:
- kind
Expand All @@ -130,10 +130,12 @@ services:
- "gomod-cache:/go/pkg/mod:rw"
env_file:
- ./developer/env/scheduleringester.env
depends_on:
- scheduler-migration
working_dir: /app
command: ./scheduleringester

executor:
executor-legacy:
container_name: executor
image: ${ARMADA_IMAGE:-gresearch/armada-bundle}:${ARMADA_IMAGE_TAG:-latest}
networks:
Expand All @@ -142,13 +144,13 @@ services:
- 9001:9001
- 4001:4000
volumes:
- ./.kube/internal:/.kube
- "./.kube/internal:/.kube"
- "go-cache:/root/.cache/go-build:rw"
- "gomod-cache:/go/pkg/mod:rw"
environment:
- KUBECONFIG=/.kube/config
env_file:
- ./developer/env/executor.env
- ./developer/env/executor-legacy.env
working_dir: /app
command: ./executor

Expand All @@ -160,10 +162,8 @@ services:
ports:
- 9001:9001
- 4001:4000
depends_on:
- server-pulsar
volumes:
- ./.kube/internal:/.kube
- "./.kube/internal:/.kube"
- "go-cache:/root/.cache/go-build:rw"
- "gomod-cache:/go/pkg/mod:rw"
environment:
Expand All @@ -181,10 +181,8 @@ services:
ports:
- 8082:8080
- 4002:4000
depends_on:
- server
volumes:
- ./.kube/internal:/.kube
- "./.kube/internal:/.kube"
- "go-cache:/root/.cache/go-build:rw"
- "gomod-cache:/go/pkg/mod:rw"
environment:
Expand Down Expand Up @@ -216,15 +214,14 @@ services:
- "10000:10000"
- "4005:4000"
depends_on:
- server
- lookoutingesterv2
volumes:
- ./internal/lookout/ui/build:/app/internal/lookout/ui/build
- "./internal/lookout/ui/build:/app/internal/lookout/ui/build"
- "go-cache:/root/.cache/go-build:rw"
- "gomod-cache:/go/pkg/mod:rw"
env_file:
- ./developer/env/lookoutv2.env
working_dir: "/app"
working_dir: /app
entrypoint: sh -c "./lookoutv2 --migrateDatabase && ./lookoutv2"

lookoutingesterv2:
Expand Down Expand Up @@ -254,7 +251,7 @@ services:
- 60003:60003
- 4008:4000
depends_on:
- server
- server-legacy
env_file:
- ./developer/env/jobservice.env
working_dir: /app
Expand Down
12 changes: 6 additions & 6 deletions docs/developer.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ This document is intended for developers who want to contribute to the project.
Want to quickly get Armada running and test it? Install the [Pre-requisites](#pre-requisites) and then run:

```bash
mage localdev minimal testsuite
mage localdev minimal-pulsar testsuite
```

To get the UI running, run:
Expand Down Expand Up @@ -74,11 +74,11 @@ LocalDev provides a reliable and extendable way to install Armada as a developer

It has the following options to customize further steps:

* `mage localdev full` - Installs all components of Armada, including the UI.
* `mage localdev minimal` - Installs only the core components of Armada, the server, executor and eventingester.
* `mage localdev no-build` - skips the build step. Assumes that a separate image has been set from `ARMADA_IMAGE` and `ARMADA_TAG` environment variables or it has already been built.
* `mage localdev full` - Runs all components of Armada, including the Lookout UI.
* `mage localdev minimal-pulsar` - Runs only the core components of Armada (such as the API server and an executor).
* `mage localdev no-build` - Skips the build step; set `ARMADA_IMAGE` and `ARMADA_TAG` to choose the Docker image to use.

`mage localdev minimal` is what is used to test the CI pipeline, and is the recommended way to test changes to the core components of Armada.
`mage localdev minimal-pulsar` is what is used to test the CI pipeline, and is the recommended way to test changes to the core components of Armada.

### Testing if LocalDev is working

Expand Down Expand Up @@ -108,7 +108,7 @@ For more information see the [UI Developer Guide](./developer/ui.md).
You can set the `ARMADA_COMPONENTS` environment variable to choose which components to run. It is a comma separated list of components to run. For example, to run only the server and executor, you can run:

```bash
export ARMADA_COMPONENTS="server,executor"
export ARMADA_COMPONENTS="server-legacy,executor-legacy"
```

### Running Pulsar backed scheduler with LocalDev
Expand Down
2 changes: 1 addition & 1 deletion docs/developer/manual-localdev.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ mage StartDependencies && mage checkForPulsarRunning
# Start the Armada server and executor.
# Alternatively, run the Armada server and executor directly on the host,
# e.g., through your IDE; see below for details.
docker compose up -d server executor
docker compose up -d server-legacy executor-legacy

# Wait for Armada to come online
mage checkForArmadaRunning
Expand Down
2 changes: 1 addition & 1 deletion internal/executor/service/job_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ func (jobLeaseService *JobLeaseService) requestJobLeases(leaseRequest *api.Strea
}

// Goroutine receiving jobs from the server.
// Also recevies ack confirmations from the server.
// Also received ack confirmations from the server.
// Send leases on ch to another goroutine responsible for sending back acks.
// Give the channel a small buffer to allow for some asynchronicity.
var numServerAcks uint32
Expand Down
2 changes: 1 addition & 1 deletion internal/scheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -936,7 +936,7 @@ func (s *Scheduler) ensureDbUpToDate(ctx *armadacontext.Context, pollInterval ti
ctx.Infof("Successfully ensured that database state is up to date")
return nil
}
ctx.Infof("Recevied %d partitions, still waiting on %d", numReceived, numSent-numReceived)
ctx.Infof("Received %d partitions, still waiting on %d", numReceived, numSent-numReceived)
s.clock.Sleep(pollInterval)
}
}
Expand Down
12 changes: 5 additions & 7 deletions magefiles/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ func LocalDev(arg string) error {
os.Setenv("ARMADA_SCHEDULING_EXECUTORUPDATEFREQUENCY", "1s")

switch arg {
case "minimal":
case "minimal-legacy":
timeTaken := time.Now()
os.Setenv("PULSAR_BACKED", "")
mg.Deps(mg.F(goreleaserMinimalRelease, "bundle"), Kind, downloadDependencyImages)
Expand All @@ -183,21 +183,19 @@ func LocalDev(arg string) error {
case "no-build", "debug":
mg.Deps(Kind, downloadDependencyImages)
default:
return fmt.Errorf("invalid argument: %s Please enter one the following argument: minimal, minimal-pulsar, full, no-build, debug ", arg)
return fmt.Errorf("invalid localdev mode: %s; valid modes are: minimal-legacy, minimal-pulsar, full, no-build, debug", arg)
}

mg.Deps(StartDependencies)
fmt.Println("Waiting for dependencies to start...")
mg.Deps(CheckForPulsarRunning)

switch arg {
case "minimal":
os.Setenv("ARMADA_COMPONENTS", "executor,server")
case "minimal-legacy":
os.Setenv("ARMADA_COMPONENTS", "executor-legacy,server-legacy")
mg.Deps(StartComponents)
case "minimal-pulsar":
// This 20s sleep is to remedy an issue caused by pods coming up too fast after pulsar
// TODO: Deal with this internally somehow?
os.Setenv("ARMADA_COMPONENTS", "executor-pulsar,server-pulsar,scheduler,scheduleringester")
os.Setenv("ARMADA_COMPONENTS", "executor-pulsar,server-pulsar,scheduler")
mg.Deps(StartComponents)
case "debug", "no-build":
fmt.Println("Dependencies started, ending localdev...")
Expand Down
4 changes: 2 additions & 2 deletions testsuite/testcases/basic/failure_oom_1x1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ jobs:
memory: 10Mi
cpu: 100m
---
timeout: "150s"
timeout: "300s"
expectedEvents:
- submitted:
- failed:
# OOMKilled reason isn't set reliably.
# reason: "Container oom failed with exit code 137 because OOMKilled: \n"
# reason: "Container oom failed with exit code 137 because OOMKilled: \n"

0 comments on commit d47ea3d

Please sign in to comment.