Skip to content

Commit

Permalink
Don't create a new Azure Container Apps Job (ACAJ) every time
Browse files Browse the repository at this point in the history
It turns out that the way Azure intends you to use Container Apps Jobs is to create a single 'job' and then invoke it (optionally changing the command/image/etc).

'Jobs' are more like 'templates', and are fairly heavy to create. This PR changes our config to just run things against and existing ACAJ, which makes everything much faster.
  • Loading branch information
bcspragu committed Jan 11, 2024
1 parent f3e9149 commit 0071ca5
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 207 deletions.
157 changes: 28 additions & 129 deletions azure/aztask/aztask.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"context"
"errors"
"fmt"
"math/rand"

"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
Expand All @@ -23,106 +22,55 @@ type Runner struct {
}

type Config struct {
// Location is the location to run the runner, like centralus
Location string

// Identity is the account the runner should act as.
Identity *RunnerIdentity

Rand *rand.Rand
}

func (c *Config) validate() error {
if c.Location == "" {
return errors.New("no container location given")
}

if err := c.Identity.validate(); err != nil {
return fmt.Errorf("invalid identity config: %w", err)
}

if c.Rand == nil {
return errors.New("no random number generator given")
}

return nil
}

type RunnerIdentity struct {
// Like runner-local
Name string
// Like aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee
// The Azure Subscription to issue API calls against, like aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee
SubscriptionID string
// Like rmi-pacta-{local,dev}
// The resource group where our Container Apps Job is located, like rmi-pacta-{local,dev}
ResourceGroup string
// Like ffffffff-0000-1111-2222-333333333333
ClientID string
// Like pacta-{local,dev}, the name of the Container Apps Environment
ManagedEnvironment string
// The Azure client ID of the managed identity that the job should run as, like ffffffff-0000-1111-2222-333333333333
ManagedIdentityClientID string
// The name of the Container Apps Job to start an execution of, like pacta-runner
JobName string
}

func (ri *RunnerIdentity) validate() error {
if ri.Name == "" {
return errors.New("no identity name given")
}
if ri.SubscriptionID == "" {
func (c *Config) validate() error {
if c.SubscriptionID == "" {
return errors.New("no identity subscription ID given")
}
if ri.ResourceGroup == "" {
if c.ResourceGroup == "" {
return errors.New("no identity resource group given")
}
if ri.ClientID == "" {
if c.ManagedIdentityClientID == "" {
return errors.New("no identity client ID given")
}
return nil
}

func (r *RunnerIdentity) String() string {
tmpl := "/subscriptions/%s/resourcegroups/%s/providers/Microsoft.ManagedIdentity/userAssignedIdentities/%s"
return fmt.Sprintf(tmpl, r.SubscriptionID, r.ResourceGroup, r.Name)
}

func (r *RunnerIdentity) EnvironmentID() string {
tmpl := "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.App/managedEnvironments/%s"
return fmt.Sprintf(tmpl, r.SubscriptionID, r.ResourceGroup, r.ManagedEnvironment)
return nil
}

func NewRunner(creds azcore.TokenCredential, cfg *Config) (*Runner, error) {
if err := cfg.validate(); err != nil {
return nil, fmt.Errorf("invalid task runner config: %w", err)
}

clientFactory, err := armappcontainers.NewClientFactory(cfg.Identity.SubscriptionID, creds, nil)
clientFactory, err := armappcontainers.NewClientFactory(cfg.SubscriptionID, creds, nil)
if err != nil {
return nil, fmt.Errorf("failed to create client: %w", err)
}

gen, err := idgen.New(cfg.Rand, idgen.WithDefaultLength(32), idgen.WithCharSet([]rune("abcdefghijklmnopqrstuvwxyz")))
if err != nil {
return nil, fmt.Errorf("failed to init ID generator: %w", err)
}

return &Runner{
client: clientFactory.NewJobsClient(),
cfg: cfg,
gen: gen,
}, nil
}

func (r *Runner) Run(ctx context.Context, cfg *task.Config) (task.RunnerID, error) {

name := r.gen.NewID()
identity := r.cfg.Identity.String()
envID := r.cfg.Identity.EnvironmentID()

envVars := []*armappcontainers.EnvironmentVar{
{
Name: to.Ptr("AZURE_CLIENT_ID"),
Value: to.Ptr(r.cfg.Identity.ClientID),
Value: to.Ptr(r.cfg.ManagedIdentityClientID),
},
{
Name: to.Ptr("MANAGED_IDENTITY_CLIENT_ID"),
Value: to.Ptr(r.cfg.Identity.ClientID),
Value: to.Ptr(r.cfg.ManagedIdentityClientID),
},
}
for _, v := range cfg.Env {
Expand All @@ -132,75 +80,26 @@ func (r *Runner) Run(ctx context.Context, cfg *task.Config) (task.RunnerID, erro
})
}

job := armappcontainers.Job{
Location: &r.cfg.Location,
Identity: &armappcontainers.ManagedServiceIdentity{
Type: to.Ptr(armappcontainers.ManagedServiceIdentityTypeUserAssigned),
UserAssignedIdentities: map[string]*armappcontainers.UserAssignedIdentity{
identity: {},
},
},
Properties: &armappcontainers.JobProperties{
Configuration: &armappcontainers.JobConfiguration{
ReplicaTimeout: to.Ptr(int32(60 * 60 * 2 /* two hours */)),
TriggerType: to.Ptr(armappcontainers.TriggerTypeManual),
ManualTriggerConfig: &armappcontainers.JobConfigurationManualTriggerConfig{
// Run one copy.
Parallelism: to.Ptr(int32(1)),
ReplicaCompletionCount: to.Ptr(int32(1)),
},
// Don't retry, if it failed once, it'll probably fail again. We might relax
// this in the future if we identify "transient" errors.
ReplicaRetryLimit: to.Ptr(int32(0)),
Registries: []*armappcontainers.RegistryCredentials{
{
Server: to.Ptr(cfg.Image.Base.Registry),
Identity: to.Ptr(identity),
},
},
Secrets: []*armappcontainers.Secret{
// TODO: Put any useful configuration here.
},
},
EnvironmentID: to.Ptr(envID),
Template: &armappcontainers.JobTemplate{
Containers: []*armappcontainers.Container{
{
Args: toPtrs(cfg.Flags),
Command: toPtrs(cfg.Command),
Env: envVars,
Image: to.Ptr(cfg.Image.String()),
Name: to.Ptr(name),
Probes: []*armappcontainers.ContainerAppProbe{},
Resources: &armappcontainers.ContainerResources{
CPU: to.Ptr(1.0),
Memory: to.Ptr("2Gi"),
},
VolumeMounts: []*armappcontainers.VolumeMount{},
},
poller, err := r.client.BeginStart(ctx, r.cfg.ResourceGroup, r.cfg.JobName, &armappcontainers.JobsClientBeginStartOptions{
Template: &armappcontainers.JobExecutionTemplate{
Containers: []*armappcontainers.JobExecutionContainer{{
Args: toPtrs(cfg.Flags),
Command: toPtrs(cfg.Command),
Env: envVars,
Image: to.Ptr(cfg.Image.String()),
Name: to.Ptr("pacta-runner"),
Resources: &armappcontainers.ContainerResources{
CPU: to.Ptr(1.0),
Memory: to.Ptr("2Gi"),
},
Volumes: []*armappcontainers.Volume{
// TODO: Mount any sources here.
},
},
}},
},
Tags: map[string]*string{},
}
poller, err := r.client.BeginCreateOrUpdate(ctx, r.cfg.Identity.ResourceGroup, name, job, nil)
})
if err != nil {
return "", fmt.Errorf("failed to create container app job: %w", err)
return "", fmt.Errorf("failed to start container app job: %w", err)
}

res, err := poller.PollUntilDone(ctx, nil)
if err != nil {
return "", fmt.Errorf("failed to poll container group creation: %w", err)
}

poller2, err := r.client.BeginStart(ctx, r.cfg.Identity.ResourceGroup, name, nil)
if err != nil {
return "", fmt.Errorf("failed to start container app job: %w", err)
}
if _, err := poller2.PollUntilDone(ctx, nil); err != nil {
return "", fmt.Errorf("failed to poll for container app start: %w", err)
}

Expand Down
1 change: 0 additions & 1 deletion cmd/server/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ go_library(
"@com_github_lestrrat_go_jwx_v2//jwk",
"@com_github_namsral_flag//:flag",
"@com_github_rs_cors//:cors",
"@com_github_silicon_ally_cryptorand//:cryptorand",
"@com_github_silicon_ally_zaphttplog//:zaphttplog",
"@org_uber_go_zap//:zap",
],
Expand Down
10 changes: 4 additions & 6 deletions cmd/server/configs/local.conf
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,10 @@ secret_auth_public_key_data -----BEGIN PUBLIC KEY-----\nMCowBQYDK2VwAyEAP/Sv7H5T
secret_azure_storage_account rmipactalocal
secret_azure_source_portfolio_container uploadedportfolios

secret_runner_config_location centralus
secret_runner_config_config_path /configs/local.conf
secret_runner_config_identity_name pacta-runner-local
secret_runner_config_identity_subscription_id 69b6db12-37e3-4e1f-b48c-aa41dba612a9
secret_runner_config_identity_resource_group rmi-pacta-local
secret_runner_config_identity_client_id c02b7346-6ba6-438a-8136-1ccb608e5449
secret_runner_config_identity_managed_environment pacta-local
secret_runner_config_subscription_id 69b6db12-37e3-4e1f-b48c-aa41dba612a9
secret_runner_config_resource_group rmi-pacta-local
secret_runner_config_managed_identity_client_id c02b7346-6ba6-438a-8136-1ccb608e5449
secret_runner_config_job_name pacta-runner
secret_runner_config_image_registry rmisa.azurecr.io
secret_runner_config_image_name runner
39 changes: 13 additions & 26 deletions cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"errors"
"fmt"
"log"
"math/rand"
"net/http"
"os"
"strings"
Expand All @@ -26,7 +25,6 @@ import (
"github.com/RMI/pacta/secrets"
"github.com/RMI/pacta/session"
"github.com/RMI/pacta/task"
"github.com/Silicon-Ally/cryptorand"
"github.com/Silicon-Ally/zaphttplog"
chi "github.com/go-chi/chi/v5"
"github.com/go-chi/httprate"
Expand Down Expand Up @@ -91,14 +89,12 @@ func run(args []string) error {

azEventWebhookSecrets = fs.String("secret_azure_webhook_secrets", "", "A comma-separated list of shared secrets we'll accept for incoming webhooks")

runnerConfigLocation = fs.String("secret_runner_config_location", "", "Location (like 'centralus') where the runner jobs should be executed")
runnerConfigConfigPath = fs.String("secret_runner_config_config_path", "", "Config path (like '/configs/dev.conf') where the runner jobs should read their base config from")

runnerConfigIdentityName = fs.String("secret_runner_config_identity_name", "", "Name of the Azure identity to run runner jobs with")
runnerConfigIdentitySubscriptionID = fs.String("secret_runner_config_identity_subscription_id", "", "Subscription ID of the identity to run runner jobs with")
runnerConfigIdentityResourceGroup = fs.String("secret_runner_config_identity_resource_group", "", "Resource group of the identity to run runner jobs with")
runnerConfigIdentityClientID = fs.String("secret_runner_config_identity_client_id", "", "Client ID of the identity to run runner jobs with")
runnerConfigIdentityManagedEnvironment = fs.String("secret_runner_config_identity_managed_environment", "", "Name of the Container Apps Environment where runner jobs should run")
runnerConfigSubscriptionID = fs.String("secret_runner_config_subscription_id", "", "Subscription ID of the identity to run runner jobs with")
runnerConfigResourceGroup = fs.String("secret_runner_config_resource_group", "", "Resource group of the identity to run runner jobs with")
runnerConfigManagedIdentityClientID = fs.String("secret_runner_config_managed_identity_client_id", "", "Client ID of the identity to run runner jobs with")
runnerConfigJobName = fs.String("secret_runner_config_job_name", "", "Name of the Container Apps Job to start instances of.")

runnerConfigImageRegistry = fs.String("secret_runner_config_image_registry", "", "Registry where PACTA runner images live, like 'rmisa.azurecr.io'")
runnerConfigImageName = fs.String("secret_runner_config_image_name", "", "Name of the Docker image of the PACTA runner, like 'runner'")
Expand Down Expand Up @@ -141,15 +137,11 @@ func run(args []string) error {
Data: *authKeyData,
},
RunnerConfig: &secrets.RawRunnerConfig{
Location: *runnerConfigLocation,
ConfigPath: *runnerConfigConfigPath,
Identity: &secrets.RawRunnerIdentity{
Name: *runnerConfigIdentityName,
SubscriptionID: *runnerConfigIdentitySubscriptionID,
ResourceGroup: *runnerConfigIdentityResourceGroup,
ClientID: *runnerConfigIdentityClientID,
ManagedEnvironment: *runnerConfigIdentityManagedEnvironment,
},
ConfigPath: *runnerConfigConfigPath,
SubscriptionID: *runnerConfigSubscriptionID,
ResourceGroup: *runnerConfigResourceGroup,
ManagedIdentityClientID: *runnerConfigManagedIdentityClientID,
JobName: *runnerConfigJobName,
Image: &secrets.RawRunnerImage{
Registry: *runnerConfigImageRegistry,
Name: *runnerConfigImageName,
Expand Down Expand Up @@ -219,15 +211,10 @@ func run(args []string) error {
if *useAZRunner {
logger.Info("initializing Azure task runner client")
tmp, err := aztask.NewRunner(creds, &aztask.Config{
Location: runCfg.Location,
Rand: rand.New(cryptorand.New()),
Identity: &aztask.RunnerIdentity{
Name: runCfg.Identity.Name,
SubscriptionID: runCfg.Identity.SubscriptionID,
ResourceGroup: runCfg.Identity.ResourceGroup,
ClientID: runCfg.Identity.ClientID,
ManagedEnvironment: runCfg.Identity.ManagedEnvironment,
},
SubscriptionID: runCfg.SubscriptionID,
ResourceGroup: runCfg.ResourceGroup,
ManagedIdentityClientID: runCfg.ManagedIdentityClientID,
JobName: runCfg.JobName,
})
if err != nil {
return fmt.Errorf("failed to init Azure runner: %w", err)
Expand Down
Loading

0 comments on commit 0071ca5

Please sign in to comment.