Skip to content

Commit

Permalink
vm/gce: retry GCE init
Browse files Browse the repository at this point in the history
We've been seeing an increase in "failed to init gce" errors on syzbot.
These problems seem totally transient, so let's address it by retrying
the initialization instead of aborting syz-manager's execution.
  • Loading branch information
a-nogikh committed Oct 10, 2023
1 parent 20b77f8 commit 83165b5
Showing 1 changed file with 27 additions and 2 deletions.
29 changes: 27 additions & 2 deletions vm/gce/gce.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,11 @@ func Ctor(env *vmimpl.Env, consoleReadCmd string) (*Pool, error) {
return nil, fmt.Errorf("both image and gce_image are specified")
}

GCE, err := gce.NewContext(cfg.ZoneID)
GCE, err := initGCE(cfg.ZoneID)
if err != nil {
return nil, fmt.Errorf("failed to init gce: %w", err)
return nil, err
}

log.Logf(0, "GCE initialized: running on %v, internal IP %v, project %v, zone %v, net %v/%v",
GCE.Instance, GCE.InternalIP, GCE.ProjectID, GCE.ZoneID, GCE.Network, GCE.Subnetwork)

Expand Down Expand Up @@ -138,6 +139,30 @@ func Ctor(env *vmimpl.Env, consoleReadCmd string) (*Pool, error) {
return pool, nil
}

func initGCE(zoneID string) (*gce.Context, error) {
// There happen some transient GCE init errors on and off.
// Let's try it several times before aborting.
const (
gceInitAttempts = 3
gceInitBackoff = 5 * time.Second
)
var (
GCE *gce.Context
err error
)
for i := 1; i <= gceInitAttempts; i++ {
if i > 1 {
time.Sleep(gceInitBackoff)
}
GCE, err = gce.NewContext(zoneID)
if err == nil {
return GCE, nil
}
log.Logf(0, "init GCE attempt %d/%d failed: %v", i, gceInitAttempts, err)
}
return nil, fmt.Errorf("all attempts to init GCE failed: %w", err)
}

func (pool *Pool) Count() int {
return pool.cfg.Count
}
Expand Down

0 comments on commit 83165b5

Please sign in to comment.