Skip to content

Commit

Permalink
wip - remove retry logic
Browse files Browse the repository at this point in the history
Signed-off-by: Florent Poinsard <[email protected]>
  • Loading branch information
frouioui committed Jan 21, 2025
1 parent 93d6c52 commit 2b75327
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 48 deletions.
14 changes: 7 additions & 7 deletions go/cmd/vtbackup/cli/vtbackup.go
Original file line number Diff line number Diff line change
Expand Up @@ -537,12 +537,12 @@ func takeBackup(ctx, backgroundCtx context.Context, topoServer *topo.Server, bac

waitStartTime = time.Now()

continuousErrorCount int
// continuousErrorCount int
)
for {
if continuousErrorCount == maximumErrorCountWhenWaitingForReplicationStatus {
return fmt.Errorf("timeout waiting for replication status after %d errors", maximumErrorCountWhenWaitingForReplicationStatus)
}
// if continuousErrorCount == maximumErrorCountWhenWaitingForReplicationStatus {
// return fmt.Errorf("timeout waiting for replication status after %d errors", maximumErrorCountWhenWaitingForReplicationStatus)
// }

select {
case <-ctx.Done():
Expand All @@ -554,7 +554,7 @@ func takeBackup(ctx, backgroundCtx context.Context, topoServer *topo.Server, bac
status, statusErr = mysqld.ReplicationStatus(ctx)
if statusErr != nil {
log.Warningf("Error getting replication status: %v", statusErr)
continuousErrorCount++
// continuousErrorCount++
continue
}
if status.Position.AtLeast(primaryPos) {
Expand All @@ -577,11 +577,11 @@ func takeBackup(ctx, backgroundCtx context.Context, topoServer *topo.Server, bac
if err := startReplication(ctx, mysqld, topoServer); err != nil {
log.Warningf("Failed to restart replication: %v", err)
}
continuousErrorCount++
// continuousErrorCount++
} else {
// Since replication is working if we got here, let's reset the error count to zero.
// This allows us to avoid failing if we only have transient errors from time to time.
continuousErrorCount = 0
// continuousErrorCount = 0
phaseStatus.Set([]string{phaseNameCatchupReplication, phaseStatusCatchupReplicationStopped}, 0)
}
}
Expand Down
82 changes: 41 additions & 41 deletions go/test/endtoend/backup/vtbackup/backup_only_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,47 +47,47 @@ var (
) Engine=InnoDB;`
)

func TestFailingReplication(t *testing.T) {
prepareCluster(t)

// Run the entire backup test
firstBackupTest(t, false)

// Insert one more row, the primary will be ahead of the last backup
_, err := primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test_failure')", keyspaceName, true)
require.NoError(t, err)

// Disable replication from the primary by removing the grants to 'vt_repl'.
_, err = primary.VttabletProcess.QueryTablet("REVOKE REPLICATION SLAVE ON *.* FROM 'vt_repl'@'%';", keyspaceName, true)
require.NoError(t, err)
_, err = primary.VttabletProcess.QueryTablet("FLUSH PRIVILEGES;", keyspaceName, true)
require.NoError(t, err)

// Take a backup with vtbackup: the process should fail entirely as it cannot replicate from the primary.
_, err = startVtBackup(t, false, false, false)
require.Error(t, err)

// keep in mind how many backups we have right now
backups, err := listBackups(shardKsName)
require.NoError(t, err)

// In 30 seconds, grant the replication permission again to 'vt_repl'.
// This will mean that vtbackup should fail to replicate for ~30 seconds, until we grant the permission again.
go func() {
<-time.After(30 * time.Second)
_, err = primary.VttabletProcess.QueryTablet("GRANT REPLICATION SLAVE ON *.* TO 'vt_repl'@'%';", keyspaceName, true)
require.NoError(t, err)
_, err = primary.VttabletProcess.QueryTablet("FLUSH PRIVILEGES;", keyspaceName, true)
require.NoError(t, err)
}()

// this will initially be stuck trying to replicate from the primary, and once we re-grant the permission in
// the goroutine above, the process will work and complete successfully.
_ = vtBackup(t, false, false, false)
verifyBackupCount(t, shardKsName, len(backups)+1)

tearDown(t, true)
}
// func TestFailingReplication(t *testing.T) {
// prepareCluster(t)
//
// // Run the entire backup test
// firstBackupTest(t, false)
//
// // Insert one more row, the primary will be ahead of the last backup
// _, err := primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test_failure')", keyspaceName, true)
// require.NoError(t, err)
//
// // Disable replication from the primary by removing the grants to 'vt_repl'.
// _, err = primary.VttabletProcess.QueryTablet("REVOKE REPLICATION SLAVE ON *.* FROM 'vt_repl'@'%';", keyspaceName, true)
// require.NoError(t, err)
// _, err = primary.VttabletProcess.QueryTablet("FLUSH PRIVILEGES;", keyspaceName, true)
// require.NoError(t, err)
//
// // Take a backup with vtbackup: the process should fail entirely as it cannot replicate from the primary.
// _, err = startVtBackup(t, false, false, false)
// require.Error(t, err)
//
// // keep in mind how many backups we have right now
// backups, err := listBackups(shardKsName)
// require.NoError(t, err)
//
// // In 30 seconds, grant the replication permission again to 'vt_repl'.
// // This will mean that vtbackup should fail to replicate for ~30 seconds, until we grant the permission again.
// go func() {
// <-time.After(30 * time.Second)
// _, err = primary.VttabletProcess.QueryTablet("GRANT REPLICATION SLAVE ON *.* TO 'vt_repl'@'%';", keyspaceName, true)
// require.NoError(t, err)
// _, err = primary.VttabletProcess.QueryTablet("FLUSH PRIVILEGES;", keyspaceName, true)
// require.NoError(t, err)
// }()
//
// // this will initially be stuck trying to replicate from the primary, and once we re-grant the permission in
// // the goroutine above, the process will work and complete successfully.
// _ = vtBackup(t, false, false, false)
// verifyBackupCount(t, shardKsName, len(backups)+1)
//
// tearDown(t, true)
// }

func TestTabletInitialBackup(t *testing.T) {
// Test Initial Backup Flow
Expand Down

0 comments on commit 2b75327

Please sign in to comment.