From ee310c8775d739ca1979b3a56799d1f78aff7477 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Mon, 9 Dec 2024 17:11:19 -0600 Subject: [PATCH 01/10] add a limit before failing replication check and fail when mysql fails Signed-off-by: Florent Poinsard --- go/cmd/vtbackup/cli/vtbackup.go | 24 ++++++++++++++++++++++++ go/vt/mysqlctl/mysqld.go | 30 +++++++++++++++++++++--------- 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/go/cmd/vtbackup/cli/vtbackup.go b/go/cmd/vtbackup/cli/vtbackup.go index 1b61c886ae7..84a26af156f 100644 --- a/go/cmd/vtbackup/cli/vtbackup.go +++ b/go/cmd/vtbackup/cli/vtbackup.go @@ -65,6 +65,11 @@ const ( phaseNameTakeNewBackup = "TakeNewBackup" phaseStatusCatchupReplicationStalled = "Stalled" phaseStatusCatchupReplicationStopped = "Stopped" + + // We will allow maximum 60 errors in a row when waiting for replication status before taking the new backup. + // As we try every second, this is equivalent to minimum 1 minute of continuously erroring before failing. + // It allows us to ignore transient errors while avoiding repeated errors in a loop (for over 60 seconds). + maximumErrorCountWhenWaitingForReplicationStatus = 60 ) var ( @@ -335,6 +340,14 @@ func takeBackup(ctx, backgroundCtx context.Context, topoServer *topo.Server, bac if err != nil { return fmt.Errorf("failed to initialize mysql config: %v", err) } + ctx, cancelCtx := context.WithCancel(ctx) + backgroundCtx, cancelbackgroundCtx := context.WithCancel(backgroundCtx) + mysqld.OnFailure(func(err error) { + log.Warning("Cancelling the vtbackup context as MySQL has failed") + cancelCtx() + cancelbackgroundCtx() + }) + initCtx, initCancel := context.WithTimeout(ctx, mysqlTimeout) defer initCancel() initMysqldAt := time.Now() @@ -519,8 +532,14 @@ func takeBackup(ctx, backgroundCtx context.Context, topoServer *topo.Server, bac statusErr error waitStartTime = time.Now() + + continuousErrorCount int ) for { + if continuousErrorCount == maximumErrorCountWhenWaitingForReplicationStatus { + return fmt.Errorf("timeout waiting for replication status after %d errors", maximumErrorCountWhenWaitingForReplicationStatus) + } + select { case <-ctx.Done(): return fmt.Errorf("error in replication catch up: %v", ctx.Err()) @@ -531,6 +550,7 @@ func takeBackup(ctx, backgroundCtx context.Context, topoServer *topo.Server, bac status, statusErr = mysqld.ReplicationStatus(ctx) if statusErr != nil { log.Warningf("Error getting replication status: %v", statusErr) + continuousErrorCount++ continue } if status.Position.AtLeast(primaryPos) { @@ -553,7 +573,11 @@ func takeBackup(ctx, backgroundCtx context.Context, topoServer *topo.Server, bac if err := startReplication(ctx, mysqld, topoServer); err != nil { log.Warningf("Failed to restart replication: %v", err) } + continuousErrorCount++ } else { + // Since replication is working if we got here, let's reset the error count to zero. + // This allows us to avoid failing if we only have transient errors from time to time. + continuousErrorCount = 0 phaseStatus.Set([]string{phaseNameCatchupReplication, phaseStatusCatchupReplicationStopped}, 0) } } diff --git a/go/vt/mysqlctl/mysqld.go b/go/vt/mysqlctl/mysqld.go index d7435705a8a..81431105d33 100644 --- a/go/vt/mysqlctl/mysqld.go +++ b/go/vt/mysqlctl/mysqld.go @@ -118,9 +118,10 @@ type Mysqld struct { capabilities capabilitySet // mutex protects the fields below. - mutex sync.Mutex - onTermFuncs []func() - cancelWaitCmd chan struct{} + mutex sync.Mutex + onTermFuncs []func() + onFailureFuncs []func(error) + cancelWaitCmd chan struct{} semiSyncType mysql.SemiSyncType } @@ -445,6 +446,11 @@ func (mysqld *Mysqld) startNoWait(cnf *Mycnf, mysqldArgs ...string) error { for _, callback := range mysqld.onTermFuncs { go callback() } + if err != nil { + for _, failureFunc := range mysqld.onFailureFuncs { + go failureFunc(err) + } + } mysqld.mutex.Unlock() } }(mysqld.cancelWaitCmd) @@ -609,12 +615,12 @@ func (mysqld *Mysqld) Shutdown(ctx context.Context, cnf *Mycnf, waitForMysqld bo // We're shutting down on purpose. We no longer want to be notified when // mysqld terminates. - mysqld.mutex.Lock() - if mysqld.cancelWaitCmd != nil { - close(mysqld.cancelWaitCmd) - mysqld.cancelWaitCmd = nil - } - mysqld.mutex.Unlock() + // mysqld.mutex.Lock() + // if mysqld.cancelWaitCmd != nil { + // close(mysqld.cancelWaitCmd) + // mysqld.cancelWaitCmd = nil + // } + // mysqld.mutex.Unlock() // possibly mysql is already shutdown, check for a few files first _, socketPathErr := os.Stat(cnf.SocketFile) @@ -1247,6 +1253,12 @@ func (mysqld *Mysqld) OnTerm(f func()) { mysqld.onTermFuncs = append(mysqld.onTermFuncs, f) } +func (mysqld *Mysqld) OnFailure(f func(error)) { + mysqld.mutex.Lock() + defer mysqld.mutex.Unlock() + mysqld.onFailureFuncs = append(mysqld.onFailureFuncs, f) +} + func buildLdPaths() ([]string, error) { vtMysqlRoot, err := vtenv.VtMysqlRoot() if err != nil { From 7cdfe8f2551edf942d901811d0011015c56c1233 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Mon, 13 Jan 2025 11:09:13 -0600 Subject: [PATCH 02/10] Remove debug code Signed-off-by: Florent Poinsard --- go/vt/mysqlctl/mysqld.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/go/vt/mysqlctl/mysqld.go b/go/vt/mysqlctl/mysqld.go index 81431105d33..43ec3d88575 100644 --- a/go/vt/mysqlctl/mysqld.go +++ b/go/vt/mysqlctl/mysqld.go @@ -615,12 +615,12 @@ func (mysqld *Mysqld) Shutdown(ctx context.Context, cnf *Mycnf, waitForMysqld bo // We're shutting down on purpose. We no longer want to be notified when // mysqld terminates. - // mysqld.mutex.Lock() - // if mysqld.cancelWaitCmd != nil { - // close(mysqld.cancelWaitCmd) - // mysqld.cancelWaitCmd = nil - // } - // mysqld.mutex.Unlock() + mysqld.mutex.Lock() + if mysqld.cancelWaitCmd != nil { + close(mysqld.cancelWaitCmd) + mysqld.cancelWaitCmd = nil + } + mysqld.mutex.Unlock() // possibly mysql is already shutdown, check for a few files first _, socketPathErr := os.Stat(cnf.SocketFile) From 9eac6e6aa008afba3923bee2ddd0714f0fdc5591 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Mon, 13 Jan 2025 15:36:49 -0600 Subject: [PATCH 03/10] Add E2E test for the new retry logic Signed-off-by: Florent Poinsard --- .../backup/vtbackup/backup_only_test.go | 88 ++++++++++++++++--- 1 file changed, 77 insertions(+), 11 deletions(-) diff --git a/go/test/endtoend/backup/vtbackup/backup_only_test.go b/go/test/endtoend/backup/vtbackup/backup_only_test.go index 4e018986100..4c295b8195f 100644 --- a/go/test/endtoend/backup/vtbackup/backup_only_test.go +++ b/go/test/endtoend/backup/vtbackup/backup_only_test.go @@ -47,6 +47,56 @@ var ( ) Engine=InnoDB;` ) +func TestFailingReplication(t *testing.T) { + defer cluster.PanicHandler(t) + + prepareCluster(t) + + // Run the entire backup test + firstBackupTest(t, false) + + // Insert one more row, the primary will be ahead of the last backup + _, err := primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test_failure')", keyspaceName, true) + require.NoError(t, err) + + // Disable replication from the primary by removing the grants to 'vt_repl'. + // Also make sure to reset the privileges to what they were before. + grant := func() { + _, err = primary.VttabletProcess.QueryTablet("GRANT REPLICATION SLAVE ON *.* TO 'vt_repl'@'%';", keyspaceName, true) + require.NoError(t, err) + _, err = primary.VttabletProcess.QueryTablet("FLUSH PRIVILEGES;", keyspaceName, true) + require.NoError(t, err) + } + t.Cleanup(grant) + + _, err = primary.VttabletProcess.QueryTablet("REVOKE REPLICATION SLAVE ON *.* FROM 'vt_repl'@'%';", keyspaceName, true) + require.NoError(t, err) + _, err = primary.VttabletProcess.QueryTablet("FLUSH PRIVILEGES;", keyspaceName, true) + require.NoError(t, err) + + // Take a backup with vtbackup: the process should fail entirely as it cannot replicate from the primary. + _, err = startVtBackup(t, false, false, false) + require.Error(t, err) + + // keep in mind how many backups we have right now + backups, err := listBackups(shardKsName) + require.NoError(t, err) + + // In 30 seconds, grant the replication permission again to 'vt_repl'. + // This will mean that vtbackup should fail to replicate for ~30 seconds, until we grant the permission again. + go func() { + <-time.After(30 * time.Second) + grant() + }() + + // this will initially be stuck trying to replicate from the primary, and once we re-grant the permission in + // the goroutine above, the process will work and complete successfully. + _ = vtBackup(t, false, false, false) + verifyBackupCount(t, shardKsName, len(backups)+1) + + tearDown(t, true) +} + func TestTabletInitialBackup(t *testing.T) { // Test Initial Backup Flow // TestTabletInitialBackup will: @@ -60,6 +110,15 @@ func TestTabletInitialBackup(t *testing.T) { // - list the backups, remove them defer cluster.PanicHandler(t) + prepareCluster(t) + + // Run the entire backup test + firstBackupTest(t, true) + + tearDown(t, true) +} + +func prepareCluster(t *testing.T) { waitForReplicationToCatchup([]cluster.Vttablet{*replica1, *replica2}) dataPointReader := vtBackup(t, true, false, false) @@ -85,11 +144,6 @@ func TestTabletInitialBackup(t *testing.T) { "TabletExternallyReparented", primary.Alias) require.NoError(t, err) restore(t, replica1, "replica", "SERVING") - - // Run the entire backup test - firstBackupTest(t, "replica") - - tearDown(t, true) } func TestTabletBackupOnly(t *testing.T) { @@ -109,12 +163,12 @@ func TestTabletBackupOnly(t *testing.T) { replica1.VttabletProcess.ServingStatus = "NOT_SERVING" initTablets(t, true, true) - firstBackupTest(t, "replica") + firstBackupTest(t, true) tearDown(t, false) } -func firstBackupTest(t *testing.T, tabletType string) { +func firstBackupTest(t *testing.T, removeBackup bool) { // Test First Backup flow. // // firstBackupTest will: @@ -170,11 +224,13 @@ func firstBackupTest(t *testing.T, tabletType string) { // check the new replica has the data cluster.VerifyRowsInTablet(t, replica2, keyspaceName, 2) - removeBackups(t) - verifyBackupCount(t, shardKsName, 0) + if removeBackup { + removeBackups(t) + verifyBackupCount(t, shardKsName, 0) + } } -func vtBackup(t *testing.T, initialBackup bool, restartBeforeBackup, disableRedoLog bool) *opentsdb.DataPointReader { +func startVtBackup(t *testing.T, initialBackup bool, restartBeforeBackup, disableRedoLog bool) (*os.File, error) { mysqlSocket, err := os.CreateTemp("", "vtbackup_test_mysql.sock") require.NoError(t, err) defer os.Remove(mysqlSocket.Name()) @@ -209,9 +265,19 @@ func vtBackup(t *testing.T, initialBackup bool, restartBeforeBackup, disableRedo log.Infof("starting backup tablet %s", time.Now()) err = localCluster.StartVtbackup(newInitDBFile, initialBackup, keyspaceName, shardName, cell, extraArgs...) - require.NoError(t, err) + if err != nil { + return nil, err + } f, err := os.OpenFile(statsPath, os.O_RDONLY, 0) + if err != nil { + return nil, err + } + return f, nil +} + +func vtBackup(t *testing.T, initialBackup bool, restartBeforeBackup, disableRedoLog bool) *opentsdb.DataPointReader { + f, err := startVtBackup(t, initialBackup, restartBeforeBackup, disableRedoLog) require.NoError(t, err) return opentsdb.NewDataPointReader(f) } From eb16b98ae87b1dd0bceef00c80cb1db44530fe1d Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Mon, 13 Jan 2025 15:52:21 -0600 Subject: [PATCH 04/10] Remove cluster panic handler after merging main Signed-off-by: Florent Poinsard --- go/test/endtoend/backup/vtbackup/backup_only_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/go/test/endtoend/backup/vtbackup/backup_only_test.go b/go/test/endtoend/backup/vtbackup/backup_only_test.go index 26384709080..186e3a4d41f 100644 --- a/go/test/endtoend/backup/vtbackup/backup_only_test.go +++ b/go/test/endtoend/backup/vtbackup/backup_only_test.go @@ -48,8 +48,6 @@ var ( ) func TestFailingReplication(t *testing.T) { - defer cluster.PanicHandler(t) - prepareCluster(t) // Run the entire backup test From 264c0482118187f6a6a0b4efa8d63e4ed6809cd9 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Mon, 13 Jan 2025 16:14:27 -0600 Subject: [PATCH 05/10] Fix test Signed-off-by: Florent Poinsard --- .../endtoend/backup/vtbackup/backup_only_test.go | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/go/test/endtoend/backup/vtbackup/backup_only_test.go b/go/test/endtoend/backup/vtbackup/backup_only_test.go index 186e3a4d41f..e0439f08c49 100644 --- a/go/test/endtoend/backup/vtbackup/backup_only_test.go +++ b/go/test/endtoend/backup/vtbackup/backup_only_test.go @@ -58,15 +58,6 @@ func TestFailingReplication(t *testing.T) { require.NoError(t, err) // Disable replication from the primary by removing the grants to 'vt_repl'. - // Also make sure to reset the privileges to what they were before. - grant := func() { - _, err = primary.VttabletProcess.QueryTablet("GRANT REPLICATION SLAVE ON *.* TO 'vt_repl'@'%';", keyspaceName, true) - require.NoError(t, err) - _, err = primary.VttabletProcess.QueryTablet("FLUSH PRIVILEGES;", keyspaceName, true) - require.NoError(t, err) - } - t.Cleanup(grant) - _, err = primary.VttabletProcess.QueryTablet("REVOKE REPLICATION SLAVE ON *.* FROM 'vt_repl'@'%';", keyspaceName, true) require.NoError(t, err) _, err = primary.VttabletProcess.QueryTablet("FLUSH PRIVILEGES;", keyspaceName, true) @@ -84,7 +75,10 @@ func TestFailingReplication(t *testing.T) { // This will mean that vtbackup should fail to replicate for ~30 seconds, until we grant the permission again. go func() { <-time.After(30 * time.Second) - grant() + _, err = primary.VttabletProcess.QueryTablet("GRANT REPLICATION SLAVE ON *.* TO 'vt_repl'@'%';", keyspaceName, true) + require.NoError(t, err) + _, err = primary.VttabletProcess.QueryTablet("FLUSH PRIVILEGES;", keyspaceName, true) + require.NoError(t, err) }() // this will initially be stuck trying to replicate from the primary, and once we re-grant the permission in From 93d6c5273e0972f3ff56a942a30cadddca29fb6c Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Tue, 21 Jan 2025 14:15:58 -0600 Subject: [PATCH 06/10] Use OnTerm instead of OnFailure Signed-off-by: Florent Poinsard --- go/cmd/vtbackup/cli/vtbackup.go | 12 ++++++++---- go/vt/mysqlctl/mysqld.go | 18 +++--------------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/go/cmd/vtbackup/cli/vtbackup.go b/go/cmd/vtbackup/cli/vtbackup.go index 84a26af156f..94cf0d7ec50 100644 --- a/go/cmd/vtbackup/cli/vtbackup.go +++ b/go/cmd/vtbackup/cli/vtbackup.go @@ -341,11 +341,15 @@ func takeBackup(ctx, backgroundCtx context.Context, topoServer *topo.Server, bac return fmt.Errorf("failed to initialize mysql config: %v", err) } ctx, cancelCtx := context.WithCancel(ctx) - backgroundCtx, cancelbackgroundCtx := context.WithCancel(backgroundCtx) - mysqld.OnFailure(func(err error) { - log.Warning("Cancelling the vtbackup context as MySQL has failed") + backgroundCtx, cancelBackgroundCtx := context.WithCancel(backgroundCtx) + defer func() { + cancelCtx() + cancelBackgroundCtx() + }() + mysqld.OnTerm(func() { + log.Warning("Cancelling the vtbackup context as MySQL has terminated") cancelCtx() - cancelbackgroundCtx() + cancelBackgroundCtx() }) initCtx, initCancel := context.WithTimeout(ctx, mysqlTimeout) diff --git a/go/vt/mysqlctl/mysqld.go b/go/vt/mysqlctl/mysqld.go index 43ec3d88575..d7435705a8a 100644 --- a/go/vt/mysqlctl/mysqld.go +++ b/go/vt/mysqlctl/mysqld.go @@ -118,10 +118,9 @@ type Mysqld struct { capabilities capabilitySet // mutex protects the fields below. - mutex sync.Mutex - onTermFuncs []func() - onFailureFuncs []func(error) - cancelWaitCmd chan struct{} + mutex sync.Mutex + onTermFuncs []func() + cancelWaitCmd chan struct{} semiSyncType mysql.SemiSyncType } @@ -446,11 +445,6 @@ func (mysqld *Mysqld) startNoWait(cnf *Mycnf, mysqldArgs ...string) error { for _, callback := range mysqld.onTermFuncs { go callback() } - if err != nil { - for _, failureFunc := range mysqld.onFailureFuncs { - go failureFunc(err) - } - } mysqld.mutex.Unlock() } }(mysqld.cancelWaitCmd) @@ -1253,12 +1247,6 @@ func (mysqld *Mysqld) OnTerm(f func()) { mysqld.onTermFuncs = append(mysqld.onTermFuncs, f) } -func (mysqld *Mysqld) OnFailure(f func(error)) { - mysqld.mutex.Lock() - defer mysqld.mutex.Unlock() - mysqld.onFailureFuncs = append(mysqld.onFailureFuncs, f) -} - func buildLdPaths() ([]string, error) { vtMysqlRoot, err := vtenv.VtMysqlRoot() if err != nil { From 80a3c874ed94499fbcddc1c2dfab99a7aaeb1d3a Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Tue, 21 Jan 2025 14:54:47 -0600 Subject: [PATCH 07/10] Use LastErrors Signed-off-by: Florent Poinsard --- go/cmd/vtbackup/cli/vtbackup.go | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/go/cmd/vtbackup/cli/vtbackup.go b/go/cmd/vtbackup/cli/vtbackup.go index 94cf0d7ec50..c3cd9b1c119 100644 --- a/go/cmd/vtbackup/cli/vtbackup.go +++ b/go/cmd/vtbackup/cli/vtbackup.go @@ -19,6 +19,7 @@ package cli import ( "context" "crypto/rand" + "errors" "fmt" "math" "math/big" @@ -66,10 +67,7 @@ const ( phaseStatusCatchupReplicationStalled = "Stalled" phaseStatusCatchupReplicationStopped = "Stopped" - // We will allow maximum 60 errors in a row when waiting for replication status before taking the new backup. - // As we try every second, this is equivalent to minimum 1 minute of continuously erroring before failing. - // It allows us to ignore transient errors while avoiding repeated errors in a loop (for over 60 seconds). - maximumErrorCountWhenWaitingForReplicationStatus = 60 + timeoutWaitingForReplicationStatus = 60 * time.Second ) var ( @@ -536,12 +534,12 @@ func takeBackup(ctx, backgroundCtx context.Context, topoServer *topo.Server, bac statusErr error waitStartTime = time.Now() - - continuousErrorCount int ) + + lastErr := vterrors.NewLastError("replication catch up", timeoutWaitingForReplicationStatus) for { - if continuousErrorCount == maximumErrorCountWhenWaitingForReplicationStatus { - return fmt.Errorf("timeout waiting for replication status after %d errors", maximumErrorCountWhenWaitingForReplicationStatus) + if !lastErr.ShouldRetry() { + return fmt.Errorf("timeout waiting for replication status after %.0f seconds", timeoutWaitingForReplicationStatus.Seconds()) } select { @@ -553,8 +551,8 @@ func takeBackup(ctx, backgroundCtx context.Context, topoServer *topo.Server, bac lastStatus = status status, statusErr = mysqld.ReplicationStatus(ctx) if statusErr != nil { + lastErr.Record(statusErr) log.Warningf("Error getting replication status: %v", statusErr) - continuousErrorCount++ continue } if status.Position.AtLeast(primaryPos) { @@ -572,16 +570,15 @@ func takeBackup(ctx, backgroundCtx context.Context, topoServer *topo.Server, bac } } if !status.Healthy() { - log.Warning("Replication has stopped before backup could be taken. Trying to restart replication.") + errStr := "Replication has stopped before backup could be taken. Trying to restart replication." + log.Warning(errStr) + lastErr.Record(errors.New(strings.ToLower(errStr))) + phaseStatus.Set([]string{phaseNameCatchupReplication, phaseStatusCatchupReplicationStopped}, 1) if err := startReplication(ctx, mysqld, topoServer); err != nil { log.Warningf("Failed to restart replication: %v", err) } - continuousErrorCount++ } else { - // Since replication is working if we got here, let's reset the error count to zero. - // This allows us to avoid failing if we only have transient errors from time to time. - continuousErrorCount = 0 phaseStatus.Set([]string{phaseNameCatchupReplication, phaseStatusCatchupReplicationStopped}, 0) } } From 17e69486aeb01491b0a090cdc3c6230b8e090347 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Tue, 21 Jan 2025 15:12:36 -0600 Subject: [PATCH 08/10] Remove leftover backups after TestFailingReplication Signed-off-by: Florent Poinsard --- go/test/endtoend/backup/vtbackup/backup_only_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/go/test/endtoend/backup/vtbackup/backup_only_test.go b/go/test/endtoend/backup/vtbackup/backup_only_test.go index e0439f08c49..0934835ea3e 100644 --- a/go/test/endtoend/backup/vtbackup/backup_only_test.go +++ b/go/test/endtoend/backup/vtbackup/backup_only_test.go @@ -86,6 +86,9 @@ func TestFailingReplication(t *testing.T) { _ = vtBackup(t, false, false, false) verifyBackupCount(t, shardKsName, len(backups)+1) + removeBackups(t) + verifyBackupCount(t, shardKsName, 0) + tearDown(t, true) } From 4bf6735d71066c9315dbb18ca13bf442cc9a9109 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Tue, 21 Jan 2025 15:32:42 -0600 Subject: [PATCH 09/10] Apply review suggestions Signed-off-by: Florent Poinsard --- go/cmd/vtbackup/cli/vtbackup.go | 2 +- go/test/endtoend/backup/vtbackup/backup_only_test.go | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/go/cmd/vtbackup/cli/vtbackup.go b/go/cmd/vtbackup/cli/vtbackup.go index c3cd9b1c119..5306cc21f11 100644 --- a/go/cmd/vtbackup/cli/vtbackup.go +++ b/go/cmd/vtbackup/cli/vtbackup.go @@ -345,7 +345,7 @@ func takeBackup(ctx, backgroundCtx context.Context, topoServer *topo.Server, bac cancelBackgroundCtx() }() mysqld.OnTerm(func() { - log.Warning("Cancelling the vtbackup context as MySQL has terminated") + log.Warning("Cancelling vtbackup as MySQL has terminated") cancelCtx() cancelBackgroundCtx() }) diff --git a/go/test/endtoend/backup/vtbackup/backup_only_test.go b/go/test/endtoend/backup/vtbackup/backup_only_test.go index 0934835ea3e..97b9dec211e 100644 --- a/go/test/endtoend/backup/vtbackup/backup_only_test.go +++ b/go/test/endtoend/backup/vtbackup/backup_only_test.go @@ -81,9 +81,13 @@ func TestFailingReplication(t *testing.T) { require.NoError(t, err) }() + startTime := time.Now() // this will initially be stuck trying to replicate from the primary, and once we re-grant the permission in // the goroutine above, the process will work and complete successfully. _ = vtBackup(t, false, false, false) + + require.GreaterOrEqual(t, time.Since(startTime).Seconds(), 30) + verifyBackupCount(t, shardKsName, len(backups)+1) removeBackups(t) From 5f21d187c7faf955ce0b97cceda985ca684aeed6 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Tue, 21 Jan 2025 15:44:52 -0600 Subject: [PATCH 10/10] Fix float type Signed-off-by: Florent Poinsard --- go/test/endtoend/backup/vtbackup/backup_only_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/test/endtoend/backup/vtbackup/backup_only_test.go b/go/test/endtoend/backup/vtbackup/backup_only_test.go index 97b9dec211e..85a9ae7be16 100644 --- a/go/test/endtoend/backup/vtbackup/backup_only_test.go +++ b/go/test/endtoend/backup/vtbackup/backup_only_test.go @@ -86,7 +86,7 @@ func TestFailingReplication(t *testing.T) { // the goroutine above, the process will work and complete successfully. _ = vtBackup(t, false, false, false) - require.GreaterOrEqual(t, time.Since(startTime).Seconds(), 30) + require.GreaterOrEqual(t, time.Since(startTime).Seconds(), float64(30)) verifyBackupCount(t, shardKsName, len(backups)+1)