Skip to content

Commit

Permalink
PBM-1347: save restore meta with error if retore fails before start
Browse files Browse the repository at this point in the history
  • Loading branch information
defbin committed Nov 4, 2024
1 parent 10ba619 commit 7bd06f8
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 4 deletions.
59 changes: 56 additions & 3 deletions cmd/pbm-agent/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

"github.com/percona/percona-backup-mongodb/pbm/backup"
"github.com/percona/percona-backup-mongodb/pbm/config"
"github.com/percona/percona-backup-mongodb/pbm/connect"
"github.com/percona/percona-backup-mongodb/pbm/ctrl"
"github.com/percona/percona-backup-mongodb/pbm/defs"
"github.com/percona/percona-backup-mongodb/pbm/errors"
Expand Down Expand Up @@ -93,13 +94,22 @@ func (a *Agent) Restore(ctx context.Context, r *ctrl.RestoreCmd, opid ctrl.OPID,
// XXX: why is backup searched on storage?
bcp, err = restore.LookupBackupMeta(ctx, a.leadConn, r.BackupName, a.brief.Me)
if err != nil {
l.Error("define base backup: %v", err)
err1 := addRestoreMetaWithError(ctx, a.leadConn, l, opid, r, nodeInfo.SetName,
"define base backup: %v", err)
if err1 != nil {
l.Error("failed to save meta: %v", err)
}
return
}

if !r.OplogTS.IsZero() && bcp.LastWriteTS.Compare(r.OplogTS) >= 0 {
l.Error("snapshot's last write is later than the target time. " +
"Try to set an earlier snapshot. Or leave the snapshot empty so PBM will choose one.")
err1 := addRestoreMetaWithError(ctx, a.leadConn, l, opid, r, nodeInfo.SetName,
"snapshot's last write is later than the target time. "+
"Try to set an earlier snapshot. Or leave the snapshot empty "+
"so PBM will choose one.")
if err1 != nil {
l.Error("failed to save meta: %v", err)
}
return
}
bcpType = bcp.Type
Expand Down Expand Up @@ -172,3 +182,46 @@ func (a *Agent) Restore(ctx context.Context, r *ctrl.RestoreCmd, opid ctrl.OPID,

l.Info("recovery successfully finished")
}

func addRestoreMetaWithError(
ctx context.Context,
conn connect.Client,
l log.LogEvent,
opid ctrl.OPID,
cmd *ctrl.RestoreCmd,
setName string,
errStr string,
args ...any,
) error {
l.Error(errStr, args...)

meta := &restore.RestoreMeta{
Type: defs.LogicalBackup,
OPID: opid.String(),
Name: cmd.Name,
Backup: cmd.BackupName,
PITR: int64(cmd.OplogTS.T),
StartTS: time.Now().UTC().Unix(),
Status: defs.StatusError,
Error: errStr,
Replsets: []restore.RestoreReplset{},
}
err := restore.SetRestoreMetaIfNotExists(ctx, conn, meta)
if err != nil {
return errors.Wrap(err, "write backup meta to db")
}

rs := restore.RestoreReplset{
Name: setName,
StartTS: time.Now().UTC().Unix(),
Status: defs.StatusError,
Error: errStr,
Conditions: restore.Conditions{},
}
err = restore.AddRestoreRSMeta(ctx, conn, cmd.Name, rs)
if err != nil {
return errors.Wrap(err, "write backup meta to db")
}

return nil
}
6 changes: 5 additions & 1 deletion cmd/pbm/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,11 @@ func doRestore(
return nil, errors.Wrap(err, "get storage")
}

fn = func(_ context.Context, _ connect.Client, name string) (*restore.RestoreMeta, error) {
fn = func(ctx context.Context, conn connect.Client, name string) (*restore.RestoreMeta, error) {
meta, err := restore.GetRestoreMeta(ctx, conn, name)
if err == nil {
return meta, nil
}
return restore.GetPhysRestoreMeta(name, stg, l)
}
startCtx, cancel = context.WithTimeout(ctx, waitPhysRestoreStart)
Expand Down
15 changes: 15 additions & 0 deletions pbm/restore/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,21 @@ func SetRestoreMeta(ctx context.Context, m connect.Client, meta *RestoreMeta) er
return err
}

func SetRestoreMetaIfNotExists(ctx context.Context, m connect.Client, meta *RestoreMeta) error {
meta.LastTransitionTS = meta.StartTS
meta.Conditions = append(meta.Conditions, &Condition{
Timestamp: meta.StartTS,
Status: meta.Status,
})

_, err := m.RestoresCollection().UpdateOne(ctx,
bson.D{{"name", meta.Name}},
bson.D{{"$set", meta}},
options.Update().SetUpsert(true))

return err
}

// GetLastRestore returns last successfully finished restore
// and nil if there is no such restore yet.
func GetLastRestore(ctx context.Context, m connect.Client) (*RestoreMeta, error) {
Expand Down

0 comments on commit 7bd06f8

Please sign in to comment.