Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VReplication: Auto renew lease on Keyspace Lock #16240

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion go/vt/topo/etcd2topo/lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (
"path"

"github.com/spf13/pflag"

"go.etcd.io/etcd/api/v3/mvccpb"
clientv3 "go.etcd.io/etcd/client/v3"

Expand Down
59 changes: 59 additions & 0 deletions go/vt/topo/keyspace_lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ package topo
import (
"context"
"path"
"time"

"vitess.io/vitess/go/vt/vterrors"

vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
)

const (
MaxKeyspaceLockLeaseTTL = 10 * time.Minute
leaseRenewalInterval = 10 * time.Second
)

type keyspaceLock struct {
Expand Down Expand Up @@ -56,3 +66,52 @@ func CheckKeyspaceLocked(ctx context.Context, keyspace string) error {
keyspace: keyspace,
})
}

// LockKeyspaceWithLeaseRenewal locks the keyspace and starts a goroutine which runs
// until the MaxKeyspaceLockLeaseTTL is reached -- exiting if the context is
// cancelled, the unlock function is called, or an error is encountered -- refreshing
// the lock's lease every leaseRenewal until it ends.
// It returns a read-only error channel that you should regularly check to ensure that
// you have not lost the lock or encountered any other non-recoverable errors related
// to your lease.
func (ts *Server) LockKeyspaceWithLeaseRenewal(ctx context.Context, keyspace, action string) (context.Context, func(*error), <-chan error, error) {
ksLock := &keyspaceLock{keyspace: keyspace}
lockCtx, unlockF, err := ts.internalLock(ctx, ksLock, action, true)
if err != nil {
return nil, nil, nil, err
}
done := make(chan struct{}) // Our work is done and we should exit
errCh := make(chan error, 1) // Communicate any errors encountered during renewals
go func() {
defer close(errCh)
maxRenewals := int((MaxKeyspaceLockLeaseTTL.Seconds() - leaseRenewalInterval.Seconds()) / leaseRenewalInterval.Seconds())
for i := 0; i < maxRenewals; i++ {
time.Sleep(leaseRenewalInterval)
select {
case <-lockCtx.Done():
return
case <-done:
return
default:
// Attempt to renew the lease.
if err := checkLocked(lockCtx, ksLock); err != nil {
errCh <- vterrors.Wrapf(err, "failed to renew keyspace %s lock lease", keyspace)
return
}
}
}
time.Sleep(leaseRenewalInterval)
select {
case <-done:
default:
errCh <- vterrors.Errorf(vtrpcpb.Code_DEADLINE_EXCEEDED, "cannot renew keyspace %s lock lease as we've hit the time limit of %v",
keyspace, MaxKeyspaceLockLeaseTTL)
}
}()
// Add to the unlock function to end the lease renewal work.
newUnlockF := func(err *error) {
close(done)
unlockF(err)
}
return lockCtx, newUnlockF, errCh, nil
}
3 changes: 3 additions & 0 deletions go/vt/vtctl/workflow/resharder.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ func (s *Server) buildResharder(ctx context.Context, keyspace, workflow string,
if err != nil {
return nil, vterrors.Wrapf(err, "GetShard(%s) failed", shard)
}
if si.PrimaryAlias == nil {
return nil, fmt.Errorf("target shard %v has no primary tablet", shard)
}
Comment on lines +102 to +104
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unrelated, but addresses a panic raised in the Vitess Slack where the target shard had no primary tablet.

if si.IsPrimaryServing {
return nil, fmt.Errorf("target shard %v is in serving state", shard)
}
Expand Down
Loading
Loading