Skip to content

Commit

Permalink
issue-708: Set lock_wait_timeout to SetReadOnly
Browse files Browse the repository at this point in the history
  • Loading branch information
shunki-fujita committed Jul 9, 2024
1 parent 5f8ebc1 commit baadcc7
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 5 deletions.
4 changes: 4 additions & 0 deletions clustering/mock_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,10 @@ func (o *mockOperator) WaitForGTID(_ context.Context, gtidSet string, _ int) err
return errors.New("waitForGTID: timed out")
}

func (o *mockOperator) SetSessionLockWaitTimeout(_ context.Context, _ int) error {
return nil
}

// SetReadOnly makes the instance super_read_only if `true` is passed.
// Otherwise, this stops the replication and makes the instance writable.
func (o *mockOperator) SetReadOnly(ctx context.Context, readonly bool) error {
Expand Down
32 changes: 27 additions & 5 deletions clustering/operations.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,34 @@ func (p *managerProcess) switchover(ctx context.Context, ss *StatusSet) error {
log.Info("begin switchover the primary", "current", ss.Primary, "next", ss.Candidate)

pdb := ss.DBOps[ss.Primary]
if err := pdb.SetReadOnly(ctx, true); err != nil {
return fmt.Errorf("failed to make instance %d read-only: %w", ss.Primary, err)

// SetReadOnly waits for a running DML.
// Therefore, if it waits for a long time, deleteGracePeriodSeconds may be reached.
// To avoid this, set lock_wait_timeout to a short time temporarily.
// If SetReadOnly fails, kill all processes and retry.
succeeded := false
if err := pdb.SetSessionLockWaitTimeout(ctx, 15); err != nil {
return fmt.Errorf("failed to set lock_wait_timeout: %w", err)
}
for i := 0; i < 2; i++ {
if err := pdb.SetReadOnly(ctx, true); err != nil {
log.Error(err, "failed to set read-only mode", "instance", ss.Primary)
} else {
succeeded = true
}
time.Sleep(100 * time.Millisecond)
if err := pdb.KillConnections(ctx); err != nil {
return fmt.Errorf("failed to kill connections in instance %d: %w", ss.Primary, err)
}
if succeeded {
break
}
}
time.Sleep(100 * time.Millisecond)
if err := pdb.KillConnections(ctx); err != nil {
return fmt.Errorf("failed to kill connections in instance %d: %w", ss.Primary, err)
if !succeeded {
return fmt.Errorf("failed to set read-only mode in instance %d", ss.Primary)
}
if err := pdb.SetSessionLockWaitTimeout(ctx, 0); err != nil {
return fmt.Errorf("failed to set lock_wait_timeout: %w", err)
}
pst, err := pdb.GetStatus(ctx)
if err != nil {
Expand Down
4 changes: 4 additions & 0 deletions pkg/dbop/nop.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ func (o NopOperator) WaitForGTID(ctx context.Context, gtidSet string, timeoutSec
return ErrNop
}

func (o NopOperator) SetSessionLockWaitTimeout(ctx context.Context, timeoutSeconds int) error {
return ErrNop
}

func (o NopOperator) SetReadOnly(context.Context, bool) error {
return ErrNop
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/dbop/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ type Operator interface {
// If `timeoutSeconds` is zero, this will not timeout.
WaitForGTID(ctx context.Context, gtidSet string, timeoutSeconds int) error

// SetSessionLockWaitTimeout set @@SESSION.lock_wait_timeout to `timeoutSeconds`.
// If `timeoutSeconds` is zero, this will set the @@GLOBAL.lock_wait_timeout.
SetSessionLockWaitTimeout(ctx context.Context, timeoutSeconds int) error

// SetReadOnly makes the instance super_read_only if `true` is passed.
// Otherwise, this stops the replication and makes the instance writable.
SetReadOnly(context.Context, bool) error
Expand Down
13 changes: 13 additions & 0 deletions pkg/dbop/replication.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,19 @@ func (o *operator) WaitForGTID(ctx context.Context, gtid string, timeoutSeconds
return nil
}

func (o *operator) SetSessionLockWaitTimeout(ctx context.Context, timeoutSeconds int) error {
if timeoutSeconds == 0 {
if _, err := o.db.ExecContext(ctx, "SET SESSION lock_wait_timeout=@@GLOBAL.lock_wait_timeout"); err != nil {
return fmt.Errorf("failed to reset lock_wait_timeout: %w", err)
}
return nil
}
if _, err := o.db.ExecContext(ctx, "SET SESSION lock_wait_timeout=?", timeoutSeconds); err != nil {
return fmt.Errorf("failed to set lock_wait_timeout: %w", err)
}
return nil
}

func (o *operator) SetReadOnly(ctx context.Context, readOnly bool) error {
if readOnly {
if _, err := o.db.ExecContext(ctx, "SET GLOBAL super_read_only=1"); err != nil {
Expand Down
18 changes: 18 additions & 0 deletions pkg/dbop/replication_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,24 @@ var _ = Describe("replication", func() {
Expect(st1.GlobalVariables.SemiSyncMasterEnabled).To(BeFalse())
Expect(st1.GlobalVariables.SemiSyncSlaveEnabled).To(BeFalse())

By("checking SetSessionLockWaitTimeout works")
var (
sessionTimeout int
globalTimeout int
)
err = ops[0].SetSessionLockWaitTimeout(ctx, 15)
Expect(err).NotTo(HaveOccurred())
err = ops[0].db.Get(&sessionTimeout, `SELECT @@SESSION.lock_wait_timeout`)
Expect(err).NotTo(HaveOccurred())
Expect(sessionTimeout).To(Equal(15))
err = ops[0].SetSessionLockWaitTimeout(ctx, 0)
Expect(err).NotTo(HaveOccurred())
err = ops[0].db.Get(&sessionTimeout, `SELECT @@SESSION.lock_wait_timeout`)
Expect(err).NotTo(HaveOccurred())
err = ops[0].db.Get(&globalTimeout, `SELECT @@GLOBAL.lock_wait_timeout`)
Expect(err).NotTo(HaveOccurred())
Expect(sessionTimeout).To(Equal(globalTimeout))

By("checking WaitForGTID works")
err = ops[1].StopReplicaIOThread(ctx)
Expect(err).NotTo(HaveOccurred())
Expand Down

0 comments on commit baadcc7

Please sign in to comment.