Skip to content

Commit 40b7251

Browse files
auricomclaude
andcommitted
fix(raft): abdicate leadership when store is significantly behind raft state
When a node wins election but its local store is more than 1 block behind the raft FSM state, RecoverFromRaft cannot replay the gap (it only handles the single latest block in the raft snapshot). Previously the node would log "recovery successful" and start leader operations anyway, then stall block production while the executor repeatedly failed to sync the missing blocks from a store that did not have them. Fix: in DynamicLeaderElection.Run, detect diff < -1 at the follower→leader transition and immediately transfer leadership to a better-synced peer. diff == -1 is preserved: RecoverFromRaft can apply exactly one block from the raft snapshot, so that path is unchanged. Closes #3255 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 2d3dc8e commit 40b7251

2 files changed

Lines changed: 63 additions & 0 deletions

File tree

pkg/raft/election.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,23 @@ func (d *DynamicLeaderElection) Run(ctx context.Context) error {
132132
if err != nil {
133133
return err
134134
}
135+
if diff < -1 {
136+
// Store is more than 1 block behind raft state.
137+
// RecoverFromRaft can only apply the single latest block
138+
// from the raft snapshot; it cannot replay a larger gap.
139+
// Starting leader operations in this state would stall block
140+
// production until catch-up completes (potentially minutes or
141+
// hours). Abdicate immediately so a better-synced peer can
142+
// take leadership.
143+
d.logger.Warn().
144+
Int("store_lag_blocks", -diff).
145+
Uint64("raft_height", raftState.Height).
146+
Msg("became leader but store is significantly behind raft state; abdicating to prevent stalled block production")
147+
if tErr := d.node.leadershipTransfer(); tErr != nil {
148+
d.logger.Error().Err(tErr).Msg("leadership transfer failed after store-lag abdication")
149+
}
150+
continue
151+
}
135152
if diff != 0 {
136153
d.logger.Info().Msg("became leader but not synced, attempting recovery")
137154
if err := runnable.Recover(ctx, raftState); err != nil {

pkg/raft/election_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,52 @@ func TestDynamicLeaderElectionRun(t *testing.T) {
221221
assert.ErrorIs(t, err, context.Canceled)
222222
},
223223
},
224+
"abdicate when store significantly behind raft": {
225+
setup: func(t *testing.T) (*DynamicLeaderElection, context.Context, context.CancelFunc) {
226+
m := newMocksourceNode(t)
227+
leaderCh := make(chan bool, 2)
228+
m.EXPECT().leaderCh().Return((<-chan bool)(leaderCh))
229+
// GetState called in verifyState (follower start) and in leader sync check
230+
m.EXPECT().GetState().Return(&RaftBlockState{Height: 10})
231+
m.EXPECT().GetState().Return(&RaftBlockState{Height: 10})
232+
m.EXPECT().Config().Return(testCfg()).Times(2)
233+
m.EXPECT().waitForMsgsLanded(2 * time.Millisecond).Return(nil)
234+
m.EXPECT().NodeID().Return("self")
235+
m.EXPECT().leaderID().Return("self")
236+
// Abdication must transfer leadership
237+
m.EXPECT().leadershipTransfer().Return(nil)
238+
239+
fStarted := make(chan struct{})
240+
follower := &testRunnable{
241+
startedCh: fStarted,
242+
isSyncedFn: func(*RaftBlockState) (int, error) { return -5, nil },
243+
}
244+
// Leader must never start
245+
leader := &testRunnable{runFn: func(ctx context.Context) error {
246+
t.Fatal("leader should not start when store is significantly behind raft")
247+
return nil
248+
}}
249+
250+
logger := zerolog.Nop()
251+
d := &DynamicLeaderElection{logger: logger, node: m,
252+
leaderFactory: func() (Runnable, error) { return leader, nil },
253+
followerFactory: func() (Runnable, error) { return follower, nil },
254+
}
255+
ctx, cancel := context.WithCancel(t.Context())
256+
go func() {
257+
leaderCh <- false
258+
<-fStarted
259+
leaderCh <- true
260+
time.Sleep(10 * time.Millisecond)
261+
cancel()
262+
}()
263+
return d, ctx, cancel
264+
},
265+
assertF: func(t *testing.T, err error) {
266+
require.Error(t, err)
267+
assert.ErrorIs(t, err, context.Canceled)
268+
},
269+
},
224270
"lost leadership during sync wait": {
225271
setup: func(t *testing.T) (*DynamicLeaderElection, context.Context, context.CancelFunc) {
226272
m := newMocksourceNode(t)

0 commit comments

Comments
 (0)