github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/scheduler.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "container/list" 15 "context" 16 "fmt" 17 "sync" 18 19 "github.com/cockroachdb/cockroach/pkg/roachpb" 20 "github.com/cockroachdb/cockroach/pkg/util/stop" 21 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 22 ) 23 24 const rangeIDChunkSize = 1000 25 26 type rangeIDChunk struct { 27 // Valid contents are buf[rd:wr], read at buf[rd], write at buf[wr]. 28 buf [rangeIDChunkSize]roachpb.RangeID 29 rd, wr int 30 } 31 32 func (c *rangeIDChunk) PushBack(id roachpb.RangeID) bool { 33 if c.WriteCap() == 0 { 34 return false 35 } 36 c.buf[c.wr] = id 37 c.wr++ 38 return true 39 } 40 41 func (c *rangeIDChunk) PopFront() (roachpb.RangeID, bool) { 42 if c.Len() == 0 { 43 return 0, false 44 } 45 id := c.buf[c.rd] 46 c.rd++ 47 return id, true 48 } 49 50 func (c *rangeIDChunk) WriteCap() int { 51 return len(c.buf) - c.wr 52 } 53 54 func (c *rangeIDChunk) Len() int { 55 return c.wr - c.rd 56 } 57 58 // rangeIDQueue is a chunked queue of range IDs. Instead of a separate list 59 // element for every range ID, it uses a rangeIDChunk to hold many range IDs, 60 // amortizing the allocation/GC cost. Using a chunk queue avoids any copying 61 // that would occur if a slice were used (the copying would occur on slice 62 // reallocation). 63 type rangeIDQueue struct { 64 chunks list.List 65 len int 66 } 67 68 func (q *rangeIDQueue) PushBack(id roachpb.RangeID) { 69 if q.chunks.Len() == 0 || q.back().WriteCap() == 0 { 70 q.chunks.PushBack(&rangeIDChunk{}) 71 } 72 q.len++ 73 if !q.back().PushBack(id) { 74 panic(fmt.Sprintf( 75 "unable to push rangeID to chunk: len=%d, cap=%d", 76 q.back().Len(), q.back().WriteCap())) 77 } 78 } 79 80 func (q *rangeIDQueue) PopFront() (roachpb.RangeID, bool) { 81 if q.len == 0 { 82 return 0, false 83 } 84 frontElem := q.chunks.Front() 85 front := frontElem.Value.(*rangeIDChunk) 86 id, ok := front.PopFront() 87 if !ok { 88 panic("encountered empty chunk") 89 } 90 q.len-- 91 if front.Len() == 0 && front.WriteCap() == 0 { 92 q.chunks.Remove(frontElem) 93 } 94 return id, true 95 } 96 97 func (q *rangeIDQueue) Len() int { 98 return q.len 99 } 100 101 func (q *rangeIDQueue) back() *rangeIDChunk { 102 return q.chunks.Back().Value.(*rangeIDChunk) 103 } 104 105 type raftProcessor interface { 106 // Process a raft.Ready struct containing entries and messages that are 107 // ready to read, be saved to stable storage, committed, or sent to other 108 // peers. 109 processReady(context.Context, roachpb.RangeID) 110 // Process all queued messages for the specified range. 111 // Return true if the range should be queued for ready processing. 112 processRequestQueue(context.Context, roachpb.RangeID) bool 113 // Process a raft tick for the specified range. 114 // Return true if the range should be queued for ready processing. 115 processTick(context.Context, roachpb.RangeID) bool 116 } 117 118 type raftScheduleState int 119 120 const ( 121 stateQueued raftScheduleState = 1 << iota 122 stateRaftReady 123 stateRaftRequest 124 stateRaftTick 125 ) 126 127 type raftScheduler struct { 128 processor raftProcessor 129 numWorkers int 130 131 mu struct { 132 syncutil.Mutex 133 cond *sync.Cond 134 queue rangeIDQueue 135 state map[roachpb.RangeID]raftScheduleState 136 stopped bool 137 } 138 139 done sync.WaitGroup 140 } 141 142 func newRaftScheduler( 143 metrics *StoreMetrics, processor raftProcessor, numWorkers int, 144 ) *raftScheduler { 145 s := &raftScheduler{ 146 processor: processor, 147 numWorkers: numWorkers, 148 } 149 s.mu.cond = sync.NewCond(&s.mu.Mutex) 150 s.mu.state = make(map[roachpb.RangeID]raftScheduleState) 151 return s 152 } 153 154 func (s *raftScheduler) Start(ctx context.Context, stopper *stop.Stopper) { 155 stopper.RunWorker(ctx, func(ctx context.Context) { 156 <-stopper.ShouldStop() 157 s.mu.Lock() 158 s.mu.stopped = true 159 s.mu.Unlock() 160 s.mu.cond.Broadcast() 161 }) 162 163 s.done.Add(s.numWorkers) 164 for i := 0; i < s.numWorkers; i++ { 165 stopper.RunWorker(ctx, func(ctx context.Context) { 166 s.worker(ctx) 167 }) 168 } 169 } 170 171 func (s *raftScheduler) Wait(context.Context) { 172 s.done.Wait() 173 } 174 175 func (s *raftScheduler) worker(ctx context.Context) { 176 defer s.done.Done() 177 178 // We use a sync.Cond for worker notification instead of a buffered 179 // channel. Buffered channels have internal overhead for maintaining the 180 // buffer even when the elements are empty. And the buffer isn't necessary as 181 // the raftScheduler work is already buffered on the internal queue. Lastly, 182 // signaling a sync.Cond is significantly faster than selecting and sending 183 // on a buffered channel. 184 185 s.mu.Lock() 186 for { 187 var id roachpb.RangeID 188 for { 189 if s.mu.stopped { 190 s.mu.Unlock() 191 return 192 } 193 var ok bool 194 if id, ok = s.mu.queue.PopFront(); ok { 195 break 196 } 197 s.mu.cond.Wait() 198 } 199 200 // Grab and clear the existing state for the range ID. Note that we leave 201 // the range ID marked as "queued" so that a concurrent Enqueue* will not 202 // queue the range ID again. 203 state := s.mu.state[id] 204 s.mu.state[id] = stateQueued 205 s.mu.Unlock() 206 207 // Process requests first. This avoids a scenario where a tick and a 208 // "quiesce" message are processed in the same iteration and intervening 209 // raft ready processing unquiesces the replica because the tick triggers 210 // an election. 211 if state&stateRaftRequest != 0 { 212 // processRequestQueue returns true if the range should perform ready 213 // processing. Do not reorder this below the call to processReady. 214 if s.processor.processRequestQueue(ctx, id) { 215 state |= stateRaftReady 216 } 217 } 218 if state&stateRaftTick != 0 { 219 // processRaftTick returns true if the range should perform ready 220 // processing. Do not reorder this below the call to processReady. 221 if s.processor.processTick(ctx, id) { 222 state |= stateRaftReady 223 } 224 } 225 if state&stateRaftReady != 0 { 226 s.processor.processReady(ctx, id) 227 } 228 229 s.mu.Lock() 230 state = s.mu.state[id] 231 if state == stateQueued { 232 // No further processing required by the range ID, clear it from the 233 // state map. 234 delete(s.mu.state, id) 235 } else { 236 // There was a concurrent call to one of the Enqueue* methods. Queue the 237 // range ID for further processing. 238 s.mu.queue.PushBack(id) 239 s.mu.cond.Signal() 240 } 241 } 242 } 243 244 func (s *raftScheduler) enqueue1Locked(addState raftScheduleState, id roachpb.RangeID) int { 245 prevState := s.mu.state[id] 246 if prevState&addState == addState { 247 return 0 248 } 249 var queued int 250 newState := prevState | addState 251 if newState&stateQueued == 0 { 252 newState |= stateQueued 253 queued++ 254 s.mu.queue.PushBack(id) 255 } 256 s.mu.state[id] = newState 257 return queued 258 } 259 260 func (s *raftScheduler) enqueue1(addState raftScheduleState, id roachpb.RangeID) int { 261 s.mu.Lock() 262 count := s.enqueue1Locked(addState, id) 263 s.mu.Unlock() 264 return count 265 } 266 267 func (s *raftScheduler) enqueueN(addState raftScheduleState, ids ...roachpb.RangeID) int { 268 // Enqueue the ids in chunks to avoid hold raftScheduler.mu for too long. 269 const enqueueChunkSize = 128 270 271 var count int 272 s.mu.Lock() 273 for i, id := range ids { 274 count += s.enqueue1Locked(addState, id) 275 if (i+1)%enqueueChunkSize == 0 { 276 s.mu.Unlock() 277 s.mu.Lock() 278 } 279 } 280 s.mu.Unlock() 281 return count 282 } 283 284 func (s *raftScheduler) signal(count int) { 285 if count >= s.numWorkers { 286 s.mu.cond.Broadcast() 287 } else { 288 for i := 0; i < count; i++ { 289 s.mu.cond.Signal() 290 } 291 } 292 } 293 294 func (s *raftScheduler) EnqueueRaftReady(id roachpb.RangeID) { 295 s.signal(s.enqueue1(stateRaftReady, id)) 296 } 297 298 func (s *raftScheduler) EnqueueRaftRequest(id roachpb.RangeID) { 299 s.signal(s.enqueue1(stateRaftRequest, id)) 300 } 301 302 func (s *raftScheduler) EnqueueRaftTick(ids ...roachpb.RangeID) { 303 s.signal(s.enqueueN(stateRaftTick, ids...)) 304 }