vitess.io/vitess@v0.16.2/go/vt/vtgate/buffer/shard_buffer.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package buffer 18 19 import ( 20 "context" 21 "fmt" 22 "runtime/debug" 23 "sync" 24 "time" 25 26 "vitess.io/vitess/go/vt/vtgate/errorsanitizer" 27 28 "vitess.io/vitess/go/vt/log" 29 "vitess.io/vitess/go/vt/logutil" 30 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 31 "vitess.io/vitess/go/vt/topo/topoproto" 32 "vitess.io/vitess/go/vt/vterrors" 33 ) 34 35 // bufferState represents the different states a shardBuffer object can be in. 36 type bufferState string 37 38 const ( 39 // stateIdle means no failover is currently in progress. 40 stateIdle bufferState = "IDLE" 41 // stateBuffering is the phase when a failover is in progress. 42 stateBuffering bufferState = "BUFFERING" 43 // stateDraining is the phase when a failover ended and the queue is drained. 44 stateDraining bufferState = "DRAINING" 45 ) 46 47 // entry is created per buffered request. 48 type entry struct { 49 // done will be closed by shardBuffer when the failover is over and the 50 // request can be retried. 51 // Any Go routine closing this channel must also remove the entry from the 52 // ShardBuffer queue such that nobody else tries to close it. 53 done chan struct{} 54 55 // deadline is the time when the entry is out of the buffering window and it 56 // must be canceled. 57 deadline time.Time 58 59 // err is set if the buffering failed e.g. when the entry was evicted. 60 err error 61 62 // bufferCtx wraps the request ctx and is used to track the retry of a 63 // request during the drain phase. Once the retry is done, the caller 64 // must cancel this context (by calling bufferCancel). 65 bufferCtx context.Context 66 bufferCancel func() 67 } 68 69 // shardBuffer buffers requests during a failover for a particular shard. 70 // The object will be reused across failovers. If no failover is currently in 71 // progress, the state is "IDLE". 72 // 73 // Note that this object is accessed concurrently by multiple threads: 74 // - vtgate request threads 75 // - discovery.HealthCheck listener execution thread 76 // - timeout thread (timeout_thread.go) to evict too old buffered requests 77 // - drain() thread 78 type shardBuffer struct { 79 // Immutable fields set at construction. 80 buf *Buffer 81 mode bufferMode 82 keyspace string 83 shard string 84 85 // statsKey is used to update the stats variables. 86 statsKey []string 87 // statsKeyJoined is all elements of "statsKey" in one string, joined by ".". 88 statsKeyJoined string 89 logTooRecent *logutil.ThrottledLogger 90 91 // mu guards the fields below. 92 mu sync.RWMutex 93 state bufferState 94 // queue is the list of buffered requests (ordered by arrival). 95 queue []*entry 96 // externallyReparented is the maximum value of all seen 97 // "StreamHealthResponse.TabletexternallyReparentedTimestamp" values across 98 // all PRIMARY tablets of this shard. 99 // In practice, it is a) the last time the shard was reparented or b) the last 100 // time the TabletExternallyReparented RPC was called on the tablet to confirm 101 // that the tablet is the current PRIMARY. 102 // We assume the value is a Unix timestamp in seconds. 103 externallyReparented int64 104 // lastStart is the last time we saw the start of a failover. 105 lastStart time.Time 106 // lastEnd is the last time we saw the end of a failover. 107 lastEnd time.Time 108 // lastReparent is the last time we saw that the tablet alias of the PRIMARY 109 // changed i.e. we definitely reparented to a different tablet. 110 lastReparent time.Time 111 // currentPrimary is tracked to determine when to update "lastReparent". 112 currentPrimary *topodatapb.TabletAlias 113 // timeoutThread will be set while a failover is in progress and the object is 114 // in the BUFFERING state. 115 timeoutThread *timeoutThread 116 // wg tracks all pending Go routines. waitForShutdown() will use this field to 117 // block on them. 118 wg sync.WaitGroup 119 } 120 121 func newShardBufferHealthCheck(buf *Buffer, mode bufferMode, keyspace, shard string) *shardBuffer { 122 statsKey := []string{keyspace, shard} 123 initVariablesForShard(statsKey) 124 125 return &shardBuffer{ 126 buf: buf, 127 mode: mode, 128 keyspace: keyspace, 129 shard: shard, 130 statsKey: statsKey, 131 statsKeyJoined: fmt.Sprintf("%s.%s", keyspace, shard), 132 logTooRecent: logutil.NewThrottledLogger(fmt.Sprintf("FailoverTooRecent-%v", topoproto.KeyspaceShardString(keyspace, shard)), 5*time.Second), 133 state: stateIdle, 134 } 135 } 136 137 func (sb *shardBuffer) timeNow() time.Time { 138 return sb.buf.config.now() 139 } 140 141 // disabled returns true if neither buffering nor the dry-run mode is enabled. 142 func (sb *shardBuffer) disabled() bool { 143 return sb.mode == bufferModeDisabled 144 } 145 146 func (sb *shardBuffer) waitForFailoverEnd(ctx context.Context, keyspace, shard string, err error) (RetryDoneFunc, error) { 147 // We assume if err != nil then it's always caused by a failover. 148 // Other errors must be filtered at higher layers. 149 failoverDetected := err != nil 150 151 // Fast path (read lock): Check if we should NOT buffer a request. 152 sb.mu.RLock() 153 if !sb.shouldBufferLocked(failoverDetected) { 154 // No buffering required. Return early. 155 sb.mu.RUnlock() 156 return nil, nil 157 } 158 sb.mu.RUnlock() 159 160 // Buffering required. Acquire write lock. 161 sb.mu.Lock() 162 // Re-check state because it could have changed in the meantime. 163 if !sb.shouldBufferLocked(failoverDetected) { 164 // Buffering no longer required. Return early. 165 sb.mu.Unlock() 166 return nil, nil 167 } 168 169 // Start buffering if failover is not detected yet. 170 if sb.state == stateIdle { 171 // Do not buffer if last failover is too recent. This is the case if: 172 // a) buffering was stopped recently 173 // OR 174 // b) we did not buffer, but observed a reparent very recently 175 now := sb.timeNow() 176 minTimeBetweenFailovers := sb.buf.config.MinTimeBetweenFailovers 177 178 // a) Buffering was stopped recently. 179 // This can happen when we stop buffering while MySQL is not ready yet 180 // (read-only mode is not cleared yet on the new primary). 181 lastBufferingStopped := now.Sub(sb.lastEnd) 182 if !sb.lastEnd.IsZero() && lastBufferingStopped < minTimeBetweenFailovers { 183 sb.mu.Unlock() 184 msg := "NOT starting buffering" 185 if sb.mode == bufferModeDryRun { 186 msg = "Dry-run: Would NOT have started buffering" 187 } 188 189 sb.logTooRecent.Infof("%v for shard: %s because the last failover which triggered buffering is too recent (%v < %v)."+ 190 " (A failover was detected by this seen error: %v.)", 191 msg, topoproto.KeyspaceShardString(keyspace, shard), lastBufferingStopped, minTimeBetweenFailovers, err) 192 193 statsKeyWithReason := append(sb.statsKey, string(skippedLastFailoverTooRecent)) 194 requestsSkipped.Add(statsKeyWithReason, 1) 195 return nil, nil 196 } 197 198 // b) The PRIMARY was reparented recently (but we did not buffer it.) 199 // This can happen when we see the end of the reparent *before* the first 200 // request failure caused by the reparent. This is possible if the QPS is 201 // very low. If we do not skip buffering here, we would start buffering but 202 // not stop because we already observed the promotion of the new primary. 203 lastReparentAgo := now.Sub(sb.lastReparent) 204 if !sb.lastReparent.IsZero() && lastReparentAgo < minTimeBetweenFailovers { 205 sb.mu.Unlock() 206 msg := "NOT starting buffering" 207 if sb.mode == bufferModeDryRun { 208 msg = "Dry-run: Would NOT have started buffering" 209 } 210 211 sb.logTooRecent.Infof("%v for shard: %s because the last reparent is too recent (%v < %v)."+ 212 " (A failover was detected by this seen error: %v.)", 213 msg, topoproto.KeyspaceShardString(keyspace, shard), lastReparentAgo, minTimeBetweenFailovers, err) 214 215 statsKeyWithReason := append(sb.statsKey, string(skippedLastReparentTooRecent)) 216 requestsSkipped.Add(statsKeyWithReason, 1) 217 return nil, nil 218 } 219 220 sb.startBufferingLocked(err) 221 } 222 223 if sb.mode == bufferModeDryRun { 224 sb.mu.Unlock() 225 // Dry-run. Do not actually buffer the request and return early. 226 lastRequestsDryRunMax.Add(sb.statsKey, 1) 227 requestsBufferedDryRun.Add(sb.statsKey, 1) 228 return nil, nil 229 } 230 231 // Buffer request. 232 entry, err := sb.bufferRequestLocked(ctx) 233 sb.mu.Unlock() 234 if err != nil { 235 return nil, err 236 } 237 return sb.wait(ctx, entry) 238 } 239 240 // shouldBufferLocked returns true if the current request should be buffered 241 // (based on the current state and whether the request detected a failover). 242 func (sb *shardBuffer) shouldBufferLocked(failoverDetected bool) bool { 243 switch s := sb.state; { 244 case s == stateIdle && !failoverDetected: 245 // No failover in progress. 246 return false 247 case s == stateIdle && failoverDetected: 248 // Not buffering yet, but new failover detected. 249 return true 250 case s == stateBuffering: 251 // Failover in progress. 252 return true 253 case s == stateDraining && !failoverDetected: 254 // Draining. Non-failover related requests can pass through. 255 return false 256 case s == stateDraining && failoverDetected: 257 // Possible race between request which saw failover-related error and the 258 // end of the failover. Do not buffer and let vtgate retry immediately. 259 return false 260 } 261 panic("BUG: All possible states must be covered by the switch expression above.") 262 } 263 264 func (sb *shardBuffer) startBufferingLocked(err error) { 265 // Reset monitoring data from previous failover. 266 lastRequestsInFlightMax.Set(sb.statsKey, 0) 267 lastRequestsDryRunMax.Set(sb.statsKey, 0) 268 failoverDurationSumMs.Reset(sb.statsKey) 269 270 sb.lastStart = sb.timeNow() 271 sb.logErrorIfStateNotLocked(stateIdle) 272 sb.state = stateBuffering 273 sb.queue = make([]*entry, 0) 274 275 sb.timeoutThread = newTimeoutThread(sb, sb.buf.config.MaxFailoverDuration) 276 sb.timeoutThread.start() 277 msg := "Starting buffering" 278 if sb.mode == bufferModeDryRun { 279 msg = "Dry-run: Would have started buffering" 280 } 281 starts.Add(sb.statsKey, 1) 282 log.Infof("%v for shard: %s (window: %v, size: %v, max failover duration: %v) (A failover was detected by this seen error: %v.)", 283 msg, 284 topoproto.KeyspaceShardString(sb.keyspace, sb.shard), 285 sb.buf.config.Window, 286 sb.buf.config.Size, 287 sb.buf.config.MaxFailoverDuration, 288 errorsanitizer.NormalizeError(err.Error()), 289 ) 290 } 291 292 // logErrorIfStateNotLocked logs an error if the current state is not "state". 293 // We do not panic/crash the process here because it is expected that a wrong 294 // state is less severe than (potentially) crash-looping all vtgates. 295 // Note: The prefix "Locked" is not related to the state. Instead, it stresses 296 // that "sb.mu" must be locked before calling the method. 297 func (sb *shardBuffer) logErrorIfStateNotLocked(state bufferState) { 298 if sb.state != state { 299 log.Errorf("BUG: Buffer state should be '%v' and not '%v'. Full state of buffer object: %#v Stacktrace:\n%s", state, sb.state, sb, debug.Stack()) 300 } 301 } 302 303 // bufferRequest creates a new entry in the queue for a request which 304 // should be buffered. 305 // It returns *entry which can be used as input for shardBuffer.cancel(). This 306 // is useful for canceled RPCs (e.g. due to deadline exceeded) which want to 307 // give up their spot in the buffer. It also holds the "bufferCancel" function. 308 // If buffering fails e.g. due to a full buffer, an error is returned. 309 func (sb *shardBuffer) bufferRequestLocked(ctx context.Context) (*entry, error) { 310 if !sb.buf.bufferSizeSema.TryAcquire() { 311 // Buffer is full. Evict the oldest entry and buffer this request instead. 312 if len(sb.queue) == 0 { 313 // Overall buffer is full, but this shard's queue is empty. That means 314 // there is at least one other shard failing over as well which consumes 315 // the whole buffer. 316 statsKeyWithReason := append(sb.statsKey, string(skippedBufferFull)) 317 requestsSkipped.Add(statsKeyWithReason, 1) 318 return nil, bufferFullError 319 } 320 321 e := sb.queue[0] 322 // Evict the entry. Do not release its slot in the buffer and reuse it for 323 // this new request. 324 // NOTE: We keep the lock to avoid racing with drain(). 325 // NOTE: We're not waiting until the request finishes and instead reuse its 326 // slot immediately, i.e. the number of evicted requests + drained requests 327 // can be bigger than the buffer size. 328 sb.unblockAndWait(e, entryEvictedError, false /* releaseSlot */, false /* blockingWait */) 329 sb.queue = sb.queue[1:] 330 statsKeyWithReason := append(sb.statsKey, evictedBufferFull) 331 requestsEvicted.Add(statsKeyWithReason, 1) 332 } 333 334 e := &entry{ 335 done: make(chan struct{}), 336 deadline: sb.timeNow().Add(sb.buf.config.Window), 337 } 338 e.bufferCtx, e.bufferCancel = context.WithCancel(ctx) 339 sb.queue = append(sb.queue, e) 340 341 if max := lastRequestsInFlightMax.Counts()[sb.statsKeyJoined]; max < int64(len(sb.queue)) { 342 lastRequestsInFlightMax.Set(sb.statsKey, int64(len(sb.queue))) 343 } 344 requestsBuffered.Add(sb.statsKey, 1) 345 346 if len(sb.queue) == 1 { 347 sb.timeoutThread.notifyQueueNotEmpty() 348 } 349 return e, nil 350 } 351 352 // unblockAndWait unblocks a blocked request. 353 // If releaseSlot is true, the buffer semaphore will be decreased by 1 when 354 // the request retried and finished. 355 // If blockingWait is true, this call will block until the request retried and 356 // finished. This mode is used during the drain (to avoid flooding the primary) 357 // while the non-blocking mode is used when a) evicting a request (e.g. because 358 // the buffer is full or it exceeded the buffering window) or b) when the 359 // request was canceled from outside and we removed it. 360 func (sb *shardBuffer) unblockAndWait(e *entry, err error, releaseSlot, blockingWait bool) { 361 // Set error such that the request will see it. 362 e.err = err 363 // Tell blocked request to stop waiting. 364 close(e.done) 365 366 if blockingWait { 367 sb.waitForRequestFinish(e, releaseSlot, false /* async */) 368 } else { 369 sb.wg.Add(1) 370 go sb.waitForRequestFinish(e, releaseSlot, true /* async */) 371 } 372 } 373 374 func (sb *shardBuffer) waitForRequestFinish(e *entry, releaseSlot, async bool) { 375 if async { 376 defer sb.wg.Done() 377 } 378 379 // Wait for unblocked request to finish. 380 <-e.bufferCtx.Done() 381 382 // Release the slot to the buffer. 383 // NOTE: We always wait for the request first, even if the calling code like 384 // the buffer full eviction or the timeout thread does not block on us. 385 // This way, the request's slot can only be reused after the request finished. 386 if releaseSlot { 387 sb.buf.bufferSizeSema.Release() 388 } 389 } 390 391 // wait blocks while the request is buffered during the failover. 392 // See Buffer.WaitForFailoverEnd() for the API contract of the return values. 393 func (sb *shardBuffer) wait(ctx context.Context, e *entry) (RetryDoneFunc, error) { 394 select { 395 case <-ctx.Done(): 396 sb.remove(e) 397 return nil, vterrors.Errorf(vterrors.Code(contextCanceledError), "%v: %v", contextCanceledError, ctx.Err()) 398 case <-e.done: 399 return e.bufferCancel, e.err 400 } 401 } 402 403 // oldestEntry returns the head of the queue or nil if the queue is empty. 404 func (sb *shardBuffer) oldestEntry() *entry { 405 sb.mu.Lock() 406 defer sb.mu.Unlock() 407 408 if len(sb.queue) > 0 { 409 return sb.queue[0] 410 } 411 return nil 412 } 413 414 // evictOldestEntry is used by timeoutThread to evict the head entry of the 415 // queue if it exceeded its buffering window. 416 func (sb *shardBuffer) evictOldestEntry(e *entry) { 417 sb.mu.Lock() 418 defer sb.mu.Unlock() 419 420 if len(sb.queue) == 0 || e != sb.queue[0] { 421 // Entry is already removed e.g. by remove(). Ignore it. 422 return 423 } 424 425 // Evict the entry. 426 // 427 // NOTE: We're not waiting for the request to finish in order to unblock the 428 // timeout thread as fast as possible. However, the slot of the evicted 429 // request is only returned after it has finished i.e. the buffer may stay 430 // full in the meantime. This is a design tradeoff to keep things simple and 431 // avoid additional pressure on the primary tablet. 432 sb.unblockAndWait(e, nil /* err */, true /* releaseSlot */, false /* blockingWait */) 433 sb.queue = sb.queue[1:] 434 statsKeyWithReason := append(sb.statsKey, evictedWindowExceeded) 435 requestsEvicted.Add(statsKeyWithReason, 1) 436 } 437 438 // remove must be called when the request was canceled from outside and not 439 // internally. 440 func (sb *shardBuffer) remove(toRemove *entry) { 441 sb.mu.Lock() 442 defer sb.mu.Unlock() 443 444 if sb.queue == nil { 445 // Queue is cleared because we're already in the DRAIN phase. 446 return 447 } 448 449 // If entry is still in the queue, delete it and cancel it internally. 450 for i, e := range sb.queue { 451 if e == toRemove { 452 // Delete entry at index "i" from slice. 453 sb.queue = append(sb.queue[:i], sb.queue[i+1:]...) 454 455 // Cancel the entry's "bufferCtx". 456 // The usual drain or eviction code would unblock the request and then 457 // wait for the "bufferCtx" to be done. 458 // But this code path is different because it's going to return an error 459 // to the request and not the "e.bufferCancel" function i.e. the request 460 // cannot cancel the "bufferCtx" itself. 461 // Therefore, we call "e.bufferCancel". This also avoids that the 462 // context's Go routine could leak. 463 e.bufferCancel() 464 // Release the buffer slot and close the "e.done" channel. 465 // By closing "e.done", we finish it explicitly and timeoutThread will 466 // find out about it as well. 467 sb.unblockAndWait(e, nil /* err */, true /* releaseSlot */, false /* blockingWait */) 468 469 // Track it as "ContextDone" eviction. 470 statsKeyWithReason := append(sb.statsKey, string(evictedContextDone)) 471 requestsEvicted.Add(statsKeyWithReason, 1) 472 return 473 } 474 } 475 476 // Entry was already removed. Keep the queue as it is. 477 } 478 479 func (sb *shardBuffer) recordKeyspaceEvent(alias *topodatapb.TabletAlias, stillServing bool) { 480 sb.mu.Lock() 481 defer sb.mu.Unlock() 482 483 log.Infof("disruption in shard %s/%s resolved (serving: %v)", sb.keyspace, sb.shard, stillServing) 484 485 if !topoproto.TabletAliasEqual(alias, sb.currentPrimary) { 486 if sb.currentPrimary != nil { 487 sb.lastReparent = sb.timeNow() 488 } 489 sb.currentPrimary = alias 490 } 491 if stillServing { 492 sb.stopBufferingLocked(stopFailoverEndDetected, "a primary promotion has been detected") 493 } else { 494 sb.stopBufferingLocked(stopShardMissing, "the keyspace has been resharded") 495 } 496 } 497 498 func (sb *shardBuffer) recordExternallyReparentedTimestamp(timestamp int64, alias *topodatapb.TabletAlias) { 499 // Fast path (read lock): Check if new timestamp is higher. 500 sb.mu.RLock() 501 if timestamp <= sb.externallyReparented { 502 // Do nothing. Equal values are reported if the primary has not changed. 503 // Smaller values can be reported during the failover by the old primary 504 // after the new primary already took over. 505 sb.mu.RUnlock() 506 return 507 } 508 sb.mu.RUnlock() 509 510 // New timestamp is higher. Stop buffering if running. 511 sb.mu.Lock() 512 defer sb.mu.Unlock() 513 514 // Re-check value after acquiring write lock. 515 if timestamp <= sb.externallyReparented { 516 return 517 } 518 519 sb.externallyReparented = timestamp 520 if !topoproto.TabletAliasEqual(alias, sb.currentPrimary) { 521 if sb.currentPrimary != nil { 522 sb.lastReparent = sb.timeNow() 523 } 524 sb.currentPrimary = alias 525 } 526 sb.stopBufferingLocked(stopFailoverEndDetected, "failover end detected") 527 } 528 529 func (sb *shardBuffer) stopBufferingDueToMaxDuration() { 530 sb.mu.Lock() 531 defer sb.mu.Unlock() 532 533 sb.stopBufferingLocked(stopMaxFailoverDurationExceeded, 534 fmt.Sprintf("stopping buffering because failover did not finish in time (%v)", sb.buf.config.MaxFailoverDuration)) 535 } 536 537 func (sb *shardBuffer) stopBufferingLocked(reason stopReason, details string) { 538 if sb.state != stateBuffering { 539 return 540 } 541 542 // Stop buffering. 543 sb.lastEnd = sb.timeNow() 544 d := sb.lastEnd.Sub(sb.lastStart) 545 546 statsKeyWithReason := append(sb.statsKey, string(reason)) 547 stops.Add(statsKeyWithReason, 1) 548 549 lastFailoverDurationMs.Set(sb.statsKey, int64(d/time.Millisecond)) 550 failoverDurationSumMs.Add(sb.statsKey, int64(d/time.Millisecond)) 551 if sb.mode == bufferModeDryRun { 552 utilDryRunMax := int64( 553 float64(lastRequestsDryRunMax.Counts()[sb.statsKeyJoined]) / float64(sb.buf.config.Size) * 100.0) 554 utilizationDryRunSum.Add(sb.statsKey, utilDryRunMax) 555 } else { 556 utilMax := int64( 557 float64(lastRequestsInFlightMax.Counts()[sb.statsKeyJoined]) / float64(sb.buf.config.Size) * 100.0) 558 utilizationSum.Add(sb.statsKey, utilMax) 559 } 560 561 sb.logErrorIfStateNotLocked(stateBuffering) 562 sb.state = stateDraining 563 q := sb.queue 564 // Clear the queue such that remove(), oldestEntry() and evictOldestEntry() 565 // will not work on obsolete data. 566 sb.queue = nil 567 568 msg := "Stopping buffering" 569 if sb.mode == bufferModeDryRun { 570 msg = "Dry-run: Would have stopped buffering" 571 } 572 log.Infof("%v for shard: %s after: %.1f seconds due to: %v. Draining %d buffered requests now.", msg, topoproto.KeyspaceShardString(sb.keyspace, sb.shard), d.Seconds(), details, len(q)) 573 574 var clientEntryError error 575 if reason == stopShardMissing { 576 clientEntryError = ShardMissingError 577 } 578 579 // Start the drain. (Use a new Go routine to release the lock.) 580 sb.wg.Add(1) 581 go sb.drain(q, clientEntryError) 582 } 583 584 func (sb *shardBuffer) drain(q []*entry, err error) { 585 defer sb.wg.Done() 586 587 // stop must be called outside of the lock because the thread may access 588 // shardBuffer as well e.g. to get the current oldest entry. 589 sb.timeoutThread.stop() 590 591 start := sb.timeNow() 592 // TODO(mberlin): Parallelize the drain by pumping the data through a channel. 593 for _, e := range q { 594 sb.unblockAndWait(e, err, true /* releaseSlot */, true /* blockingWait */) 595 } 596 d := sb.timeNow().Sub(start) 597 log.Infof("Draining finished for shard: %s Took: %v for: %d requests.", topoproto.KeyspaceShardString(sb.keyspace, sb.shard), d, len(q)) 598 requestsDrained.Add(sb.statsKey, int64(len(q))) 599 600 // Draining is done. Change state from "draining" to "idle". 601 sb.mu.Lock() 602 defer sb.mu.Unlock() 603 sb.logErrorIfStateNotLocked(stateDraining) 604 sb.state = stateIdle 605 sb.timeoutThread = nil 606 } 607 608 func (sb *shardBuffer) shutdown() { 609 sb.mu.Lock() 610 sb.stopBufferingLocked(stopShutdown, "shutdown") 611 sb.mu.Unlock() 612 } 613 614 func (sb *shardBuffer) waitForShutdown() { 615 sb.wg.Wait() 616 } 617 618 // sizeForTesting is used by the unit test only to find out the current number 619 // of buffered requests. 620 // TODO(mberlin): Remove this if we add a more general statistics reporting. 621 func (sb *shardBuffer) testGetSize() int { 622 sb.mu.RLock() 623 defer sb.mu.RUnlock() 624 return len(sb.queue) 625 } 626 627 // stateForTesting is used by unit tests only to probe the current state. 628 func (sb *shardBuffer) testGetState() bufferState { 629 sb.mu.RLock() 630 defer sb.mu.RUnlock() 631 return sb.state 632 }