github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_rangefeed.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "context" 15 "fmt" 16 "time" 17 18 "github.com/cockroachdb/cockroach/pkg/base" 19 "github.com/cockroachdb/cockroach/pkg/keys" 20 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result" 21 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts" 22 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/intentresolver" 23 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" 24 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rangefeed" 25 "github.com/cockroachdb/cockroach/pkg/roachpb" 26 "github.com/cockroachdb/cockroach/pkg/settings" 27 "github.com/cockroachdb/cockroach/pkg/storage" 28 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 29 "github.com/cockroachdb/cockroach/pkg/util/hlc" 30 "github.com/cockroachdb/cockroach/pkg/util/log" 31 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 32 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 33 "github.com/cockroachdb/cockroach/pkg/util/uuid" 34 "github.com/cockroachdb/errors" 35 ) 36 37 // RangefeedEnabled is a cluster setting that enables rangefeed requests. 38 var RangefeedEnabled = settings.RegisterPublicBoolSetting( 39 "kv.rangefeed.enabled", 40 "if set, rangefeed registration is enabled", 41 false, 42 ) 43 44 // lockedRangefeedStream is an implementation of rangefeed.Stream which provides 45 // support for concurrent calls to Send. Note that the default implementation of 46 // grpc.Stream is not safe for concurrent calls to Send. 47 type lockedRangefeedStream struct { 48 wrapped roachpb.Internal_RangeFeedServer 49 sendMu syncutil.Mutex 50 } 51 52 func (s *lockedRangefeedStream) Context() context.Context { 53 return s.wrapped.Context() 54 } 55 56 func (s *lockedRangefeedStream) Send(e *roachpb.RangeFeedEvent) error { 57 s.sendMu.Lock() 58 defer s.sendMu.Unlock() 59 return s.wrapped.Send(e) 60 } 61 62 // rangefeedTxnPusher is a shim around intentResolver that implements the 63 // rangefeed.TxnPusher interface. 64 type rangefeedTxnPusher struct { 65 ir *intentresolver.IntentResolver 66 r *Replica 67 } 68 69 // PushTxns is part of the rangefeed.TxnPusher interface. It performs a 70 // high-priority push at the specified timestamp to each of the specified 71 // transactions. 72 func (tp *rangefeedTxnPusher) PushTxns( 73 ctx context.Context, txns []enginepb.TxnMeta, ts hlc.Timestamp, 74 ) ([]*roachpb.Transaction, error) { 75 pushTxnMap := make(map[uuid.UUID]*enginepb.TxnMeta, len(txns)) 76 for i := range txns { 77 txn := &txns[i] 78 pushTxnMap[txn.ID] = txn 79 } 80 81 h := roachpb.Header{ 82 Timestamp: ts, 83 Txn: &roachpb.Transaction{ 84 TxnMeta: enginepb.TxnMeta{ 85 Priority: enginepb.MaxTxnPriority, 86 }, 87 }, 88 } 89 90 pushedTxnMap, pErr := tp.ir.MaybePushTransactions( 91 ctx, pushTxnMap, h, roachpb.PUSH_TIMESTAMP, false, /* skipIfInFlight */ 92 ) 93 if pErr != nil { 94 return nil, pErr.GoError() 95 } 96 97 pushedTxns := make([]*roachpb.Transaction, 0, len(pushedTxnMap)) 98 for _, txn := range pushedTxnMap { 99 pushedTxns = append(pushedTxns, txn) 100 } 101 return pushedTxns, nil 102 } 103 104 // CleanupTxnIntentsAsync is part of the rangefeed.TxnPusher interface. 105 func (tp *rangefeedTxnPusher) CleanupTxnIntentsAsync( 106 ctx context.Context, txns []*roachpb.Transaction, 107 ) error { 108 endTxns := make([]result.EndTxnIntents, len(txns)) 109 for i, txn := range txns { 110 endTxns[i].Txn = txn 111 endTxns[i].Poison = true 112 } 113 return tp.ir.CleanupTxnIntentsAsync(ctx, tp.r.RangeID, endTxns, true /* allowSyncProcessing */) 114 } 115 116 type iteratorWithCloser struct { 117 storage.SimpleIterator 118 close func() 119 } 120 121 func (i iteratorWithCloser) Close() { 122 i.SimpleIterator.Close() 123 i.close() 124 } 125 126 // RangeFeed registers a rangefeed over the specified span. It sends updates to 127 // the provided stream and returns with an optional error when the rangefeed is 128 // complete. The provided ConcurrentRequestLimiter is used to limit the number 129 // of rangefeeds using catchup iterators at the same time. 130 func (r *Replica) RangeFeed( 131 args *roachpb.RangeFeedRequest, stream roachpb.Internal_RangeFeedServer, 132 ) *roachpb.Error { 133 if !r.isSystemRange() && !RangefeedEnabled.Get(&r.store.cfg.Settings.SV) { 134 return roachpb.NewErrorf("rangefeeds require the kv.rangefeed.enabled setting. See %s", 135 base.DocsURL(`change-data-capture.html#enable-rangefeeds-to-reduce-latency`)) 136 } 137 ctx := r.AnnotateCtx(stream.Context()) 138 139 var rSpan roachpb.RSpan 140 var err error 141 rSpan.Key, err = keys.Addr(args.Span.Key) 142 if err != nil { 143 return roachpb.NewError(err) 144 } 145 rSpan.EndKey, err = keys.Addr(args.Span.EndKey) 146 if err != nil { 147 return roachpb.NewError(err) 148 } 149 150 if err := r.ensureClosedTimestampStarted(ctx); err != nil { 151 return err 152 } 153 154 // If the RangeFeed is performing a catch-up scan then it will observe all 155 // values above args.Timestamp. If the RangeFeed is requesting previous 156 // values for every update then it will also need to look for the version 157 // proceeding each value observed during the catch-up scan timestamp. This 158 // means that the earliest value observed by the catch-up scan will be 159 // args.Timestamp.Next and the earliest timestamp used to retrieve the 160 // previous version of a value will be args.Timestamp, so this is the 161 // timestamp we must check against the GCThreshold. 162 checkTS := args.Timestamp 163 if checkTS.IsEmpty() { 164 // If no timestamp was provided then we're not going to run a catch-up 165 // scan, so make sure the GCThreshold in requestCanProceed succeeds. 166 checkTS = r.Clock().Now() 167 } 168 169 lockedStream := &lockedRangefeedStream{wrapped: stream} 170 errC := make(chan *roachpb.Error, 1) 171 172 // If we will be using a catch-up iterator, wait for the limiter here before 173 // locking raftMu. 174 usingCatchupIter := false 175 var iterSemRelease func() 176 if !args.Timestamp.IsEmpty() { 177 usingCatchupIter = true 178 lim := &r.store.limiters.ConcurrentRangefeedIters 179 if err := lim.Begin(ctx); err != nil { 180 return roachpb.NewError(err) 181 } 182 // Finish the iterator limit, but only if we exit before 183 // creating the iterator itself. 184 iterSemRelease = lim.Finish 185 defer func() { 186 if iterSemRelease != nil { 187 iterSemRelease() 188 } 189 }() 190 } 191 192 // Lock the raftMu, then register the stream as a new rangefeed registration. 193 // raftMu is held so that the catch-up iterator is captured in the same 194 // critical-section as the registration is established. This ensures that 195 // the registration doesn't miss any events. 196 r.raftMu.Lock() 197 if err := r.checkExecutionCanProceedForRangeFeed(rSpan, checkTS); err != nil { 198 r.raftMu.Unlock() 199 return roachpb.NewError(err) 200 } 201 202 // Register the stream with a catch-up iterator. 203 var catchUpIter storage.SimpleIterator 204 if usingCatchupIter { 205 innerIter := r.Engine().NewIterator(storage.IterOptions{ 206 UpperBound: args.Span.EndKey, 207 // RangeFeed originally intended to use the time-bound iterator 208 // performance optimization. However, they've had correctness issues in 209 // the past (#28358, #34819) and no-one has the time for the due-diligence 210 // necessary to be confidant in their correctness going forward. Not using 211 // them causes the total time spent in RangeFeed catchup on changefeed 212 // over tpcc-1000 to go from 40s -> 4853s, which is quite large but still 213 // workable. See #35122 for details. 214 // MinTimestampHint: args.Timestamp, 215 }) 216 catchUpIter = iteratorWithCloser{ 217 SimpleIterator: innerIter, 218 close: iterSemRelease, 219 } 220 // Responsibility for releasing the semaphore now passes to the iterator. 221 iterSemRelease = nil 222 } 223 p := r.registerWithRangefeedRaftMuLocked( 224 ctx, rSpan, args.Timestamp, catchUpIter, args.WithDiff, lockedStream, errC, 225 ) 226 r.raftMu.Unlock() 227 228 // When this function returns, attempt to clean up the rangefeed. 229 defer r.maybeDisconnectEmptyRangefeed(p) 230 231 // Block on the registration's error channel. Note that the registration 232 // observes stream.Context().Done. 233 return <-errC 234 } 235 236 func (r *Replica) getRangefeedProcessorAndFilter() (*rangefeed.Processor, *rangefeed.Filter) { 237 r.rangefeedMu.RLock() 238 defer r.rangefeedMu.RUnlock() 239 return r.rangefeedMu.proc, r.rangefeedMu.opFilter 240 } 241 242 func (r *Replica) getRangefeedProcessor() *rangefeed.Processor { 243 p, _ := r.getRangefeedProcessorAndFilter() 244 return p 245 } 246 247 func (r *Replica) setRangefeedProcessor(p *rangefeed.Processor) { 248 r.rangefeedMu.Lock() 249 defer r.rangefeedMu.Unlock() 250 r.rangefeedMu.proc = p 251 r.store.addReplicaWithRangefeed(r.RangeID) 252 } 253 254 func (r *Replica) unsetRangefeedProcessorLocked(p *rangefeed.Processor) { 255 if r.rangefeedMu.proc != p { 256 // The processor was already unset. 257 return 258 } 259 r.rangefeedMu.proc = nil 260 r.rangefeedMu.opFilter = nil 261 r.store.removeReplicaWithRangefeed(r.RangeID) 262 } 263 264 func (r *Replica) unsetRangefeedProcessor(p *rangefeed.Processor) { 265 r.rangefeedMu.Lock() 266 defer r.rangefeedMu.Unlock() 267 r.unsetRangefeedProcessorLocked(p) 268 } 269 270 func (r *Replica) setRangefeedFilterLocked(f *rangefeed.Filter) { 271 if f == nil { 272 panic("filter nil") 273 } 274 r.rangefeedMu.opFilter = f 275 } 276 277 func (r *Replica) updateRangefeedFilterLocked() bool { 278 f := r.rangefeedMu.proc.Filter() 279 // Return whether the update to the filter was successful or not. If 280 // the processor was already stopped then we can't update the filter. 281 if f != nil { 282 r.setRangefeedFilterLocked(f) 283 return true 284 } 285 return false 286 } 287 288 // The size of an event is 112 bytes, so this will result in an allocation on 289 // the order of ~512KB per RangeFeed. That's probably ok given the number of 290 // ranges on a node that we'd like to support with active rangefeeds, but it's 291 // certainly on the upper end of the range. 292 // 293 // TODO(dan): Everyone seems to agree that this memory limit would be better set 294 // at a store-wide level, but there doesn't seem to be an easy way to accomplish 295 // that. 296 const defaultEventChanCap = 4096 297 298 // registerWithRangefeedRaftMuLocked sets up a Rangefeed registration over the 299 // provided span. It initializes a rangefeed for the Replica if one is not 300 // already running. Requires raftMu be locked. 301 func (r *Replica) registerWithRangefeedRaftMuLocked( 302 ctx context.Context, 303 span roachpb.RSpan, 304 startTS hlc.Timestamp, 305 catchupIter storage.SimpleIterator, 306 withDiff bool, 307 stream rangefeed.Stream, 308 errC chan<- *roachpb.Error, 309 ) *rangefeed.Processor { 310 // Attempt to register with an existing Rangefeed processor, if one exists. 311 // The locking here is a little tricky because we need to handle the case 312 // of concurrent processor shutdowns (see maybeDisconnectEmptyRangefeed). 313 r.rangefeedMu.Lock() 314 p := r.rangefeedMu.proc 315 if p != nil { 316 reg, filter := p.Register(span, startTS, catchupIter, withDiff, stream, errC) 317 if reg { 318 // Registered successfully with an existing processor. 319 // Update the rangefeed filter to avoid filtering ops 320 // that this new registration might be interested in. 321 r.setRangefeedFilterLocked(filter) 322 r.rangefeedMu.Unlock() 323 return p 324 } 325 // If the registration failed, the processor was already being shut 326 // down. Help unset it and then continue on with initializing a new 327 // processor. 328 r.unsetRangefeedProcessorLocked(p) 329 p = nil 330 } 331 r.rangefeedMu.Unlock() 332 333 // Create a new rangefeed. 334 desc := r.Desc() 335 tp := rangefeedTxnPusher{ir: r.store.intentResolver, r: r} 336 cfg := rangefeed.Config{ 337 AmbientContext: r.AmbientContext, 338 Clock: r.Clock(), 339 Span: desc.RSpan(), 340 TxnPusher: &tp, 341 PushTxnsInterval: r.store.TestingKnobs().RangeFeedPushTxnsInterval, 342 PushTxnsAge: r.store.TestingKnobs().RangeFeedPushTxnsAge, 343 EventChanCap: defaultEventChanCap, 344 EventChanTimeout: 50 * time.Millisecond, 345 Metrics: r.store.metrics.RangeFeedMetrics, 346 } 347 p = rangefeed.NewProcessor(cfg) 348 349 // Start it with an iterator to initialize the resolved timestamp. 350 rtsIter := r.Engine().NewIterator(storage.IterOptions{ 351 UpperBound: desc.EndKey.AsRawKey(), 352 // TODO(nvanbenschoten): To facilitate fast restarts of rangefeed 353 // we should periodically persist the resolved timestamp so that we 354 // can initialize the rangefeed using an iterator that only needs to 355 // observe timestamps back to the last recorded resolved timestamp. 356 // This is safe because we know that there are no unresolved intents 357 // at times before a resolved timestamp. 358 // MinTimestampHint: r.ResolvedTimestamp, 359 }) 360 p.Start(r.store.Stopper(), rtsIter) 361 362 // Register with the processor *before* we attach its reference to the 363 // Replica struct. This ensures that the registration is in place before 364 // any other goroutines are able to stop the processor. In other words, 365 // this ensures that the only time the registration fails is during 366 // server shutdown. 367 reg, filter := p.Register(span, startTS, catchupIter, withDiff, stream, errC) 368 if !reg { 369 catchupIter.Close() // clean up 370 select { 371 case <-r.store.Stopper().ShouldQuiesce(): 372 errC <- roachpb.NewError(&roachpb.NodeUnavailableError{}) 373 return nil 374 default: 375 panic("unexpected Stopped processor") 376 } 377 } 378 379 // Set the rangefeed processor and filter reference. We know that no other 380 // registration process could have raced with ours because calling this 381 // method requires raftMu to be exclusively locked. 382 r.setRangefeedProcessor(p) 383 r.setRangefeedFilterLocked(filter) 384 385 // Check for an initial closed timestamp update immediately to help 386 // initialize the rangefeed's resolved timestamp as soon as possible. 387 r.handleClosedTimestampUpdateRaftMuLocked(ctx) 388 389 return p 390 } 391 392 // maybeDisconnectEmptyRangefeed tears down the provided Processor if it is 393 // still active and if it no longer has any registrations. 394 func (r *Replica) maybeDisconnectEmptyRangefeed(p *rangefeed.Processor) { 395 r.rangefeedMu.Lock() 396 defer r.rangefeedMu.Unlock() 397 if p == nil || p != r.rangefeedMu.proc { 398 // The processor has already been removed or replaced. 399 return 400 } 401 if p.Len() == 0 || !r.updateRangefeedFilterLocked() { 402 // Stop the rangefeed processor if it has no registrations or if we are 403 // unable to update the operation filter. 404 p.Stop() 405 r.unsetRangefeedProcessorLocked(p) 406 } 407 } 408 409 // disconnectRangefeedWithErr broadcasts the provided error to all rangefeed 410 // registrations and tears down the provided rangefeed Processor. 411 func (r *Replica) disconnectRangefeedWithErr(p *rangefeed.Processor, pErr *roachpb.Error) { 412 p.StopWithErr(pErr) 413 r.unsetRangefeedProcessor(p) 414 } 415 416 // disconnectRangefeedWithReason broadcasts the provided rangefeed retry reason 417 // to all rangefeed registrations and tears down the active rangefeed Processor. 418 // No-op if a rangefeed is not active. 419 func (r *Replica) disconnectRangefeedWithReason(reason roachpb.RangeFeedRetryError_Reason) { 420 p := r.getRangefeedProcessor() 421 if p == nil { 422 return 423 } 424 pErr := roachpb.NewError(roachpb.NewRangeFeedRetryError(reason)) 425 r.disconnectRangefeedWithErr(p, pErr) 426 } 427 428 // numRangefeedRegistrations returns the number of registrations attached to the 429 // Replica's rangefeed processor. 430 func (r *Replica) numRangefeedRegistrations() int { 431 p := r.getRangefeedProcessor() 432 if p == nil { 433 return 0 434 } 435 return p.Len() 436 } 437 438 // populatePrevValsInLogicalOpLogRaftMuLocked updates the provided logical op 439 // log with previous values read from the reader, which is expected to reflect 440 // the state of the Replica before the operations in the logical op log are 441 // applied. No-op if a rangefeed is not active. Requires raftMu to be locked. 442 func (r *Replica) populatePrevValsInLogicalOpLogRaftMuLocked( 443 ctx context.Context, ops *kvserverpb.LogicalOpLog, prevReader storage.Reader, 444 ) { 445 p, filter := r.getRangefeedProcessorAndFilter() 446 if p == nil { 447 return 448 } 449 450 // Read from the Reader to populate the PrevValue fields. 451 for _, op := range ops.Ops { 452 var key []byte 453 var ts hlc.Timestamp 454 var prevValPtr *[]byte 455 switch t := op.GetValue().(type) { 456 case *enginepb.MVCCWriteValueOp: 457 key, ts, prevValPtr = t.Key, t.Timestamp, &t.PrevValue 458 case *enginepb.MVCCCommitIntentOp: 459 key, ts, prevValPtr = t.Key, t.Timestamp, &t.PrevValue 460 case *enginepb.MVCCWriteIntentOp, 461 *enginepb.MVCCUpdateIntentOp, 462 *enginepb.MVCCAbortIntentOp, 463 *enginepb.MVCCAbortTxnOp: 464 // Nothing to do. 465 continue 466 default: 467 panic(fmt.Sprintf("unknown logical op %T", t)) 468 } 469 470 // Don't read previous values from the reader for operations that are 471 // not needed by any rangefeed registration. 472 if !filter.NeedPrevVal(roachpb.Span{Key: key}) { 473 continue 474 } 475 476 // Read the previous value from the prev Reader. Unlike the new value 477 // (see handleLogicalOpLogRaftMuLocked), this one may be missing. 478 prevVal, _, err := storage.MVCCGet( 479 ctx, prevReader, key, ts, storage.MVCCGetOptions{Tombstones: true, Inconsistent: true}, 480 ) 481 if err != nil { 482 r.disconnectRangefeedWithErr(p, roachpb.NewErrorf( 483 "error consuming %T for key %v @ ts %v: %v", op, key, ts, err, 484 )) 485 return 486 } 487 if prevVal != nil { 488 *prevValPtr = prevVal.RawBytes 489 } else { 490 *prevValPtr = nil 491 } 492 } 493 } 494 495 // handleLogicalOpLogRaftMuLocked passes the logical op log to the active 496 // rangefeed, if one is running. The method accepts a reader, which is used to 497 // look up the values associated with key-value writes in the log before handing 498 // them to the rangefeed processor. No-op if a rangefeed is not active. Requires 499 // raftMu to be locked. 500 func (r *Replica) handleLogicalOpLogRaftMuLocked( 501 ctx context.Context, ops *kvserverpb.LogicalOpLog, reader storage.Reader, 502 ) { 503 p, filter := r.getRangefeedProcessorAndFilter() 504 if p == nil { 505 return 506 } 507 if ops == nil { 508 // Rangefeeds can't be turned on unless RangefeedEnabled is set to true, 509 // after which point new Raft proposals will include logical op logs. 510 // However, there's a race present where old Raft commands without a 511 // logical op log might be passed to a rangefeed. Since the effect of 512 // these commands was not included in the catch-up scan of current 513 // registrations, we're forced to throw an error. The rangefeed clients 514 // can reconnect at a later time, at which point all new Raft commands 515 // should have logical op logs. 516 r.disconnectRangefeedWithReason(roachpb.RangeFeedRetryError_REASON_LOGICAL_OPS_MISSING) 517 return 518 } 519 if len(ops.Ops) == 0 { 520 return 521 } 522 523 // When reading straight from the Raft log, some logical ops will not be 524 // fully populated. Read from the Reader to populate all fields. 525 for _, op := range ops.Ops { 526 var key []byte 527 var ts hlc.Timestamp 528 var valPtr *[]byte 529 switch t := op.GetValue().(type) { 530 case *enginepb.MVCCWriteValueOp: 531 key, ts, valPtr = t.Key, t.Timestamp, &t.Value 532 case *enginepb.MVCCCommitIntentOp: 533 key, ts, valPtr = t.Key, t.Timestamp, &t.Value 534 case *enginepb.MVCCWriteIntentOp, 535 *enginepb.MVCCUpdateIntentOp, 536 *enginepb.MVCCAbortIntentOp, 537 *enginepb.MVCCAbortTxnOp: 538 // Nothing to do. 539 continue 540 default: 541 panic(fmt.Sprintf("unknown logical op %T", t)) 542 } 543 544 // Don't read values from the reader for operations that are not needed 545 // by any rangefeed registration. We still need to inform the rangefeed 546 // processor of the changes to intents so that it can track unresolved 547 // intents, but we don't need to provide values. 548 // 549 // We could filter out MVCCWriteValueOp operations entirely at this 550 // point if they are not needed by any registration, but as long as we 551 // avoid the value lookup here, doing any more doesn't seem worth it. 552 if !filter.NeedVal(roachpb.Span{Key: key}) { 553 continue 554 } 555 556 // Read the value directly from the Reader. This is performed in the 557 // same raftMu critical section that the logical op's corresponding 558 // WriteBatch is applied, so the value should exist. 559 val, _, err := storage.MVCCGet(ctx, reader, key, ts, storage.MVCCGetOptions{Tombstones: true}) 560 if val == nil && err == nil { 561 err = errors.New("value missing in reader") 562 } 563 if err != nil { 564 r.disconnectRangefeedWithErr(p, roachpb.NewErrorf( 565 "error consuming %T for key %v @ ts %v: %v", op, key, ts, err, 566 )) 567 return 568 } 569 *valPtr = val.RawBytes 570 } 571 572 // Pass the ops to the rangefeed processor. 573 if !p.ConsumeLogicalOps(ops.Ops...) { 574 // Consumption failed and the rangefeed was stopped. 575 r.unsetRangefeedProcessor(p) 576 } 577 } 578 579 // handleClosedTimestampUpdate determines the current maximum closed timestamp 580 // for the replica and informs the rangefeed, if one is running. No-op if a 581 // rangefeed is not active. 582 func (r *Replica) handleClosedTimestampUpdate(ctx context.Context) { 583 ctx = r.AnnotateCtx(ctx) 584 r.raftMu.Lock() 585 defer r.raftMu.Unlock() 586 r.handleClosedTimestampUpdateRaftMuLocked(ctx) 587 } 588 589 // handleClosedTimestampUpdateRaftMuLocked is like handleClosedTimestampUpdate, 590 // but it requires raftMu to be locked. 591 func (r *Replica) handleClosedTimestampUpdateRaftMuLocked(ctx context.Context) { 592 p := r.getRangefeedProcessor() 593 if p == nil { 594 return 595 } 596 597 // Determine what the maximum closed timestamp is for this replica. 598 closedTS, _ := r.maxClosed(ctx) 599 600 // If the closed timestamp is sufficiently stale, signal that we want an 601 // update to the leaseholder so that it will eventually begin to progress 602 // again. 603 slowClosedTSThresh := 5 * closedts.TargetDuration.Get(&r.store.cfg.Settings.SV) 604 if d := timeutil.Since(closedTS.GoTime()); d > slowClosedTSThresh { 605 m := r.store.metrics.RangeFeedMetrics 606 if m.RangeFeedSlowClosedTimestampLogN.ShouldLog() { 607 if closedTS.IsEmpty() { 608 log.Infof(ctx, "RangeFeed closed timestamp is empty") 609 } else { 610 log.Infof(ctx, "RangeFeed closed timestamp %s is behind by %s", closedTS, d) 611 } 612 } 613 614 // Asynchronously attempt to nudge the closed timestamp in case it's stuck. 615 key := fmt.Sprintf(`rangefeed-slow-closed-timestamp-nudge-r%d`, r.RangeID) 616 // Ignore the result of DoChan since, to keep this all async, it always 617 // returns nil and any errors are logged by the closure passed to the 618 // `DoChan` call. 619 _, _ = m.RangeFeedSlowClosedTimestampNudge.DoChan(key, func() (interface{}, error) { 620 // Also ignore the result of RunTask, since it only returns errors when 621 // the task didn't start because we're shutting down. 622 _ = r.store.stopper.RunTask(ctx, key, func(context.Context) { 623 // Limit the amount of work this can suddenly spin up. In particular, 624 // this is to protect against the case of a system-wide slowdown on 625 // closed timestamps, which would otherwise potentially launch a huge 626 // number of lease acquisitions all at once. 627 select { 628 case <-ctx.Done(): 629 // Don't need to do this anymore. 630 return 631 case m.RangeFeedSlowClosedTimestampNudgeSem <- struct{}{}: 632 } 633 defer func() { <-m.RangeFeedSlowClosedTimestampNudgeSem }() 634 if err := r.ensureClosedTimestampStarted(ctx); err != nil { 635 log.Infof(ctx, `RangeFeed failed to nudge: %s`, err) 636 } 637 }) 638 return nil, nil 639 }) 640 } 641 642 // If the closed timestamp is not empty, inform the Processor. 643 if closedTS.IsEmpty() { 644 return 645 } 646 if !p.ForwardClosedTS(closedTS) { 647 // Consumption failed and the rangefeed was stopped. 648 r.unsetRangefeedProcessor(p) 649 } 650 } 651 652 // ensureClosedTimestampStarted does its best to make sure that this node is 653 // receiving closed timestamp updated for this replica's range. Note that this 654 // forces a valid lease to exist on the range and so can be reasonably expensive 655 // if there is not already a valid lease. 656 func (r *Replica) ensureClosedTimestampStarted(ctx context.Context) *roachpb.Error { 657 // Make sure there's a leaseholder. If there's no leaseholder, there's no 658 // closed timestamp updates. 659 var leaseholderNodeID roachpb.NodeID 660 _, err := r.redirectOnOrAcquireLease(ctx) 661 if err == nil { 662 // We have the lease. Request is essentially a wrapper for calling EmitMLAI 663 // on a remote node, so cut out the middleman. 664 r.EmitMLAI() 665 return nil 666 } else if lErr, ok := err.GetDetail().(*roachpb.NotLeaseHolderError); ok { 667 if lErr.LeaseHolder == nil { 668 // It's possible for redirectOnOrAcquireLease to return 669 // NotLeaseHolderErrors with LeaseHolder unset, but these should be 670 // transient conditions. If this method is being called by RangeFeed to 671 // nudge a stuck closedts, then essentially all we can do here is nothing 672 // and assume that redirectOnOrAcquireLease will do something different 673 // the next time it's called. 674 return nil 675 } 676 leaseholderNodeID = lErr.LeaseHolder.NodeID 677 } else { 678 return err 679 } 680 // Request fixes any issues where we've missed a closed timestamp update or 681 // where we're not connected to receive them from this node in the first 682 // place. 683 r.store.cfg.ClosedTimestamp.Clients.Request(leaseholderNodeID, r.RangeID) 684 return nil 685 }