github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_rangefeed.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"time"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/base"
    19  	"github.com/cockroachdb/cockroach/pkg/keys"
    20  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result"
    21  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts"
    22  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/intentresolver"
    23  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
    24  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/rangefeed"
    25  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    26  	"github.com/cockroachdb/cockroach/pkg/settings"
    27  	"github.com/cockroachdb/cockroach/pkg/storage"
    28  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    29  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    30  	"github.com/cockroachdb/cockroach/pkg/util/log"
    31  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    32  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    33  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    34  	"github.com/cockroachdb/errors"
    35  )
    36  
    37  // RangefeedEnabled is a cluster setting that enables rangefeed requests.
    38  var RangefeedEnabled = settings.RegisterPublicBoolSetting(
    39  	"kv.rangefeed.enabled",
    40  	"if set, rangefeed registration is enabled",
    41  	false,
    42  )
    43  
    44  // lockedRangefeedStream is an implementation of rangefeed.Stream which provides
    45  // support for concurrent calls to Send. Note that the default implementation of
    46  // grpc.Stream is not safe for concurrent calls to Send.
    47  type lockedRangefeedStream struct {
    48  	wrapped roachpb.Internal_RangeFeedServer
    49  	sendMu  syncutil.Mutex
    50  }
    51  
    52  func (s *lockedRangefeedStream) Context() context.Context {
    53  	return s.wrapped.Context()
    54  }
    55  
    56  func (s *lockedRangefeedStream) Send(e *roachpb.RangeFeedEvent) error {
    57  	s.sendMu.Lock()
    58  	defer s.sendMu.Unlock()
    59  	return s.wrapped.Send(e)
    60  }
    61  
    62  // rangefeedTxnPusher is a shim around intentResolver that implements the
    63  // rangefeed.TxnPusher interface.
    64  type rangefeedTxnPusher struct {
    65  	ir *intentresolver.IntentResolver
    66  	r  *Replica
    67  }
    68  
    69  // PushTxns is part of the rangefeed.TxnPusher interface. It performs a
    70  // high-priority push at the specified timestamp to each of the specified
    71  // transactions.
    72  func (tp *rangefeedTxnPusher) PushTxns(
    73  	ctx context.Context, txns []enginepb.TxnMeta, ts hlc.Timestamp,
    74  ) ([]*roachpb.Transaction, error) {
    75  	pushTxnMap := make(map[uuid.UUID]*enginepb.TxnMeta, len(txns))
    76  	for i := range txns {
    77  		txn := &txns[i]
    78  		pushTxnMap[txn.ID] = txn
    79  	}
    80  
    81  	h := roachpb.Header{
    82  		Timestamp: ts,
    83  		Txn: &roachpb.Transaction{
    84  			TxnMeta: enginepb.TxnMeta{
    85  				Priority: enginepb.MaxTxnPriority,
    86  			},
    87  		},
    88  	}
    89  
    90  	pushedTxnMap, pErr := tp.ir.MaybePushTransactions(
    91  		ctx, pushTxnMap, h, roachpb.PUSH_TIMESTAMP, false, /* skipIfInFlight */
    92  	)
    93  	if pErr != nil {
    94  		return nil, pErr.GoError()
    95  	}
    96  
    97  	pushedTxns := make([]*roachpb.Transaction, 0, len(pushedTxnMap))
    98  	for _, txn := range pushedTxnMap {
    99  		pushedTxns = append(pushedTxns, txn)
   100  	}
   101  	return pushedTxns, nil
   102  }
   103  
   104  // CleanupTxnIntentsAsync is part of the rangefeed.TxnPusher interface.
   105  func (tp *rangefeedTxnPusher) CleanupTxnIntentsAsync(
   106  	ctx context.Context, txns []*roachpb.Transaction,
   107  ) error {
   108  	endTxns := make([]result.EndTxnIntents, len(txns))
   109  	for i, txn := range txns {
   110  		endTxns[i].Txn = txn
   111  		endTxns[i].Poison = true
   112  	}
   113  	return tp.ir.CleanupTxnIntentsAsync(ctx, tp.r.RangeID, endTxns, true /* allowSyncProcessing */)
   114  }
   115  
   116  type iteratorWithCloser struct {
   117  	storage.SimpleIterator
   118  	close func()
   119  }
   120  
   121  func (i iteratorWithCloser) Close() {
   122  	i.SimpleIterator.Close()
   123  	i.close()
   124  }
   125  
   126  // RangeFeed registers a rangefeed over the specified span. It sends updates to
   127  // the provided stream and returns with an optional error when the rangefeed is
   128  // complete. The provided ConcurrentRequestLimiter is used to limit the number
   129  // of rangefeeds using catchup iterators at the same time.
   130  func (r *Replica) RangeFeed(
   131  	args *roachpb.RangeFeedRequest, stream roachpb.Internal_RangeFeedServer,
   132  ) *roachpb.Error {
   133  	if !r.isSystemRange() && !RangefeedEnabled.Get(&r.store.cfg.Settings.SV) {
   134  		return roachpb.NewErrorf("rangefeeds require the kv.rangefeed.enabled setting. See %s",
   135  			base.DocsURL(`change-data-capture.html#enable-rangefeeds-to-reduce-latency`))
   136  	}
   137  	ctx := r.AnnotateCtx(stream.Context())
   138  
   139  	var rSpan roachpb.RSpan
   140  	var err error
   141  	rSpan.Key, err = keys.Addr(args.Span.Key)
   142  	if err != nil {
   143  		return roachpb.NewError(err)
   144  	}
   145  	rSpan.EndKey, err = keys.Addr(args.Span.EndKey)
   146  	if err != nil {
   147  		return roachpb.NewError(err)
   148  	}
   149  
   150  	if err := r.ensureClosedTimestampStarted(ctx); err != nil {
   151  		return err
   152  	}
   153  
   154  	// If the RangeFeed is performing a catch-up scan then it will observe all
   155  	// values above args.Timestamp. If the RangeFeed is requesting previous
   156  	// values for every update then it will also need to look for the version
   157  	// proceeding each value observed during the catch-up scan timestamp. This
   158  	// means that the earliest value observed by the catch-up scan will be
   159  	// args.Timestamp.Next and the earliest timestamp used to retrieve the
   160  	// previous version of a value will be args.Timestamp, so this is the
   161  	// timestamp we must check against the GCThreshold.
   162  	checkTS := args.Timestamp
   163  	if checkTS.IsEmpty() {
   164  		// If no timestamp was provided then we're not going to run a catch-up
   165  		// scan, so make sure the GCThreshold in requestCanProceed succeeds.
   166  		checkTS = r.Clock().Now()
   167  	}
   168  
   169  	lockedStream := &lockedRangefeedStream{wrapped: stream}
   170  	errC := make(chan *roachpb.Error, 1)
   171  
   172  	// If we will be using a catch-up iterator, wait for the limiter here before
   173  	// locking raftMu.
   174  	usingCatchupIter := false
   175  	var iterSemRelease func()
   176  	if !args.Timestamp.IsEmpty() {
   177  		usingCatchupIter = true
   178  		lim := &r.store.limiters.ConcurrentRangefeedIters
   179  		if err := lim.Begin(ctx); err != nil {
   180  			return roachpb.NewError(err)
   181  		}
   182  		// Finish the iterator limit, but only if we exit before
   183  		// creating the iterator itself.
   184  		iterSemRelease = lim.Finish
   185  		defer func() {
   186  			if iterSemRelease != nil {
   187  				iterSemRelease()
   188  			}
   189  		}()
   190  	}
   191  
   192  	// Lock the raftMu, then register the stream as a new rangefeed registration.
   193  	// raftMu is held so that the catch-up iterator is captured in the same
   194  	// critical-section as the registration is established. This ensures that
   195  	// the registration doesn't miss any events.
   196  	r.raftMu.Lock()
   197  	if err := r.checkExecutionCanProceedForRangeFeed(rSpan, checkTS); err != nil {
   198  		r.raftMu.Unlock()
   199  		return roachpb.NewError(err)
   200  	}
   201  
   202  	// Register the stream with a catch-up iterator.
   203  	var catchUpIter storage.SimpleIterator
   204  	if usingCatchupIter {
   205  		innerIter := r.Engine().NewIterator(storage.IterOptions{
   206  			UpperBound: args.Span.EndKey,
   207  			// RangeFeed originally intended to use the time-bound iterator
   208  			// performance optimization. However, they've had correctness issues in
   209  			// the past (#28358, #34819) and no-one has the time for the due-diligence
   210  			// necessary to be confidant in their correctness going forward. Not using
   211  			// them causes the total time spent in RangeFeed catchup on changefeed
   212  			// over tpcc-1000 to go from 40s -> 4853s, which is quite large but still
   213  			// workable. See #35122 for details.
   214  			// MinTimestampHint: args.Timestamp,
   215  		})
   216  		catchUpIter = iteratorWithCloser{
   217  			SimpleIterator: innerIter,
   218  			close:          iterSemRelease,
   219  		}
   220  		// Responsibility for releasing the semaphore now passes to the iterator.
   221  		iterSemRelease = nil
   222  	}
   223  	p := r.registerWithRangefeedRaftMuLocked(
   224  		ctx, rSpan, args.Timestamp, catchUpIter, args.WithDiff, lockedStream, errC,
   225  	)
   226  	r.raftMu.Unlock()
   227  
   228  	// When this function returns, attempt to clean up the rangefeed.
   229  	defer r.maybeDisconnectEmptyRangefeed(p)
   230  
   231  	// Block on the registration's error channel. Note that the registration
   232  	// observes stream.Context().Done.
   233  	return <-errC
   234  }
   235  
   236  func (r *Replica) getRangefeedProcessorAndFilter() (*rangefeed.Processor, *rangefeed.Filter) {
   237  	r.rangefeedMu.RLock()
   238  	defer r.rangefeedMu.RUnlock()
   239  	return r.rangefeedMu.proc, r.rangefeedMu.opFilter
   240  }
   241  
   242  func (r *Replica) getRangefeedProcessor() *rangefeed.Processor {
   243  	p, _ := r.getRangefeedProcessorAndFilter()
   244  	return p
   245  }
   246  
   247  func (r *Replica) setRangefeedProcessor(p *rangefeed.Processor) {
   248  	r.rangefeedMu.Lock()
   249  	defer r.rangefeedMu.Unlock()
   250  	r.rangefeedMu.proc = p
   251  	r.store.addReplicaWithRangefeed(r.RangeID)
   252  }
   253  
   254  func (r *Replica) unsetRangefeedProcessorLocked(p *rangefeed.Processor) {
   255  	if r.rangefeedMu.proc != p {
   256  		// The processor was already unset.
   257  		return
   258  	}
   259  	r.rangefeedMu.proc = nil
   260  	r.rangefeedMu.opFilter = nil
   261  	r.store.removeReplicaWithRangefeed(r.RangeID)
   262  }
   263  
   264  func (r *Replica) unsetRangefeedProcessor(p *rangefeed.Processor) {
   265  	r.rangefeedMu.Lock()
   266  	defer r.rangefeedMu.Unlock()
   267  	r.unsetRangefeedProcessorLocked(p)
   268  }
   269  
   270  func (r *Replica) setRangefeedFilterLocked(f *rangefeed.Filter) {
   271  	if f == nil {
   272  		panic("filter nil")
   273  	}
   274  	r.rangefeedMu.opFilter = f
   275  }
   276  
   277  func (r *Replica) updateRangefeedFilterLocked() bool {
   278  	f := r.rangefeedMu.proc.Filter()
   279  	// Return whether the update to the filter was successful or not. If
   280  	// the processor was already stopped then we can't update the filter.
   281  	if f != nil {
   282  		r.setRangefeedFilterLocked(f)
   283  		return true
   284  	}
   285  	return false
   286  }
   287  
   288  // The size of an event is 112 bytes, so this will result in an allocation on
   289  // the order of ~512KB per RangeFeed. That's probably ok given the number of
   290  // ranges on a node that we'd like to support with active rangefeeds, but it's
   291  // certainly on the upper end of the range.
   292  //
   293  // TODO(dan): Everyone seems to agree that this memory limit would be better set
   294  // at a store-wide level, but there doesn't seem to be an easy way to accomplish
   295  // that.
   296  const defaultEventChanCap = 4096
   297  
   298  // registerWithRangefeedRaftMuLocked sets up a Rangefeed registration over the
   299  // provided span. It initializes a rangefeed for the Replica if one is not
   300  // already running. Requires raftMu be locked.
   301  func (r *Replica) registerWithRangefeedRaftMuLocked(
   302  	ctx context.Context,
   303  	span roachpb.RSpan,
   304  	startTS hlc.Timestamp,
   305  	catchupIter storage.SimpleIterator,
   306  	withDiff bool,
   307  	stream rangefeed.Stream,
   308  	errC chan<- *roachpb.Error,
   309  ) *rangefeed.Processor {
   310  	// Attempt to register with an existing Rangefeed processor, if one exists.
   311  	// The locking here is a little tricky because we need to handle the case
   312  	// of concurrent processor shutdowns (see maybeDisconnectEmptyRangefeed).
   313  	r.rangefeedMu.Lock()
   314  	p := r.rangefeedMu.proc
   315  	if p != nil {
   316  		reg, filter := p.Register(span, startTS, catchupIter, withDiff, stream, errC)
   317  		if reg {
   318  			// Registered successfully with an existing processor.
   319  			// Update the rangefeed filter to avoid filtering ops
   320  			// that this new registration might be interested in.
   321  			r.setRangefeedFilterLocked(filter)
   322  			r.rangefeedMu.Unlock()
   323  			return p
   324  		}
   325  		// If the registration failed, the processor was already being shut
   326  		// down. Help unset it and then continue on with initializing a new
   327  		// processor.
   328  		r.unsetRangefeedProcessorLocked(p)
   329  		p = nil
   330  	}
   331  	r.rangefeedMu.Unlock()
   332  
   333  	// Create a new rangefeed.
   334  	desc := r.Desc()
   335  	tp := rangefeedTxnPusher{ir: r.store.intentResolver, r: r}
   336  	cfg := rangefeed.Config{
   337  		AmbientContext:   r.AmbientContext,
   338  		Clock:            r.Clock(),
   339  		Span:             desc.RSpan(),
   340  		TxnPusher:        &tp,
   341  		PushTxnsInterval: r.store.TestingKnobs().RangeFeedPushTxnsInterval,
   342  		PushTxnsAge:      r.store.TestingKnobs().RangeFeedPushTxnsAge,
   343  		EventChanCap:     defaultEventChanCap,
   344  		EventChanTimeout: 50 * time.Millisecond,
   345  		Metrics:          r.store.metrics.RangeFeedMetrics,
   346  	}
   347  	p = rangefeed.NewProcessor(cfg)
   348  
   349  	// Start it with an iterator to initialize the resolved timestamp.
   350  	rtsIter := r.Engine().NewIterator(storage.IterOptions{
   351  		UpperBound: desc.EndKey.AsRawKey(),
   352  		// TODO(nvanbenschoten): To facilitate fast restarts of rangefeed
   353  		// we should periodically persist the resolved timestamp so that we
   354  		// can initialize the rangefeed using an iterator that only needs to
   355  		// observe timestamps back to the last recorded resolved timestamp.
   356  		// This is safe because we know that there are no unresolved intents
   357  		// at times before a resolved timestamp.
   358  		// MinTimestampHint: r.ResolvedTimestamp,
   359  	})
   360  	p.Start(r.store.Stopper(), rtsIter)
   361  
   362  	// Register with the processor *before* we attach its reference to the
   363  	// Replica struct. This ensures that the registration is in place before
   364  	// any other goroutines are able to stop the processor. In other words,
   365  	// this ensures that the only time the registration fails is during
   366  	// server shutdown.
   367  	reg, filter := p.Register(span, startTS, catchupIter, withDiff, stream, errC)
   368  	if !reg {
   369  		catchupIter.Close() // clean up
   370  		select {
   371  		case <-r.store.Stopper().ShouldQuiesce():
   372  			errC <- roachpb.NewError(&roachpb.NodeUnavailableError{})
   373  			return nil
   374  		default:
   375  			panic("unexpected Stopped processor")
   376  		}
   377  	}
   378  
   379  	// Set the rangefeed processor and filter reference. We know that no other
   380  	// registration process could have raced with ours because calling this
   381  	// method requires raftMu to be exclusively locked.
   382  	r.setRangefeedProcessor(p)
   383  	r.setRangefeedFilterLocked(filter)
   384  
   385  	// Check for an initial closed timestamp update immediately to help
   386  	// initialize the rangefeed's resolved timestamp as soon as possible.
   387  	r.handleClosedTimestampUpdateRaftMuLocked(ctx)
   388  
   389  	return p
   390  }
   391  
   392  // maybeDisconnectEmptyRangefeed tears down the provided Processor if it is
   393  // still active and if it no longer has any registrations.
   394  func (r *Replica) maybeDisconnectEmptyRangefeed(p *rangefeed.Processor) {
   395  	r.rangefeedMu.Lock()
   396  	defer r.rangefeedMu.Unlock()
   397  	if p == nil || p != r.rangefeedMu.proc {
   398  		// The processor has already been removed or replaced.
   399  		return
   400  	}
   401  	if p.Len() == 0 || !r.updateRangefeedFilterLocked() {
   402  		// Stop the rangefeed processor if it has no registrations or if we are
   403  		// unable to update the operation filter.
   404  		p.Stop()
   405  		r.unsetRangefeedProcessorLocked(p)
   406  	}
   407  }
   408  
   409  // disconnectRangefeedWithErr broadcasts the provided error to all rangefeed
   410  // registrations and tears down the provided rangefeed Processor.
   411  func (r *Replica) disconnectRangefeedWithErr(p *rangefeed.Processor, pErr *roachpb.Error) {
   412  	p.StopWithErr(pErr)
   413  	r.unsetRangefeedProcessor(p)
   414  }
   415  
   416  // disconnectRangefeedWithReason broadcasts the provided rangefeed retry reason
   417  // to all rangefeed registrations and tears down the active rangefeed Processor.
   418  // No-op if a rangefeed is not active.
   419  func (r *Replica) disconnectRangefeedWithReason(reason roachpb.RangeFeedRetryError_Reason) {
   420  	p := r.getRangefeedProcessor()
   421  	if p == nil {
   422  		return
   423  	}
   424  	pErr := roachpb.NewError(roachpb.NewRangeFeedRetryError(reason))
   425  	r.disconnectRangefeedWithErr(p, pErr)
   426  }
   427  
   428  // numRangefeedRegistrations returns the number of registrations attached to the
   429  // Replica's rangefeed processor.
   430  func (r *Replica) numRangefeedRegistrations() int {
   431  	p := r.getRangefeedProcessor()
   432  	if p == nil {
   433  		return 0
   434  	}
   435  	return p.Len()
   436  }
   437  
   438  // populatePrevValsInLogicalOpLogRaftMuLocked updates the provided logical op
   439  // log with previous values read from the reader, which is expected to reflect
   440  // the state of the Replica before the operations in the logical op log are
   441  // applied. No-op if a rangefeed is not active. Requires raftMu to be locked.
   442  func (r *Replica) populatePrevValsInLogicalOpLogRaftMuLocked(
   443  	ctx context.Context, ops *kvserverpb.LogicalOpLog, prevReader storage.Reader,
   444  ) {
   445  	p, filter := r.getRangefeedProcessorAndFilter()
   446  	if p == nil {
   447  		return
   448  	}
   449  
   450  	// Read from the Reader to populate the PrevValue fields.
   451  	for _, op := range ops.Ops {
   452  		var key []byte
   453  		var ts hlc.Timestamp
   454  		var prevValPtr *[]byte
   455  		switch t := op.GetValue().(type) {
   456  		case *enginepb.MVCCWriteValueOp:
   457  			key, ts, prevValPtr = t.Key, t.Timestamp, &t.PrevValue
   458  		case *enginepb.MVCCCommitIntentOp:
   459  			key, ts, prevValPtr = t.Key, t.Timestamp, &t.PrevValue
   460  		case *enginepb.MVCCWriteIntentOp,
   461  			*enginepb.MVCCUpdateIntentOp,
   462  			*enginepb.MVCCAbortIntentOp,
   463  			*enginepb.MVCCAbortTxnOp:
   464  			// Nothing to do.
   465  			continue
   466  		default:
   467  			panic(fmt.Sprintf("unknown logical op %T", t))
   468  		}
   469  
   470  		// Don't read previous values from the reader for operations that are
   471  		// not needed by any rangefeed registration.
   472  		if !filter.NeedPrevVal(roachpb.Span{Key: key}) {
   473  			continue
   474  		}
   475  
   476  		// Read the previous value from the prev Reader. Unlike the new value
   477  		// (see handleLogicalOpLogRaftMuLocked), this one may be missing.
   478  		prevVal, _, err := storage.MVCCGet(
   479  			ctx, prevReader, key, ts, storage.MVCCGetOptions{Tombstones: true, Inconsistent: true},
   480  		)
   481  		if err != nil {
   482  			r.disconnectRangefeedWithErr(p, roachpb.NewErrorf(
   483  				"error consuming %T for key %v @ ts %v: %v", op, key, ts, err,
   484  			))
   485  			return
   486  		}
   487  		if prevVal != nil {
   488  			*prevValPtr = prevVal.RawBytes
   489  		} else {
   490  			*prevValPtr = nil
   491  		}
   492  	}
   493  }
   494  
   495  // handleLogicalOpLogRaftMuLocked passes the logical op log to the active
   496  // rangefeed, if one is running. The method accepts a reader, which is used to
   497  // look up the values associated with key-value writes in the log before handing
   498  // them to the rangefeed processor. No-op if a rangefeed is not active. Requires
   499  // raftMu to be locked.
   500  func (r *Replica) handleLogicalOpLogRaftMuLocked(
   501  	ctx context.Context, ops *kvserverpb.LogicalOpLog, reader storage.Reader,
   502  ) {
   503  	p, filter := r.getRangefeedProcessorAndFilter()
   504  	if p == nil {
   505  		return
   506  	}
   507  	if ops == nil {
   508  		// Rangefeeds can't be turned on unless RangefeedEnabled is set to true,
   509  		// after which point new Raft proposals will include logical op logs.
   510  		// However, there's a race present where old Raft commands without a
   511  		// logical op log might be passed to a rangefeed. Since the effect of
   512  		// these commands was not included in the catch-up scan of current
   513  		// registrations, we're forced to throw an error. The rangefeed clients
   514  		// can reconnect at a later time, at which point all new Raft commands
   515  		// should have logical op logs.
   516  		r.disconnectRangefeedWithReason(roachpb.RangeFeedRetryError_REASON_LOGICAL_OPS_MISSING)
   517  		return
   518  	}
   519  	if len(ops.Ops) == 0 {
   520  		return
   521  	}
   522  
   523  	// When reading straight from the Raft log, some logical ops will not be
   524  	// fully populated. Read from the Reader to populate all fields.
   525  	for _, op := range ops.Ops {
   526  		var key []byte
   527  		var ts hlc.Timestamp
   528  		var valPtr *[]byte
   529  		switch t := op.GetValue().(type) {
   530  		case *enginepb.MVCCWriteValueOp:
   531  			key, ts, valPtr = t.Key, t.Timestamp, &t.Value
   532  		case *enginepb.MVCCCommitIntentOp:
   533  			key, ts, valPtr = t.Key, t.Timestamp, &t.Value
   534  		case *enginepb.MVCCWriteIntentOp,
   535  			*enginepb.MVCCUpdateIntentOp,
   536  			*enginepb.MVCCAbortIntentOp,
   537  			*enginepb.MVCCAbortTxnOp:
   538  			// Nothing to do.
   539  			continue
   540  		default:
   541  			panic(fmt.Sprintf("unknown logical op %T", t))
   542  		}
   543  
   544  		// Don't read values from the reader for operations that are not needed
   545  		// by any rangefeed registration. We still need to inform the rangefeed
   546  		// processor of the changes to intents so that it can track unresolved
   547  		// intents, but we don't need to provide values.
   548  		//
   549  		// We could filter out MVCCWriteValueOp operations entirely at this
   550  		// point if they are not needed by any registration, but as long as we
   551  		// avoid the value lookup here, doing any more doesn't seem worth it.
   552  		if !filter.NeedVal(roachpb.Span{Key: key}) {
   553  			continue
   554  		}
   555  
   556  		// Read the value directly from the Reader. This is performed in the
   557  		// same raftMu critical section that the logical op's corresponding
   558  		// WriteBatch is applied, so the value should exist.
   559  		val, _, err := storage.MVCCGet(ctx, reader, key, ts, storage.MVCCGetOptions{Tombstones: true})
   560  		if val == nil && err == nil {
   561  			err = errors.New("value missing in reader")
   562  		}
   563  		if err != nil {
   564  			r.disconnectRangefeedWithErr(p, roachpb.NewErrorf(
   565  				"error consuming %T for key %v @ ts %v: %v", op, key, ts, err,
   566  			))
   567  			return
   568  		}
   569  		*valPtr = val.RawBytes
   570  	}
   571  
   572  	// Pass the ops to the rangefeed processor.
   573  	if !p.ConsumeLogicalOps(ops.Ops...) {
   574  		// Consumption failed and the rangefeed was stopped.
   575  		r.unsetRangefeedProcessor(p)
   576  	}
   577  }
   578  
   579  // handleClosedTimestampUpdate determines the current maximum closed timestamp
   580  // for the replica and informs the rangefeed, if one is running. No-op if a
   581  // rangefeed is not active.
   582  func (r *Replica) handleClosedTimestampUpdate(ctx context.Context) {
   583  	ctx = r.AnnotateCtx(ctx)
   584  	r.raftMu.Lock()
   585  	defer r.raftMu.Unlock()
   586  	r.handleClosedTimestampUpdateRaftMuLocked(ctx)
   587  }
   588  
   589  // handleClosedTimestampUpdateRaftMuLocked is like handleClosedTimestampUpdate,
   590  // but it requires raftMu to be locked.
   591  func (r *Replica) handleClosedTimestampUpdateRaftMuLocked(ctx context.Context) {
   592  	p := r.getRangefeedProcessor()
   593  	if p == nil {
   594  		return
   595  	}
   596  
   597  	// Determine what the maximum closed timestamp is for this replica.
   598  	closedTS, _ := r.maxClosed(ctx)
   599  
   600  	// If the closed timestamp is sufficiently stale, signal that we want an
   601  	// update to the leaseholder so that it will eventually begin to progress
   602  	// again.
   603  	slowClosedTSThresh := 5 * closedts.TargetDuration.Get(&r.store.cfg.Settings.SV)
   604  	if d := timeutil.Since(closedTS.GoTime()); d > slowClosedTSThresh {
   605  		m := r.store.metrics.RangeFeedMetrics
   606  		if m.RangeFeedSlowClosedTimestampLogN.ShouldLog() {
   607  			if closedTS.IsEmpty() {
   608  				log.Infof(ctx, "RangeFeed closed timestamp is empty")
   609  			} else {
   610  				log.Infof(ctx, "RangeFeed closed timestamp %s is behind by %s", closedTS, d)
   611  			}
   612  		}
   613  
   614  		// Asynchronously attempt to nudge the closed timestamp in case it's stuck.
   615  		key := fmt.Sprintf(`rangefeed-slow-closed-timestamp-nudge-r%d`, r.RangeID)
   616  		// Ignore the result of DoChan since, to keep this all async, it always
   617  		// returns nil and any errors are logged by the closure passed to the
   618  		// `DoChan` call.
   619  		_, _ = m.RangeFeedSlowClosedTimestampNudge.DoChan(key, func() (interface{}, error) {
   620  			// Also ignore the result of RunTask, since it only returns errors when
   621  			// the task didn't start because we're shutting down.
   622  			_ = r.store.stopper.RunTask(ctx, key, func(context.Context) {
   623  				// Limit the amount of work this can suddenly spin up. In particular,
   624  				// this is to protect against the case of a system-wide slowdown on
   625  				// closed timestamps, which would otherwise potentially launch a huge
   626  				// number of lease acquisitions all at once.
   627  				select {
   628  				case <-ctx.Done():
   629  					// Don't need to do this anymore.
   630  					return
   631  				case m.RangeFeedSlowClosedTimestampNudgeSem <- struct{}{}:
   632  				}
   633  				defer func() { <-m.RangeFeedSlowClosedTimestampNudgeSem }()
   634  				if err := r.ensureClosedTimestampStarted(ctx); err != nil {
   635  					log.Infof(ctx, `RangeFeed failed to nudge: %s`, err)
   636  				}
   637  			})
   638  			return nil, nil
   639  		})
   640  	}
   641  
   642  	// If the closed timestamp is not empty, inform the Processor.
   643  	if closedTS.IsEmpty() {
   644  		return
   645  	}
   646  	if !p.ForwardClosedTS(closedTS) {
   647  		// Consumption failed and the rangefeed was stopped.
   648  		r.unsetRangefeedProcessor(p)
   649  	}
   650  }
   651  
   652  // ensureClosedTimestampStarted does its best to make sure that this node is
   653  // receiving closed timestamp updated for this replica's range. Note that this
   654  // forces a valid lease to exist on the range and so can be reasonably expensive
   655  // if there is not already a valid lease.
   656  func (r *Replica) ensureClosedTimestampStarted(ctx context.Context) *roachpb.Error {
   657  	// Make sure there's a leaseholder. If there's no leaseholder, there's no
   658  	// closed timestamp updates.
   659  	var leaseholderNodeID roachpb.NodeID
   660  	_, err := r.redirectOnOrAcquireLease(ctx)
   661  	if err == nil {
   662  		// We have the lease. Request is essentially a wrapper for calling EmitMLAI
   663  		// on a remote node, so cut out the middleman.
   664  		r.EmitMLAI()
   665  		return nil
   666  	} else if lErr, ok := err.GetDetail().(*roachpb.NotLeaseHolderError); ok {
   667  		if lErr.LeaseHolder == nil {
   668  			// It's possible for redirectOnOrAcquireLease to return
   669  			// NotLeaseHolderErrors with LeaseHolder unset, but these should be
   670  			// transient conditions. If this method is being called by RangeFeed to
   671  			// nudge a stuck closedts, then essentially all we can do here is nothing
   672  			// and assume that redirectOnOrAcquireLease will do something different
   673  			// the next time it's called.
   674  			return nil
   675  		}
   676  		leaseholderNodeID = lErr.LeaseHolder.NodeID
   677  	} else {
   678  		return err
   679  	}
   680  	// Request fixes any issues where we've missed a closed timestamp update or
   681  	// where we're not connected to receive them from this node in the first
   682  	// place.
   683  	r.store.cfg.ClosedTimestamp.Clients.Request(leaseholderNodeID, r.RangeID)
   684  	return nil
   685  }