github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvclient/kvcoord/dist_sender_rangefeed.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvcoord
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"io"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/keys"
    19  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    20  	"github.com/cockroachdb/cockroach/pkg/rpc"
    21  	"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
    22  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/cockroach/pkg/util/retry"
    25  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    26  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    27  )
    28  
    29  type singleRangeInfo struct {
    30  	desc  *roachpb.RangeDescriptor
    31  	rs    roachpb.RSpan
    32  	ts    hlc.Timestamp
    33  	token *EvictionToken
    34  }
    35  
    36  // RangeFeed divides a RangeFeed request on range boundaries and establishes a
    37  // RangeFeed to each of the individual ranges. It streams back results on the
    38  // provided channel.
    39  //
    40  // Note that the timestamps in RangeFeedCheckpoint events that are streamed back
    41  // may be lower than the timestamp given here.
    42  func (ds *DistSender) RangeFeed(
    43  	ctx context.Context,
    44  	span roachpb.Span,
    45  	ts hlc.Timestamp,
    46  	withDiff bool,
    47  	eventCh chan<- *roachpb.RangeFeedEvent,
    48  ) error {
    49  	ctx = ds.AnnotateCtx(ctx)
    50  	ctx, sp := tracing.EnsureChildSpan(ctx, ds.AmbientContext.Tracer, "dist sender")
    51  	defer sp.Finish()
    52  
    53  	startRKey, err := keys.Addr(span.Key)
    54  	if err != nil {
    55  		return err
    56  	}
    57  	endRKey, err := keys.Addr(span.EndKey)
    58  	if err != nil {
    59  		return err
    60  	}
    61  	rs := roachpb.RSpan{Key: startRKey, EndKey: endRKey}
    62  
    63  	g := ctxgroup.WithContext(ctx)
    64  	// Goroutine that processes subdivided ranges and creates a rangefeed for
    65  	// each.
    66  	rangeCh := make(chan singleRangeInfo, 16)
    67  	g.GoCtx(func(ctx context.Context) error {
    68  		for {
    69  			select {
    70  			case sri := <-rangeCh:
    71  				// Spawn a child goroutine to process this feed.
    72  				g.GoCtx(func(ctx context.Context) error {
    73  					return ds.partialRangeFeed(ctx, &sri, withDiff, rangeCh, eventCh)
    74  				})
    75  			case <-ctx.Done():
    76  				return ctx.Err()
    77  			}
    78  		}
    79  	})
    80  
    81  	// Kick off the initial set of ranges.
    82  	g.GoCtx(func(ctx context.Context) error {
    83  		return ds.divideAndSendRangeFeedToRanges(ctx, rs, ts, rangeCh)
    84  	})
    85  
    86  	return g.Wait()
    87  }
    88  
    89  func (ds *DistSender) divideAndSendRangeFeedToRanges(
    90  	ctx context.Context, rs roachpb.RSpan, ts hlc.Timestamp, rangeCh chan<- singleRangeInfo,
    91  ) error {
    92  	// As RangeIterator iterates, it can return overlapping descriptors (and
    93  	// during splits, this happens frequently), but divideAndSendRangeFeedToRanges
    94  	// intends to split up the input into non-overlapping spans aligned to range
    95  	// boundaries. So, as we go, keep track of the remaining uncovered part of
    96  	// `rs` in `nextRS`.
    97  	nextRS := rs
    98  	ri := NewRangeIterator(ds)
    99  	for ri.Seek(ctx, nextRS.Key, Ascending); ri.Valid(); ri.Next(ctx) {
   100  		desc := ri.Desc()
   101  		partialRS, err := nextRS.Intersect(desc)
   102  		if err != nil {
   103  			return err
   104  		}
   105  		nextRS.Key = partialRS.EndKey
   106  		select {
   107  		case rangeCh <- singleRangeInfo{
   108  			desc:  desc,
   109  			rs:    partialRS,
   110  			ts:    ts,
   111  			token: ri.Token(),
   112  		}:
   113  		case <-ctx.Done():
   114  			return ctx.Err()
   115  		}
   116  		if !ri.NeedAnother(nextRS) {
   117  			break
   118  		}
   119  	}
   120  	return ri.Error()
   121  }
   122  
   123  // partialRangeFeed establishes a RangeFeed to the range specified by desc. It
   124  // manages lifecycle events of the range in order to maintain the RangeFeed
   125  // connection; this may involve instructing higher-level functions to retry
   126  // this rangefeed, or subdividing the range further in the event of a split.
   127  func (ds *DistSender) partialRangeFeed(
   128  	ctx context.Context,
   129  	rangeInfo *singleRangeInfo,
   130  	withDiff bool,
   131  	rangeCh chan<- singleRangeInfo,
   132  	eventCh chan<- *roachpb.RangeFeedEvent,
   133  ) error {
   134  	// Bound the partial rangefeed to the partial span.
   135  	span := rangeInfo.rs.AsRawSpanWithNoLocals()
   136  	ts := rangeInfo.ts
   137  
   138  	// Start a retry loop for sending the batch to the range.
   139  	for r := retry.StartWithCtx(ctx, ds.rpcRetryOptions); r.Next(); {
   140  		// If we've cleared the descriptor on a send failure, re-lookup.
   141  		if rangeInfo.desc == nil {
   142  			var err error
   143  			rangeInfo.desc, rangeInfo.token, err = ds.getDescriptor(ctx, rangeInfo.rs.Key, nil, false)
   144  			if err != nil {
   145  				log.VErrEventf(ctx, 1, "range descriptor re-lookup failed: %s", err)
   146  				continue
   147  			}
   148  		}
   149  
   150  		// Establish a RangeFeed for a single Range.
   151  		maxTS, pErr := ds.singleRangeFeed(ctx, span, ts, withDiff, rangeInfo.desc, eventCh)
   152  
   153  		// Forward the timestamp in case we end up sending it again.
   154  		ts.Forward(maxTS)
   155  
   156  		if pErr != nil {
   157  			if log.V(1) {
   158  				log.Infof(ctx, "RangeFeed %s disconnected with last checkpoint %s ago: %v",
   159  					span, timeutil.Since(ts.GoTime()), pErr)
   160  			}
   161  			switch t := pErr.GetDetail().(type) {
   162  			case *roachpb.StoreNotFoundError, *roachpb.NodeUnavailableError:
   163  				// These errors are likely to be unique to the replica that
   164  				// reported them, so no action is required before the next
   165  				// retry.
   166  			case *roachpb.SendError, *roachpb.RangeNotFoundError:
   167  				// Evict the decriptor from the cache and reload on next attempt.
   168  				rangeInfo.token.Evict(ctx)
   169  				rangeInfo.desc = nil
   170  				continue
   171  			case *roachpb.RangeKeyMismatchError:
   172  				// Evict the decriptor from the cache.
   173  				rangeInfo.token.Evict(ctx)
   174  				return ds.divideAndSendRangeFeedToRanges(ctx, rangeInfo.rs, ts, rangeCh)
   175  			case *roachpb.RangeFeedRetryError:
   176  				switch t.Reason {
   177  				case roachpb.RangeFeedRetryError_REASON_REPLICA_REMOVED,
   178  					roachpb.RangeFeedRetryError_REASON_RAFT_SNAPSHOT,
   179  					roachpb.RangeFeedRetryError_REASON_LOGICAL_OPS_MISSING,
   180  					roachpb.RangeFeedRetryError_REASON_SLOW_CONSUMER:
   181  					// Try again with same descriptor. These are transient
   182  					// errors that should not show up again.
   183  					continue
   184  				case roachpb.RangeFeedRetryError_REASON_RANGE_SPLIT,
   185  					roachpb.RangeFeedRetryError_REASON_RANGE_MERGED:
   186  					// Evict the decriptor from the cache.
   187  					rangeInfo.token.Evict(ctx)
   188  					return ds.divideAndSendRangeFeedToRanges(ctx, rangeInfo.rs, ts, rangeCh)
   189  				default:
   190  					log.Fatalf(ctx, "unexpected RangeFeedRetryError reason %v", t.Reason)
   191  				}
   192  			default:
   193  				return t
   194  			}
   195  		}
   196  	}
   197  	return nil
   198  }
   199  
   200  // singleRangeFeed gathers and rearranges the replicas, and makes a RangeFeed
   201  // RPC call. Results will be send on the provided channel. Returns the timestamp
   202  // of the maximum rangefeed checkpoint seen, which can be used to re-establish
   203  // the rangefeed with a larger starting timestamp, reflecting the fact that all
   204  // values up to the last checkpoint have already been observed. Returns the
   205  // request's timestamp if not checkpoints are seen.
   206  func (ds *DistSender) singleRangeFeed(
   207  	ctx context.Context,
   208  	span roachpb.Span,
   209  	ts hlc.Timestamp,
   210  	withDiff bool,
   211  	desc *roachpb.RangeDescriptor,
   212  	eventCh chan<- *roachpb.RangeFeedEvent,
   213  ) (hlc.Timestamp, *roachpb.Error) {
   214  	args := roachpb.RangeFeedRequest{
   215  		Span: span,
   216  		Header: roachpb.Header{
   217  			Timestamp: ts,
   218  			RangeID:   desc.RangeID,
   219  		},
   220  		WithDiff: withDiff,
   221  	}
   222  
   223  	var latencyFn LatencyFunc
   224  	if ds.rpcContext != nil {
   225  		latencyFn = ds.rpcContext.RemoteClocks.Latency
   226  	}
   227  	// Learner replicas won't serve reads/writes, so send only to the `Voters`
   228  	// replicas. This is just an optimization to save a network hop, everything
   229  	// would still work if we had `All` here.
   230  	replicas := NewReplicaSlice(ds.gossip, desc.Replicas().Voters())
   231  	replicas.OptimizeReplicaOrder(ds.getNodeDescriptor(), latencyFn)
   232  	// The RangeFeed is not used for system critical traffic so use a DefaultClass
   233  	// connection regardless of the range.
   234  	opts := SendOptions{class: rpc.DefaultClass}
   235  	transport, err := ds.transportFactory(opts, ds.nodeDialer, replicas)
   236  	if err != nil {
   237  		return args.Timestamp, roachpb.NewError(err)
   238  	}
   239  
   240  	for {
   241  		if transport.IsExhausted() {
   242  			return args.Timestamp, roachpb.NewError(roachpb.NewSendError(
   243  				fmt.Sprintf("sending to all %d replicas failed", len(replicas)),
   244  			))
   245  		}
   246  
   247  		args.Replica = transport.NextReplica()
   248  		clientCtx, client, err := transport.NextInternalClient(ctx)
   249  		if err != nil {
   250  			log.VErrEventf(ctx, 2, "RPC error: %s", err)
   251  			continue
   252  		}
   253  
   254  		stream, err := client.RangeFeed(clientCtx, &args)
   255  		if err != nil {
   256  			log.VErrEventf(ctx, 2, "RPC error: %s", err)
   257  			continue
   258  		}
   259  		for {
   260  			event, err := stream.Recv()
   261  			if err == io.EOF {
   262  				return args.Timestamp, nil
   263  			}
   264  			if err != nil {
   265  				return args.Timestamp, roachpb.NewError(err)
   266  			}
   267  			switch t := event.GetValue().(type) {
   268  			case *roachpb.RangeFeedCheckpoint:
   269  				if t.Span.Contains(args.Span) {
   270  					args.Timestamp.Forward(t.ResolvedTS)
   271  				}
   272  			case *roachpb.RangeFeedError:
   273  				log.VErrEventf(ctx, 2, "RangeFeedError: %s", t.Error.GoError())
   274  				return args.Timestamp, &t.Error
   275  			}
   276  			select {
   277  			case eventCh <- event:
   278  			case <-ctx.Done():
   279  				return args.Timestamp, roachpb.NewError(ctx.Err())
   280  			}
   281  		}
   282  	}
   283  }