github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/split_trigger_helper.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    18  	"github.com/cockroachdb/cockroach/pkg/util/log"
    19  	"go.etcd.io/etcd/raft/raftpb"
    20  )
    21  
    22  const maxDelaySplitTriggerTicks = 100
    23  
    24  type replicaMsgAppDropper Replica
    25  
    26  func (rd *replicaMsgAppDropper) Args() (initialized bool, ticks int) {
    27  	r := (*Replica)(rd)
    28  	r.mu.RLock()
    29  	initialized = r.isInitializedRLocked()
    30  	ticks = r.mu.ticks
    31  	r.mu.RUnlock()
    32  	return initialized, ticks
    33  }
    34  
    35  func (rd *replicaMsgAppDropper) ShouldDrop(startKey roachpb.RKey) (fmt.Stringer, bool) {
    36  	lhsRepl := (*Replica)(rd).store.LookupReplica(startKey)
    37  	if lhsRepl == nil {
    38  		return nil, false
    39  	}
    40  	lhsRepl.store.gcQueue.AddAsync(context.Background(), lhsRepl, replicaGCPriorityDefault)
    41  	return lhsRepl, true
    42  }
    43  
    44  type msgAppDropper interface {
    45  	Args() (initialized bool, ticks int)
    46  	ShouldDrop(key roachpb.RKey) (fmt.Stringer, bool)
    47  }
    48  
    49  // maybeDropMsgApp returns true if the incoming Raft message should be dropped.
    50  // It does so if the recipient replica is uninitialized (i.e. has no state) and
    51  // is waiting for a split trigger to apply,in which case  delivering the message
    52  // in this situation would result in an unnecessary Raft snapshot: the MsgApp
    53  // would be rejected and the rejection would prompt the leader to send a
    54  // snapshot, while the split trigger would likely populate the replica "for
    55  // free". However, there are some situations in which this is not the case (all
    56  // taken into account by this method by allowing the MsgApp through).
    57  func maybeDropMsgApp(
    58  	ctx context.Context, r msgAppDropper, msg *raftpb.Message, startKey roachpb.RKey,
    59  ) (drop bool) {
    60  	// Run the cheapest check first. If the leader doesn't think this replica is
    61  	// probing, it won't set msg.Context (the common case).
    62  	// Note that startKey could be of length zero (not nil) if the sender is a
    63  	// replica of the first range.
    64  	if msg.Type != raftpb.MsgApp || startKey == nil {
    65  		return false
    66  	}
    67  
    68  	// The leader doesn't know our state, so it injected its start key into the
    69  	// message via msg.Context. Check if this replica might be waiting for a
    70  	// split trigger. The first condition for that is not knowing the key
    71  	// bounds, i.e. not being initialized.
    72  	initialized, ticks := r.Args()
    73  
    74  	if initialized {
    75  		return false
    76  	}
    77  
    78  	// The goal is to find out if this replica is waiting for a split trigger.
    79  	// We do so by looking up the start key in the local store. If we find a
    80  	// replica for the start key, we know that that replica is in theory going
    81  	// to apply the split trigger and populate the right hand side (i.e. this
    82  	// replica):
    83  	//
    84  	// sender  (leader)    [a--lhs--b)[b---rhs----c)
    85  	//                                             \
    86  	//                                              \
    87  	//                                            (1)\ MsgApp (startKey='b')
    88  	//                                                \
    89  	//                                                 v
    90  	// recipient           [a----------lhs--------c) (this uninitialized replica)
    91  	//                                 ĘŚ                /
    92  	//                                  \______________/ (2)
    93  	//                                         'b'
    94  	//
    95  	// However, it's also possible that the left hand side has been rebalanced
    96  	// away and is going to be GC'ed soon; queue a check to make sure this would
    97  	// happen ASAP. (The leader will probe this replica only once per heartbeat
    98  	// interval, so we're not going to queue these checks at some high rate).
    99  	//
   100  	// New replicas only get created through splits or rebalances, so if we
   101  	// don't find a left hand side, it was either garbage collected after having
   102  	// been removed from the store (see the above comment), or there wasn't a
   103  	// split in the first case and this replica was instead created through an
   104  	// up-replication for which the preemptive snapshot had been lost (i.e.
   105  	// accidentally GC'ed before the replication change succeeded).
   106  	//
   107  	// Note that there's a subtle case in which the left hand side is caught up
   108  	// across the split trigger via a snapshot. In that case, since we're looking
   109  	// up the start key of the right-hand side, we have the following picture:
   110  	//
   111  	// sender  (leader)    [a--lhs--b)[b---rhs----c)
   112  	//                                             \
   113  	//                                              \
   114  	//                                            (1)\ MsgApp (startKey='b')
   115  	//                                                \
   116  	//                                                 v
   117  	// recipient           [a--lhs--b)               (this uninitialized replica)
   118  	//
   119  	// Trying to look up the replica for 'b', we'd come up empty and deliver the
   120  	// message, resulting in a snapshot, as intended.
   121  	//
   122  	// Note that the invariant that the start key points at a replica that will
   123  	// definitely apply the split trigger holds even if the left-hand range
   124  	// carries out splits (as that doesn't change its start key) or gets merged
   125  	// away (as this entails either a removal of the follower's replica during
   126  	// colocation, or waiting for the follower to have caught up which implies
   127  	// executing all pending split triggers).
   128  
   129  	verbose := verboseRaftLoggingEnabled()
   130  
   131  	// NB: the caller is likely holding r.raftMu, but that's OK according to
   132  	// the lock order. We're not allowed to hold r.mu, but we don't.
   133  	lhsRepl, drop := r.ShouldDrop(startKey)
   134  	if !drop {
   135  		return false
   136  	}
   137  
   138  	if verbose {
   139  		log.Infof(ctx, "start key is contained in replica %v", lhsRepl)
   140  	}
   141  	if ticks > maxDelaySplitTriggerTicks {
   142  		// This is an escape hatch in case there are other scenarios (missed in
   143  		// the above analysis) in which a split trigger just isn't coming. If
   144  		// there are, the idea is that we notice this log message and improve
   145  		// the heuristics.
   146  		log.Warningf(
   147  			ctx,
   148  			"would have dropped incoming MsgApp to wait for split trigger, "+
   149  				"but allowing due to %d (>%d) ticks",
   150  			ticks, maxDelaySplitTriggerTicks)
   151  		return false
   152  	}
   153  	if verbose {
   154  		log.Infof(ctx, "dropping MsgApp at index %d to wait for split trigger", msg.Index)
   155  	}
   156  	return true
   157  }