github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/client_raft_helpers_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver_test
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
    17  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    18  	"github.com/cockroachdb/cockroach/pkg/util/log"
    19  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    20  	"github.com/cockroachdb/errors"
    21  	"go.etcd.io/etcd/raft"
    22  )
    23  
    24  type unreliableRaftHandlerFuncs struct {
    25  	// If non-nil, can return false to avoid dropping a msg to rangeID.
    26  	dropReq  func(*kvserver.RaftMessageRequest) bool
    27  	dropHB   func(*kvserver.RaftHeartbeat) bool
    28  	dropResp func(*kvserver.RaftMessageResponse) bool
    29  	// snapErr defaults to returning nil.
    30  	snapErr func(*kvserver.SnapshotRequest_Header) error
    31  }
    32  
    33  func noopRaftHandlerFuncs() unreliableRaftHandlerFuncs {
    34  	return unreliableRaftHandlerFuncs{
    35  		dropResp: func(*kvserver.RaftMessageResponse) bool {
    36  			return false
    37  		},
    38  		dropReq: func(*kvserver.RaftMessageRequest) bool {
    39  			return false
    40  		},
    41  		dropHB: func(*kvserver.RaftHeartbeat) bool {
    42  			return false
    43  		},
    44  	}
    45  }
    46  
    47  // unreliableRaftHandler drops all Raft messages that are addressed to the
    48  // specified rangeID, but lets all other messages through.
    49  type unreliableRaftHandler struct {
    50  	rangeID roachpb.RangeID
    51  	kvserver.RaftMessageHandler
    52  	unreliableRaftHandlerFuncs
    53  }
    54  
    55  func (h *unreliableRaftHandler) HandleRaftRequest(
    56  	ctx context.Context,
    57  	req *kvserver.RaftMessageRequest,
    58  	respStream kvserver.RaftMessageResponseStream,
    59  ) *roachpb.Error {
    60  	if len(req.Heartbeats)+len(req.HeartbeatResps) > 0 {
    61  		reqCpy := *req
    62  		req = &reqCpy
    63  		req.Heartbeats = h.filterHeartbeats(req.Heartbeats)
    64  		req.HeartbeatResps = h.filterHeartbeats(req.HeartbeatResps)
    65  		if len(req.Heartbeats)+len(req.HeartbeatResps) == 0 {
    66  			// Entirely filtered.
    67  			return nil
    68  		}
    69  	} else if req.RangeID == h.rangeID {
    70  		if h.dropReq == nil || h.dropReq(req) {
    71  			log.Infof(
    72  				ctx,
    73  				"dropping r%d Raft message %s",
    74  				req.RangeID,
    75  				raft.DescribeMessage(req.Message, func([]byte) string {
    76  					return "<omitted>"
    77  				}),
    78  			)
    79  
    80  			return nil
    81  		}
    82  	}
    83  	return h.RaftMessageHandler.HandleRaftRequest(ctx, req, respStream)
    84  }
    85  
    86  func (h *unreliableRaftHandler) filterHeartbeats(
    87  	hbs []kvserver.RaftHeartbeat,
    88  ) []kvserver.RaftHeartbeat {
    89  	if len(hbs) == 0 {
    90  		return hbs
    91  	}
    92  	var cpy []kvserver.RaftHeartbeat
    93  	for i := range hbs {
    94  		hb := &hbs[i]
    95  		if hb.RangeID != h.rangeID || (h.dropHB != nil && !h.dropHB(hb)) {
    96  			cpy = append(cpy, *hb)
    97  		}
    98  	}
    99  	return cpy
   100  }
   101  
   102  func (h *unreliableRaftHandler) HandleRaftResponse(
   103  	ctx context.Context, resp *kvserver.RaftMessageResponse,
   104  ) error {
   105  	if resp.RangeID == h.rangeID {
   106  		if h.dropResp == nil || h.dropResp(resp) {
   107  			return nil
   108  		}
   109  	}
   110  	return h.RaftMessageHandler.HandleRaftResponse(ctx, resp)
   111  }
   112  
   113  func (h *unreliableRaftHandler) HandleSnapshot(
   114  	header *kvserver.SnapshotRequest_Header, respStream kvserver.SnapshotResponseStream,
   115  ) error {
   116  	if header.RaftMessageRequest.RangeID == h.rangeID && h.snapErr != nil {
   117  		if err := h.snapErr(header); err != nil {
   118  			return err
   119  		}
   120  	}
   121  	return h.RaftMessageHandler.HandleSnapshot(header, respStream)
   122  }
   123  
   124  // mtcStoreRaftMessageHandler exists to allows a store to be stopped and
   125  // restarted while maintaining a partition using an unreliableRaftHandler.
   126  type mtcStoreRaftMessageHandler struct {
   127  	mtc      *multiTestContext
   128  	storeIdx int
   129  }
   130  
   131  func (h *mtcStoreRaftMessageHandler) HandleRaftRequest(
   132  	ctx context.Context,
   133  	req *kvserver.RaftMessageRequest,
   134  	respStream kvserver.RaftMessageResponseStream,
   135  ) *roachpb.Error {
   136  	store := h.mtc.Store(h.storeIdx)
   137  	if store == nil {
   138  		return roachpb.NewErrorf("store not found")
   139  	}
   140  	return store.HandleRaftRequest(ctx, req, respStream)
   141  }
   142  
   143  func (h *mtcStoreRaftMessageHandler) HandleRaftResponse(
   144  	ctx context.Context, resp *kvserver.RaftMessageResponse,
   145  ) error {
   146  	store := h.mtc.Store(h.storeIdx)
   147  	if store == nil {
   148  		return errors.New("store not found")
   149  	}
   150  	return store.HandleRaftResponse(ctx, resp)
   151  }
   152  
   153  func (h *mtcStoreRaftMessageHandler) HandleSnapshot(
   154  	header *kvserver.SnapshotRequest_Header, respStream kvserver.SnapshotResponseStream,
   155  ) error {
   156  	store := h.mtc.Store(h.storeIdx)
   157  	if store == nil {
   158  		return errors.New("store not found")
   159  	}
   160  	return store.HandleSnapshot(header, respStream)
   161  }
   162  
   163  // mtcPartitionedRange is a convenient abstraction to create a range on a node
   164  // in a multiTestContext which can be partitioned and unpartitioned.
   165  type mtcPartitionedRange struct {
   166  	rangeID roachpb.RangeID
   167  	mu      struct {
   168  		syncutil.RWMutex
   169  		partitionedNode     int
   170  		partitioned         bool
   171  		partitionedReplicas map[roachpb.ReplicaID]bool
   172  	}
   173  	handlers []kvserver.RaftMessageHandler
   174  }
   175  
   176  // setupPartitionedRange sets up an mtcPartitionedRange for the provided mtc,
   177  // rangeID, and node index in the mtc. The range is initially not partitioned.
   178  //
   179  // We're going to set up the cluster with partitioning so that we can
   180  // partition node p from the others. We do this by installing
   181  // unreliableRaftHandler listeners on all three Stores which we can enable
   182  // and disable with an atomic. The handler on the partitioned store filters
   183  // out all messages while the handler on the other two stores only filters
   184  // out messages from the partitioned store. When activated the configuration
   185  // looks like:
   186  //
   187  //           [p]
   188  //          x  x
   189  //         /    \
   190  //        x      x
   191  //      [*]<---->[*]
   192  //
   193  // The activated argument controls whether the partition is activated when this
   194  // function returns.
   195  //
   196  // If replicaID is zero then it is resolved by looking up the replica for the
   197  // partitionedNode of from the current range descriptor of rangeID.
   198  func setupPartitionedRange(
   199  	mtc *multiTestContext,
   200  	rangeID roachpb.RangeID,
   201  	replicaID roachpb.ReplicaID,
   202  	partitionedNode int,
   203  	activated bool,
   204  	funcs unreliableRaftHandlerFuncs,
   205  ) (*mtcPartitionedRange, error) {
   206  	handlers := make([]kvserver.RaftMessageHandler, 0, len(mtc.stores))
   207  	for i := range mtc.stores {
   208  		handlers = append(handlers, &mtcStoreRaftMessageHandler{
   209  			mtc:      mtc,
   210  			storeIdx: i,
   211  		})
   212  	}
   213  	return setupPartitionedRangeWithHandlers(mtc, rangeID, replicaID, partitionedNode, activated, handlers, funcs)
   214  }
   215  
   216  func setupPartitionedRangeWithHandlers(
   217  	mtc *multiTestContext,
   218  	rangeID roachpb.RangeID,
   219  	replicaID roachpb.ReplicaID,
   220  	partitionedNode int,
   221  	activated bool,
   222  	handlers []kvserver.RaftMessageHandler,
   223  	funcs unreliableRaftHandlerFuncs,
   224  ) (*mtcPartitionedRange, error) {
   225  	pr := &mtcPartitionedRange{
   226  		rangeID:  rangeID,
   227  		handlers: make([]kvserver.RaftMessageHandler, 0, len(handlers)),
   228  	}
   229  	pr.mu.partitioned = activated
   230  	pr.mu.partitionedNode = partitionedNode
   231  	if replicaID == 0 {
   232  		partRepl, err := mtc.Store(partitionedNode).GetReplica(rangeID)
   233  		if err != nil {
   234  			return nil, err
   235  		}
   236  		partReplDesc, err := partRepl.GetReplicaDescriptor()
   237  		if err != nil {
   238  			return nil, err
   239  		}
   240  		replicaID = partReplDesc.ReplicaID
   241  	}
   242  	pr.mu.partitionedReplicas = map[roachpb.ReplicaID]bool{
   243  		replicaID: true,
   244  	}
   245  	for i := range mtc.stores {
   246  		s := i
   247  		h := &unreliableRaftHandler{
   248  			rangeID:                    rangeID,
   249  			RaftMessageHandler:         handlers[s],
   250  			unreliableRaftHandlerFuncs: funcs,
   251  		}
   252  		// Only filter messages from the partitioned store on the other
   253  		// two stores.
   254  		if h.dropReq == nil {
   255  			h.dropReq = func(req *kvserver.RaftMessageRequest) bool {
   256  				pr.mu.RLock()
   257  				defer pr.mu.RUnlock()
   258  				return pr.mu.partitioned &&
   259  					(s == pr.mu.partitionedNode ||
   260  						req.FromReplica.StoreID == roachpb.StoreID(pr.mu.partitionedNode)+1)
   261  			}
   262  		}
   263  		if h.dropHB == nil {
   264  			h.dropHB = func(hb *kvserver.RaftHeartbeat) bool {
   265  				pr.mu.RLock()
   266  				defer pr.mu.RUnlock()
   267  				if !pr.mu.partitioned {
   268  					return false
   269  				}
   270  				if s == partitionedNode {
   271  					return true
   272  				}
   273  				return pr.mu.partitionedReplicas[hb.FromReplicaID]
   274  			}
   275  		}
   276  		if h.snapErr == nil {
   277  			h.snapErr = func(header *kvserver.SnapshotRequest_Header) error {
   278  				pr.mu.RLock()
   279  				defer pr.mu.RUnlock()
   280  				if !pr.mu.partitioned {
   281  					return nil
   282  				}
   283  				if pr.mu.partitionedReplicas[header.RaftMessageRequest.ToReplica.ReplicaID] {
   284  					return errors.New("partitioned")
   285  				}
   286  				return nil
   287  			}
   288  		}
   289  		pr.handlers = append(pr.handlers, h)
   290  		mtc.transport.Listen(mtc.stores[s].Ident.StoreID, h)
   291  	}
   292  	return pr, nil
   293  }
   294  
   295  func (pr *mtcPartitionedRange) deactivate() { pr.set(false) }
   296  func (pr *mtcPartitionedRange) activate()   { pr.set(true) }
   297  func (pr *mtcPartitionedRange) set(active bool) {
   298  	pr.mu.Lock()
   299  	defer pr.mu.Unlock()
   300  	pr.mu.partitioned = active
   301  }
   302  
   303  func (pr *mtcPartitionedRange) addReplica(replicaID roachpb.ReplicaID) {
   304  	pr.mu.Lock()
   305  	defer pr.mu.Unlock()
   306  	pr.mu.partitionedReplicas[replicaID] = true
   307  }
   308  
   309  func (pr *mtcPartitionedRange) extend(
   310  	mtc *multiTestContext,
   311  	rangeID roachpb.RangeID,
   312  	replicaID roachpb.ReplicaID,
   313  	partitionedNode int,
   314  	activated bool,
   315  	funcs unreliableRaftHandlerFuncs,
   316  ) (*mtcPartitionedRange, error) {
   317  	return setupPartitionedRangeWithHandlers(mtc, rangeID, replicaID, partitionedNode, activated, pr.handlers, funcs)
   318  }