github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/client_raft_helpers_test.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver_test 12 13 import ( 14 "context" 15 16 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 17 "github.com/cockroachdb/cockroach/pkg/roachpb" 18 "github.com/cockroachdb/cockroach/pkg/util/log" 19 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 20 "github.com/cockroachdb/errors" 21 "go.etcd.io/etcd/raft" 22 ) 23 24 type unreliableRaftHandlerFuncs struct { 25 // If non-nil, can return false to avoid dropping a msg to rangeID. 26 dropReq func(*kvserver.RaftMessageRequest) bool 27 dropHB func(*kvserver.RaftHeartbeat) bool 28 dropResp func(*kvserver.RaftMessageResponse) bool 29 // snapErr defaults to returning nil. 30 snapErr func(*kvserver.SnapshotRequest_Header) error 31 } 32 33 func noopRaftHandlerFuncs() unreliableRaftHandlerFuncs { 34 return unreliableRaftHandlerFuncs{ 35 dropResp: func(*kvserver.RaftMessageResponse) bool { 36 return false 37 }, 38 dropReq: func(*kvserver.RaftMessageRequest) bool { 39 return false 40 }, 41 dropHB: func(*kvserver.RaftHeartbeat) bool { 42 return false 43 }, 44 } 45 } 46 47 // unreliableRaftHandler drops all Raft messages that are addressed to the 48 // specified rangeID, but lets all other messages through. 49 type unreliableRaftHandler struct { 50 rangeID roachpb.RangeID 51 kvserver.RaftMessageHandler 52 unreliableRaftHandlerFuncs 53 } 54 55 func (h *unreliableRaftHandler) HandleRaftRequest( 56 ctx context.Context, 57 req *kvserver.RaftMessageRequest, 58 respStream kvserver.RaftMessageResponseStream, 59 ) *roachpb.Error { 60 if len(req.Heartbeats)+len(req.HeartbeatResps) > 0 { 61 reqCpy := *req 62 req = &reqCpy 63 req.Heartbeats = h.filterHeartbeats(req.Heartbeats) 64 req.HeartbeatResps = h.filterHeartbeats(req.HeartbeatResps) 65 if len(req.Heartbeats)+len(req.HeartbeatResps) == 0 { 66 // Entirely filtered. 67 return nil 68 } 69 } else if req.RangeID == h.rangeID { 70 if h.dropReq == nil || h.dropReq(req) { 71 log.Infof( 72 ctx, 73 "dropping r%d Raft message %s", 74 req.RangeID, 75 raft.DescribeMessage(req.Message, func([]byte) string { 76 return "<omitted>" 77 }), 78 ) 79 80 return nil 81 } 82 } 83 return h.RaftMessageHandler.HandleRaftRequest(ctx, req, respStream) 84 } 85 86 func (h *unreliableRaftHandler) filterHeartbeats( 87 hbs []kvserver.RaftHeartbeat, 88 ) []kvserver.RaftHeartbeat { 89 if len(hbs) == 0 { 90 return hbs 91 } 92 var cpy []kvserver.RaftHeartbeat 93 for i := range hbs { 94 hb := &hbs[i] 95 if hb.RangeID != h.rangeID || (h.dropHB != nil && !h.dropHB(hb)) { 96 cpy = append(cpy, *hb) 97 } 98 } 99 return cpy 100 } 101 102 func (h *unreliableRaftHandler) HandleRaftResponse( 103 ctx context.Context, resp *kvserver.RaftMessageResponse, 104 ) error { 105 if resp.RangeID == h.rangeID { 106 if h.dropResp == nil || h.dropResp(resp) { 107 return nil 108 } 109 } 110 return h.RaftMessageHandler.HandleRaftResponse(ctx, resp) 111 } 112 113 func (h *unreliableRaftHandler) HandleSnapshot( 114 header *kvserver.SnapshotRequest_Header, respStream kvserver.SnapshotResponseStream, 115 ) error { 116 if header.RaftMessageRequest.RangeID == h.rangeID && h.snapErr != nil { 117 if err := h.snapErr(header); err != nil { 118 return err 119 } 120 } 121 return h.RaftMessageHandler.HandleSnapshot(header, respStream) 122 } 123 124 // mtcStoreRaftMessageHandler exists to allows a store to be stopped and 125 // restarted while maintaining a partition using an unreliableRaftHandler. 126 type mtcStoreRaftMessageHandler struct { 127 mtc *multiTestContext 128 storeIdx int 129 } 130 131 func (h *mtcStoreRaftMessageHandler) HandleRaftRequest( 132 ctx context.Context, 133 req *kvserver.RaftMessageRequest, 134 respStream kvserver.RaftMessageResponseStream, 135 ) *roachpb.Error { 136 store := h.mtc.Store(h.storeIdx) 137 if store == nil { 138 return roachpb.NewErrorf("store not found") 139 } 140 return store.HandleRaftRequest(ctx, req, respStream) 141 } 142 143 func (h *mtcStoreRaftMessageHandler) HandleRaftResponse( 144 ctx context.Context, resp *kvserver.RaftMessageResponse, 145 ) error { 146 store := h.mtc.Store(h.storeIdx) 147 if store == nil { 148 return errors.New("store not found") 149 } 150 return store.HandleRaftResponse(ctx, resp) 151 } 152 153 func (h *mtcStoreRaftMessageHandler) HandleSnapshot( 154 header *kvserver.SnapshotRequest_Header, respStream kvserver.SnapshotResponseStream, 155 ) error { 156 store := h.mtc.Store(h.storeIdx) 157 if store == nil { 158 return errors.New("store not found") 159 } 160 return store.HandleSnapshot(header, respStream) 161 } 162 163 // mtcPartitionedRange is a convenient abstraction to create a range on a node 164 // in a multiTestContext which can be partitioned and unpartitioned. 165 type mtcPartitionedRange struct { 166 rangeID roachpb.RangeID 167 mu struct { 168 syncutil.RWMutex 169 partitionedNode int 170 partitioned bool 171 partitionedReplicas map[roachpb.ReplicaID]bool 172 } 173 handlers []kvserver.RaftMessageHandler 174 } 175 176 // setupPartitionedRange sets up an mtcPartitionedRange for the provided mtc, 177 // rangeID, and node index in the mtc. The range is initially not partitioned. 178 // 179 // We're going to set up the cluster with partitioning so that we can 180 // partition node p from the others. We do this by installing 181 // unreliableRaftHandler listeners on all three Stores which we can enable 182 // and disable with an atomic. The handler on the partitioned store filters 183 // out all messages while the handler on the other two stores only filters 184 // out messages from the partitioned store. When activated the configuration 185 // looks like: 186 // 187 // [p] 188 // x x 189 // / \ 190 // x x 191 // [*]<---->[*] 192 // 193 // The activated argument controls whether the partition is activated when this 194 // function returns. 195 // 196 // If replicaID is zero then it is resolved by looking up the replica for the 197 // partitionedNode of from the current range descriptor of rangeID. 198 func setupPartitionedRange( 199 mtc *multiTestContext, 200 rangeID roachpb.RangeID, 201 replicaID roachpb.ReplicaID, 202 partitionedNode int, 203 activated bool, 204 funcs unreliableRaftHandlerFuncs, 205 ) (*mtcPartitionedRange, error) { 206 handlers := make([]kvserver.RaftMessageHandler, 0, len(mtc.stores)) 207 for i := range mtc.stores { 208 handlers = append(handlers, &mtcStoreRaftMessageHandler{ 209 mtc: mtc, 210 storeIdx: i, 211 }) 212 } 213 return setupPartitionedRangeWithHandlers(mtc, rangeID, replicaID, partitionedNode, activated, handlers, funcs) 214 } 215 216 func setupPartitionedRangeWithHandlers( 217 mtc *multiTestContext, 218 rangeID roachpb.RangeID, 219 replicaID roachpb.ReplicaID, 220 partitionedNode int, 221 activated bool, 222 handlers []kvserver.RaftMessageHandler, 223 funcs unreliableRaftHandlerFuncs, 224 ) (*mtcPartitionedRange, error) { 225 pr := &mtcPartitionedRange{ 226 rangeID: rangeID, 227 handlers: make([]kvserver.RaftMessageHandler, 0, len(handlers)), 228 } 229 pr.mu.partitioned = activated 230 pr.mu.partitionedNode = partitionedNode 231 if replicaID == 0 { 232 partRepl, err := mtc.Store(partitionedNode).GetReplica(rangeID) 233 if err != nil { 234 return nil, err 235 } 236 partReplDesc, err := partRepl.GetReplicaDescriptor() 237 if err != nil { 238 return nil, err 239 } 240 replicaID = partReplDesc.ReplicaID 241 } 242 pr.mu.partitionedReplicas = map[roachpb.ReplicaID]bool{ 243 replicaID: true, 244 } 245 for i := range mtc.stores { 246 s := i 247 h := &unreliableRaftHandler{ 248 rangeID: rangeID, 249 RaftMessageHandler: handlers[s], 250 unreliableRaftHandlerFuncs: funcs, 251 } 252 // Only filter messages from the partitioned store on the other 253 // two stores. 254 if h.dropReq == nil { 255 h.dropReq = func(req *kvserver.RaftMessageRequest) bool { 256 pr.mu.RLock() 257 defer pr.mu.RUnlock() 258 return pr.mu.partitioned && 259 (s == pr.mu.partitionedNode || 260 req.FromReplica.StoreID == roachpb.StoreID(pr.mu.partitionedNode)+1) 261 } 262 } 263 if h.dropHB == nil { 264 h.dropHB = func(hb *kvserver.RaftHeartbeat) bool { 265 pr.mu.RLock() 266 defer pr.mu.RUnlock() 267 if !pr.mu.partitioned { 268 return false 269 } 270 if s == partitionedNode { 271 return true 272 } 273 return pr.mu.partitionedReplicas[hb.FromReplicaID] 274 } 275 } 276 if h.snapErr == nil { 277 h.snapErr = func(header *kvserver.SnapshotRequest_Header) error { 278 pr.mu.RLock() 279 defer pr.mu.RUnlock() 280 if !pr.mu.partitioned { 281 return nil 282 } 283 if pr.mu.partitionedReplicas[header.RaftMessageRequest.ToReplica.ReplicaID] { 284 return errors.New("partitioned") 285 } 286 return nil 287 } 288 } 289 pr.handlers = append(pr.handlers, h) 290 mtc.transport.Listen(mtc.stores[s].Ident.StoreID, h) 291 } 292 return pr, nil 293 } 294 295 func (pr *mtcPartitionedRange) deactivate() { pr.set(false) } 296 func (pr *mtcPartitionedRange) activate() { pr.set(true) } 297 func (pr *mtcPartitionedRange) set(active bool) { 298 pr.mu.Lock() 299 defer pr.mu.Unlock() 300 pr.mu.partitioned = active 301 } 302 303 func (pr *mtcPartitionedRange) addReplica(replicaID roachpb.ReplicaID) { 304 pr.mu.Lock() 305 defer pr.mu.Unlock() 306 pr.mu.partitionedReplicas[replicaID] = true 307 } 308 309 func (pr *mtcPartitionedRange) extend( 310 mtc *multiTestContext, 311 rangeID roachpb.RangeID, 312 replicaID roachpb.ReplicaID, 313 partitionedNode int, 314 activated bool, 315 funcs unreliableRaftHandlerFuncs, 316 ) (*mtcPartitionedRange, error) { 317 return setupPartitionedRangeWithHandlers(mtc, rangeID, replicaID, partitionedNode, activated, pr.handlers, funcs) 318 }