github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_placeholder.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "fmt" 15 16 "github.com/cockroachdb/cockroach/pkg/roachpb" 17 "github.com/google/btree" 18 ) 19 20 // ReplicaPlaceholder represents a "lock" of a part of the keyspace on a given 21 // *Store for the application of a (preemptive or Raft) snapshot. Placeholders 22 // are kept synchronously in two places in (*Store).mu, namely the 23 // replicaPlaceholders and replicaByKey maps, and exist only while the Raft 24 // scheduler tries to apply raft.Ready containing a snapshot to some Replica. 25 // 26 // To see why placeholders are necessary, consider the case in which two 27 // snapshots arrive at a Store, one for r1 and bounds [a,c) and the other for r2 28 // and [b,c), and assume that the keyspace [a,c) is not associated to any 29 // Replica on the receiving Store. This situation can occur because even though 30 // "logically" the keyspace always shards cleanly into replicas, incoming 31 // snapshots don't always originate from a mutually consistent version of this 32 // sharding. For example, a range Q might split, creating a range R, but some 33 // Store might be receiving a snapshot of Q before the split as well as a 34 // replica of R (which postdates the split). Similar examples are possible with 35 // merges as well as with arbitrarily complicated combinations of multiple 36 // merges and splits. 37 // 38 // Without placeholders, the following interleaving of two concurrent Raft 39 // scheduler goroutines g1 and g2 is possible for the above example: 40 // 41 // - g1: new raft.Ready for r1 wants to apply snapshot 42 // - g1: check for conflicts with existing replicas: none found; [a,c) is empty 43 // - g2: new raft.Ready for r2 wants to apply snapshot 44 // - g2: check for conflicts with existing replicas: none found; [b,c) is empty 45 // - g2: apply snapshot: writes replica for r2 to [b,c) 46 // - g2: done 47 // - g1: apply snapshot: writes replica for r1 to [a,c) 48 // - boom: we now have two replicas on this store that overlap 49 // 50 // Placeholders avoid this problem because they provide a serialization point 51 // between g1 and g2: When g1 checks for conflicts, it also checks for an 52 // existing placeholder (inserting its own atomically when none found), so that 53 // g2 would later fail the overlap check on g1's placeholder. 54 // 55 // Placeholders are removed by the goroutine that inserted them at the end of 56 // the respective Raft cycle, so they usually live only for as long as it takes 57 // to write the snapshot to disk. See (*Store).processRaftSnapshotRequest for 58 // details. 59 type ReplicaPlaceholder struct { 60 rangeDesc roachpb.RangeDescriptor 61 } 62 63 var _ KeyRange = &ReplicaPlaceholder{} 64 65 // Desc returns the range Placeholder's descriptor. 66 func (r *ReplicaPlaceholder) Desc() *roachpb.RangeDescriptor { 67 return &r.rangeDesc 68 } 69 70 func (r *ReplicaPlaceholder) startKey() roachpb.RKey { 71 return r.Desc().StartKey 72 } 73 74 // Less implements the btree.Item interface. 75 func (r *ReplicaPlaceholder) Less(i btree.Item) bool { 76 return r.Desc().StartKey.Less(i.(rangeKeyItem).startKey()) 77 } 78 79 func (r *ReplicaPlaceholder) String() string { 80 return fmt.Sprintf("range=%d [%s-%s) (placeholder)", 81 r.Desc().RangeID, r.rangeDesc.StartKey, r.rangeDesc.EndKey) 82 }