github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_placeholder.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"fmt"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    17  	"github.com/google/btree"
    18  )
    19  
    20  // ReplicaPlaceholder represents a "lock" of a part of the keyspace on a given
    21  // *Store for the application of a (preemptive or Raft) snapshot. Placeholders
    22  // are kept synchronously in two places in (*Store).mu, namely the
    23  // replicaPlaceholders and replicaByKey maps, and exist only while the Raft
    24  // scheduler tries to apply raft.Ready containing a snapshot to some Replica.
    25  //
    26  // To see why placeholders are necessary, consider the case in which two
    27  // snapshots arrive at a Store, one for r1 and bounds [a,c) and the other for r2
    28  // and [b,c), and assume that the keyspace [a,c) is not associated to any
    29  // Replica on the receiving Store. This situation can occur because even though
    30  // "logically" the keyspace always shards cleanly into replicas, incoming
    31  // snapshots don't always originate from a mutually consistent version of this
    32  // sharding. For example, a range Q might split, creating a range R, but some
    33  // Store might be receiving a snapshot of Q before the split as well as a
    34  // replica of R (which postdates the split). Similar examples are possible with
    35  // merges as well as with arbitrarily complicated combinations of multiple
    36  // merges and splits.
    37  //
    38  // Without placeholders, the following interleaving of two concurrent Raft
    39  // scheduler goroutines g1 and g2 is possible for the above example:
    40  //
    41  // - g1: new raft.Ready for r1 wants to apply snapshot
    42  // - g1: check for conflicts with existing replicas: none found; [a,c) is empty
    43  // - g2: new raft.Ready for r2 wants to apply snapshot
    44  // - g2: check for conflicts with existing replicas: none found; [b,c) is empty
    45  // - g2: apply snapshot: writes replica for r2 to [b,c)
    46  // - g2: done
    47  // - g1: apply snapshot: writes replica for r1 to [a,c)
    48  // - boom: we now have two replicas on this store that overlap
    49  //
    50  // Placeholders avoid this problem because they provide a serialization point
    51  // between g1 and g2: When g1 checks for conflicts, it also checks for an
    52  // existing placeholder (inserting its own atomically when none found), so that
    53  // g2 would later fail the overlap check on g1's placeholder.
    54  //
    55  // Placeholders are removed by the goroutine that inserted them at the end of
    56  // the respective Raft cycle, so they usually live only for as long as it takes
    57  // to write the snapshot to disk. See (*Store).processRaftSnapshotRequest for
    58  // details.
    59  type ReplicaPlaceholder struct {
    60  	rangeDesc roachpb.RangeDescriptor
    61  }
    62  
    63  var _ KeyRange = &ReplicaPlaceholder{}
    64  
    65  // Desc returns the range Placeholder's descriptor.
    66  func (r *ReplicaPlaceholder) Desc() *roachpb.RangeDescriptor {
    67  	return &r.rangeDesc
    68  }
    69  
    70  func (r *ReplicaPlaceholder) startKey() roachpb.RKey {
    71  	return r.Desc().StartKey
    72  }
    73  
    74  // Less implements the btree.Item interface.
    75  func (r *ReplicaPlaceholder) Less(i btree.Item) bool {
    76  	return r.Desc().StartKey.Less(i.(rangeKeyItem).startKey())
    77  }
    78  
    79  func (r *ReplicaPlaceholder) String() string {
    80  	return fmt.Sprintf("range=%d [%s-%s) (placeholder)",
    81  		r.Desc().RangeID, r.rangeDesc.StartKey, r.rangeDesc.EndKey)
    82  }