github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/batcheval/cmd_subsume.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package batcheval 12 13 import ( 14 "bytes" 15 "context" 16 17 "github.com/cockroachdb/cockroach/pkg/keys" 18 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result" 19 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset" 20 "github.com/cockroachdb/cockroach/pkg/roachpb" 21 "github.com/cockroachdb/cockroach/pkg/storage" 22 "github.com/cockroachdb/errors" 23 ) 24 25 func init() { 26 RegisterReadWriteCommand(roachpb.Subsume, declareKeysSubsume, Subsume) 27 } 28 29 func declareKeysSubsume( 30 _ *roachpb.RangeDescriptor, 31 header roachpb.Header, 32 req roachpb.Request, 33 latchSpans, _ *spanset.SpanSet, 34 ) { 35 // Subsume must not run concurrently with any other command. It declares a 36 // non-MVCC write over every addressable key in the range; this guarantees 37 // that it conflicts with any other command because every command must declare 38 // at least one addressable key. It does not, in fact, write any keys. 39 // 40 // We use the key bounds from the range descriptor in the request instead 41 // of the current range descriptor. Either would be fine because we verify 42 // that these match during the evaluation of the Subsume request. 43 args := req.(*roachpb.SubsumeRequest) 44 desc := args.RightDesc 45 latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{ 46 Key: desc.StartKey.AsRawKey(), 47 EndKey: desc.EndKey.AsRawKey(), 48 }) 49 latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{ 50 Key: keys.MakeRangeKeyPrefix(desc.StartKey), 51 EndKey: keys.MakeRangeKeyPrefix(desc.EndKey).PrefixEnd(), 52 }) 53 rangeIDPrefix := keys.MakeRangeIDReplicatedPrefix(desc.RangeID) 54 latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{ 55 Key: rangeIDPrefix, 56 EndKey: rangeIDPrefix.PrefixEnd(), 57 }) 58 } 59 60 // Subsume freezes a range for merging with its left-hand neighbor. When called 61 // correctly, it provides important guarantees that ensure there is no moment in 62 // time where the ranges involved in the merge could both process commands for 63 // the same keys. 64 // 65 // Specifically, the receiving replica guarantees that: 66 // 67 // 1. it is the leaseholder at the time the request executes, 68 // 2. when it responds, there are no commands in flight with a timestamp 69 // greater than the FreezeStart timestamp provided in the response, 70 // 3. the MVCC statistics in the response reflect the latest writes, 71 // 4. it, and all future leaseholders for the range, will not process another 72 // command until they refresh their range descriptor with a consistent read 73 // from meta2, and 74 // 5. if it or any future leaseholder for the range finds that its range 75 // descriptor has been deleted, it self destructs. 76 // 77 // To achieve guarantees four and five, when issuing a Subsume request, the 78 // caller must have a merge transaction open that has already placed deletion 79 // intents on both the local and meta2 copy of the right-hand range descriptor. 80 // The intent on the meta2 allows the leaseholder to block until the merge 81 // transaction completes by performing a consistent read for its meta2 82 // descriptor. The intent on the local descriptor allows future leaseholders to 83 // efficiently check whether a merge is in progress by performing a read of its 84 // local descriptor after acquiring the lease. 85 // 86 // The period of time after intents have been placed but before the merge 87 // transaction is complete is called the merge's "critical phase". 88 func Subsume( 89 ctx context.Context, readWriter storage.ReadWriter, cArgs CommandArgs, resp roachpb.Response, 90 ) (result.Result, error) { 91 args := cArgs.Args.(*roachpb.SubsumeRequest) 92 reply := resp.(*roachpb.SubsumeResponse) 93 94 // Verify that the Subsume request was sent to the correct range and that 95 // the range's bounds have not changed during the merge transaction. 96 desc := cArgs.EvalCtx.Desc() 97 if !bytes.Equal(desc.StartKey, args.RightDesc.StartKey) || 98 !bytes.Equal(desc.EndKey, args.RightDesc.EndKey) { 99 return result.Result{}, errors.Errorf("RHS range bounds do not match: %s != %s", 100 args.RightDesc, desc) 101 } 102 103 // Sanity check that the requesting range is our left neighbor. The ordering 104 // of operations in the AdminMerge transaction should make it impossible for 105 // these ranges to be nonadjacent, but double check. 106 if !bytes.Equal(args.LeftDesc.EndKey, desc.StartKey) { 107 return result.Result{}, errors.Errorf("ranges are not adjacent: %s != %s", 108 args.LeftDesc.EndKey, desc.StartKey) 109 } 110 111 // Sanity check the caller has initiated a merge transaction by checking for 112 // a deletion intent on the local range descriptor. 113 descKey := keys.RangeDescriptorKey(desc.StartKey) 114 _, intent, err := storage.MVCCGet(ctx, readWriter, descKey, cArgs.Header.Timestamp, 115 storage.MVCCGetOptions{Inconsistent: true}) 116 if err != nil { 117 return result.Result{}, errors.Errorf("fetching local range descriptor: %s", err) 118 } else if intent == nil { 119 return result.Result{}, errors.New("range missing intent on its local descriptor") 120 } 121 val, _, err := storage.MVCCGetAsTxn(ctx, readWriter, descKey, cArgs.Header.Timestamp, intent.Txn) 122 if err != nil { 123 return result.Result{}, errors.Errorf("fetching local range descriptor as txn: %s", err) 124 } else if val != nil { 125 return result.Result{}, errors.New("non-deletion intent on local range descriptor") 126 } 127 128 // NOTE: the deletion intent on the range's meta2 descriptor is just as 129 // important to correctness as the deletion intent on the local descriptor, 130 // but the check is too expensive as it would involve a network roundtrip on 131 // most nodes. 132 133 reply.MVCCStats = cArgs.EvalCtx.GetMVCCStats() 134 reply.LeaseAppliedIndex = cArgs.EvalCtx.GetLeaseAppliedIndex() 135 reply.FreezeStart = cArgs.EvalCtx.Clock().Now() 136 137 return result.Result{ 138 Local: result.LocalResult{MaybeWatchForMerge: true}, 139 }, nil 140 }