github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/batcheval/cmd_subsume.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package batcheval
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/keys"
    18  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result"
    19  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
    20  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    21  	"github.com/cockroachdb/cockroach/pkg/storage"
    22  	"github.com/cockroachdb/errors"
    23  )
    24  
    25  func init() {
    26  	RegisterReadWriteCommand(roachpb.Subsume, declareKeysSubsume, Subsume)
    27  }
    28  
    29  func declareKeysSubsume(
    30  	_ *roachpb.RangeDescriptor,
    31  	header roachpb.Header,
    32  	req roachpb.Request,
    33  	latchSpans, _ *spanset.SpanSet,
    34  ) {
    35  	// Subsume must not run concurrently with any other command. It declares a
    36  	// non-MVCC write over every addressable key in the range; this guarantees
    37  	// that it conflicts with any other command because every command must declare
    38  	// at least one addressable key. It does not, in fact, write any keys.
    39  	//
    40  	// We use the key bounds from the range descriptor in the request instead
    41  	// of the current range descriptor. Either would be fine because we verify
    42  	// that these match during the evaluation of the Subsume request.
    43  	args := req.(*roachpb.SubsumeRequest)
    44  	desc := args.RightDesc
    45  	latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{
    46  		Key:    desc.StartKey.AsRawKey(),
    47  		EndKey: desc.EndKey.AsRawKey(),
    48  	})
    49  	latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{
    50  		Key:    keys.MakeRangeKeyPrefix(desc.StartKey),
    51  		EndKey: keys.MakeRangeKeyPrefix(desc.EndKey).PrefixEnd(),
    52  	})
    53  	rangeIDPrefix := keys.MakeRangeIDReplicatedPrefix(desc.RangeID)
    54  	latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{
    55  		Key:    rangeIDPrefix,
    56  		EndKey: rangeIDPrefix.PrefixEnd(),
    57  	})
    58  }
    59  
    60  // Subsume freezes a range for merging with its left-hand neighbor. When called
    61  // correctly, it provides important guarantees that ensure there is no moment in
    62  // time where the ranges involved in the merge could both process commands for
    63  // the same keys.
    64  //
    65  // Specifically, the receiving replica guarantees that:
    66  //
    67  //   1. it is the leaseholder at the time the request executes,
    68  //   2. when it responds, there are no commands in flight with a timestamp
    69  //      greater than the FreezeStart timestamp provided in the response,
    70  //   3. the MVCC statistics in the response reflect the latest writes,
    71  //   4. it, and all future leaseholders for the range, will not process another
    72  //      command until they refresh their range descriptor with a consistent read
    73  //      from meta2, and
    74  //   5. if it or any future leaseholder for the range finds that its range
    75  //      descriptor has been deleted, it self destructs.
    76  //
    77  // To achieve guarantees four and five, when issuing a Subsume request, the
    78  // caller must have a merge transaction open that has already placed deletion
    79  // intents on both the local and meta2 copy of the right-hand range descriptor.
    80  // The intent on the meta2 allows the leaseholder to block until the merge
    81  // transaction completes by performing a consistent read for its meta2
    82  // descriptor. The intent on the local descriptor allows future leaseholders to
    83  // efficiently check whether a merge is in progress by performing a read of its
    84  // local descriptor after acquiring the lease.
    85  //
    86  // The period of time after intents have been placed but before the merge
    87  // transaction is complete is called the merge's "critical phase".
    88  func Subsume(
    89  	ctx context.Context, readWriter storage.ReadWriter, cArgs CommandArgs, resp roachpb.Response,
    90  ) (result.Result, error) {
    91  	args := cArgs.Args.(*roachpb.SubsumeRequest)
    92  	reply := resp.(*roachpb.SubsumeResponse)
    93  
    94  	// Verify that the Subsume request was sent to the correct range and that
    95  	// the range's bounds have not changed during the merge transaction.
    96  	desc := cArgs.EvalCtx.Desc()
    97  	if !bytes.Equal(desc.StartKey, args.RightDesc.StartKey) ||
    98  		!bytes.Equal(desc.EndKey, args.RightDesc.EndKey) {
    99  		return result.Result{}, errors.Errorf("RHS range bounds do not match: %s != %s",
   100  			args.RightDesc, desc)
   101  	}
   102  
   103  	// Sanity check that the requesting range is our left neighbor. The ordering
   104  	// of operations in the AdminMerge transaction should make it impossible for
   105  	// these ranges to be nonadjacent, but double check.
   106  	if !bytes.Equal(args.LeftDesc.EndKey, desc.StartKey) {
   107  		return result.Result{}, errors.Errorf("ranges are not adjacent: %s != %s",
   108  			args.LeftDesc.EndKey, desc.StartKey)
   109  	}
   110  
   111  	// Sanity check the caller has initiated a merge transaction by checking for
   112  	// a deletion intent on the local range descriptor.
   113  	descKey := keys.RangeDescriptorKey(desc.StartKey)
   114  	_, intent, err := storage.MVCCGet(ctx, readWriter, descKey, cArgs.Header.Timestamp,
   115  		storage.MVCCGetOptions{Inconsistent: true})
   116  	if err != nil {
   117  		return result.Result{}, errors.Errorf("fetching local range descriptor: %s", err)
   118  	} else if intent == nil {
   119  		return result.Result{}, errors.New("range missing intent on its local descriptor")
   120  	}
   121  	val, _, err := storage.MVCCGetAsTxn(ctx, readWriter, descKey, cArgs.Header.Timestamp, intent.Txn)
   122  	if err != nil {
   123  		return result.Result{}, errors.Errorf("fetching local range descriptor as txn: %s", err)
   124  	} else if val != nil {
   125  		return result.Result{}, errors.New("non-deletion intent on local range descriptor")
   126  	}
   127  
   128  	// NOTE: the deletion intent on the range's meta2 descriptor is just as
   129  	// important to correctness as the deletion intent on the local descriptor,
   130  	// but the check is too expensive as it would involve a network roundtrip on
   131  	// most nodes.
   132  
   133  	reply.MVCCStats = cArgs.EvalCtx.GetMVCCStats()
   134  	reply.LeaseAppliedIndex = cArgs.EvalCtx.GetLeaseAppliedIndex()
   135  	reply.FreezeStart = cArgs.EvalCtx.Clock().Now()
   136  
   137  	return result.Result{
   138  		Local: result.LocalResult{MaybeWatchForMerge: true},
   139  	}, nil
   140  }