github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/store_send.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"context"
    15  	"time"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/keys"
    18  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    19  	"github.com/cockroachdb/cockroach/pkg/util/log"
    20  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    21  )
    22  
    23  // Send fetches a range based on the header's replica, assembles method, args &
    24  // reply into a Raft Cmd struct and executes the command using the fetched
    25  // range.
    26  //
    27  // An incoming request may be transactional or not. If it is not transactional,
    28  // the timestamp at which it executes may be higher than that optionally
    29  // specified through the incoming BatchRequest, and it is not guaranteed that
    30  // all operations are written at the same timestamp. If it is transactional, a
    31  // timestamp must not be set - it is deduced automatically from the
    32  // transaction. In particular, the read timestamp will be used for
    33  // all reads and the write (provisional commit) timestamp will be used for
    34  // all writes. See the comments on txn.TxnMeta.Timestamp and txn.ReadTimestamp
    35  // for more details.
    36  //
    37  // Should a transactional operation be forced to a higher timestamp (for
    38  // instance due to the timestamp cache or finding a committed value in the path
    39  // of one of its writes), the response will have a transaction set which should
    40  // be used to update the client transaction object.
    41  func (s *Store) Send(
    42  	ctx context.Context, ba roachpb.BatchRequest,
    43  ) (br *roachpb.BatchResponse, pErr *roachpb.Error) {
    44  	// Attach any log tags from the store to the context (which normally
    45  	// comes from gRPC).
    46  	ctx = s.AnnotateCtx(ctx)
    47  	for _, union := range ba.Requests {
    48  		arg := union.GetInner()
    49  		header := arg.Header()
    50  		if err := verifyKeys(header.Key, header.EndKey, roachpb.IsRange(arg)); err != nil {
    51  			return nil, roachpb.NewError(err)
    52  		}
    53  	}
    54  
    55  	// Limit the number of concurrent AddSSTable requests, since they're expensive
    56  	// and block all other writes to the same span.
    57  	if ba.IsSingleAddSSTableRequest() {
    58  		before := timeutil.Now()
    59  		if err := s.limiters.ConcurrentAddSSTableRequests.Begin(ctx); err != nil {
    60  			return nil, roachpb.NewError(err)
    61  		}
    62  		defer s.limiters.ConcurrentAddSSTableRequests.Finish()
    63  
    64  		beforeEngineDelay := timeutil.Now()
    65  		s.engine.PreIngestDelay(ctx)
    66  		after := timeutil.Now()
    67  
    68  		waited, waitedEngine := after.Sub(before), after.Sub(beforeEngineDelay)
    69  		s.metrics.AddSSTableProposalTotalDelay.Inc(waited.Nanoseconds())
    70  		s.metrics.AddSSTableProposalEngineDelay.Inc(waitedEngine.Nanoseconds())
    71  		if waited > time.Second {
    72  			log.Infof(ctx, "SST ingestion was delayed by %v (%v for storage engine back-pressure)",
    73  				waited, waitedEngine)
    74  		}
    75  	}
    76  
    77  	if ba.Txn != nil && ba.Txn.ReadTimestamp.Less(ba.Txn.DeprecatedOrigTimestamp) {
    78  		// For compatibility with 19.2 nodes which might not have set ReadTimestamp,
    79  		// fallback to DeprecatedOrigTimestamp. Note that even if ReadTimestamp is
    80  		// set, it might still be less than DeprecatedOrigTimestamp if the txn was
    81  		// restarted.
    82  		ba.Txn = ba.Txn.Clone()
    83  		ba.Txn.ReadTimestamp = ba.Txn.DeprecatedOrigTimestamp
    84  	}
    85  	if err := ba.SetActiveTimestamp(s.Clock().Now); err != nil {
    86  		return nil, roachpb.NewError(err)
    87  	}
    88  
    89  	if s.cfg.TestingKnobs.ClockBeforeSend != nil {
    90  		s.cfg.TestingKnobs.ClockBeforeSend(s.cfg.Clock, ba)
    91  	}
    92  
    93  	// Update our clock with the incoming request timestamp. This advances the
    94  	// local node's clock to a high water mark from all nodes with which it has
    95  	// interacted.
    96  	if s.cfg.TestingKnobs.DisableMaxOffsetCheck {
    97  		s.cfg.Clock.Update(ba.Timestamp)
    98  	} else {
    99  		// If the command appears to come from a node with a bad clock,
   100  		// reject it now before we reach that point.
   101  		var err error
   102  		if err = s.cfg.Clock.UpdateAndCheckMaxOffset(ctx, ba.Timestamp); err != nil {
   103  			return nil, roachpb.NewError(err)
   104  		}
   105  	}
   106  
   107  	defer func() {
   108  		if r := recover(); r != nil {
   109  			// On panic, don't run the defer. It's probably just going to panic
   110  			// again due to undefined state.
   111  			panic(r)
   112  		}
   113  		if ba.Txn != nil {
   114  			// We're in a Txn, so we can reduce uncertainty restarts by attaching
   115  			// the above timestamp to the returned response or error. The caller
   116  			// can use it to shorten its uncertainty interval when it comes back to
   117  			// this node.
   118  			if pErr != nil {
   119  				pErr.OriginNode = ba.Replica.NodeID
   120  				if txn := pErr.GetTxn(); txn == nil {
   121  					pErr.SetTxn(ba.Txn)
   122  				}
   123  			} else {
   124  				if br.Txn == nil {
   125  					br.Txn = ba.Txn
   126  				}
   127  				// Update our clock with the outgoing response txn timestamp
   128  				// (if timestamp has been forwarded).
   129  				if ba.Timestamp.Less(br.Txn.WriteTimestamp) {
   130  					s.cfg.Clock.Update(br.Txn.WriteTimestamp)
   131  				}
   132  			}
   133  		} else {
   134  			if pErr == nil {
   135  				// Update our clock with the outgoing response timestamp.
   136  				// (if timestamp has been forwarded).
   137  				if ba.Timestamp.Less(br.Timestamp) {
   138  					s.cfg.Clock.Update(br.Timestamp)
   139  				}
   140  			}
   141  		}
   142  
   143  		// We get the latest timestamp - we know that any
   144  		// write with a higher timestamp we run into later must
   145  		// have started after this point in (absolute) time.
   146  		now := s.cfg.Clock.Now()
   147  		if pErr != nil {
   148  			pErr.Now = now
   149  		} else {
   150  			br.Now = now
   151  		}
   152  	}()
   153  
   154  	if ba.Txn != nil {
   155  		// We make our transaction aware that no other operation that causally
   156  		// precedes it could have started after `now`. This is important: If we
   157  		// wind up pushing a value, it will be in our immediate future, and not
   158  		// updating the top end of our uncertainty timestamp would lead to a
   159  		// restart (at least in the absence of a prior observed timestamp from
   160  		// this node, in which case the following is a no-op).
   161  		if _, ok := ba.Txn.GetObservedTimestamp(ba.Replica.NodeID); !ok {
   162  			txnClone := ba.Txn.Clone()
   163  			txnClone.UpdateObservedTimestamp(ba.Replica.NodeID, s.cfg.Clock.Now())
   164  			ba.Txn = txnClone
   165  		}
   166  	}
   167  
   168  	if log.ExpensiveLogEnabled(ctx, 1) {
   169  		log.Eventf(ctx, "executing %s", ba)
   170  	}
   171  
   172  	// Get range and add command to the range for execution.
   173  	repl, err := s.GetReplica(ba.RangeID)
   174  	if err != nil {
   175  		return nil, roachpb.NewError(err)
   176  	}
   177  	if !repl.IsInitialized() {
   178  		repl.mu.RLock()
   179  		replicaID := repl.mu.replicaID
   180  		repl.mu.RUnlock()
   181  
   182  		// If we have an uninitialized copy of the range, then we are
   183  		// probably a valid member of the range, we're just in the
   184  		// process of getting our snapshot. If we returned
   185  		// RangeNotFoundError, the client would invalidate its cache,
   186  		// but we can be smarter: the replica that caused our
   187  		// uninitialized replica to be created is most likely the
   188  		// leader.
   189  		return nil, roachpb.NewError(&roachpb.NotLeaseHolderError{
   190  			RangeID:     ba.RangeID,
   191  			LeaseHolder: repl.creatingReplica,
   192  			// The replica doesn't have a range descriptor yet, so we have to build
   193  			// a ReplicaDescriptor manually.
   194  			Replica: roachpb.ReplicaDescriptor{
   195  				NodeID:    repl.store.nodeDesc.NodeID,
   196  				StoreID:   repl.store.StoreID(),
   197  				ReplicaID: replicaID,
   198  			},
   199  		})
   200  	}
   201  
   202  	br, pErr = repl.Send(ctx, ba)
   203  	if pErr == nil {
   204  		return br, nil
   205  	}
   206  
   207  	// Augment error if necessary and return.
   208  	switch t := pErr.GetDetail().(type) {
   209  	case *roachpb.RangeKeyMismatchError:
   210  		// On a RangeKeyMismatchError where the batch didn't even overlap
   211  		// the start of the mismatched Range, try to suggest a more suitable
   212  		// Range from this Store.
   213  		rSpan, err := keys.Range(ba.Requests)
   214  		if err != nil {
   215  			return nil, roachpb.NewError(err)
   216  		}
   217  		if !t.MismatchedRange.ContainsKey(rSpan.Key) {
   218  			if r2 := s.LookupReplica(rSpan.Key); r2 != nil {
   219  				// Only return the correct range descriptor as a hint
   220  				// if we know the current lease holder for that range, which
   221  				// indicates that our knowledge is not stale.
   222  				if l, _ := r2.GetLease(); r2.IsLeaseValid(l, s.Clock().Now()) {
   223  					t.SuggestedRange = r2.Desc()
   224  				}
   225  			}
   226  		}
   227  	case *roachpb.RaftGroupDeletedError:
   228  		// This error needs to be converted appropriately so that clients
   229  		// will retry.
   230  		err := roachpb.NewRangeNotFoundError(repl.RangeID, repl.store.StoreID())
   231  		pErr = roachpb.NewError(err)
   232  	}
   233  	return nil, pErr
   234  }