github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/store_send.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "context" 15 "time" 16 17 "github.com/cockroachdb/cockroach/pkg/keys" 18 "github.com/cockroachdb/cockroach/pkg/roachpb" 19 "github.com/cockroachdb/cockroach/pkg/util/log" 20 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 21 ) 22 23 // Send fetches a range based on the header's replica, assembles method, args & 24 // reply into a Raft Cmd struct and executes the command using the fetched 25 // range. 26 // 27 // An incoming request may be transactional or not. If it is not transactional, 28 // the timestamp at which it executes may be higher than that optionally 29 // specified through the incoming BatchRequest, and it is not guaranteed that 30 // all operations are written at the same timestamp. If it is transactional, a 31 // timestamp must not be set - it is deduced automatically from the 32 // transaction. In particular, the read timestamp will be used for 33 // all reads and the write (provisional commit) timestamp will be used for 34 // all writes. See the comments on txn.TxnMeta.Timestamp and txn.ReadTimestamp 35 // for more details. 36 // 37 // Should a transactional operation be forced to a higher timestamp (for 38 // instance due to the timestamp cache or finding a committed value in the path 39 // of one of its writes), the response will have a transaction set which should 40 // be used to update the client transaction object. 41 func (s *Store) Send( 42 ctx context.Context, ba roachpb.BatchRequest, 43 ) (br *roachpb.BatchResponse, pErr *roachpb.Error) { 44 // Attach any log tags from the store to the context (which normally 45 // comes from gRPC). 46 ctx = s.AnnotateCtx(ctx) 47 for _, union := range ba.Requests { 48 arg := union.GetInner() 49 header := arg.Header() 50 if err := verifyKeys(header.Key, header.EndKey, roachpb.IsRange(arg)); err != nil { 51 return nil, roachpb.NewError(err) 52 } 53 } 54 55 // Limit the number of concurrent AddSSTable requests, since they're expensive 56 // and block all other writes to the same span. 57 if ba.IsSingleAddSSTableRequest() { 58 before := timeutil.Now() 59 if err := s.limiters.ConcurrentAddSSTableRequests.Begin(ctx); err != nil { 60 return nil, roachpb.NewError(err) 61 } 62 defer s.limiters.ConcurrentAddSSTableRequests.Finish() 63 64 beforeEngineDelay := timeutil.Now() 65 s.engine.PreIngestDelay(ctx) 66 after := timeutil.Now() 67 68 waited, waitedEngine := after.Sub(before), after.Sub(beforeEngineDelay) 69 s.metrics.AddSSTableProposalTotalDelay.Inc(waited.Nanoseconds()) 70 s.metrics.AddSSTableProposalEngineDelay.Inc(waitedEngine.Nanoseconds()) 71 if waited > time.Second { 72 log.Infof(ctx, "SST ingestion was delayed by %v (%v for storage engine back-pressure)", 73 waited, waitedEngine) 74 } 75 } 76 77 if ba.Txn != nil && ba.Txn.ReadTimestamp.Less(ba.Txn.DeprecatedOrigTimestamp) { 78 // For compatibility with 19.2 nodes which might not have set ReadTimestamp, 79 // fallback to DeprecatedOrigTimestamp. Note that even if ReadTimestamp is 80 // set, it might still be less than DeprecatedOrigTimestamp if the txn was 81 // restarted. 82 ba.Txn = ba.Txn.Clone() 83 ba.Txn.ReadTimestamp = ba.Txn.DeprecatedOrigTimestamp 84 } 85 if err := ba.SetActiveTimestamp(s.Clock().Now); err != nil { 86 return nil, roachpb.NewError(err) 87 } 88 89 if s.cfg.TestingKnobs.ClockBeforeSend != nil { 90 s.cfg.TestingKnobs.ClockBeforeSend(s.cfg.Clock, ba) 91 } 92 93 // Update our clock with the incoming request timestamp. This advances the 94 // local node's clock to a high water mark from all nodes with which it has 95 // interacted. 96 if s.cfg.TestingKnobs.DisableMaxOffsetCheck { 97 s.cfg.Clock.Update(ba.Timestamp) 98 } else { 99 // If the command appears to come from a node with a bad clock, 100 // reject it now before we reach that point. 101 var err error 102 if err = s.cfg.Clock.UpdateAndCheckMaxOffset(ctx, ba.Timestamp); err != nil { 103 return nil, roachpb.NewError(err) 104 } 105 } 106 107 defer func() { 108 if r := recover(); r != nil { 109 // On panic, don't run the defer. It's probably just going to panic 110 // again due to undefined state. 111 panic(r) 112 } 113 if ba.Txn != nil { 114 // We're in a Txn, so we can reduce uncertainty restarts by attaching 115 // the above timestamp to the returned response or error. The caller 116 // can use it to shorten its uncertainty interval when it comes back to 117 // this node. 118 if pErr != nil { 119 pErr.OriginNode = ba.Replica.NodeID 120 if txn := pErr.GetTxn(); txn == nil { 121 pErr.SetTxn(ba.Txn) 122 } 123 } else { 124 if br.Txn == nil { 125 br.Txn = ba.Txn 126 } 127 // Update our clock with the outgoing response txn timestamp 128 // (if timestamp has been forwarded). 129 if ba.Timestamp.Less(br.Txn.WriteTimestamp) { 130 s.cfg.Clock.Update(br.Txn.WriteTimestamp) 131 } 132 } 133 } else { 134 if pErr == nil { 135 // Update our clock with the outgoing response timestamp. 136 // (if timestamp has been forwarded). 137 if ba.Timestamp.Less(br.Timestamp) { 138 s.cfg.Clock.Update(br.Timestamp) 139 } 140 } 141 } 142 143 // We get the latest timestamp - we know that any 144 // write with a higher timestamp we run into later must 145 // have started after this point in (absolute) time. 146 now := s.cfg.Clock.Now() 147 if pErr != nil { 148 pErr.Now = now 149 } else { 150 br.Now = now 151 } 152 }() 153 154 if ba.Txn != nil { 155 // We make our transaction aware that no other operation that causally 156 // precedes it could have started after `now`. This is important: If we 157 // wind up pushing a value, it will be in our immediate future, and not 158 // updating the top end of our uncertainty timestamp would lead to a 159 // restart (at least in the absence of a prior observed timestamp from 160 // this node, in which case the following is a no-op). 161 if _, ok := ba.Txn.GetObservedTimestamp(ba.Replica.NodeID); !ok { 162 txnClone := ba.Txn.Clone() 163 txnClone.UpdateObservedTimestamp(ba.Replica.NodeID, s.cfg.Clock.Now()) 164 ba.Txn = txnClone 165 } 166 } 167 168 if log.ExpensiveLogEnabled(ctx, 1) { 169 log.Eventf(ctx, "executing %s", ba) 170 } 171 172 // Get range and add command to the range for execution. 173 repl, err := s.GetReplica(ba.RangeID) 174 if err != nil { 175 return nil, roachpb.NewError(err) 176 } 177 if !repl.IsInitialized() { 178 repl.mu.RLock() 179 replicaID := repl.mu.replicaID 180 repl.mu.RUnlock() 181 182 // If we have an uninitialized copy of the range, then we are 183 // probably a valid member of the range, we're just in the 184 // process of getting our snapshot. If we returned 185 // RangeNotFoundError, the client would invalidate its cache, 186 // but we can be smarter: the replica that caused our 187 // uninitialized replica to be created is most likely the 188 // leader. 189 return nil, roachpb.NewError(&roachpb.NotLeaseHolderError{ 190 RangeID: ba.RangeID, 191 LeaseHolder: repl.creatingReplica, 192 // The replica doesn't have a range descriptor yet, so we have to build 193 // a ReplicaDescriptor manually. 194 Replica: roachpb.ReplicaDescriptor{ 195 NodeID: repl.store.nodeDesc.NodeID, 196 StoreID: repl.store.StoreID(), 197 ReplicaID: replicaID, 198 }, 199 }) 200 } 201 202 br, pErr = repl.Send(ctx, ba) 203 if pErr == nil { 204 return br, nil 205 } 206 207 // Augment error if necessary and return. 208 switch t := pErr.GetDetail().(type) { 209 case *roachpb.RangeKeyMismatchError: 210 // On a RangeKeyMismatchError where the batch didn't even overlap 211 // the start of the mismatched Range, try to suggest a more suitable 212 // Range from this Store. 213 rSpan, err := keys.Range(ba.Requests) 214 if err != nil { 215 return nil, roachpb.NewError(err) 216 } 217 if !t.MismatchedRange.ContainsKey(rSpan.Key) { 218 if r2 := s.LookupReplica(rSpan.Key); r2 != nil { 219 // Only return the correct range descriptor as a hint 220 // if we know the current lease holder for that range, which 221 // indicates that our knowledge is not stale. 222 if l, _ := r2.GetLease(); r2.IsLeaseValid(l, s.Clock().Now()) { 223 t.SuggestedRange = r2.Desc() 224 } 225 } 226 } 227 case *roachpb.RaftGroupDeletedError: 228 // This error needs to be converted appropriately so that clients 229 // will retry. 230 err := roachpb.NewRangeNotFoundError(repl.RangeID, repl.store.StoreID()) 231 pErr = roachpb.NewError(err) 232 } 233 return nil, pErr 234 }