github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/concurrency/concurrency_manager.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package concurrency 12 13 import ( 14 "context" 15 "sync" 16 17 "github.com/cockroachdb/cockroach/pkg/kv" 18 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock" 19 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" 20 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanlatch" 21 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset" 22 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/txnwait" 23 "github.com/cockroachdb/cockroach/pkg/roachpb" 24 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 25 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 26 "github.com/cockroachdb/cockroach/pkg/util/hlc" 27 "github.com/cockroachdb/cockroach/pkg/util/log" 28 "github.com/cockroachdb/cockroach/pkg/util/metric" 29 "github.com/cockroachdb/cockroach/pkg/util/stop" 30 "github.com/cockroachdb/cockroach/pkg/util/uuid" 31 "github.com/cockroachdb/errors" 32 ) 33 34 // managerImpl implements the Manager interface. 35 type managerImpl struct { 36 // Synchronizes conflicting in-flight requests. 37 lm latchManager 38 // Synchronizes conflicting in-progress transactions. 39 lt lockTable 40 // Waits for locks that conflict with a request to be released. 41 ltw lockTableWaiter 42 // Waits for transaction completion and detects deadlocks. 43 twq txnWaitQueue 44 } 45 46 // Config contains the dependencies to construct a Manager. 47 type Config struct { 48 // Identification. 49 NodeDesc *roachpb.NodeDescriptor 50 RangeDesc *roachpb.RangeDescriptor 51 // Components. 52 Settings *cluster.Settings 53 DB *kv.DB 54 Clock *hlc.Clock 55 Stopper *stop.Stopper 56 IntentResolver IntentResolver 57 // Metrics. 58 TxnWaitMetrics *txnwait.Metrics 59 SlowLatchGauge *metric.Gauge 60 // Configs + Knobs. 61 MaxLockTableSize int64 62 DisableTxnPushing bool 63 TxnWaitKnobs txnwait.TestingKnobs 64 } 65 66 func (c *Config) initDefaults() { 67 if c.MaxLockTableSize == 0 { 68 c.MaxLockTableSize = defaultLockTableSize 69 } 70 } 71 72 // NewManager creates a new concurrency Manager structure. 73 func NewManager(cfg Config) Manager { 74 cfg.initDefaults() 75 m := new(managerImpl) 76 *m = managerImpl{ 77 // TODO(nvanbenschoten): move pkg/storage/spanlatch to a new 78 // pkg/storage/concurrency/latch package. Make it implement the 79 // latchManager interface directly, if possible. 80 lm: &latchManagerImpl{ 81 m: spanlatch.Make( 82 cfg.Stopper, 83 cfg.SlowLatchGauge, 84 ), 85 }, 86 lt: &lockTableImpl{ 87 maxLocks: cfg.MaxLockTableSize, 88 }, 89 ltw: &lockTableWaiterImpl{ 90 st: cfg.Settings, 91 stopper: cfg.Stopper, 92 ir: cfg.IntentResolver, 93 lm: m, 94 disableTxnPushing: cfg.DisableTxnPushing, 95 }, 96 // TODO(nvanbenschoten): move pkg/storage/txnwait to a new 97 // pkg/storage/concurrency/txnwait package. 98 twq: txnwait.NewQueue(txnwait.Config{ 99 RangeDesc: cfg.RangeDesc, 100 DB: cfg.DB, 101 Clock: cfg.Clock, 102 Stopper: cfg.Stopper, 103 Metrics: cfg.TxnWaitMetrics, 104 Knobs: cfg.TxnWaitKnobs, 105 }), 106 } 107 return m 108 } 109 110 // SequenceReq implements the RequestSequencer interface. 111 func (m *managerImpl) SequenceReq( 112 ctx context.Context, prev *Guard, req Request, 113 ) (*Guard, Response, *Error) { 114 var g *Guard 115 if prev == nil { 116 g = newGuard(req) 117 log.Event(ctx, "sequencing request") 118 } else { 119 g = prev 120 g.AssertNoLatches() 121 log.Event(ctx, "re-sequencing request") 122 } 123 124 resp, err := m.sequenceReqWithGuard(ctx, g, req) 125 if resp != nil || err != nil { 126 // Ensure that we release the guard if we return a response or an error. 127 m.FinishReq(g) 128 return nil, resp, err 129 } 130 return g, nil, nil 131 } 132 133 func (m *managerImpl) sequenceReqWithGuard( 134 ctx context.Context, g *Guard, req Request, 135 ) (Response, *Error) { 136 // Some requests don't need to acquire latches at all. 137 if !shouldAcquireLatches(req) { 138 log.Event(ctx, "not acquiring latches") 139 return nil, nil 140 } 141 142 // Provide the manager with an opportunity to intercept the request. It 143 // may be able to serve the request directly, and even if not, it may be 144 // able to update its internal state based on the request. 145 resp, err := m.maybeInterceptReq(ctx, req) 146 if resp != nil || err != nil { 147 return resp, err 148 } 149 150 for { 151 // Acquire latches for the request. This synchronizes the request 152 // with all conflicting in-flight requests. 153 log.Event(ctx, "acquiring latches") 154 g.lg, err = m.lm.Acquire(ctx, req) 155 if err != nil { 156 return nil, err 157 } 158 159 // Some requests don't want the wait on locks. 160 if req.LockSpans.Empty() { 161 return nil, nil 162 } 163 164 // Scan for conflicting locks. 165 log.Event(ctx, "scanning lock table for conflicting locks") 166 g.ltg = m.lt.ScanAndEnqueue(g.Req, g.ltg) 167 168 // Wait on conflicting locks, if necessary. 169 if g.ltg.ShouldWait() { 170 m.lm.Release(g.moveLatchGuard()) 171 172 log.Event(ctx, "waiting in lock wait-queues") 173 if err := m.ltw.WaitOn(ctx, g.Req, g.ltg); err != nil { 174 return nil, err 175 } 176 continue 177 } 178 return nil, nil 179 } 180 } 181 182 // maybeInterceptReq allows the concurrency manager to intercept requests before 183 // sequencing and evaluation so that it can immediately act on them. This allows 184 // the concurrency manager to route certain concurrency control-related requests 185 // into queues and optionally update its internal state based on the requests. 186 func (m *managerImpl) maybeInterceptReq(ctx context.Context, req Request) (Response, *Error) { 187 switch { 188 case req.isSingle(roachpb.PushTxn): 189 // If necessary, wait in the txnWaitQueue for the pushee transaction to 190 // expire or to move to a finalized state. 191 t := req.Requests[0].GetPushTxn() 192 resp, err := m.twq.MaybeWaitForPush(ctx, t) 193 if err != nil { 194 return nil, err 195 } else if resp != nil { 196 return makeSingleResponse(resp), nil 197 } 198 case req.isSingle(roachpb.QueryTxn): 199 // If necessary, wait in the txnWaitQueue for a transaction state update 200 // or for a dependent transaction to change. 201 t := req.Requests[0].GetQueryTxn() 202 return nil, m.twq.MaybeWaitForQuery(ctx, t) 203 default: 204 // TODO(nvanbenschoten): in the future, use this hook to update the lock 205 // table to allow contending transactions to proceed. 206 // for _, arg := range req.Requests { 207 // switch t := arg.GetInner().(type) { 208 // case *roachpb.ResolveIntentRequest: 209 // _ = t 210 // case *roachpb.ResolveIntentRangeRequest: 211 // _ = t 212 // } 213 // } 214 } 215 return nil, nil 216 } 217 218 // shouldAcquireLatches determines whether the request should acquire latches 219 // before proceeding to evaluate. Latches are used to synchronize with other 220 // conflicting requests, based on the Spans collected for the request. Most 221 // request types will want to acquire latches. 222 func shouldAcquireLatches(req Request) bool { 223 switch { 224 case req.ReadConsistency != roachpb.CONSISTENT: 225 // Only acquire latches for consistent operations. 226 return false 227 case req.isSingle(roachpb.RequestLease): 228 // Do not acquire latches for lease requests. These requests are run on 229 // replicas that do not hold the lease, so acquiring latches wouldn't 230 // help synchronize with other requests. 231 return false 232 } 233 return true 234 } 235 236 // FinishReq implements the RequestSequencer interface. 237 func (m *managerImpl) FinishReq(g *Guard) { 238 if ltg := g.moveLockTableGuard(); ltg != nil { 239 m.lt.Dequeue(ltg) 240 } 241 if lg := g.moveLatchGuard(); lg != nil { 242 m.lm.Release(lg) 243 } 244 releaseGuard(g) 245 } 246 247 // HandleWriterIntentError implements the ContentionHandler interface. 248 func (m *managerImpl) HandleWriterIntentError( 249 ctx context.Context, g *Guard, t *roachpb.WriteIntentError, 250 ) (*Guard, *Error) { 251 if g.ltg == nil { 252 log.Fatalf(ctx, "cannot handle WriteIntentError %v for request without "+ 253 "lockTableGuard; were lock spans declared for this request?", t) 254 } 255 256 // Add a discovered lock to lock-table for each intent and enter each lock's 257 // wait-queue. If the lock-table is disabled and one or more of the intents 258 // are ignored then we immediately wait on all intents. 259 wait := false 260 for i := range t.Intents { 261 intent := &t.Intents[i] 262 added, err := m.lt.AddDiscoveredLock(intent, g.ltg) 263 if err != nil { 264 log.Fatalf(ctx, "%v", errors.HandleAsAssertionFailure(err)) 265 } 266 if !added { 267 wait = true 268 } 269 } 270 271 // Release the Guard's latches but continue to remain in lock wait-queues by 272 // not releasing lockWaitQueueGuards. We expect the caller of this method to 273 // then re-sequence the Request by calling SequenceReq with the un-latched 274 // Guard. This is analogous to iterating through the loop in SequenceReq. 275 m.lm.Release(g.moveLatchGuard()) 276 277 // If the lockTable was disabled then we need to immediately wait on the 278 // intents to ensure that they are resolved and moved out of the request's 279 // way. 280 if wait { 281 for i := range t.Intents { 282 intent := &t.Intents[i] 283 if err := m.ltw.WaitOnLock(ctx, g.Req, intent); err != nil { 284 m.FinishReq(g) 285 return nil, err 286 } 287 } 288 } 289 290 return g, nil 291 } 292 293 // HandleTransactionPushError implements the ContentionHandler interface. 294 func (m *managerImpl) HandleTransactionPushError( 295 ctx context.Context, g *Guard, t *roachpb.TransactionPushError, 296 ) *Guard { 297 m.twq.EnqueueTxn(&t.PusheeTxn) 298 299 // Release the Guard's latches. The PushTxn request should not be in any 300 // lock wait-queues because it does not scan the lockTable. We expect the 301 // caller of this method to then re-sequence the Request by calling 302 // SequenceReq with the un-latched Guard. This is analogous to iterating 303 // through the loop in SequenceReq. 304 m.lm.Release(g.moveLatchGuard()) 305 return g 306 } 307 308 // OnLockAcquired implements the LockManager interface. 309 func (m *managerImpl) OnLockAcquired(ctx context.Context, acq *roachpb.LockAcquisition) { 310 if err := m.lt.AcquireLock(&acq.Txn, acq.Key, lock.Exclusive, acq.Durability); err != nil { 311 log.Fatalf(ctx, "%v", errors.HandleAsAssertionFailure(err)) 312 } 313 } 314 315 // OnLockUpdated implements the LockManager interface. 316 func (m *managerImpl) OnLockUpdated(ctx context.Context, up *roachpb.LockUpdate) { 317 if err := m.lt.UpdateLocks(up); err != nil { 318 log.Fatalf(ctx, "%v", errors.HandleAsAssertionFailure(err)) 319 } 320 } 321 322 // OnTransactionUpdated implements the TransactionManager interface. 323 func (m *managerImpl) OnTransactionUpdated(ctx context.Context, txn *roachpb.Transaction) { 324 m.twq.UpdateTxn(ctx, txn) 325 } 326 327 // GetDependents implements the TransactionManager interface. 328 func (m *managerImpl) GetDependents(txnID uuid.UUID) []uuid.UUID { 329 return m.twq.GetDependents(txnID) 330 } 331 332 // OnRangeDescUpdated implements the RangeStateListener interface. 333 func (m *managerImpl) OnRangeDescUpdated(desc *roachpb.RangeDescriptor) { 334 m.twq.OnRangeDescUpdated(desc) 335 } 336 337 // OnRangeLeaseUpdated implements the RangeStateListener interface. 338 func (m *managerImpl) OnRangeLeaseUpdated(isLeaseholder bool) { 339 if isLeaseholder { 340 m.lt.Enable() 341 m.twq.Enable() 342 } else { 343 // Disable all queues - the concurrency manager will no longer be 344 // informed about all state transitions to locks and transactions. 345 const disable = true 346 m.lt.Clear(disable) 347 m.twq.Clear(disable) 348 // Also clear caches, since they won't be needed any time soon and 349 // consume memory. 350 m.ltw.ClearCaches() 351 } 352 } 353 354 // OnRangeSplit implements the RangeStateListener interface. 355 func (m *managerImpl) OnRangeSplit() { 356 // TODO(nvanbenschoten): it only essential that we clear the half of the 357 // lockTable which contains locks in the key range that is being split off 358 // from the current range. For now though, we clear it all. 359 const disable = false 360 m.lt.Clear(disable) 361 m.twq.Clear(disable) 362 } 363 364 // OnRangeMerge implements the RangeStateListener interface. 365 func (m *managerImpl) OnRangeMerge() { 366 // Disable all queues - the range is being merged into its LHS neighbor. 367 // It will no longer be informed about all state transitions to locks and 368 // transactions. 369 const disable = true 370 m.lt.Clear(disable) 371 m.twq.Clear(disable) 372 } 373 374 // OnReplicaSnapshotApplied implements the RangeStateListener interface. 375 func (m *managerImpl) OnReplicaSnapshotApplied() { 376 // A snapshot can cause discontinuities in raft entry application. The 377 // lockTable expects to observe all lock state transitions on the range 378 // through LockManager listener methods. If there's a chance it missed a 379 // state transition, it is safer to simply clear the lockTable and rebuild 380 // it from persistent intent state by allowing requests to discover locks 381 // and inform the manager through calls to HandleWriterIntentError. 382 // 383 // A range only maintains locks in the lockTable of its leaseholder replica 384 // even thought it runs a concurrency manager on all replicas. Because of 385 // this, we expect it to be very rare that this actually clears any locks. 386 // Still, it is possible for the leaseholder replica to receive a snapshot 387 // when it is not also the raft leader. 388 const disable = false 389 m.lt.Clear(disable) 390 } 391 392 // LatchMetrics implements the MetricExporter interface. 393 func (m *managerImpl) LatchMetrics() (global, local kvserverpb.LatchManagerInfo) { 394 return m.lm.Info() 395 } 396 397 // LockTableDebug implements the MetricExporter interface. 398 func (m *managerImpl) LockTableDebug() string { 399 return m.lt.String() 400 } 401 402 // TxnWaitQueue implements the MetricExporter interface. 403 func (m *managerImpl) TxnWaitQueue() *txnwait.Queue { 404 return m.twq.(*txnwait.Queue) 405 } 406 407 func (r *Request) txnMeta() *enginepb.TxnMeta { 408 if r.Txn == nil { 409 return nil 410 } 411 return &r.Txn.TxnMeta 412 } 413 414 // readConflictTimestamp returns the maximum timestamp at which the request 415 // conflicts with locks acquired by other transaction. The request must wait 416 // for all locks acquired by other transactions at or below this timestamp 417 // to be released. All locks acquired by other transactions above this 418 // timestamp are ignored. 419 func (r *Request) readConflictTimestamp() hlc.Timestamp { 420 ts := r.Timestamp 421 if r.Txn != nil { 422 ts = r.Txn.ReadTimestamp 423 ts.Forward(r.Txn.MaxTimestamp) 424 } 425 return ts 426 } 427 428 // writeConflictTimestamp returns the minimum timestamp at which the request 429 // acquires locks when performing mutations. All writes performed by the 430 // requests must take place at or above this timestamp. 431 func (r *Request) writeConflictTimestamp() hlc.Timestamp { 432 ts := r.Timestamp 433 if r.Txn != nil { 434 ts = r.Txn.WriteTimestamp 435 } 436 return ts 437 } 438 439 func (r *Request) isSingle(m roachpb.Method) bool { 440 if len(r.Requests) != 1 { 441 return false 442 } 443 return r.Requests[0].GetInner().Method() == m 444 } 445 446 // Used to avoid allocations. 447 var guardPool = sync.Pool{ 448 New: func() interface{} { return new(Guard) }, 449 } 450 451 func newGuard(req Request) *Guard { 452 g := guardPool.Get().(*Guard) 453 g.Req = req 454 return g 455 } 456 457 func releaseGuard(g *Guard) { 458 *g = Guard{} 459 guardPool.Put(g) 460 } 461 462 // LatchSpans returns the maximal set of spans that the request will access. 463 func (g *Guard) LatchSpans() *spanset.SpanSet { 464 return g.Req.LatchSpans 465 } 466 467 // HoldingLatches returned whether the guard is holding latches or not. 468 func (g *Guard) HoldingLatches() bool { 469 return g != nil && g.lg != nil 470 } 471 472 // AssertLatches asserts that the guard is non-nil and holding latches. 473 func (g *Guard) AssertLatches() { 474 if !g.HoldingLatches() { 475 panic("expected latches held, found none") 476 } 477 } 478 479 // AssertNoLatches asserts that the guard is non-nil and not holding latches. 480 func (g *Guard) AssertNoLatches() { 481 if g.HoldingLatches() { 482 panic("unexpected latches held") 483 } 484 } 485 486 func (g *Guard) moveLatchGuard() latchGuard { 487 lg := g.lg 488 g.lg = nil 489 return lg 490 } 491 492 func (g *Guard) moveLockTableGuard() lockTableGuard { 493 ltg := g.ltg 494 g.ltg = nil 495 return ltg 496 } 497 498 func makeSingleResponse(r roachpb.Response) Response { 499 ru := make(Response, 1) 500 ru[0].MustSetInner(r) 501 return ru 502 }