github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/concurrency/concurrency_manager.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package concurrency
    12  
    13  import (
    14  	"context"
    15  	"sync"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/kv"
    18  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
    19  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
    20  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanlatch"
    21  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
    22  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/txnwait"
    23  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    24  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    25  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    26  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    27  	"github.com/cockroachdb/cockroach/pkg/util/log"
    28  	"github.com/cockroachdb/cockroach/pkg/util/metric"
    29  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    30  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    31  	"github.com/cockroachdb/errors"
    32  )
    33  
    34  // managerImpl implements the Manager interface.
    35  type managerImpl struct {
    36  	// Synchronizes conflicting in-flight requests.
    37  	lm latchManager
    38  	// Synchronizes conflicting in-progress transactions.
    39  	lt lockTable
    40  	// Waits for locks that conflict with a request to be released.
    41  	ltw lockTableWaiter
    42  	// Waits for transaction completion and detects deadlocks.
    43  	twq txnWaitQueue
    44  }
    45  
    46  // Config contains the dependencies to construct a Manager.
    47  type Config struct {
    48  	// Identification.
    49  	NodeDesc  *roachpb.NodeDescriptor
    50  	RangeDesc *roachpb.RangeDescriptor
    51  	// Components.
    52  	Settings       *cluster.Settings
    53  	DB             *kv.DB
    54  	Clock          *hlc.Clock
    55  	Stopper        *stop.Stopper
    56  	IntentResolver IntentResolver
    57  	// Metrics.
    58  	TxnWaitMetrics *txnwait.Metrics
    59  	SlowLatchGauge *metric.Gauge
    60  	// Configs + Knobs.
    61  	MaxLockTableSize  int64
    62  	DisableTxnPushing bool
    63  	TxnWaitKnobs      txnwait.TestingKnobs
    64  }
    65  
    66  func (c *Config) initDefaults() {
    67  	if c.MaxLockTableSize == 0 {
    68  		c.MaxLockTableSize = defaultLockTableSize
    69  	}
    70  }
    71  
    72  // NewManager creates a new concurrency Manager structure.
    73  func NewManager(cfg Config) Manager {
    74  	cfg.initDefaults()
    75  	m := new(managerImpl)
    76  	*m = managerImpl{
    77  		// TODO(nvanbenschoten): move pkg/storage/spanlatch to a new
    78  		// pkg/storage/concurrency/latch package. Make it implement the
    79  		// latchManager interface directly, if possible.
    80  		lm: &latchManagerImpl{
    81  			m: spanlatch.Make(
    82  				cfg.Stopper,
    83  				cfg.SlowLatchGauge,
    84  			),
    85  		},
    86  		lt: &lockTableImpl{
    87  			maxLocks: cfg.MaxLockTableSize,
    88  		},
    89  		ltw: &lockTableWaiterImpl{
    90  			st:                cfg.Settings,
    91  			stopper:           cfg.Stopper,
    92  			ir:                cfg.IntentResolver,
    93  			lm:                m,
    94  			disableTxnPushing: cfg.DisableTxnPushing,
    95  		},
    96  		// TODO(nvanbenschoten): move pkg/storage/txnwait to a new
    97  		// pkg/storage/concurrency/txnwait package.
    98  		twq: txnwait.NewQueue(txnwait.Config{
    99  			RangeDesc: cfg.RangeDesc,
   100  			DB:        cfg.DB,
   101  			Clock:     cfg.Clock,
   102  			Stopper:   cfg.Stopper,
   103  			Metrics:   cfg.TxnWaitMetrics,
   104  			Knobs:     cfg.TxnWaitKnobs,
   105  		}),
   106  	}
   107  	return m
   108  }
   109  
   110  // SequenceReq implements the RequestSequencer interface.
   111  func (m *managerImpl) SequenceReq(
   112  	ctx context.Context, prev *Guard, req Request,
   113  ) (*Guard, Response, *Error) {
   114  	var g *Guard
   115  	if prev == nil {
   116  		g = newGuard(req)
   117  		log.Event(ctx, "sequencing request")
   118  	} else {
   119  		g = prev
   120  		g.AssertNoLatches()
   121  		log.Event(ctx, "re-sequencing request")
   122  	}
   123  
   124  	resp, err := m.sequenceReqWithGuard(ctx, g, req)
   125  	if resp != nil || err != nil {
   126  		// Ensure that we release the guard if we return a response or an error.
   127  		m.FinishReq(g)
   128  		return nil, resp, err
   129  	}
   130  	return g, nil, nil
   131  }
   132  
   133  func (m *managerImpl) sequenceReqWithGuard(
   134  	ctx context.Context, g *Guard, req Request,
   135  ) (Response, *Error) {
   136  	// Some requests don't need to acquire latches at all.
   137  	if !shouldAcquireLatches(req) {
   138  		log.Event(ctx, "not acquiring latches")
   139  		return nil, nil
   140  	}
   141  
   142  	// Provide the manager with an opportunity to intercept the request. It
   143  	// may be able to serve the request directly, and even if not, it may be
   144  	// able to update its internal state based on the request.
   145  	resp, err := m.maybeInterceptReq(ctx, req)
   146  	if resp != nil || err != nil {
   147  		return resp, err
   148  	}
   149  
   150  	for {
   151  		// Acquire latches for the request. This synchronizes the request
   152  		// with all conflicting in-flight requests.
   153  		log.Event(ctx, "acquiring latches")
   154  		g.lg, err = m.lm.Acquire(ctx, req)
   155  		if err != nil {
   156  			return nil, err
   157  		}
   158  
   159  		// Some requests don't want the wait on locks.
   160  		if req.LockSpans.Empty() {
   161  			return nil, nil
   162  		}
   163  
   164  		// Scan for conflicting locks.
   165  		log.Event(ctx, "scanning lock table for conflicting locks")
   166  		g.ltg = m.lt.ScanAndEnqueue(g.Req, g.ltg)
   167  
   168  		// Wait on conflicting locks, if necessary.
   169  		if g.ltg.ShouldWait() {
   170  			m.lm.Release(g.moveLatchGuard())
   171  
   172  			log.Event(ctx, "waiting in lock wait-queues")
   173  			if err := m.ltw.WaitOn(ctx, g.Req, g.ltg); err != nil {
   174  				return nil, err
   175  			}
   176  			continue
   177  		}
   178  		return nil, nil
   179  	}
   180  }
   181  
   182  // maybeInterceptReq allows the concurrency manager to intercept requests before
   183  // sequencing and evaluation so that it can immediately act on them. This allows
   184  // the concurrency manager to route certain concurrency control-related requests
   185  // into queues and optionally update its internal state based on the requests.
   186  func (m *managerImpl) maybeInterceptReq(ctx context.Context, req Request) (Response, *Error) {
   187  	switch {
   188  	case req.isSingle(roachpb.PushTxn):
   189  		// If necessary, wait in the txnWaitQueue for the pushee transaction to
   190  		// expire or to move to a finalized state.
   191  		t := req.Requests[0].GetPushTxn()
   192  		resp, err := m.twq.MaybeWaitForPush(ctx, t)
   193  		if err != nil {
   194  			return nil, err
   195  		} else if resp != nil {
   196  			return makeSingleResponse(resp), nil
   197  		}
   198  	case req.isSingle(roachpb.QueryTxn):
   199  		// If necessary, wait in the txnWaitQueue for a transaction state update
   200  		// or for a dependent transaction to change.
   201  		t := req.Requests[0].GetQueryTxn()
   202  		return nil, m.twq.MaybeWaitForQuery(ctx, t)
   203  	default:
   204  		// TODO(nvanbenschoten): in the future, use this hook to update the lock
   205  		// table to allow contending transactions to proceed.
   206  		// for _, arg := range req.Requests {
   207  		// 	switch t := arg.GetInner().(type) {
   208  		// 	case *roachpb.ResolveIntentRequest:
   209  		// 		_ = t
   210  		// 	case *roachpb.ResolveIntentRangeRequest:
   211  		// 		_ = t
   212  		// 	}
   213  		// }
   214  	}
   215  	return nil, nil
   216  }
   217  
   218  // shouldAcquireLatches determines whether the request should acquire latches
   219  // before proceeding to evaluate. Latches are used to synchronize with other
   220  // conflicting requests, based on the Spans collected for the request. Most
   221  // request types will want to acquire latches.
   222  func shouldAcquireLatches(req Request) bool {
   223  	switch {
   224  	case req.ReadConsistency != roachpb.CONSISTENT:
   225  		// Only acquire latches for consistent operations.
   226  		return false
   227  	case req.isSingle(roachpb.RequestLease):
   228  		// Do not acquire latches for lease requests. These requests are run on
   229  		// replicas that do not hold the lease, so acquiring latches wouldn't
   230  		// help synchronize with other requests.
   231  		return false
   232  	}
   233  	return true
   234  }
   235  
   236  // FinishReq implements the RequestSequencer interface.
   237  func (m *managerImpl) FinishReq(g *Guard) {
   238  	if ltg := g.moveLockTableGuard(); ltg != nil {
   239  		m.lt.Dequeue(ltg)
   240  	}
   241  	if lg := g.moveLatchGuard(); lg != nil {
   242  		m.lm.Release(lg)
   243  	}
   244  	releaseGuard(g)
   245  }
   246  
   247  // HandleWriterIntentError implements the ContentionHandler interface.
   248  func (m *managerImpl) HandleWriterIntentError(
   249  	ctx context.Context, g *Guard, t *roachpb.WriteIntentError,
   250  ) (*Guard, *Error) {
   251  	if g.ltg == nil {
   252  		log.Fatalf(ctx, "cannot handle WriteIntentError %v for request without "+
   253  			"lockTableGuard; were lock spans declared for this request?", t)
   254  	}
   255  
   256  	// Add a discovered lock to lock-table for each intent and enter each lock's
   257  	// wait-queue. If the lock-table is disabled and one or more of the intents
   258  	// are ignored then we immediately wait on all intents.
   259  	wait := false
   260  	for i := range t.Intents {
   261  		intent := &t.Intents[i]
   262  		added, err := m.lt.AddDiscoveredLock(intent, g.ltg)
   263  		if err != nil {
   264  			log.Fatalf(ctx, "%v", errors.HandleAsAssertionFailure(err))
   265  		}
   266  		if !added {
   267  			wait = true
   268  		}
   269  	}
   270  
   271  	// Release the Guard's latches but continue to remain in lock wait-queues by
   272  	// not releasing lockWaitQueueGuards. We expect the caller of this method to
   273  	// then re-sequence the Request by calling SequenceReq with the un-latched
   274  	// Guard. This is analogous to iterating through the loop in SequenceReq.
   275  	m.lm.Release(g.moveLatchGuard())
   276  
   277  	// If the lockTable was disabled then we need to immediately wait on the
   278  	// intents to ensure that they are resolved and moved out of the request's
   279  	// way.
   280  	if wait {
   281  		for i := range t.Intents {
   282  			intent := &t.Intents[i]
   283  			if err := m.ltw.WaitOnLock(ctx, g.Req, intent); err != nil {
   284  				m.FinishReq(g)
   285  				return nil, err
   286  			}
   287  		}
   288  	}
   289  
   290  	return g, nil
   291  }
   292  
   293  // HandleTransactionPushError implements the ContentionHandler interface.
   294  func (m *managerImpl) HandleTransactionPushError(
   295  	ctx context.Context, g *Guard, t *roachpb.TransactionPushError,
   296  ) *Guard {
   297  	m.twq.EnqueueTxn(&t.PusheeTxn)
   298  
   299  	// Release the Guard's latches. The PushTxn request should not be in any
   300  	// lock wait-queues because it does not scan the lockTable. We expect the
   301  	// caller of this method to then re-sequence the Request by calling
   302  	// SequenceReq with the un-latched Guard. This is analogous to iterating
   303  	// through the loop in SequenceReq.
   304  	m.lm.Release(g.moveLatchGuard())
   305  	return g
   306  }
   307  
   308  // OnLockAcquired implements the LockManager interface.
   309  func (m *managerImpl) OnLockAcquired(ctx context.Context, acq *roachpb.LockAcquisition) {
   310  	if err := m.lt.AcquireLock(&acq.Txn, acq.Key, lock.Exclusive, acq.Durability); err != nil {
   311  		log.Fatalf(ctx, "%v", errors.HandleAsAssertionFailure(err))
   312  	}
   313  }
   314  
   315  // OnLockUpdated implements the LockManager interface.
   316  func (m *managerImpl) OnLockUpdated(ctx context.Context, up *roachpb.LockUpdate) {
   317  	if err := m.lt.UpdateLocks(up); err != nil {
   318  		log.Fatalf(ctx, "%v", errors.HandleAsAssertionFailure(err))
   319  	}
   320  }
   321  
   322  // OnTransactionUpdated implements the TransactionManager interface.
   323  func (m *managerImpl) OnTransactionUpdated(ctx context.Context, txn *roachpb.Transaction) {
   324  	m.twq.UpdateTxn(ctx, txn)
   325  }
   326  
   327  // GetDependents implements the TransactionManager interface.
   328  func (m *managerImpl) GetDependents(txnID uuid.UUID) []uuid.UUID {
   329  	return m.twq.GetDependents(txnID)
   330  }
   331  
   332  // OnRangeDescUpdated implements the RangeStateListener interface.
   333  func (m *managerImpl) OnRangeDescUpdated(desc *roachpb.RangeDescriptor) {
   334  	m.twq.OnRangeDescUpdated(desc)
   335  }
   336  
   337  // OnRangeLeaseUpdated implements the RangeStateListener interface.
   338  func (m *managerImpl) OnRangeLeaseUpdated(isLeaseholder bool) {
   339  	if isLeaseholder {
   340  		m.lt.Enable()
   341  		m.twq.Enable()
   342  	} else {
   343  		// Disable all queues - the concurrency manager will no longer be
   344  		// informed about all state transitions to locks and transactions.
   345  		const disable = true
   346  		m.lt.Clear(disable)
   347  		m.twq.Clear(disable)
   348  		// Also clear caches, since they won't be needed any time soon and
   349  		// consume memory.
   350  		m.ltw.ClearCaches()
   351  	}
   352  }
   353  
   354  // OnRangeSplit implements the RangeStateListener interface.
   355  func (m *managerImpl) OnRangeSplit() {
   356  	// TODO(nvanbenschoten): it only essential that we clear the half of the
   357  	// lockTable which contains locks in the key range that is being split off
   358  	// from the current range. For now though, we clear it all.
   359  	const disable = false
   360  	m.lt.Clear(disable)
   361  	m.twq.Clear(disable)
   362  }
   363  
   364  // OnRangeMerge implements the RangeStateListener interface.
   365  func (m *managerImpl) OnRangeMerge() {
   366  	// Disable all queues - the range is being merged into its LHS neighbor.
   367  	// It will no longer be informed about all state transitions to locks and
   368  	// transactions.
   369  	const disable = true
   370  	m.lt.Clear(disable)
   371  	m.twq.Clear(disable)
   372  }
   373  
   374  // OnReplicaSnapshotApplied implements the RangeStateListener interface.
   375  func (m *managerImpl) OnReplicaSnapshotApplied() {
   376  	// A snapshot can cause discontinuities in raft entry application. The
   377  	// lockTable expects to observe all lock state transitions on the range
   378  	// through LockManager listener methods. If there's a chance it missed a
   379  	// state transition, it is safer to simply clear the lockTable and rebuild
   380  	// it from persistent intent state by allowing requests to discover locks
   381  	// and inform the manager through calls to HandleWriterIntentError.
   382  	//
   383  	// A range only maintains locks in the lockTable of its leaseholder replica
   384  	// even thought it runs a concurrency manager on all replicas. Because of
   385  	// this, we expect it to be very rare that this actually clears any locks.
   386  	// Still, it is possible for the leaseholder replica to receive a snapshot
   387  	// when it is not also the raft leader.
   388  	const disable = false
   389  	m.lt.Clear(disable)
   390  }
   391  
   392  // LatchMetrics implements the MetricExporter interface.
   393  func (m *managerImpl) LatchMetrics() (global, local kvserverpb.LatchManagerInfo) {
   394  	return m.lm.Info()
   395  }
   396  
   397  // LockTableDebug implements the MetricExporter interface.
   398  func (m *managerImpl) LockTableDebug() string {
   399  	return m.lt.String()
   400  }
   401  
   402  // TxnWaitQueue implements the MetricExporter interface.
   403  func (m *managerImpl) TxnWaitQueue() *txnwait.Queue {
   404  	return m.twq.(*txnwait.Queue)
   405  }
   406  
   407  func (r *Request) txnMeta() *enginepb.TxnMeta {
   408  	if r.Txn == nil {
   409  		return nil
   410  	}
   411  	return &r.Txn.TxnMeta
   412  }
   413  
   414  // readConflictTimestamp returns the maximum timestamp at which the request
   415  // conflicts with locks acquired by other transaction. The request must wait
   416  // for all locks acquired by other transactions at or below this timestamp
   417  // to be released. All locks acquired by other transactions above this
   418  // timestamp are ignored.
   419  func (r *Request) readConflictTimestamp() hlc.Timestamp {
   420  	ts := r.Timestamp
   421  	if r.Txn != nil {
   422  		ts = r.Txn.ReadTimestamp
   423  		ts.Forward(r.Txn.MaxTimestamp)
   424  	}
   425  	return ts
   426  }
   427  
   428  // writeConflictTimestamp returns the minimum timestamp at which the request
   429  // acquires locks when performing mutations. All writes performed by the
   430  // requests must take place at or above this timestamp.
   431  func (r *Request) writeConflictTimestamp() hlc.Timestamp {
   432  	ts := r.Timestamp
   433  	if r.Txn != nil {
   434  		ts = r.Txn.WriteTimestamp
   435  	}
   436  	return ts
   437  }
   438  
   439  func (r *Request) isSingle(m roachpb.Method) bool {
   440  	if len(r.Requests) != 1 {
   441  		return false
   442  	}
   443  	return r.Requests[0].GetInner().Method() == m
   444  }
   445  
   446  // Used to avoid allocations.
   447  var guardPool = sync.Pool{
   448  	New: func() interface{} { return new(Guard) },
   449  }
   450  
   451  func newGuard(req Request) *Guard {
   452  	g := guardPool.Get().(*Guard)
   453  	g.Req = req
   454  	return g
   455  }
   456  
   457  func releaseGuard(g *Guard) {
   458  	*g = Guard{}
   459  	guardPool.Put(g)
   460  }
   461  
   462  // LatchSpans returns the maximal set of spans that the request will access.
   463  func (g *Guard) LatchSpans() *spanset.SpanSet {
   464  	return g.Req.LatchSpans
   465  }
   466  
   467  // HoldingLatches returned whether the guard is holding latches or not.
   468  func (g *Guard) HoldingLatches() bool {
   469  	return g != nil && g.lg != nil
   470  }
   471  
   472  // AssertLatches asserts that the guard is non-nil and holding latches.
   473  func (g *Guard) AssertLatches() {
   474  	if !g.HoldingLatches() {
   475  		panic("expected latches held, found none")
   476  	}
   477  }
   478  
   479  // AssertNoLatches asserts that the guard is non-nil and not holding latches.
   480  func (g *Guard) AssertNoLatches() {
   481  	if g.HoldingLatches() {
   482  		panic("unexpected latches held")
   483  	}
   484  }
   485  
   486  func (g *Guard) moveLatchGuard() latchGuard {
   487  	lg := g.lg
   488  	g.lg = nil
   489  	return lg
   490  }
   491  
   492  func (g *Guard) moveLockTableGuard() lockTableGuard {
   493  	ltg := g.ltg
   494  	g.ltg = nil
   495  	return ltg
   496  }
   497  
   498  func makeSingleResponse(r roachpb.Response) Response {
   499  	ru := make(Response, 1)
   500  	ru[0].MustSetInner(r)
   501  	return ru
   502  }