github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/spanlatch/manager.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package spanlatch
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"unsafe"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/base"
    19  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
    20  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/cockroach/pkg/util/metric"
    25  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    26  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    27  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    28  )
    29  
    30  // A Manager maintains an interval tree of key and key range latches. Latch
    31  // acquisitions affecting keys or key ranges must wait on already-acquired
    32  // latches which overlap their key ranges to be released.
    33  //
    34  // Latch acquisition attempts invoke Manager.Acquire and provide details about
    35  // the spans that they plan to touch and the timestamps they plan to touch them
    36  // at. Acquire inserts the latch into the Manager's tree and waits on
    37  // prerequisite latch attempts that are already tracked by the Manager.
    38  // Manager.Acquire blocks until the latch acquisition completes, at which point
    39  // it returns a Guard, which is scoped to the lifetime of the latch ownership.
    40  //
    41  // When the latches are no longer needed, they are released by invoking
    42  // Manager.Release with the Guard returned when the latches were originally
    43  // acquired. Doing so removes the latches from the Manager's tree and signals to
    44  // dependent latch acquisitions that they no longer need to wait on the released
    45  // latches.
    46  //
    47  // Manager is safe for concurrent use by multiple goroutines. Concurrent access
    48  // is made efficient using a copy-on-write technique to capture immutable
    49  // snapshots of the type's inner btree structures. Using this strategy, tasks
    50  // requiring mutual exclusion are limited to updating the type's trees and
    51  // grabbing snapshots. Notably, scanning for and waiting on prerequisite latches
    52  // is performed outside of the mutual exclusion zone. This means that the work
    53  // performed under lock is linear with respect to the number of spans that a
    54  // latch acquisition declares but NOT linear with respect to the number of other
    55  // latch attempts that it will wait on.
    56  //
    57  // Manager's zero value can be used directly.
    58  type Manager struct {
    59  	mu      syncutil.Mutex
    60  	idAlloc uint64
    61  	scopes  [spanset.NumSpanScope]scopedManager
    62  
    63  	stopper  *stop.Stopper
    64  	slowReqs *metric.Gauge
    65  }
    66  
    67  // scopedManager is a latch manager scoped to either local or global keys.
    68  // See spanset.SpanScope.
    69  type scopedManager struct {
    70  	readSet latchList
    71  	trees   [spanset.NumSpanAccess]btree
    72  }
    73  
    74  // Make returns an initialized Manager. Using this constructor is optional as
    75  // the type's zero value is valid to use directly.
    76  func Make(stopper *stop.Stopper, slowReqs *metric.Gauge) Manager {
    77  	return Manager{
    78  		stopper:  stopper,
    79  		slowReqs: slowReqs,
    80  	}
    81  }
    82  
    83  // latches are stored in the Manager's btrees. They represent the latching
    84  // of a single key span.
    85  type latch struct {
    86  	id         uint64
    87  	span       roachpb.Span
    88  	ts         hlc.Timestamp
    89  	done       *signal
    90  	next, prev *latch // readSet linked-list.
    91  }
    92  
    93  func (la *latch) inReadSet() bool {
    94  	return la.next != nil
    95  }
    96  
    97  //go:generate ../../../util/interval/generic/gen.sh *latch spanlatch
    98  
    99  // Methods required by util/interval/generic type contract.
   100  func (la *latch) ID() uint64         { return la.id }
   101  func (la *latch) Key() []byte        { return la.span.Key }
   102  func (la *latch) EndKey() []byte     { return la.span.EndKey }
   103  func (la *latch) String() string     { return fmt.Sprintf("%s@%s", la.span, la.ts) }
   104  func (la *latch) New() *latch        { return new(latch) }
   105  func (la *latch) SetID(v uint64)     { la.id = v }
   106  func (la *latch) SetKey(v []byte)    { la.span.Key = v }
   107  func (la *latch) SetEndKey(v []byte) { la.span.EndKey = v }
   108  
   109  // Guard is a handle to a set of acquired latches. It is returned by
   110  // Manager.Acquire and accepted by Manager.Release.
   111  type Guard struct {
   112  	done signal
   113  	// latches [spanset.NumSpanScope][spanset.NumSpanAccess][]latch, but half the size.
   114  	latchesPtrs [spanset.NumSpanScope][spanset.NumSpanAccess]unsafe.Pointer
   115  	latchesLens [spanset.NumSpanScope][spanset.NumSpanAccess]int32
   116  }
   117  
   118  func (lg *Guard) latches(s spanset.SpanScope, a spanset.SpanAccess) []latch {
   119  	len := lg.latchesLens[s][a]
   120  	if len == 0 {
   121  		return nil
   122  	}
   123  	const maxArrayLen = 1 << 31
   124  	return (*[maxArrayLen]latch)(lg.latchesPtrs[s][a])[:len:len]
   125  }
   126  
   127  func (lg *Guard) setLatches(s spanset.SpanScope, a spanset.SpanAccess, latches []latch) {
   128  	lg.latchesPtrs[s][a] = unsafe.Pointer(&latches[0])
   129  	lg.latchesLens[s][a] = int32(len(latches))
   130  }
   131  
   132  func allocGuardAndLatches(nLatches int) (*Guard, []latch) {
   133  	// Guard would be an ideal candidate for object pooling, but without
   134  	// reference counting its latches we can't know whether they're still
   135  	// referenced by other tree snapshots. The latches hold a reference to
   136  	// the signal living on the Guard, so the guard can't be recycled while
   137  	// latches still point to it.
   138  	if nLatches <= 1 {
   139  		alloc := new(struct {
   140  			g       Guard
   141  			latches [1]latch
   142  		})
   143  		return &alloc.g, alloc.latches[:nLatches]
   144  	} else if nLatches <= 2 {
   145  		alloc := new(struct {
   146  			g       Guard
   147  			latches [2]latch
   148  		})
   149  		return &alloc.g, alloc.latches[:nLatches]
   150  	} else if nLatches <= 4 {
   151  		alloc := new(struct {
   152  			g       Guard
   153  			latches [4]latch
   154  		})
   155  		return &alloc.g, alloc.latches[:nLatches]
   156  	} else if nLatches <= 8 {
   157  		alloc := new(struct {
   158  			g       Guard
   159  			latches [8]latch
   160  		})
   161  		return &alloc.g, alloc.latches[:nLatches]
   162  	}
   163  	return new(Guard), make([]latch, nLatches)
   164  }
   165  
   166  func newGuard(spans *spanset.SpanSet) *Guard {
   167  	nLatches := spans.Len()
   168  	guard, latches := allocGuardAndLatches(nLatches)
   169  	for s := spanset.SpanScope(0); s < spanset.NumSpanScope; s++ {
   170  		for a := spanset.SpanAccess(0); a < spanset.NumSpanAccess; a++ {
   171  			ss := spans.GetSpans(a, s)
   172  			n := len(ss)
   173  			if n == 0 {
   174  				continue
   175  			}
   176  
   177  			ssLatches := latches[:n]
   178  			for i := range ssLatches {
   179  				latch := &latches[i]
   180  				latch.span = ss[i].Span
   181  				latch.done = &guard.done
   182  				latch.ts = ss[i].Timestamp
   183  				// latch.setID() in Manager.insert, under lock.
   184  			}
   185  			guard.setLatches(s, a, ssLatches)
   186  			latches = latches[n:]
   187  		}
   188  	}
   189  	if len(latches) != 0 {
   190  		panic("alloc too large")
   191  	}
   192  	return guard
   193  }
   194  
   195  // Acquire acquires latches from the Manager for each of the provided spans, at
   196  // the specified timestamp. In doing so, it waits for latches over all
   197  // overlapping spans to be released before returning. If the provided context
   198  // is canceled before the method is done waiting for overlapping latches to
   199  // be released, it stops waiting and releases all latches that it has already
   200  // acquired.
   201  //
   202  // It returns a Guard which must be provided to Release.
   203  func (m *Manager) Acquire(ctx context.Context, spans *spanset.SpanSet) (*Guard, error) {
   204  	lg, snap := m.sequence(spans)
   205  	defer snap.close()
   206  
   207  	err := m.wait(ctx, lg, snap)
   208  	if err != nil {
   209  		m.Release(lg)
   210  		return nil, err
   211  	}
   212  	return lg, nil
   213  }
   214  
   215  // sequence locks the manager, captures an immutable snapshot, inserts latches
   216  // for each of the specified spans into the manager's interval trees, and
   217  // unlocks the manager. The role of the method is to sequence latch acquisition
   218  // attempts.
   219  func (m *Manager) sequence(spans *spanset.SpanSet) (*Guard, snapshot) {
   220  	lg := newGuard(spans)
   221  
   222  	m.mu.Lock()
   223  	snap := m.snapshotLocked(spans)
   224  	m.insertLocked(lg)
   225  	m.mu.Unlock()
   226  	return lg, snap
   227  }
   228  
   229  // snapshot is an immutable view into the latch manager's state.
   230  type snapshot struct {
   231  	trees [spanset.NumSpanScope][spanset.NumSpanAccess]btree
   232  }
   233  
   234  // close closes the snapshot and releases any associated resources.
   235  func (sn *snapshot) close() {
   236  	for s := spanset.SpanScope(0); s < spanset.NumSpanScope; s++ {
   237  		for a := spanset.SpanAccess(0); a < spanset.NumSpanAccess; a++ {
   238  			sn.trees[s][a].Reset()
   239  		}
   240  	}
   241  }
   242  
   243  // snapshotLocked captures an immutable snapshot of the latch manager. It takes
   244  // a spanset to limit the amount of state captured.
   245  func (m *Manager) snapshotLocked(spans *spanset.SpanSet) snapshot {
   246  	var snap snapshot
   247  	for s := spanset.SpanScope(0); s < spanset.NumSpanScope; s++ {
   248  		sm := &m.scopes[s]
   249  		reading := len(spans.GetSpans(spanset.SpanReadOnly, s)) > 0
   250  		writing := len(spans.GetSpans(spanset.SpanReadWrite, s)) > 0
   251  
   252  		if writing {
   253  			sm.flushReadSetLocked()
   254  			snap.trees[s][spanset.SpanReadOnly] = sm.trees[spanset.SpanReadOnly].Clone()
   255  		}
   256  		if writing || reading {
   257  			snap.trees[s][spanset.SpanReadWrite] = sm.trees[spanset.SpanReadWrite].Clone()
   258  		}
   259  	}
   260  	return snap
   261  }
   262  
   263  // flushReadSetLocked flushes the read set into the read interval tree.
   264  func (sm *scopedManager) flushReadSetLocked() {
   265  	for sm.readSet.len > 0 {
   266  		latch := sm.readSet.front()
   267  		sm.readSet.remove(latch)
   268  		sm.trees[spanset.SpanReadOnly].Set(latch)
   269  	}
   270  }
   271  
   272  // insertLocked inserts the latches owned by the provided Guard into the
   273  // Manager.
   274  func (m *Manager) insertLocked(lg *Guard) {
   275  	for s := spanset.SpanScope(0); s < spanset.NumSpanScope; s++ {
   276  		sm := &m.scopes[s]
   277  		for a := spanset.SpanAccess(0); a < spanset.NumSpanAccess; a++ {
   278  			latches := lg.latches(s, a)
   279  			for i := range latches {
   280  				latch := &latches[i]
   281  				latch.id = m.nextIDLocked()
   282  				switch a {
   283  				case spanset.SpanReadOnly:
   284  					// Add reads to the readSet. They only need to enter
   285  					// the read tree if they're flushed by a write capturing
   286  					// a snapshot.
   287  					sm.readSet.pushBack(latch)
   288  				case spanset.SpanReadWrite:
   289  					// Add writes directly to the write tree.
   290  					sm.trees[spanset.SpanReadWrite].Set(latch)
   291  				default:
   292  					panic("unknown access")
   293  				}
   294  			}
   295  		}
   296  	}
   297  }
   298  
   299  func (m *Manager) nextIDLocked() uint64 {
   300  	m.idAlloc++
   301  	return m.idAlloc
   302  }
   303  
   304  // ignoreFn is used for non-interference of earlier reads with later writes.
   305  //
   306  // However, this is only desired for the global scope. Reads and writes to local
   307  // keys are specified to always interfere, regardless of their timestamp. This
   308  // is done to avoid confusion with local keys declared as part of proposer
   309  // evaluated KV.
   310  //
   311  // This is also disabled in the global scope if either of the timestamps are
   312  // empty. In those cases, we consider the latch without a timestamp to be a
   313  // non-MVCC operation that affects all timestamps in the key range.
   314  type ignoreFn func(ts, other hlc.Timestamp) bool
   315  
   316  func ignoreLater(ts, other hlc.Timestamp) bool   { return !ts.IsEmpty() && ts.Less(other) }
   317  func ignoreEarlier(ts, other hlc.Timestamp) bool { return !other.IsEmpty() && other.Less(ts) }
   318  func ignoreNothing(ts, other hlc.Timestamp) bool { return false }
   319  
   320  // wait waits for all interfering latches in the provided snapshot to complete
   321  // before returning.
   322  func (m *Manager) wait(ctx context.Context, lg *Guard, snap snapshot) error {
   323  	timer := timeutil.NewTimer()
   324  	timer.Reset(base.SlowRequestThreshold)
   325  	defer timer.Stop()
   326  
   327  	for s := spanset.SpanScope(0); s < spanset.NumSpanScope; s++ {
   328  		tr := &snap.trees[s]
   329  		for a := spanset.SpanAccess(0); a < spanset.NumSpanAccess; a++ {
   330  			latches := lg.latches(s, a)
   331  			for i := range latches {
   332  				latch := &latches[i]
   333  				switch a {
   334  				case spanset.SpanReadOnly:
   335  					// Wait for writes at equal or lower timestamps.
   336  					it := tr[spanset.SpanReadWrite].MakeIter()
   337  					if err := m.iterAndWait(ctx, timer, &it, latch, ignoreLater); err != nil {
   338  						return err
   339  					}
   340  				case spanset.SpanReadWrite:
   341  					// Wait for all other writes.
   342  					//
   343  					// It is cheaper to wait on an already released latch than
   344  					// it is an unreleased latch so we prefer waiting on longer
   345  					// latches first. We expect writes to take longer than reads
   346  					// to release their latches, so we wait on them first.
   347  					it := tr[spanset.SpanReadWrite].MakeIter()
   348  					if err := m.iterAndWait(ctx, timer, &it, latch, ignoreNothing); err != nil {
   349  						return err
   350  					}
   351  					// Wait for reads at equal or higher timestamps.
   352  					it = tr[spanset.SpanReadOnly].MakeIter()
   353  					if err := m.iterAndWait(ctx, timer, &it, latch, ignoreEarlier); err != nil {
   354  						return err
   355  					}
   356  				default:
   357  					panic("unknown access")
   358  				}
   359  			}
   360  		}
   361  	}
   362  	return nil
   363  }
   364  
   365  // iterAndWait uses the provided iterator to wait on all latches that overlap
   366  // with the search latch and which should not be ignored given their timestamp
   367  // and the supplied ignoreFn.
   368  func (m *Manager) iterAndWait(
   369  	ctx context.Context, t *timeutil.Timer, it *iterator, wait *latch, ignore ignoreFn,
   370  ) error {
   371  	for it.FirstOverlap(wait); it.Valid(); it.NextOverlap(wait) {
   372  		held := it.Cur()
   373  		if held.done.signaled() {
   374  			continue
   375  		}
   376  		if ignore(wait.ts, held.ts) {
   377  			continue
   378  		}
   379  		if err := m.waitForSignal(ctx, t, wait, held); err != nil {
   380  			return err
   381  		}
   382  	}
   383  	return nil
   384  }
   385  
   386  // waitForSignal waits for the latch that is currently held to be signaled.
   387  func (m *Manager) waitForSignal(ctx context.Context, t *timeutil.Timer, wait, held *latch) error {
   388  	for {
   389  		select {
   390  		case <-held.done.signalChan():
   391  			return nil
   392  		case <-t.C:
   393  			t.Read = true
   394  			defer t.Reset(base.SlowRequestThreshold)
   395  
   396  			log.Warningf(ctx, "have been waiting %s to acquire latch %s, held by %s",
   397  				base.SlowRequestThreshold, wait, held)
   398  			if m.slowReqs != nil {
   399  				m.slowReqs.Inc(1)
   400  				defer m.slowReqs.Dec(1)
   401  			}
   402  		case <-ctx.Done():
   403  			log.VEventf(ctx, 2, "%s while acquiring latch %s, held by %s", ctx.Err(), wait, held)
   404  			return ctx.Err()
   405  		case <-m.stopper.ShouldQuiesce():
   406  			// While shutting down, requests may acquire
   407  			// latches and never release them.
   408  			return &roachpb.NodeUnavailableError{}
   409  		}
   410  	}
   411  }
   412  
   413  // Release releases the latches held by the provided Guard. After being called,
   414  // dependent latch acquisition attempts can complete if not blocked on any other
   415  // owned latches.
   416  func (m *Manager) Release(lg *Guard) {
   417  	lg.done.signal()
   418  
   419  	m.mu.Lock()
   420  	m.removeLocked(lg)
   421  	m.mu.Unlock()
   422  }
   423  
   424  // removeLocked removes the latches owned by the provided Guard from the
   425  // Manager. Must be called with mu held.
   426  func (m *Manager) removeLocked(lg *Guard) {
   427  	for s := spanset.SpanScope(0); s < spanset.NumSpanScope; s++ {
   428  		sm := &m.scopes[s]
   429  		for a := spanset.SpanAccess(0); a < spanset.NumSpanAccess; a++ {
   430  			latches := lg.latches(s, a)
   431  			for i := range latches {
   432  				latch := &latches[i]
   433  				if latch.inReadSet() {
   434  					sm.readSet.remove(latch)
   435  				} else {
   436  					sm.trees[a].Delete(latch)
   437  				}
   438  			}
   439  		}
   440  	}
   441  }
   442  
   443  // Info returns information about the state of the Manager.
   444  func (m *Manager) Info() (global, local kvserverpb.LatchManagerInfo) {
   445  	m.mu.Lock()
   446  	defer m.mu.Unlock()
   447  	global = m.scopes[spanset.SpanGlobal].infoLocked()
   448  	local = m.scopes[spanset.SpanLocal].infoLocked()
   449  	return global, local
   450  }
   451  
   452  func (sm *scopedManager) infoLocked() kvserverpb.LatchManagerInfo {
   453  	var info kvserverpb.LatchManagerInfo
   454  	info.ReadCount = int64(sm.trees[spanset.SpanReadOnly].Len() + sm.readSet.len)
   455  	info.WriteCount = int64(sm.trees[spanset.SpanReadWrite].Len())
   456  	return info
   457  }