github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/spanlatch/manager.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package spanlatch 12 13 import ( 14 "context" 15 "fmt" 16 "unsafe" 17 18 "github.com/cockroachdb/cockroach/pkg/base" 19 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" 20 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset" 21 "github.com/cockroachdb/cockroach/pkg/roachpb" 22 "github.com/cockroachdb/cockroach/pkg/util/hlc" 23 "github.com/cockroachdb/cockroach/pkg/util/log" 24 "github.com/cockroachdb/cockroach/pkg/util/metric" 25 "github.com/cockroachdb/cockroach/pkg/util/stop" 26 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 27 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 28 ) 29 30 // A Manager maintains an interval tree of key and key range latches. Latch 31 // acquisitions affecting keys or key ranges must wait on already-acquired 32 // latches which overlap their key ranges to be released. 33 // 34 // Latch acquisition attempts invoke Manager.Acquire and provide details about 35 // the spans that they plan to touch and the timestamps they plan to touch them 36 // at. Acquire inserts the latch into the Manager's tree and waits on 37 // prerequisite latch attempts that are already tracked by the Manager. 38 // Manager.Acquire blocks until the latch acquisition completes, at which point 39 // it returns a Guard, which is scoped to the lifetime of the latch ownership. 40 // 41 // When the latches are no longer needed, they are released by invoking 42 // Manager.Release with the Guard returned when the latches were originally 43 // acquired. Doing so removes the latches from the Manager's tree and signals to 44 // dependent latch acquisitions that they no longer need to wait on the released 45 // latches. 46 // 47 // Manager is safe for concurrent use by multiple goroutines. Concurrent access 48 // is made efficient using a copy-on-write technique to capture immutable 49 // snapshots of the type's inner btree structures. Using this strategy, tasks 50 // requiring mutual exclusion are limited to updating the type's trees and 51 // grabbing snapshots. Notably, scanning for and waiting on prerequisite latches 52 // is performed outside of the mutual exclusion zone. This means that the work 53 // performed under lock is linear with respect to the number of spans that a 54 // latch acquisition declares but NOT linear with respect to the number of other 55 // latch attempts that it will wait on. 56 // 57 // Manager's zero value can be used directly. 58 type Manager struct { 59 mu syncutil.Mutex 60 idAlloc uint64 61 scopes [spanset.NumSpanScope]scopedManager 62 63 stopper *stop.Stopper 64 slowReqs *metric.Gauge 65 } 66 67 // scopedManager is a latch manager scoped to either local or global keys. 68 // See spanset.SpanScope. 69 type scopedManager struct { 70 readSet latchList 71 trees [spanset.NumSpanAccess]btree 72 } 73 74 // Make returns an initialized Manager. Using this constructor is optional as 75 // the type's zero value is valid to use directly. 76 func Make(stopper *stop.Stopper, slowReqs *metric.Gauge) Manager { 77 return Manager{ 78 stopper: stopper, 79 slowReqs: slowReqs, 80 } 81 } 82 83 // latches are stored in the Manager's btrees. They represent the latching 84 // of a single key span. 85 type latch struct { 86 id uint64 87 span roachpb.Span 88 ts hlc.Timestamp 89 done *signal 90 next, prev *latch // readSet linked-list. 91 } 92 93 func (la *latch) inReadSet() bool { 94 return la.next != nil 95 } 96 97 //go:generate ../../../util/interval/generic/gen.sh *latch spanlatch 98 99 // Methods required by util/interval/generic type contract. 100 func (la *latch) ID() uint64 { return la.id } 101 func (la *latch) Key() []byte { return la.span.Key } 102 func (la *latch) EndKey() []byte { return la.span.EndKey } 103 func (la *latch) String() string { return fmt.Sprintf("%s@%s", la.span, la.ts) } 104 func (la *latch) New() *latch { return new(latch) } 105 func (la *latch) SetID(v uint64) { la.id = v } 106 func (la *latch) SetKey(v []byte) { la.span.Key = v } 107 func (la *latch) SetEndKey(v []byte) { la.span.EndKey = v } 108 109 // Guard is a handle to a set of acquired latches. It is returned by 110 // Manager.Acquire and accepted by Manager.Release. 111 type Guard struct { 112 done signal 113 // latches [spanset.NumSpanScope][spanset.NumSpanAccess][]latch, but half the size. 114 latchesPtrs [spanset.NumSpanScope][spanset.NumSpanAccess]unsafe.Pointer 115 latchesLens [spanset.NumSpanScope][spanset.NumSpanAccess]int32 116 } 117 118 func (lg *Guard) latches(s spanset.SpanScope, a spanset.SpanAccess) []latch { 119 len := lg.latchesLens[s][a] 120 if len == 0 { 121 return nil 122 } 123 const maxArrayLen = 1 << 31 124 return (*[maxArrayLen]latch)(lg.latchesPtrs[s][a])[:len:len] 125 } 126 127 func (lg *Guard) setLatches(s spanset.SpanScope, a spanset.SpanAccess, latches []latch) { 128 lg.latchesPtrs[s][a] = unsafe.Pointer(&latches[0]) 129 lg.latchesLens[s][a] = int32(len(latches)) 130 } 131 132 func allocGuardAndLatches(nLatches int) (*Guard, []latch) { 133 // Guard would be an ideal candidate for object pooling, but without 134 // reference counting its latches we can't know whether they're still 135 // referenced by other tree snapshots. The latches hold a reference to 136 // the signal living on the Guard, so the guard can't be recycled while 137 // latches still point to it. 138 if nLatches <= 1 { 139 alloc := new(struct { 140 g Guard 141 latches [1]latch 142 }) 143 return &alloc.g, alloc.latches[:nLatches] 144 } else if nLatches <= 2 { 145 alloc := new(struct { 146 g Guard 147 latches [2]latch 148 }) 149 return &alloc.g, alloc.latches[:nLatches] 150 } else if nLatches <= 4 { 151 alloc := new(struct { 152 g Guard 153 latches [4]latch 154 }) 155 return &alloc.g, alloc.latches[:nLatches] 156 } else if nLatches <= 8 { 157 alloc := new(struct { 158 g Guard 159 latches [8]latch 160 }) 161 return &alloc.g, alloc.latches[:nLatches] 162 } 163 return new(Guard), make([]latch, nLatches) 164 } 165 166 func newGuard(spans *spanset.SpanSet) *Guard { 167 nLatches := spans.Len() 168 guard, latches := allocGuardAndLatches(nLatches) 169 for s := spanset.SpanScope(0); s < spanset.NumSpanScope; s++ { 170 for a := spanset.SpanAccess(0); a < spanset.NumSpanAccess; a++ { 171 ss := spans.GetSpans(a, s) 172 n := len(ss) 173 if n == 0 { 174 continue 175 } 176 177 ssLatches := latches[:n] 178 for i := range ssLatches { 179 latch := &latches[i] 180 latch.span = ss[i].Span 181 latch.done = &guard.done 182 latch.ts = ss[i].Timestamp 183 // latch.setID() in Manager.insert, under lock. 184 } 185 guard.setLatches(s, a, ssLatches) 186 latches = latches[n:] 187 } 188 } 189 if len(latches) != 0 { 190 panic("alloc too large") 191 } 192 return guard 193 } 194 195 // Acquire acquires latches from the Manager for each of the provided spans, at 196 // the specified timestamp. In doing so, it waits for latches over all 197 // overlapping spans to be released before returning. If the provided context 198 // is canceled before the method is done waiting for overlapping latches to 199 // be released, it stops waiting and releases all latches that it has already 200 // acquired. 201 // 202 // It returns a Guard which must be provided to Release. 203 func (m *Manager) Acquire(ctx context.Context, spans *spanset.SpanSet) (*Guard, error) { 204 lg, snap := m.sequence(spans) 205 defer snap.close() 206 207 err := m.wait(ctx, lg, snap) 208 if err != nil { 209 m.Release(lg) 210 return nil, err 211 } 212 return lg, nil 213 } 214 215 // sequence locks the manager, captures an immutable snapshot, inserts latches 216 // for each of the specified spans into the manager's interval trees, and 217 // unlocks the manager. The role of the method is to sequence latch acquisition 218 // attempts. 219 func (m *Manager) sequence(spans *spanset.SpanSet) (*Guard, snapshot) { 220 lg := newGuard(spans) 221 222 m.mu.Lock() 223 snap := m.snapshotLocked(spans) 224 m.insertLocked(lg) 225 m.mu.Unlock() 226 return lg, snap 227 } 228 229 // snapshot is an immutable view into the latch manager's state. 230 type snapshot struct { 231 trees [spanset.NumSpanScope][spanset.NumSpanAccess]btree 232 } 233 234 // close closes the snapshot and releases any associated resources. 235 func (sn *snapshot) close() { 236 for s := spanset.SpanScope(0); s < spanset.NumSpanScope; s++ { 237 for a := spanset.SpanAccess(0); a < spanset.NumSpanAccess; a++ { 238 sn.trees[s][a].Reset() 239 } 240 } 241 } 242 243 // snapshotLocked captures an immutable snapshot of the latch manager. It takes 244 // a spanset to limit the amount of state captured. 245 func (m *Manager) snapshotLocked(spans *spanset.SpanSet) snapshot { 246 var snap snapshot 247 for s := spanset.SpanScope(0); s < spanset.NumSpanScope; s++ { 248 sm := &m.scopes[s] 249 reading := len(spans.GetSpans(spanset.SpanReadOnly, s)) > 0 250 writing := len(spans.GetSpans(spanset.SpanReadWrite, s)) > 0 251 252 if writing { 253 sm.flushReadSetLocked() 254 snap.trees[s][spanset.SpanReadOnly] = sm.trees[spanset.SpanReadOnly].Clone() 255 } 256 if writing || reading { 257 snap.trees[s][spanset.SpanReadWrite] = sm.trees[spanset.SpanReadWrite].Clone() 258 } 259 } 260 return snap 261 } 262 263 // flushReadSetLocked flushes the read set into the read interval tree. 264 func (sm *scopedManager) flushReadSetLocked() { 265 for sm.readSet.len > 0 { 266 latch := sm.readSet.front() 267 sm.readSet.remove(latch) 268 sm.trees[spanset.SpanReadOnly].Set(latch) 269 } 270 } 271 272 // insertLocked inserts the latches owned by the provided Guard into the 273 // Manager. 274 func (m *Manager) insertLocked(lg *Guard) { 275 for s := spanset.SpanScope(0); s < spanset.NumSpanScope; s++ { 276 sm := &m.scopes[s] 277 for a := spanset.SpanAccess(0); a < spanset.NumSpanAccess; a++ { 278 latches := lg.latches(s, a) 279 for i := range latches { 280 latch := &latches[i] 281 latch.id = m.nextIDLocked() 282 switch a { 283 case spanset.SpanReadOnly: 284 // Add reads to the readSet. They only need to enter 285 // the read tree if they're flushed by a write capturing 286 // a snapshot. 287 sm.readSet.pushBack(latch) 288 case spanset.SpanReadWrite: 289 // Add writes directly to the write tree. 290 sm.trees[spanset.SpanReadWrite].Set(latch) 291 default: 292 panic("unknown access") 293 } 294 } 295 } 296 } 297 } 298 299 func (m *Manager) nextIDLocked() uint64 { 300 m.idAlloc++ 301 return m.idAlloc 302 } 303 304 // ignoreFn is used for non-interference of earlier reads with later writes. 305 // 306 // However, this is only desired for the global scope. Reads and writes to local 307 // keys are specified to always interfere, regardless of their timestamp. This 308 // is done to avoid confusion with local keys declared as part of proposer 309 // evaluated KV. 310 // 311 // This is also disabled in the global scope if either of the timestamps are 312 // empty. In those cases, we consider the latch without a timestamp to be a 313 // non-MVCC operation that affects all timestamps in the key range. 314 type ignoreFn func(ts, other hlc.Timestamp) bool 315 316 func ignoreLater(ts, other hlc.Timestamp) bool { return !ts.IsEmpty() && ts.Less(other) } 317 func ignoreEarlier(ts, other hlc.Timestamp) bool { return !other.IsEmpty() && other.Less(ts) } 318 func ignoreNothing(ts, other hlc.Timestamp) bool { return false } 319 320 // wait waits for all interfering latches in the provided snapshot to complete 321 // before returning. 322 func (m *Manager) wait(ctx context.Context, lg *Guard, snap snapshot) error { 323 timer := timeutil.NewTimer() 324 timer.Reset(base.SlowRequestThreshold) 325 defer timer.Stop() 326 327 for s := spanset.SpanScope(0); s < spanset.NumSpanScope; s++ { 328 tr := &snap.trees[s] 329 for a := spanset.SpanAccess(0); a < spanset.NumSpanAccess; a++ { 330 latches := lg.latches(s, a) 331 for i := range latches { 332 latch := &latches[i] 333 switch a { 334 case spanset.SpanReadOnly: 335 // Wait for writes at equal or lower timestamps. 336 it := tr[spanset.SpanReadWrite].MakeIter() 337 if err := m.iterAndWait(ctx, timer, &it, latch, ignoreLater); err != nil { 338 return err 339 } 340 case spanset.SpanReadWrite: 341 // Wait for all other writes. 342 // 343 // It is cheaper to wait on an already released latch than 344 // it is an unreleased latch so we prefer waiting on longer 345 // latches first. We expect writes to take longer than reads 346 // to release their latches, so we wait on them first. 347 it := tr[spanset.SpanReadWrite].MakeIter() 348 if err := m.iterAndWait(ctx, timer, &it, latch, ignoreNothing); err != nil { 349 return err 350 } 351 // Wait for reads at equal or higher timestamps. 352 it = tr[spanset.SpanReadOnly].MakeIter() 353 if err := m.iterAndWait(ctx, timer, &it, latch, ignoreEarlier); err != nil { 354 return err 355 } 356 default: 357 panic("unknown access") 358 } 359 } 360 } 361 } 362 return nil 363 } 364 365 // iterAndWait uses the provided iterator to wait on all latches that overlap 366 // with the search latch and which should not be ignored given their timestamp 367 // and the supplied ignoreFn. 368 func (m *Manager) iterAndWait( 369 ctx context.Context, t *timeutil.Timer, it *iterator, wait *latch, ignore ignoreFn, 370 ) error { 371 for it.FirstOverlap(wait); it.Valid(); it.NextOverlap(wait) { 372 held := it.Cur() 373 if held.done.signaled() { 374 continue 375 } 376 if ignore(wait.ts, held.ts) { 377 continue 378 } 379 if err := m.waitForSignal(ctx, t, wait, held); err != nil { 380 return err 381 } 382 } 383 return nil 384 } 385 386 // waitForSignal waits for the latch that is currently held to be signaled. 387 func (m *Manager) waitForSignal(ctx context.Context, t *timeutil.Timer, wait, held *latch) error { 388 for { 389 select { 390 case <-held.done.signalChan(): 391 return nil 392 case <-t.C: 393 t.Read = true 394 defer t.Reset(base.SlowRequestThreshold) 395 396 log.Warningf(ctx, "have been waiting %s to acquire latch %s, held by %s", 397 base.SlowRequestThreshold, wait, held) 398 if m.slowReqs != nil { 399 m.slowReqs.Inc(1) 400 defer m.slowReqs.Dec(1) 401 } 402 case <-ctx.Done(): 403 log.VEventf(ctx, 2, "%s while acquiring latch %s, held by %s", ctx.Err(), wait, held) 404 return ctx.Err() 405 case <-m.stopper.ShouldQuiesce(): 406 // While shutting down, requests may acquire 407 // latches and never release them. 408 return &roachpb.NodeUnavailableError{} 409 } 410 } 411 } 412 413 // Release releases the latches held by the provided Guard. After being called, 414 // dependent latch acquisition attempts can complete if not blocked on any other 415 // owned latches. 416 func (m *Manager) Release(lg *Guard) { 417 lg.done.signal() 418 419 m.mu.Lock() 420 m.removeLocked(lg) 421 m.mu.Unlock() 422 } 423 424 // removeLocked removes the latches owned by the provided Guard from the 425 // Manager. Must be called with mu held. 426 func (m *Manager) removeLocked(lg *Guard) { 427 for s := spanset.SpanScope(0); s < spanset.NumSpanScope; s++ { 428 sm := &m.scopes[s] 429 for a := spanset.SpanAccess(0); a < spanset.NumSpanAccess; a++ { 430 latches := lg.latches(s, a) 431 for i := range latches { 432 latch := &latches[i] 433 if latch.inReadSet() { 434 sm.readSet.remove(latch) 435 } else { 436 sm.trees[a].Delete(latch) 437 } 438 } 439 } 440 } 441 } 442 443 // Info returns information about the state of the Manager. 444 func (m *Manager) Info() (global, local kvserverpb.LatchManagerInfo) { 445 m.mu.Lock() 446 defer m.mu.Unlock() 447 global = m.scopes[spanset.SpanGlobal].infoLocked() 448 local = m.scopes[spanset.SpanLocal].infoLocked() 449 return global, local 450 } 451 452 func (sm *scopedManager) infoLocked() kvserverpb.LatchManagerInfo { 453 var info kvserverpb.LatchManagerInfo 454 info.ReadCount = int64(sm.trees[spanset.SpanReadOnly].Len() + sm.readSet.len) 455 info.WriteCount = int64(sm.trees[spanset.SpanReadWrite].Len()) 456 return info 457 }