github.com/m3db/m3@v1.5.0/src/dbnode/storage/block/block.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package block 22 23 import ( 24 "errors" 25 "sync" 26 "sync/atomic" 27 "time" 28 29 "github.com/m3db/m3/src/dbnode/namespace" 30 "github.com/m3db/m3/src/dbnode/ts" 31 "github.com/m3db/m3/src/dbnode/x/xio" 32 "github.com/m3db/m3/src/x/context" 33 "github.com/m3db/m3/src/x/ident" 34 xtime "github.com/m3db/m3/src/x/time" 35 ) 36 37 var ( 38 errReadFromClosedBlock = errors.New("attempt to read from a closed block") 39 errTriedToMergeBlockFromDisk = errors.New("[invariant violated] tried to merge a block that was retrieved from disk") 40 41 timeZero = xtime.UnixNano(0) 42 ) 43 44 type dbBlock struct { 45 sync.RWMutex 46 47 nsCtx namespace.Context 48 opts Options 49 startUnixNanos xtime.UnixNano 50 segment ts.Segment 51 length int 52 53 blockSize time.Duration 54 55 lastReadUnixNanos int64 56 57 mergeTarget DatabaseBlock 58 59 seriesID ident.ID 60 61 onEvicted OnEvictedFromWiredList 62 63 // listState contains state that the Wired List requires in order to track a block's 64 // position in the wired list. All the state in this struct is "owned" by the wired 65 // list and should only be accessed by the Wired List itself. Does not require any 66 // synchronization because the WiredList is not concurrent. 67 listState listState 68 69 checksum uint32 70 71 wasRetrievedFromDisk bool 72 closed bool 73 } 74 75 type listState struct { 76 next DatabaseBlock 77 prev DatabaseBlock 78 enteredListAtUnixNano int64 79 } 80 81 // NewDatabaseBlock creates a new DatabaseBlock instance. 82 func NewDatabaseBlock( 83 start xtime.UnixNano, 84 blockSize time.Duration, 85 segment ts.Segment, 86 opts Options, 87 nsCtx namespace.Context, 88 ) DatabaseBlock { 89 b := &dbBlock{ 90 nsCtx: nsCtx, 91 opts: opts, 92 startUnixNanos: start, 93 blockSize: blockSize, 94 closed: false, 95 } 96 if segment.Len() > 0 { 97 b.resetSegmentWithLock(segment) 98 } 99 return b 100 } 101 102 func (b *dbBlock) StartTime() xtime.UnixNano { 103 b.RLock() 104 start := b.startWithRLock() 105 b.RUnlock() 106 return start 107 } 108 109 func (b *dbBlock) BlockSize() time.Duration { 110 b.RLock() 111 size := b.blockSize 112 b.RUnlock() 113 return size 114 } 115 116 func (b *dbBlock) startWithRLock() xtime.UnixNano { 117 return b.startUnixNanos 118 } 119 120 func (b *dbBlock) SetLastReadTime(value xtime.UnixNano) { 121 // Use an int64 to avoid needing a write lock for 122 // this high frequency called method (i.e. each individual 123 // read needing a write lock would be excessive) 124 atomic.StoreInt64(&b.lastReadUnixNanos, int64(value)) 125 } 126 127 func (b *dbBlock) LastReadTime() xtime.UnixNano { 128 return xtime.UnixNano(atomic.LoadInt64(&b.lastReadUnixNanos)) 129 } 130 131 func (b *dbBlock) Empty() bool { 132 b.RLock() 133 empty := b.length == 0 134 b.RUnlock() 135 return empty 136 } 137 138 func (b *dbBlock) Len() int { 139 b.RLock() 140 length := b.length 141 b.RUnlock() 142 return length 143 } 144 145 func (b *dbBlock) Checksum() (uint32, error) { 146 b.RLock() 147 checksum := b.checksum 148 hasMergeTarget := b.mergeTarget != nil 149 b.RUnlock() 150 151 if !hasMergeTarget { 152 return checksum, nil 153 } 154 155 b.Lock() 156 defer b.Unlock() 157 // Since we released the lock temporarily we need to check again. 158 hasMergeTarget = b.mergeTarget != nil 159 if !hasMergeTarget { 160 return b.checksum, nil 161 } 162 163 tempCtx := b.opts.ContextPool().Get() 164 165 stream, err := b.streamWithRLock(tempCtx) 166 if err != nil { 167 return 0, err 168 } 169 170 // This will merge the existing stream with the merge target's stream, 171 // as well as recalculate and store the new checksum. 172 err = b.forceMergeWithLock(tempCtx, stream) 173 if err != nil { 174 return 0, err 175 } 176 177 return b.checksum, nil 178 } 179 180 func (b *dbBlock) Stream(blocker context.Context) (xio.BlockReader, error) { 181 lockUpgraded := false 182 183 b.RLock() 184 defer func() { 185 if lockUpgraded { 186 b.Unlock() 187 } else { 188 b.RUnlock() 189 } 190 }() 191 192 if b.closed { 193 return xio.EmptyBlockReader, errReadFromClosedBlock 194 } 195 196 if b.mergeTarget == nil { 197 return b.streamWithRLock(blocker) 198 } 199 200 b.RUnlock() 201 lockUpgraded = true 202 b.Lock() 203 204 // Need to re-check everything since we upgraded the lock. 205 if b.closed { 206 return xio.EmptyBlockReader, errReadFromClosedBlock 207 } 208 209 stream, err := b.streamWithRLock(blocker) 210 if err != nil { 211 return xio.EmptyBlockReader, err 212 } 213 214 if b.mergeTarget == nil { 215 return stream, nil 216 } 217 218 // This will merge the existing stream with the merge target's stream, 219 // as well as recalculate and store the new checksum. 220 err = b.forceMergeWithLock(blocker, stream) 221 if err != nil { 222 return xio.EmptyBlockReader, err 223 } 224 225 // This will return a copy of the data so that it is still safe to 226 // close the block after calling this method. 227 return b.streamWithRLock(blocker) 228 } 229 230 func (b *dbBlock) HasMergeTarget() bool { 231 b.RLock() 232 hasMergeTarget := b.mergeTarget != nil 233 b.RUnlock() 234 return hasMergeTarget 235 } 236 237 func (b *dbBlock) WasRetrievedFromDisk() bool { 238 b.RLock() 239 wasRetrieved := b.wasRetrievedFromDisk 240 b.RUnlock() 241 return wasRetrieved 242 } 243 244 func (b *dbBlock) Merge(other DatabaseBlock) error { 245 b.Lock() 246 if b.wasRetrievedFromDisk || other.WasRetrievedFromDisk() { 247 // We use Merge to lazily merge blocks that eventually need to be flushed to disk 248 // If we try to perform a merge on blocks that were retrieved from disk then we've 249 // violated an invariant and probably have a bug that is causing data loss. 250 b.Unlock() 251 return errTriedToMergeBlockFromDisk 252 } 253 254 if b.mergeTarget == nil { 255 b.mergeTarget = other 256 } else { 257 b.mergeTarget.Merge(other) 258 } 259 260 b.Unlock() 261 return nil 262 } 263 264 func (b *dbBlock) Reset( 265 start xtime.UnixNano, 266 blockSize time.Duration, 267 segment ts.Segment, 268 nsCtx namespace.Context, 269 ) { 270 b.Lock() 271 defer b.Unlock() 272 b.resetNewBlockStartWithLock(start, blockSize) 273 b.resetSegmentWithLock(segment) 274 b.nsCtx = nsCtx 275 } 276 277 func (b *dbBlock) ResetFromDisk( 278 start xtime.UnixNano, 279 blockSize time.Duration, 280 segment ts.Segment, 281 id ident.ID, 282 nsCtx namespace.Context, 283 ) { 284 b.Lock() 285 defer b.Unlock() 286 b.resetNewBlockStartWithLock(start, blockSize) 287 // resetSegmentWithLock sets seriesID to nil 288 b.resetSegmentWithLock(segment) 289 b.seriesID = id 290 b.nsCtx = nsCtx 291 b.wasRetrievedFromDisk = true 292 } 293 294 func (b *dbBlock) streamWithRLock(ctx context.Context) (xio.BlockReader, error) { 295 start := b.startWithRLock() 296 297 // Take a copy to avoid heavy depends on cycle 298 segmentReader := b.opts.SegmentReaderPool().Get() 299 data := b.opts.BytesPool().Get(b.segment.Len()) 300 data.IncRef() 301 if b.segment.Head != nil { 302 data.AppendAll(b.segment.Head.Bytes()) 303 } 304 if b.segment.Tail != nil { 305 data.AppendAll(b.segment.Tail.Bytes()) 306 } 307 data.DecRef() 308 checksum := b.segment.CalculateChecksum() 309 segmentReader.Reset(ts.NewSegment(data, nil, checksum, ts.FinalizeHead)) 310 ctx.RegisterFinalizer(segmentReader) 311 312 blockReader := xio.BlockReader{ 313 SegmentReader: segmentReader, 314 Start: start, 315 BlockSize: b.blockSize, 316 } 317 318 return blockReader, nil 319 } 320 321 func (b *dbBlock) forceMergeWithLock(ctx context.Context, stream xio.SegmentReader) error { 322 targetStream, err := b.mergeTarget.Stream(ctx) 323 if err != nil { 324 return err 325 } 326 start := b.startWithRLock() 327 mergedBlockReader := newDatabaseMergedBlockReader(b.nsCtx, start, b.blockSize, 328 mergeableStream{stream: stream, finalize: false}, // Should have been marked for finalization by the caller 329 mergeableStream{stream: targetStream, finalize: false}, // Already marked for finalization by the Stream() call above 330 b.opts) 331 mergedSegment, err := mergedBlockReader.Segment() 332 if err != nil { 333 return err 334 } 335 336 b.resetMergeTargetWithLock() 337 b.resetSegmentWithLock(mergedSegment) 338 return nil 339 } 340 341 func (b *dbBlock) resetNewBlockStartWithLock(start xtime.UnixNano, blockSize time.Duration) { 342 b.startUnixNanos = start 343 b.blockSize = blockSize 344 atomic.StoreInt64(&b.lastReadUnixNanos, 0) 345 b.closed = false 346 b.resetMergeTargetWithLock() 347 } 348 349 func (b *dbBlock) resetSegmentWithLock(seg ts.Segment) { 350 b.segment = seg 351 b.length = seg.Len() 352 b.checksum = seg.CalculateChecksum() 353 b.seriesID = nil 354 b.wasRetrievedFromDisk = false 355 } 356 357 func (b *dbBlock) Discard() ts.Segment { 358 seg, _ := b.closeAndDiscardConditionally(nil) 359 return seg 360 } 361 362 func (b *dbBlock) Close() { 363 segment, _ := b.closeAndDiscardConditionally(nil) 364 segment.Finalize() 365 } 366 367 func (b *dbBlock) CloseIfFromDisk() bool { 368 segment, ok := b.closeAndDiscardConditionally(func(b *dbBlock) bool { 369 return b.wasRetrievedFromDisk 370 }) 371 if !ok { 372 return false 373 } 374 segment.Finalize() 375 return true 376 } 377 378 func (b *dbBlock) closeAndDiscardConditionally(condition func(b *dbBlock) bool) (ts.Segment, bool) { 379 b.Lock() 380 381 if condition != nil && !condition(b) { 382 b.Unlock() 383 return ts.Segment{}, false 384 } 385 386 if b.closed { 387 b.Unlock() 388 return ts.Segment{}, true 389 } 390 391 segment := b.segment 392 b.closed = true 393 394 b.resetMergeTargetWithLock() 395 b.Unlock() 396 397 if pool := b.opts.DatabaseBlockPool(); pool != nil { 398 pool.Put(b) 399 } 400 401 return segment, true 402 } 403 404 func (b *dbBlock) resetMergeTargetWithLock() { 405 if b.mergeTarget != nil { 406 b.mergeTarget.Close() 407 } 408 b.mergeTarget = nil 409 } 410 411 // Should only be used by the WiredList. 412 func (b *dbBlock) next() DatabaseBlock { 413 return b.listState.next 414 } 415 416 // Should only be used by the WiredList. 417 func (b *dbBlock) setNext(value DatabaseBlock) { 418 b.listState.next = value 419 } 420 421 // Should only be used by the WiredList. 422 func (b *dbBlock) prev() DatabaseBlock { 423 return b.listState.prev 424 } 425 426 // Should only be used by the WiredList. 427 func (b *dbBlock) setPrev(value DatabaseBlock) { 428 b.listState.prev = value 429 } 430 431 // Should only be used by the WiredList. 432 func (b *dbBlock) enteredListAtUnixNano() int64 { 433 return b.listState.enteredListAtUnixNano 434 } 435 436 // Should only be used by the WiredList. 437 func (b *dbBlock) setEnteredListAtUnixNano(value int64) { 438 b.listState.enteredListAtUnixNano = value 439 } 440 441 // wiredListEntry is a snapshot of a subset of the block's state that the WiredList 442 // uses to determine if a block is eligible for inclusion in the WiredList. 443 type wiredListEntry struct { 444 seriesID ident.ID 445 startTime xtime.UnixNano 446 closed bool 447 wasRetrievedFromDisk bool 448 } 449 450 // wiredListEntry generates a wiredListEntry for the block, and should only 451 // be used by the WiredList. 452 func (b *dbBlock) wiredListEntry() wiredListEntry { 453 b.RLock() 454 result := wiredListEntry{ 455 closed: b.closed, 456 seriesID: b.seriesID, 457 wasRetrievedFromDisk: b.wasRetrievedFromDisk, 458 startTime: b.startWithRLock(), 459 } 460 b.RUnlock() 461 return result 462 } 463 464 func (b *dbBlock) SetOnEvictedFromWiredList(onEvicted OnEvictedFromWiredList) { 465 b.Lock() 466 b.onEvicted = onEvicted 467 b.Unlock() 468 } 469 470 func (b *dbBlock) OnEvictedFromWiredList() OnEvictedFromWiredList { 471 b.RLock() 472 onEvicted := b.onEvicted 473 b.RUnlock() 474 return onEvicted 475 } 476 477 type databaseSeriesBlocks struct { 478 elems map[xtime.UnixNano]DatabaseBlock 479 min xtime.UnixNano 480 max xtime.UnixNano 481 } 482 483 // NewDatabaseSeriesBlocks creates a databaseSeriesBlocks instance. 484 func NewDatabaseSeriesBlocks(capacity int) DatabaseSeriesBlocks { 485 return &databaseSeriesBlocks{ 486 elems: make(map[xtime.UnixNano]DatabaseBlock, capacity), 487 } 488 } 489 490 func (dbb *databaseSeriesBlocks) Len() int { 491 return len(dbb.elems) 492 } 493 494 func (dbb *databaseSeriesBlocks) AddBlock(block DatabaseBlock) { 495 start := block.StartTime() 496 if dbb.min.Equal(timeZero) || start.Before(dbb.min) { 497 dbb.min = start 498 } 499 if dbb.max.Equal(timeZero) || start.After(dbb.max) { 500 dbb.max = start 501 } 502 dbb.elems[start] = block 503 } 504 505 func (dbb *databaseSeriesBlocks) AddSeries(other DatabaseSeriesBlocks) { 506 if other == nil { 507 return 508 } 509 blocks := other.AllBlocks() 510 for _, b := range blocks { 511 dbb.AddBlock(b) 512 } 513 } 514 515 // MinTime returns the min time of the blocks contained. 516 func (dbb *databaseSeriesBlocks) MinTime() xtime.UnixNano { 517 return dbb.min 518 } 519 520 // MaxTime returns the max time of the blocks contained. 521 func (dbb *databaseSeriesBlocks) MaxTime() xtime.UnixNano { 522 return dbb.max 523 } 524 525 func (dbb *databaseSeriesBlocks) BlockAt(t xtime.UnixNano) (DatabaseBlock, bool) { 526 b, ok := dbb.elems[t] 527 return b, ok 528 } 529 530 func (dbb *databaseSeriesBlocks) AllBlocks() map[xtime.UnixNano]DatabaseBlock { 531 return dbb.elems 532 } 533 534 func (dbb *databaseSeriesBlocks) RemoveBlockAt(t xtime.UnixNano) { 535 if _, exists := dbb.elems[t]; !exists { 536 return 537 } 538 delete(dbb.elems, t) 539 if dbb.min != t && dbb.max != t { 540 return 541 } 542 dbb.min, dbb.max = timeZero, timeZero 543 if len(dbb.elems) == 0 { 544 return 545 } 546 for key := range dbb.elems { 547 if dbb.min == timeZero || dbb.min > key { 548 dbb.min = key 549 } 550 if dbb.max == timeZero || dbb.max < key { 551 dbb.max = key 552 } 553 } 554 } 555 556 func (dbb *databaseSeriesBlocks) RemoveAll() { 557 for t, block := range dbb.elems { 558 block.Close() 559 delete(dbb.elems, t) 560 } 561 } 562 563 func (dbb *databaseSeriesBlocks) Reset() { 564 // Ensure the old, possibly large map is GC'd 565 dbb.elems = nil 566 dbb.elems = make(map[xtime.UnixNano]DatabaseBlock) 567 dbb.min = 0 568 dbb.max = 0 569 } 570 571 func (dbb *databaseSeriesBlocks) Close() { 572 dbb.RemoveAll() 573 // Mark the map as nil to prevent maps that have grown large from wasting 574 // space in the pool (Deleting elements from a large map will not cause 575 // the underlying resources to shrink) 576 dbb.elems = nil 577 }