github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/namespace_readers.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package storage 22 23 import ( 24 "sync" 25 26 "github.com/m3db/m3/src/dbnode/namespace" 27 "github.com/m3db/m3/src/dbnode/persist/fs" 28 "github.com/m3db/m3/src/dbnode/sharding" 29 "github.com/m3db/m3/src/dbnode/storage/block" 30 "github.com/m3db/m3/src/x/ident" 31 "github.com/m3db/m3/src/x/pool" 32 xtime "github.com/m3db/m3/src/x/time" 33 34 "github.com/uber-go/tally" 35 "go.uber.org/zap" 36 ) 37 38 // namespaceReaderManager maintains a pool of closed readers which can be 39 // re-used (to prevent additional allocations), as well as a cache of recently 40 // used open readers based on their position. The cache of recently used open 41 // readers is useful during peer bootstrapping because a pageToken (which 42 // contains an offset into the reader for both the data and metadata portions 43 // of the fileset) is used to communicate the clients current position to the 44 // server. 45 // In the general case, the client will miss on its first request for a given 46 // shard/block start, and then experience a cache hit on every subsequent 47 // request because the current client implementation does not perform any 48 // parallel requests for a single shard. 49 // The closedReaders pool is modeled as a stack (implemented via slice 50 // operations) and the open readers cache is implemented as a map where the 51 // key is of type cachedOpenReaderKey. 52 // The namespaceReaderManager also implements a tick() method which should 53 // be called regularly in order to shrunk the closedReaders stack after bursts 54 // of usage, as well as to expire cached open readers which have not been used 55 // for a configurable number of ticks. 56 57 const ( 58 expireCachedReadersAfterNumTicks = 2 59 ) 60 61 type databaseNamespaceReaderManager interface { 62 filesetExistsAt( 63 shard uint32, 64 blockStart xtime.UnixNano, 65 ) (bool, error) 66 67 get( 68 shard uint32, 69 blockStart xtime.UnixNano, 70 position readerPosition, 71 ) (fs.DataFileSetReader, error) 72 73 put(reader fs.DataFileSetReader) error 74 75 latestVolume(shard uint32, blockStart xtime.UnixNano) (int, error) 76 77 assignShardSet(shardSet sharding.ShardSet) 78 79 tick() 80 81 close() 82 } 83 84 type fsFileSetExistsFn func( 85 prefix string, 86 namespace ident.ID, 87 shard uint32, 88 blockStart xtime.UnixNano, 89 volume int, 90 ) (bool, error) 91 92 type fsNewReaderFn func( 93 bytesPool pool.CheckedBytesPool, 94 opts fs.Options, 95 ) (fs.DataFileSetReader, error) 96 97 type namespaceReaderManager struct { 98 sync.Mutex 99 100 filesetExistsFn fsFileSetExistsFn 101 newReaderFn fsNewReaderFn 102 103 namespace namespace.Metadata 104 fsOpts fs.Options 105 blockLeaseManager block.LeaseManager 106 bytesPool pool.CheckedBytesPool 107 108 logger *zap.Logger 109 110 closedReaders []cachedReader 111 openReaders map[cachedOpenReaderKey]cachedReader 112 shardSet sharding.ShardSet 113 114 metrics namespaceReaderManagerMetrics 115 } 116 117 type cachedOpenReaderKey struct { 118 shard uint32 119 blockStart xtime.UnixNano 120 position readerPosition 121 } 122 123 type readerPosition struct { 124 volume int 125 dataIdx int 126 metadataIdx int 127 } 128 129 type cachedReader struct { 130 reader fs.DataFileSetReader 131 ticksSinceUsed int 132 } 133 134 type namespaceReaderManagerMetrics struct { 135 cacheHit tally.Counter 136 cacheMissAllocReader tally.Counter 137 cacheMissReusedReader tally.Counter 138 } 139 140 func newNamespaceReaderManagerMetrics( 141 scope tally.Scope, 142 ) namespaceReaderManagerMetrics { 143 subScope := scope.SubScope("reader-cache") 144 return namespaceReaderManagerMetrics{ 145 cacheHit: subScope.Counter("hit"), 146 cacheMissAllocReader: subScope.Tagged(map[string]string{ 147 "miss_type": "alloc_reader", 148 }).Counter("miss"), 149 cacheMissReusedReader: subScope.Tagged(map[string]string{ 150 "miss_type": "reuse_reader", 151 }).Counter("miss"), 152 } 153 } 154 155 func newNamespaceReaderManager( 156 namespace namespace.Metadata, 157 namespaceScope tally.Scope, 158 opts Options, 159 ) databaseNamespaceReaderManager { 160 blm := opts.BlockLeaseManager() 161 mgr := &namespaceReaderManager{ 162 filesetExistsFn: fs.DataFileSetExists, 163 newReaderFn: fs.NewReader, 164 namespace: namespace, 165 fsOpts: opts.CommitLogOptions().FilesystemOptions(), 166 blockLeaseManager: blm, 167 bytesPool: opts.BytesPool(), 168 logger: opts.InstrumentOptions().Logger(), 169 openReaders: make(map[cachedOpenReaderKey]cachedReader), 170 shardSet: sharding.NewEmptyShardSet(sharding.DefaultHashFn(1)), 171 metrics: newNamespaceReaderManagerMetrics(namespaceScope), 172 } 173 174 blm.RegisterLeaser(mgr) 175 176 return mgr 177 } 178 179 func (m *namespaceReaderManager) latestVolume( 180 shard uint32, 181 blockStart xtime.UnixNano, 182 ) (int, error) { 183 state, err := m.blockLeaseManager.OpenLatestLease(m, block.LeaseDescriptor{ 184 Namespace: m.namespace.ID(), 185 Shard: shard, 186 BlockStart: blockStart, 187 }) 188 if err != nil { 189 return -1, err 190 } 191 192 return state.Volume, nil 193 } 194 195 func (m *namespaceReaderManager) filesetExistsAt( 196 shard uint32, 197 blockStart xtime.UnixNano, 198 ) (bool, error) { 199 latestVolume, err := m.latestVolume(shard, blockStart) 200 if err != nil { 201 return false, err 202 } 203 204 return m.filesetExistsFn(m.fsOpts.FilePathPrefix(), 205 m.namespace.ID(), shard, blockStart, latestVolume) 206 } 207 208 func (m *namespaceReaderManager) assignShardSet(shardSet sharding.ShardSet) { 209 m.Lock() 210 defer m.Unlock() 211 m.shardSet = shardSet 212 } 213 214 func (m *namespaceReaderManager) shardExistsWithLock(shard uint32) bool { 215 _, err := m.shardSet.LookupStateByID(shard) 216 // NB(bodu): LookupStateByID returns ErrInvalidShardID when shard 217 // does not exist in the shard map which means the shard is not available. 218 return err == nil 219 } 220 221 type cachedReaderForKeyResult struct { 222 openReader fs.DataFileSetReader 223 closedReader fs.DataFileSetReader 224 } 225 226 func (m *namespaceReaderManager) pushClosedReaderWithLock( 227 reader fs.DataFileSetReader, 228 ) { 229 m.closedReaders = append(m.closedReaders, cachedReader{ 230 reader: reader, 231 }) 232 } 233 234 func (m *namespaceReaderManager) popClosedReaderWithLock() fs.DataFileSetReader { 235 idx := len(m.closedReaders) - 1 236 reader := m.closedReaders[idx].reader 237 // Zero refs from element in slice and shrink slice 238 m.closedReaders[idx] = cachedReader{} 239 m.closedReaders = m.closedReaders[:idx] 240 return reader 241 } 242 243 func (m *namespaceReaderManager) cachedReaderForKey( 244 key cachedOpenReaderKey, 245 ) (cachedReaderForKeyResult, error) { 246 m.Lock() 247 defer m.Unlock() 248 249 openReader, ok := m.openReaders[key] 250 if ok { 251 // Cache hit, take this open reader 252 delete(m.openReaders, key) 253 254 m.metrics.cacheHit.Inc(1) 255 256 return cachedReaderForKeyResult{ 257 openReader: openReader.reader, 258 }, nil 259 } 260 261 // Cache miss, need to return a reused reader or open a new reader 262 if len(m.closedReaders) > 0 { 263 reader := m.popClosedReaderWithLock() 264 265 m.metrics.cacheMissReusedReader.Inc(1) 266 return cachedReaderForKeyResult{ 267 closedReader: reader, 268 }, nil 269 } 270 271 reader, err := m.newReaderFn(m.bytesPool, m.fsOpts) 272 if err != nil { 273 return cachedReaderForKeyResult{}, err 274 } 275 276 m.metrics.cacheMissAllocReader.Inc(1) 277 return cachedReaderForKeyResult{ 278 closedReader: reader, 279 }, nil 280 } 281 282 func (m *namespaceReaderManager) get( 283 shard uint32, 284 blockStart xtime.UnixNano, 285 position readerPosition, 286 ) (fs.DataFileSetReader, error) { 287 latestVolume, err := m.latestVolume(shard, blockStart) 288 if err != nil { 289 return nil, err 290 } 291 292 // If requesting an outdated volume, we need to start reading again from 293 // the beginning of the latest volume. The caller knows how to handle 294 // duplicate metadata, so doing this is okay. 295 // 296 // The previously cached reader for the outdated volume will eventually be 297 // cleaned up either during the ticking process or the next time 298 // UpdateOpenLease gets called, so we don't need to worry about closing it 299 // here. 300 if position.volume < latestVolume { 301 position.volume = latestVolume 302 position.dataIdx = 0 303 position.metadataIdx = 0 304 } 305 306 key := cachedOpenReaderKey{ 307 shard: shard, 308 blockStart: blockStart, 309 position: position, 310 } 311 312 lookup, err := m.cachedReaderForKey(key) 313 if err != nil { 314 return nil, err 315 } 316 if reader := lookup.openReader; reader != nil { 317 return reader, nil // Found an open reader for the position 318 } 319 320 // We have a closed reader from the cache (either a cached closed 321 // reader or newly allocated, either way need to prepare it) 322 reader := lookup.closedReader 323 324 openOpts := fs.DataReaderOpenOptions{ 325 Identifier: fs.FileSetFileIdentifier{ 326 Namespace: m.namespace.ID(), 327 Shard: shard, 328 BlockStart: blockStart, 329 VolumeIndex: latestVolume, 330 }, 331 } 332 if err := reader.Open(openOpts); err != nil { 333 return nil, err 334 } 335 336 // We can validate metadata immediately since its read when opened 337 if err := reader.ValidateMetadata(); err != nil { 338 return nil, err 339 } 340 341 // Fast fwd through if in the middle of a volume 342 for i := 0; i < position.dataIdx; i++ { 343 id, tags, data, _, err := reader.Read() 344 if err != nil { 345 return nil, err 346 } 347 id.Finalize() 348 tags.Close() 349 data.Finalize() 350 } 351 for i := 0; i < position.metadataIdx; i++ { 352 id, tags, _, _, err := reader.ReadMetadata() 353 if err != nil { 354 return nil, err 355 } 356 id.Finalize() 357 tags.Close() 358 } 359 360 return reader, nil 361 } 362 363 func (m *namespaceReaderManager) closeAndPushReaderWithLock(reader fs.DataFileSetReader) error { 364 if err := reader.Close(); err != nil { 365 return err 366 } 367 368 m.pushClosedReaderWithLock(reader) 369 return nil 370 } 371 372 func (m *namespaceReaderManager) put(reader fs.DataFileSetReader) error { 373 status := reader.Status() 374 375 m.Lock() 376 defer m.Unlock() 377 378 if !status.Open { 379 m.pushClosedReaderWithLock(reader) 380 return nil 381 } 382 383 shard := status.Shard 384 385 latestVolume, err := m.latestVolume(shard, status.BlockStart) 386 if err != nil { 387 return err 388 } 389 390 // If the supplied reader is for a stale volume, then it will never be 391 // reused in its current state. Instead, put it in the closed reader pool 392 // so that it can be reconfigured to be reopened later. 393 if latestVolume > status.Volume { 394 if err := m.closeAndPushReaderWithLock(reader); err != nil { 395 // Best effort on closing the reader and caching it. If it fails, 396 // we can always allocate a new reader. 397 m.logger.Error("error closing reader on put from reader cache", zap.Error(err)) 398 } 399 return nil 400 } 401 402 key := cachedOpenReaderKey{ 403 shard: shard, 404 blockStart: status.BlockStart, 405 position: readerPosition{ 406 volume: status.Volume, 407 dataIdx: reader.EntriesRead(), 408 metadataIdx: reader.MetadataRead(), 409 }, 410 } 411 412 if _, ok := m.openReaders[key]; ok { 413 // There is already an open reader cached for this key. We don't need 414 // a duplicate one, so close the reader and push to slice of closed 415 // readers. 416 if err := m.closeAndPushReaderWithLock(reader); err != nil { 417 // Best effort on closing the reader and caching it. If it fails, 418 // we can always allocate a new reader. 419 m.logger.Error("error closing reader on put from reader cache", zap.Error(err)) 420 } 421 return nil 422 } 423 424 m.openReaders[key] = cachedReader{reader: reader} 425 426 return nil 427 } 428 429 func (m *namespaceReaderManager) tick() { 430 m.tickWithThreshold(expireCachedReadersAfterNumTicks) 431 } 432 433 func (m *namespaceReaderManager) close() { 434 m.blockLeaseManager.UnregisterLeaser(m) 435 436 // Perform a tick but make the threshold zero so all readers must be expired 437 m.tickWithThreshold(0) 438 } 439 440 func (m *namespaceReaderManager) tickWithThreshold(threshold int) { 441 m.Lock() 442 defer m.Unlock() 443 444 // First increment ticks since used for closed readers 445 expiredClosedReaders := 0 446 for i := range m.closedReaders { 447 m.closedReaders[i].ticksSinceUsed++ 448 if m.closedReaders[i].ticksSinceUsed >= threshold { 449 expiredClosedReaders++ 450 } 451 } 452 // Expire any closed readers, alloc a new slice to avoid spikes 453 // of use creating slices that are never released 454 if expired := expiredClosedReaders; expired > 0 { 455 newClosedReaders := make([]cachedReader, 0, len(m.closedReaders)-expired) 456 for _, elem := range m.closedReaders { 457 if elem.ticksSinceUsed < threshold { 458 newClosedReaders = append(newClosedReaders, elem) 459 } 460 } 461 m.closedReaders = newClosedReaders 462 } 463 464 // For open readers calculate and expire from map directly 465 for key, elem := range m.openReaders { 466 // Mutate the for-loop copy in place before checking the threshold 467 elem.ticksSinceUsed++ 468 if elem.ticksSinceUsed >= threshold || 469 // Also check to see if shard is still available and remove cached readers for 470 // shards that are no longer available. This ensures cached readers are eventually 471 // consistent with shard state. 472 !m.shardExistsWithLock(key.shard) { 473 // Close before removing ref 474 if err := elem.reader.Close(); err != nil { 475 m.logger.Error("error closing reader from reader cache", zap.Error(err)) 476 } 477 delete(m.openReaders, key) 478 continue 479 } 480 481 // Save the mutated copy back to the map 482 m.openReaders[key] = elem 483 } 484 } 485 486 // UpdateOpenLease() implements block.Leaser. 487 func (m *namespaceReaderManager) UpdateOpenLease( 488 descriptor block.LeaseDescriptor, 489 state block.LeaseState, 490 ) (block.UpdateOpenLeaseResult, error) { 491 if !m.namespace.ID().Equal(descriptor.Namespace) { 492 return block.NoOpenLease, nil 493 } 494 495 m.Lock() 496 defer m.Unlock() 497 // Close and remove open readers with matching key but lower volume. 498 for readerKey, cachedReader := range m.openReaders { 499 if readerKey.shard == descriptor.Shard && 500 readerKey.blockStart == descriptor.BlockStart && 501 readerKey.position.volume < state.Volume { 502 delete(m.openReaders, readerKey) 503 if err := m.closeAndPushReaderWithLock(cachedReader.reader); err != nil { 504 // Best effort on closing the reader and caching it. If it 505 // fails, we can always allocate a new reader. 506 m.logger.Error("error closing reader on put from reader cache", zap.Error(err)) 507 } 508 } 509 } 510 511 return block.UpdateOpenLease, nil 512 }