github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/bootstrap/bootstrapper/readers.go (about) 1 // Copyright (c) 2020 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package bootstrapper 22 23 import ( 24 "sync" 25 "time" 26 27 "github.com/m3db/m3/src/dbnode/namespace" 28 "github.com/m3db/m3/src/dbnode/persist/fs" 29 "github.com/m3db/m3/src/dbnode/runtime" 30 "github.com/m3db/m3/src/dbnode/storage/bootstrap" 31 "github.com/m3db/m3/src/dbnode/storage/bootstrap/result" 32 "github.com/m3db/m3/src/x/clock" 33 xtime "github.com/m3db/m3/src/x/time" 34 35 "github.com/opentracing/opentracing-go" 36 opentracinglog "github.com/opentracing/opentracing-go/log" 37 "go.uber.org/zap" 38 "go.uber.org/zap/zapcore" 39 ) 40 41 // TimeWindowReaders are grouped by data block. 42 type TimeWindowReaders struct { 43 Ranges result.ShardTimeRanges 44 Readers map[ShardID]ShardReaders 45 } 46 47 // ShardID is the shard #. 48 type ShardID uint32 49 50 // ShardReaders are the fileset readers for a shard. 51 type ShardReaders struct { 52 Readers []fs.DataFileSetReader 53 } 54 55 func newTimeWindowReaders( 56 ranges result.ShardTimeRanges, 57 readers map[ShardID]ShardReaders, 58 ) TimeWindowReaders { 59 return TimeWindowReaders{ 60 Ranges: ranges, 61 Readers: readers, 62 } 63 } 64 65 // EnqueueReadersOptions supplies options to enqueue readers. 66 type EnqueueReadersOptions struct { 67 NsMD namespace.Metadata 68 RunOpts bootstrap.RunOptions 69 RuntimeOpts runtime.Options 70 FsOpts fs.Options 71 ShardTimeRanges result.ShardTimeRanges 72 ReaderPool *ReaderPool 73 ReadersCh chan<- TimeWindowReaders 74 BlockSize time.Duration 75 ReadMetadataOnly bool 76 Logger *zap.Logger 77 Span opentracing.Span 78 NowFn clock.NowFn 79 Cache bootstrap.Cache 80 } 81 82 // EnqueueReaders into a readers channel grouped by data block. 83 func EnqueueReaders(opts EnqueueReadersOptions) { 84 // Close the readers ch if and only if all readers are enqueued. 85 defer close(opts.ReadersCh) 86 87 // Normal run, open readers 88 enqueueReadersGroupedByBlockSize( 89 opts.NsMD, 90 opts.ShardTimeRanges, 91 opts.ReaderPool, 92 opts.ReadersCh, 93 opts.BlockSize, 94 opts.ReadMetadataOnly, 95 opts.Logger, 96 opts.Span, 97 opts.NowFn, 98 opts.Cache, 99 ) 100 } 101 102 func enqueueReadersGroupedByBlockSize( 103 ns namespace.Metadata, 104 shardTimeRanges result.ShardTimeRanges, 105 readerPool *ReaderPool, 106 readersCh chan<- TimeWindowReaders, 107 blockSize time.Duration, 108 readMetadataOnly bool, 109 logger *zap.Logger, 110 span opentracing.Span, 111 nowFn clock.NowFn, 112 cache bootstrap.Cache, 113 ) { 114 // Group them by block size. 115 groupFn := NewShardTimeRangesTimeWindowGroups 116 groupedByBlockSize := groupFn(shardTimeRanges, blockSize) 117 118 // Now enqueue across all shards by block size. 119 for _, group := range groupedByBlockSize { 120 readers := make(map[ShardID]ShardReaders, group.Ranges.Len()) 121 for shard, tr := range group.Ranges.Iter() { 122 readInfoFilesResults, err := cache.InfoFilesForShard(ns, shard) 123 if err != nil { 124 logger.Error("fs bootstrapper unable to read info files for the shard", 125 zap.Uint32("shard", shard), 126 zap.Stringer("namespace", ns.ID()), 127 zap.Error(err), 128 zap.String("timeRange", tr.String()), 129 ) 130 continue 131 } 132 shardReaders := newShardReaders(ns, readerPool, shard, tr, 133 readMetadataOnly, logger, span, nowFn, readInfoFilesResults) 134 readers[ShardID(shard)] = shardReaders 135 } 136 readersCh <- newTimeWindowReaders(group.Ranges, readers) 137 } 138 } 139 140 func newShardReaders( 141 ns namespace.Metadata, 142 readerPool *ReaderPool, 143 shard uint32, 144 tr xtime.Ranges, 145 readMetadataOnly bool, 146 logger *zap.Logger, 147 span opentracing.Span, 148 nowFn clock.NowFn, 149 readInfoFilesResults []fs.ReadInfoFileResult, 150 ) ShardReaders { 151 logSpan := func(event string) { 152 span.LogFields( 153 opentracinglog.String("event", event), 154 opentracinglog.Uint32("shard", shard), 155 opentracinglog.String("tr", tr.String()), 156 ) 157 } 158 logFields := []zapcore.Field{ 159 zap.Uint32("shard", shard), 160 zap.String("tr", tr.String()), 161 } 162 if len(readInfoFilesResults) == 0 { 163 // No readers. 164 return ShardReaders{} 165 } 166 167 start := nowFn() 168 logger.Debug("enqueue readers open data readers start", logFields...) 169 logSpan("enqueue_readers_open_data_readers_start") 170 readers := make([]fs.DataFileSetReader, 0, len(readInfoFilesResults)) 171 for i := 0; i < len(readInfoFilesResults); i++ { 172 result := readInfoFilesResults[i] 173 if err := result.Err.Error(); err != nil { 174 logger.Error("fs bootstrapper unable to read info file", 175 zap.Uint32("shard", shard), 176 zap.Stringer("namespace", ns.ID()), 177 zap.Error(err), 178 zap.String("timeRange", tr.String()), 179 zap.String("path", result.Err.Filepath()), 180 ) 181 // Errors are marked unfulfilled by markRunResultErrorsAndUnfulfilled 182 // and will be re-attempted by the next bootstrapper. 183 continue 184 } 185 186 info := result.Info 187 blockStart := xtime.UnixNano(info.BlockStart) 188 if !tr.Overlaps(xtime.Range{ 189 Start: blockStart, 190 End: blockStart.Add(ns.Options().RetentionOptions().BlockSize()), 191 }) { 192 // Errors are marked unfulfilled by markRunResultErrorsAndUnfulfilled 193 // and will be re-attempted by the next bootstrapper. 194 continue 195 } 196 197 r, err := readerPool.Get() 198 if err != nil { 199 logger.Error("unable to get reader from pool") 200 // Errors are marked unfulfilled by markRunResultErrorsAndUnfulfilled 201 // and will be re-attempted by the next bootstrapper. 202 continue 203 } 204 205 openOpts := fs.DataReaderOpenOptions{ 206 Identifier: fs.NewFileSetFileIdentifier(ns.ID(), blockStart, shard, info.VolumeIndex), 207 StreamingEnabled: readMetadataOnly, 208 } 209 if err := r.Open(openOpts); err != nil { 210 logger.Error("unable to open fileset files", 211 zap.Uint32("shard", shard), 212 zap.Time("blockStart", blockStart.ToTime()), 213 zap.Error(err), 214 ) 215 readerPool.Put(r) 216 // Errors are marked unfulfilled by markRunResultErrorsAndUnfulfilled 217 // and will be re-attempted by the next bootstrapper. 218 continue 219 } 220 221 readers = append(readers, r) 222 } 223 logger.Debug("enqueue readers open data readers done", 224 append(logFields, zap.Duration("took", nowFn().Sub(start)))...) 225 logSpan("enqueue_readers_open_data_readers_done") 226 227 return ShardReaders{Readers: readers} 228 } 229 230 // ReaderPool is a lean pool that does not allocate 231 // instances up front and is used per bootstrap call. 232 type ReaderPool struct { 233 sync.Mutex 234 alloc ReaderPoolAllocFn 235 values []fs.DataFileSetReader 236 disableReuse bool 237 } 238 239 // ReaderPoolAllocFn allocates a new fileset reader. 240 type ReaderPoolAllocFn func() (fs.DataFileSetReader, error) 241 242 // NewReaderPoolOptions contains reader pool options. 243 type NewReaderPoolOptions struct { 244 Alloc ReaderPoolAllocFn 245 DisableReuse bool 246 } 247 248 // NewReaderPool creates a new share-able fileset reader pool 249 func NewReaderPool( 250 opts NewReaderPoolOptions, 251 ) *ReaderPool { 252 return &ReaderPool{alloc: opts.Alloc, disableReuse: opts.DisableReuse} 253 } 254 255 // Get gets a fileset reader from the pool in synchronized fashion. 256 func (p *ReaderPool) Get() (fs.DataFileSetReader, error) { 257 p.Lock() 258 defer p.Unlock() 259 if len(p.values) == 0 { 260 return p.alloc() 261 } 262 length := len(p.values) 263 value := p.values[length-1] 264 p.values[length-1] = nil 265 p.values = p.values[:length-1] 266 return value, nil 267 } 268 269 // Put returns a fileset reader back the the pool in synchronized fashion. 270 func (p *ReaderPool) Put(r fs.DataFileSetReader) { 271 if p.disableReuse { 272 // Useful for tests. 273 return 274 } 275 p.Lock() 276 defer p.Unlock() 277 p.values = append(p.values, r) 278 }