github.com/m3db/m3@v1.5.0/src/dbnode/persist/fs/merger.go (about) 1 // Copyright (c) 2019 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package fs 22 23 import ( 24 "errors" 25 "io" 26 "time" 27 28 "github.com/m3db/m3/src/dbnode/encoding" 29 "github.com/m3db/m3/src/dbnode/namespace" 30 "github.com/m3db/m3/src/dbnode/persist" 31 "github.com/m3db/m3/src/dbnode/storage/block" 32 "github.com/m3db/m3/src/dbnode/ts" 33 "github.com/m3db/m3/src/dbnode/x/xio" 34 "github.com/m3db/m3/src/m3ninx/doc" 35 "github.com/m3db/m3/src/x/checked" 36 "github.com/m3db/m3/src/x/context" 37 "github.com/m3db/m3/src/x/ident" 38 xtime "github.com/m3db/m3/src/x/time" 39 ) 40 41 var errMergeAndCleanupNotSupported = errors.New("function MergeAndCleanup not supported outside of bootstrapping") 42 43 type merger struct { 44 reader DataFileSetReader 45 blockAllocSize int 46 srPool xio.SegmentReaderPool 47 multiIterPool encoding.MultiReaderIteratorPool 48 identPool ident.Pool 49 encoderPool encoding.EncoderPool 50 contextPool context.Pool 51 nsOpts namespace.Options 52 filePathPrefix string 53 } 54 55 // NewMerger returns a new Merger. This implementation is in charge of merging 56 // the data from an existing fileset with a merge target. If data for a series 57 // at a timestamp exists both on disk and the merge target, data from the merge 58 // target will be used. This merged data is then persisted. 59 // 60 // Note that the merger does not know how or where this merged data is 61 // persisted since it just uses the flushPreparer that is passed in. Further, 62 // it does not signal to the database of the existence of the newly persisted 63 // data, nor does it clean up the original fileset. 64 func NewMerger( 65 reader DataFileSetReader, 66 blockAllocSize int, 67 srPool xio.SegmentReaderPool, 68 multiIterPool encoding.MultiReaderIteratorPool, 69 identPool ident.Pool, 70 encoderPool encoding.EncoderPool, 71 contextPool context.Pool, 72 filePathPrefix string, 73 nsOpts namespace.Options, 74 ) Merger { 75 return &merger{ 76 reader: reader, 77 blockAllocSize: blockAllocSize, 78 srPool: srPool, 79 multiIterPool: multiIterPool, 80 identPool: identPool, 81 encoderPool: encoderPool, 82 contextPool: contextPool, 83 nsOpts: nsOpts, 84 filePathPrefix: filePathPrefix, 85 } 86 } 87 88 // Merge merges data from a fileset with a merge target and persists it. 89 // The caller is responsible for finalizing all resources used for the 90 // MergeWith passed here. 91 func (m *merger) Merge( 92 fileID FileSetFileIdentifier, 93 mergeWith MergeWith, 94 nextVolumeIndex int, 95 flushPreparer persist.FlushPreparer, 96 nsCtx namespace.Context, 97 onFlush persist.OnFlushSeries, 98 ) (persist.DataCloser, error) { 99 var ( 100 reader = m.reader 101 blockAllocSize = m.blockAllocSize 102 srPool = m.srPool 103 multiIterPool = m.multiIterPool 104 encoderPool = m.encoderPool 105 nsOpts = m.nsOpts 106 107 nsID = fileID.Namespace 108 shard = fileID.Shard 109 startTime = fileID.BlockStart 110 volume = fileID.VolumeIndex 111 blockSize = nsOpts.RetentionOptions().BlockSize() 112 blockStart = startTime 113 openOpts = DataReaderOpenOptions{ 114 Identifier: FileSetFileIdentifier{ 115 Namespace: nsID, 116 Shard: shard, 117 BlockStart: startTime, 118 VolumeIndex: volume, 119 }, 120 FileSetType: persist.FileSetFlushType, 121 } 122 closer persist.DataCloser 123 err error 124 ) 125 126 if err := reader.Open(openOpts); err != nil { 127 return closer, err 128 } 129 defer reader.Close() // nolint 130 131 nsMd, err := namespace.NewMetadata(nsID, nsOpts) 132 if err != nil { 133 return closer, err 134 } 135 prepareOpts := persist.DataPrepareOptions{ 136 NamespaceMetadata: nsMd, 137 Shard: shard, 138 BlockStart: startTime, 139 VolumeIndex: nextVolumeIndex, 140 FileSetType: persist.FileSetFlushType, 141 DeleteIfExists: false, 142 } 143 prepared, err := flushPreparer.PrepareData(prepareOpts) 144 if err != nil { 145 return closer, err 146 } 147 148 var ( 149 // There will likely be at least two SegmentReaders - one for disk data and 150 // one for data from the merge target. 151 segmentReaders = make([]xio.SegmentReader, 0, 2) 152 153 // It's safe to share these between iterations and just reset them each 154 // time because the series gets persisted each loop, so the previous 155 // iterations' reader and iterator will never be needed. 156 segReader = srPool.Get() 157 multiIter = multiIterPool.Get() 158 ctx = m.contextPool.Get() 159 160 // Shared between iterations. 161 iterResources = newIterResources( 162 multiIter, 163 blockStart, 164 blockSize, 165 blockAllocSize, 166 nsCtx.Schema, 167 encoderPool) 168 ) 169 defer func() { 170 segReader.Finalize() 171 multiIter.Close() 172 }() 173 174 // The merge is performed in two stages. The first stage is to loop through 175 // series on disk and merge it with what's in the merge target. Looping 176 // through disk in the first stage is prepared intentionally to read disk 177 // sequentially to optimize for spinning disk access. The second stage is to 178 // persist the rest of the series in the merge target that were not 179 // persisted in the first stage. 180 181 // First stage: loop through series on disk. 182 for id, tagsIter, data, checksum, err := reader.Read(); err != io.EOF; id, tagsIter, data, checksum, err = reader.Read() { 183 if err != nil { 184 return closer, err 185 } 186 187 segmentReaders = segmentReaders[:0] 188 seg := segmentReaderFromData(data, checksum, segReader) 189 segmentReaders = append(segmentReaders, seg) 190 191 // Check if this series is in memory (and thus requires merging). 192 ctx.Reset() 193 mergeWithData, hasInMemoryData, err := mergeWith.Read(ctx, id, blockStart, nsCtx) 194 if err != nil { 195 return closer, err 196 } 197 if hasInMemoryData { 198 segmentReaders = appendBlockReadersToSegmentReaders(segmentReaders, mergeWithData) 199 } 200 201 // Inform the writer to finalize the ID and tag iterator once 202 // the volume is written. 203 metadata := persist.NewMetadataFromIDAndTagIterator(id, tagsIter, 204 persist.MetadataOptions{ 205 FinalizeID: true, 206 FinalizeTagIterator: true, 207 }) 208 209 // In the special (but common) case that we're just copying the series data from the old file 210 // into the new one without merging or adding any additional data we can avoid recalculating 211 // the checksum. 212 if len(segmentReaders) == 1 && hasInMemoryData == false { 213 segment, err := segmentReaders[0].Segment() 214 if err != nil { 215 return closer, err 216 } 217 218 if err := persistSegmentWithChecksum(metadata, segment, checksum, prepared.Persist); err != nil { 219 return closer, err 220 } 221 } else { 222 if err := persistSegmentReaders(metadata, segmentReaders, iterResources, prepared.Persist); err != nil { 223 return closer, err 224 } 225 } 226 // Closing the context will finalize the data returned from 227 // mergeWith.Read(), but is safe because it has already been persisted 228 // to disk. 229 // NB(r): Make sure to use BlockingCloseReset so can reuse the context. 230 ctx.BlockingCloseReset() 231 } 232 // Second stage: loop through any series in the merge target that were not 233 // captured in the first stage. 234 ctx.Reset() 235 err = mergeWith.ForEachRemaining( 236 ctx, blockStart, 237 func(seriesMetadata doc.Metadata, mergeWithData block.FetchBlockResult) error { 238 segmentReaders = segmentReaders[:0] 239 segmentReaders = appendBlockReadersToSegmentReaders(segmentReaders, mergeWithData.Blocks) 240 241 metadata := persist.NewMetadata(seriesMetadata) 242 err := persistSegmentReaders(metadata, segmentReaders, iterResources, prepared.Persist) 243 244 if err == nil { 245 err = onFlush.OnFlushNewSeries(persist.OnFlushNewSeriesEvent{ 246 Shard: shard, 247 BlockStart: startTime, 248 FirstWrite: mergeWithData.FirstWrite, 249 SeriesMetadata: persist.SeriesMetadata{ 250 Type: persist.SeriesDocumentType, 251 Document: seriesMetadata, 252 // The lifetime of the shard series metadata is longly lived. 253 LifeTime: persist.SeriesLifeTimeLong, 254 }, 255 }) 256 } 257 258 // Context is safe to close after persisting data to disk. 259 // Reset context here within the passed in function so that the 260 // context gets reset for each remaining series instead of getting 261 // finalized at the end of the ForEachRemaining call. 262 // NB(r): Make sure to use BlockingCloseReset so can reuse the context. 263 ctx.BlockingCloseReset() 264 return err 265 }, nsCtx) 266 if err != nil { 267 return closer, err 268 } 269 270 // NB(bodu): Return a deferred closer so that we can guarantee that cold index writes are persisted first. 271 return prepared.DeferClose() 272 } 273 274 func (m *merger) MergeAndCleanup( 275 fileID FileSetFileIdentifier, 276 mergeWith MergeWith, 277 nextVolumeIndex int, 278 flushPreparer persist.FlushPreparer, 279 nsCtx namespace.Context, 280 onFlush persist.OnFlushSeries, 281 isBootstrapped bool, 282 ) error { 283 if isBootstrapped { 284 return errMergeAndCleanupNotSupported 285 } 286 287 close, err := m.Merge(fileID, mergeWith, nextVolumeIndex, flushPreparer, nsCtx, onFlush) 288 if err != nil { 289 return err 290 } 291 292 if err = close(); err != nil { 293 return err 294 } 295 296 return DeleteFileSetAt(m.filePathPrefix, fileID.Namespace, fileID.Shard, fileID.BlockStart, fileID.VolumeIndex) 297 } 298 299 func appendBlockReadersToSegmentReaders(segReaders []xio.SegmentReader, brs []xio.BlockReader) []xio.SegmentReader { 300 for _, br := range brs { 301 segReaders = append(segReaders, br.SegmentReader) 302 } 303 return segReaders 304 } 305 306 func segmentReaderFromData( 307 data checked.Bytes, 308 checksum uint32, 309 segReader xio.SegmentReader, 310 ) xio.SegmentReader { 311 seg := ts.NewSegment(data, nil, checksum, ts.FinalizeHead) 312 segReader.Reset(seg) 313 return segReader 314 } 315 316 func persistSegmentReaders( 317 metadata persist.Metadata, 318 segReaders []xio.SegmentReader, 319 ir iterResources, 320 persistFn persist.DataFn, 321 ) error { 322 if len(segReaders) == 0 { 323 return nil 324 } 325 326 if len(segReaders) == 1 { 327 return persistSegmentReader(metadata, segReaders[0], persistFn) 328 } 329 330 return persistIter(metadata, segReaders, ir, persistFn) 331 } 332 333 func persistIter( 334 metadata persist.Metadata, 335 segReaders []xio.SegmentReader, 336 ir iterResources, 337 persistFn persist.DataFn, 338 ) error { 339 it := ir.multiIter 340 it.Reset(segReaders, ir.blockStart, ir.blockSize, ir.schema) 341 encoder := ir.encoderPool.Get() 342 encoder.Reset(ir.blockStart, ir.blockAllocSize, ir.schema) 343 for it.Next() { 344 if err := encoder.Encode(it.Current()); err != nil { 345 encoder.Close() 346 return err 347 } 348 } 349 if err := it.Err(); err != nil { 350 encoder.Close() 351 return err 352 } 353 354 segment := encoder.Discard() 355 return persistSegment(metadata, segment, persistFn) 356 } 357 358 func persistSegmentReader( 359 metadata persist.Metadata, 360 segmentReader xio.SegmentReader, 361 persistFn persist.DataFn, 362 ) error { 363 segment, err := segmentReader.Segment() 364 if err != nil { 365 return err 366 } 367 return persistSegment(metadata, segment, persistFn) 368 } 369 370 func persistSegment( 371 metadata persist.Metadata, 372 segment ts.Segment, 373 persistFn persist.DataFn, 374 ) error { 375 checksum := segment.CalculateChecksum() 376 return persistFn(metadata, segment, checksum) 377 } 378 379 func persistSegmentWithChecksum( 380 metadata persist.Metadata, 381 segment ts.Segment, 382 checksum uint32, 383 persistFn persist.DataFn, 384 ) error { 385 return persistFn(metadata, segment, checksum) 386 } 387 388 type iterResources struct { 389 multiIter encoding.MultiReaderIterator 390 blockStart xtime.UnixNano 391 blockSize time.Duration 392 blockAllocSize int 393 schema namespace.SchemaDescr 394 encoderPool encoding.EncoderPool 395 } 396 397 func newIterResources( 398 multiIter encoding.MultiReaderIterator, 399 blockStart xtime.UnixNano, 400 blockSize time.Duration, 401 blockAllocSize int, 402 schema namespace.SchemaDescr, 403 encoderPool encoding.EncoderPool, 404 ) iterResources { 405 return iterResources{ 406 multiIter: multiIter, 407 blockStart: blockStart, 408 blockSize: blockSize, 409 blockAllocSize: blockAllocSize, 410 schema: schema, 411 encoderPool: encoderPool, 412 } 413 }