github.com/m3db/m3@v1.5.0/src/dbnode/storage/series/reader.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package series 22 23 import ( 24 "errors" 25 "fmt" 26 "time" 27 28 "github.com/m3db/m3/src/dbnode/namespace" 29 "github.com/m3db/m3/src/dbnode/retention" 30 "github.com/m3db/m3/src/dbnode/storage/block" 31 "github.com/m3db/m3/src/dbnode/x/xio" 32 "github.com/m3db/m3/src/x/context" 33 xerrors "github.com/m3db/m3/src/x/errors" 34 "github.com/m3db/m3/src/x/ident" 35 xtime "github.com/m3db/m3/src/x/time" 36 ) 37 38 var ( 39 errSeriesReadInvalidRange = errors.New( 40 "series invalid time range read argument specified") 41 ) 42 43 // Reader reads results from a series, or a series block 44 // retriever or both. 45 // It is implemented as a struct so it can be allocated on 46 // the stack. 47 type Reader struct { 48 opts Options 49 id ident.ID 50 retriever QueryableBlockRetriever 51 onRetrieve block.OnRetrieveBlock 52 onRead block.OnReadBlock 53 } 54 55 // NewReaderUsingRetriever returns a reader for a series 56 // block retriever, it will use the block retriever as the 57 // source to read blocks from. 58 func NewReaderUsingRetriever( 59 id ident.ID, 60 retriever QueryableBlockRetriever, 61 onRetrieveBlock block.OnRetrieveBlock, 62 onReadBlock block.OnReadBlock, 63 opts Options, 64 ) Reader { 65 return Reader{ 66 opts: opts, 67 id: id, 68 retriever: retriever, 69 onRetrieve: onRetrieveBlock, 70 onRead: onReadBlock, 71 } 72 } 73 74 // ReadEncoded reads encoded blocks using just a block retriever. 75 func (r *Reader) ReadEncoded( 76 ctx context.Context, 77 start, end xtime.UnixNano, 78 nsCtx namespace.Context, 79 ) (BlockReaderIter, error) { 80 return r.readersWithBlocksMapAndBuffer(ctx, start, end, nil, nil, nsCtx) 81 } 82 83 func (r *Reader) readersWithBlocksMapAndBuffer( 84 ctx context.Context, 85 start, end xtime.UnixNano, 86 seriesBlocks block.DatabaseSeriesBlocks, 87 seriesBuffer databaseBuffer, 88 nsCtx namespace.Context, 89 ) (BlockReaderIter, error) { 90 if end.Before(start) { 91 return nil, xerrors.NewInvalidParamsError(errSeriesReadInvalidRange) 92 } 93 94 var ( 95 nowFn = r.opts.ClockOptions().NowFn() 96 now = xtime.ToUnixNano(nowFn()) 97 ropts = r.opts.RetentionOptions() 98 size = ropts.BlockSize() 99 alignedStart = start.Truncate(size) 100 alignedEnd = end.Truncate(size) 101 ) 102 103 if alignedEnd.Equal(end) { 104 // Move back to make range [start, end) 105 alignedEnd = alignedEnd.Add(-1 * size) 106 } 107 108 // Squeeze the lookup window by what's available to make range queries like [0, infinity) possible 109 earliest := retention.FlushTimeStart(ropts, now) 110 if alignedStart.Before(earliest) { 111 alignedStart = earliest 112 } 113 latest := now.Add(ropts.BufferFuture()).Truncate(size) 114 if alignedEnd.After(latest) { 115 alignedEnd = latest 116 } 117 118 return r.readersWithBlocksMapAndBufferAligned(ctx, alignedStart, alignedEnd, 119 seriesBlocks, seriesBuffer, nsCtx) 120 } 121 122 // BlockReaderIter provides an Iterator interface to a collection of BlockReaders. 123 // 124 // The Iterator allows disk read to be lazily requested when Next() is called. This allows the system to limit an 125 // expensive Query before it issues all its disk reads. 126 // 127 // The Iterator does not hold any locks so it's safe to pause or even abandon iterating through the results. Any 128 // in-memory blocks (cache, series buffer) are eagerly loaded to avoid holding read locks while iterating. 129 // 130 // This iterator is not thread safe and should be only be used by a single go routine. 131 type BlockReaderIter interface { 132 // Next requests the next BlockReaders, returning if any more existed. 133 // The BlockReaders are available with Current. 134 // If an error occurs, false is returned and it is available with Err. 135 Next(ctx context.Context) bool 136 137 // Current returns the current set of BlockReaders for a given blockStart time, guaranteed to be not empty. 138 // Results are returned in block start time asc ordering. Within the same block start time there is no guaranteed 139 // ordering. 140 Current() []xio.BlockReader 141 142 // Err is non-nil if an error occurred when calling Next. 143 Err() error 144 145 // ToSlices eagerly loads all BlockReaders into the legacy slices of slices for backward compatibility. 146 // TODO: remove this and convert everything to the iterator pattern. 147 ToSlices(ctx context.Context) ([][]xio.BlockReader, error) 148 } 149 150 type blockReaderIterOpts struct { 151 start xtime.UnixNano 152 end xtime.UnixNano 153 blockSize time.Duration 154 reader *Reader 155 nsCtx namespace.Context 156 cached []xio.BlockReader 157 buffer [][]xio.BlockReader 158 } 159 160 type blockReaderIter struct { 161 blockReaderIterOpts 162 163 blockAt xtime.UnixNano 164 curr []xio.BlockReader 165 err error 166 } 167 168 func (i *blockReaderIter) Err() error { 169 return i.err 170 } 171 172 func (i *blockReaderIter) Current() []xio.BlockReader { 173 return i.curr 174 } 175 176 // nolint: gocyclo 177 func (i *blockReaderIter) Next(ctx context.Context) bool { 178 if i.blockAt.IsZero() { 179 i.blockAt = i.start 180 } 181 i.curr = make([]xio.BlockReader, 0) 182 for !i.blockAt.After(i.end) && len(i.curr) == 0 { 183 // first checks the blocks from the series buffer at this time. 184 // blocks are sorted by start time so we only need to look at the head. 185 for len(i.buffer) > 0 && len(i.buffer[0]) > 0 && i.buffer[0][0].Start.Equal(i.blockAt) { 186 i.curr = append(i.curr, i.buffer[0][0]) 187 i.buffer[0] = i.buffer[0][1:] 188 if len(i.buffer[0]) == 0 { 189 i.buffer = i.buffer[1:] 190 } 191 } 192 193 // next check for the disk block at this time. 194 // check the cache first. 195 // blocks are sorted by start time so we only need to look at the head. 196 if len(i.cached) > 0 && i.cached[0].Start.Equal(i.blockAt) { 197 i.curr = append(i.curr, i.cached[0]) 198 i.cached = i.cached[1:] 199 } else { 200 // if not in the cache, request a load from disk. 201 blockReader, found, err := i.reader.streamBlock(ctx, i.blockAt, i.reader.onRetrieve, i.nsCtx) 202 if err != nil { 203 i.err = err 204 return false 205 } 206 if found { 207 i.curr = append(i.curr, blockReader) 208 } 209 } 210 i.blockAt = i.blockAt.Add(i.blockSize) 211 } 212 return len(i.curr) != 0 213 } 214 215 func (i *blockReaderIter) ToSlices(ctx context.Context) ([][]xio.BlockReader, error) { 216 var results [][]xio.BlockReader 217 for i.Next(ctx) { 218 results = append(results, i.Current()) 219 } 220 if i.Err() != nil { 221 return nil, i.Err() 222 } 223 return results, nil 224 } 225 226 func (r *Reader) readersWithBlocksMapAndBufferAligned( 227 ctx context.Context, 228 start, end xtime.UnixNano, 229 seriesBlocks block.DatabaseSeriesBlocks, 230 seriesBuffer databaseBuffer, 231 nsCtx namespace.Context, 232 ) (BlockReaderIter, error) { 233 var ( 234 nowFn = r.opts.ClockOptions().NowFn() 235 now = xtime.ToUnixNano(nowFn()) 236 ropts = r.opts.RetentionOptions() 237 blockSize = ropts.BlockSize() 238 readerCount = end.Sub(start) / blockSize 239 buffer [][]xio.BlockReader 240 cached []xio.BlockReader 241 ) 242 243 if readerCount < 0 { 244 readerCount = 0 245 } 246 247 for blockAt := start; !blockAt.After(end); blockAt = blockAt.Add(blockSize) { 248 // Eagerly load the readers from the series buffer and disk cache to avoid holding locks in the iterator. 249 // 250 // It is important to look for data in the series buffer one block at 251 // a time within this loop so that the returned results contain data 252 // from blocks in chronological order. Failure to do this will result 253 // in an out of order error in the MultiReaderIterator on query. 254 255 blockReader, blk, found, err := retrieveCached(ctx, blockAt, seriesBlocks) 256 if err != nil { 257 return nil, err 258 } 259 260 if found { 261 // NB(r): Mark this block as read now 262 blk.SetLastReadTime(now) 263 if r.onRead != nil { 264 r.onRead.OnReadBlock(blk) 265 } 266 cached = append(cached, blockReader) 267 } 268 269 if seriesBuffer != nil { 270 var bufferReaders []xio.BlockReader 271 bufferResults, err := seriesBuffer.ReadEncoded(ctx, blockAt, blockAt.Add(blockSize), nsCtx) 272 if err != nil { 273 return nil, err 274 } 275 276 // Multiple block results may be returned here (for the same block 277 // start) - one for warm writes and another for cold writes. 278 for _, bufferRes := range bufferResults { 279 bufferReaders = append(bufferReaders, bufferRes...) 280 } 281 if len(bufferReaders) > 0 { 282 buffer = append(buffer, bufferReaders) 283 } 284 } 285 } 286 287 return &blockReaderIter{ 288 blockReaderIterOpts: blockReaderIterOpts{ 289 start: start, 290 end: end, 291 blockSize: blockSize, 292 reader: r, 293 nsCtx: nsCtx, 294 buffer: buffer, 295 cached: cached, 296 }, 297 }, nil 298 } 299 300 // FetchBlocks returns data blocks given a list of block start times using 301 // just a block retriever. 302 func (r *Reader) FetchBlocks( 303 ctx context.Context, 304 starts []xtime.UnixNano, 305 nsCtx namespace.Context, 306 ) ([]block.FetchBlockResult, error) { 307 return r.fetchBlocksWithBlocksMapAndBuffer(ctx, starts, nil, nil, nsCtx) 308 } 309 310 func (r *Reader) fetchBlocksWithBlocksMapAndBuffer( 311 ctx context.Context, 312 starts []xtime.UnixNano, 313 seriesBlocks block.DatabaseSeriesBlocks, 314 seriesBuffer databaseBuffer, 315 nsCtx namespace.Context, 316 ) ([]block.FetchBlockResult, error) { 317 res := r.resolveBlockResults(ctx, starts, seriesBlocks, nsCtx) 318 if seriesBuffer != nil && !seriesBuffer.IsEmpty() { 319 bufferResults := seriesBuffer.FetchBlocks(ctx, starts, nsCtx) 320 321 // Ensure both slices are sorted before merging as two sorted lists. 322 block.SortFetchBlockResultByTimeAscending(res) 323 block.SortFetchBlockResultByTimeAscending(bufferResults) 324 bufferIdx := 0 325 for i := range res { 326 blockResult := res[i] 327 if !(bufferIdx < len(bufferResults)) { 328 break 329 } 330 331 currBufferResult := bufferResults[bufferIdx] 332 if blockResult.Start.Equal(currBufferResult.Start) { 333 if currBufferResult.Err != nil { 334 res[i].Err = currBufferResult.Err 335 } else { 336 res[i].Blocks = append(res[i].Blocks, currBufferResult.Blocks...) 337 } 338 bufferIdx++ 339 continue 340 } 341 } 342 343 // Add any buffer results for which there was no existing blockstart 344 // to the end. 345 if bufferIdx < len(bufferResults) { 346 res = append(res, bufferResults[bufferIdx:]...) 347 } 348 } 349 350 // Should still be sorted but do it again for sanity. 351 block.SortFetchBlockResultByTimeAscending(res) 352 return res, nil 353 } 354 355 func (r *Reader) resolveBlockResults( 356 ctx context.Context, 357 starts []xtime.UnixNano, 358 seriesBlocks block.DatabaseSeriesBlocks, 359 nsCtx namespace.Context, 360 ) []block.FetchBlockResult { 361 // Two-dimensional slice (each block.FetchBlockResult has a []xio.BlockReader internally) 362 // such that the first dimension is unique by blockstart and the second dimension is blocks 363 // of data for that blockstart (not necessarily in chronological order). 364 // 365 // ex. (querying 2P.M -> 6P.M with a 2-hour blocksize): 366 // []block.FetchBlockResult{ 367 // block.FetchBlockResult{ 368 // Start: 2P.M, 369 // Blocks: []xio.BlockReader{block0, block1, block2}, 370 // }, 371 // block.FetchBlockResult{ 372 // Start: 4P.M, 373 // Blocks: []xio.BlockReader{block0}, 374 // }, 375 // } 376 res := make([]block.FetchBlockResult, 0, len(starts)) 377 for _, start := range starts { 378 // Slice of xio.BlockReader such that all data belong to the same blockstart. 379 var blockReaders []xio.BlockReader 380 381 blockReader, _, found, err := retrieveCached(ctx, start, seriesBlocks) 382 if err != nil { 383 // Short-circuit this entire blockstart if an error was encountered. 384 r := block.NewFetchBlockResult(start, nil, 385 fmt.Errorf("unable to retrieve block stream for series %s time %v: %w", 386 r.id.String(), start, err)) 387 res = append(res, r) 388 continue 389 } 390 391 if !found { 392 // NB(r): Always use nil for OnRetrieveBlock so we don't cache the 393 // series after fetching it from disk, the fetch blocks API is called 394 // during streaming so to cache it in memory would mean we would 395 // eventually cache all series in memory when we stream results to a 396 // peer. 397 blockReader, found, err = r.streamBlock(ctx, start, nil, nsCtx) 398 if err != nil { 399 // Short-circuit this entire blockstart if an error was encountered. 400 r := block.NewFetchBlockResult(start, nil, 401 fmt.Errorf("unable to retrieve block stream for series %s time %v: %w", 402 r.id.String(), start, err)) 403 res = append(res, r) 404 continue 405 } 406 } 407 408 if found { 409 blockReaders = append(blockReaders, blockReader) 410 } 411 412 if len(blockReaders) > 0 { 413 res = append(res, block.NewFetchBlockResult(start, blockReaders, nil)) 414 } 415 } 416 417 return res 418 } 419 420 func retrieveCached( 421 ctx context.Context, 422 start xtime.UnixNano, 423 seriesBlocks block.DatabaseSeriesBlocks, 424 ) (xio.BlockReader, block.DatabaseBlock, bool, error) { 425 if seriesBlocks != nil { 426 if b, exists := seriesBlocks.BlockAt(start); exists { 427 streamedBlock, err := b.Stream(ctx) 428 if err != nil { 429 return xio.BlockReader{}, b, false, err 430 } 431 432 if streamedBlock.IsNotEmpty() { 433 return streamedBlock, b, true, nil 434 } 435 } 436 } 437 438 return xio.BlockReader{}, nil, false, nil 439 } 440 441 func (r *Reader) streamBlock( 442 ctx context.Context, 443 start xtime.UnixNano, 444 onRetrieve block.OnRetrieveBlock, 445 nsCtx namespace.Context, 446 ) (xio.BlockReader, bool, error) { 447 cachePolicy := r.opts.CachePolicy() 448 switch { 449 case cachePolicy == CacheAll: 450 // No-op, block metadata should have been in-memory 451 case r.retriever != nil: 452 // Try to stream from disk 453 isRetrievable, err := r.retriever.IsBlockRetrievable(start) 454 if err != nil { 455 return xio.BlockReader{}, false, err 456 } 457 458 if isRetrievable { 459 streamedBlock, err := r.retriever.Stream(ctx, r.id, start, onRetrieve, nsCtx) 460 if err != nil { 461 // Short-circuit this entire blockstart if an error was encountered. 462 return xio.BlockReader{}, false, err 463 } 464 465 if streamedBlock.IsNotEmpty() { 466 return streamedBlock, true, nil 467 } 468 } 469 } 470 471 return xio.BlockReader{}, false, nil 472 }