github.com/m3db/m3@v1.5.0/src/dbnode/storage/bootstrap/result/result_index.go (about) 1 // Copyright (c) 2020 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package result 22 23 import ( 24 "fmt" 25 26 "github.com/m3db/m3/src/dbnode/namespace" 27 "github.com/m3db/m3/src/m3ninx/doc" 28 "github.com/m3db/m3/src/m3ninx/index" 29 "github.com/m3db/m3/src/m3ninx/index/segment" 30 "github.com/m3db/m3/src/m3ninx/index/segment/builder" 31 "github.com/m3db/m3/src/m3ninx/persist" 32 xtime "github.com/m3db/m3/src/x/time" 33 ) 34 35 // NewDefaultDocumentsBuilderAllocator returns a default mutable segment 36 // allocator. 37 func NewDefaultDocumentsBuilderAllocator() DocumentsBuilderAllocator { 38 return func() (segment.DocumentsBuilder, error) { 39 return builder.NewBuilderFromDocuments(builder.NewOptions()) 40 } 41 } 42 43 type indexBootstrapResult struct { 44 results IndexResults 45 unfulfilled ShardTimeRanges 46 } 47 48 // NewIndexBootstrapResult returns a new index bootstrap result. 49 func NewIndexBootstrapResult() IndexBootstrapResult { 50 return &indexBootstrapResult{ 51 results: make(IndexResults), 52 unfulfilled: NewShardTimeRanges(), 53 } 54 } 55 56 func (r *indexBootstrapResult) IndexResults() IndexResults { 57 return r.results 58 } 59 60 func (r *indexBootstrapResult) Unfulfilled() ShardTimeRanges { 61 return r.unfulfilled 62 } 63 64 func (r *indexBootstrapResult) SetUnfulfilled(unfulfilled ShardTimeRanges) { 65 r.unfulfilled = unfulfilled 66 } 67 68 func (r *indexBootstrapResult) Add(blocks IndexBlockByVolumeType, unfulfilled ShardTimeRanges) { 69 r.results.Add(blocks) 70 r.unfulfilled.AddRanges(unfulfilled) 71 } 72 73 func (r *indexBootstrapResult) NumSeries() int { 74 var size int64 75 for _, blockByVolumeType := range r.results { 76 for _, b := range blockByVolumeType.data { 77 for _, s := range b.segments { 78 size += s.Segment().Size() 79 } 80 } 81 } 82 return int(size) 83 } 84 85 // NewIndexBuilder creates a wrapped locakble index seg builder. 86 func NewIndexBuilder(builder segment.DocumentsBuilder) *IndexBuilder { 87 return &IndexBuilder{ 88 builder: builder, 89 } 90 } 91 92 // FlushBatch flushes a batch of documents to the underlying segment builder. 93 func (b *IndexBuilder) FlushBatch(batch []doc.Metadata) ([]doc.Metadata, error) { 94 if len(batch) == 0 { 95 // Last flush might not have any docs enqueued 96 return batch, nil 97 } 98 99 // NB(bodu): Prevent concurrent writes. 100 // Although it seems like there's no need to lock on writes since 101 // each block should ONLY be getting built in a single thread. 102 err := b.builder.InsertBatch(index.Batch{ 103 Docs: batch, 104 AllowPartialUpdates: true, 105 }) 106 if err != nil && index.IsBatchPartialError(err) { 107 // If after filtering out duplicate ID errors 108 // there are no errors, then this was a successful 109 // insertion. 110 batchErr := err.(*index.BatchPartialError) 111 // NB(r): FilterDuplicateIDErrors returns nil 112 // if no errors remain after filtering duplicate ID 113 // errors, this case is covered in unit tests. 114 err = batchErr.FilterDuplicateIDErrors() 115 } 116 if err != nil { 117 return batch, err 118 } 119 120 // Reset docs batch for reuse 121 var empty doc.Metadata 122 for i := range batch { 123 batch[i] = empty 124 } 125 batch = batch[:0] 126 return batch, nil 127 } 128 129 // Builder returns the underlying index segment docs builder. 130 func (b *IndexBuilder) Builder() segment.DocumentsBuilder { 131 return b.builder 132 } 133 134 // AddBlockIfNotExists adds an index block if it does not already exist to the index results. 135 func (r IndexResults) AddBlockIfNotExists( 136 t xtime.UnixNano, 137 idxopts namespace.IndexOptions, 138 ) { 139 // NB(r): The reason we can align by the retention block size and guarantee 140 // there is only one entry for this time is because index blocks must be a 141 // positive multiple of the data block size, making it easy to map a data 142 // block entry to at most one index block entry. 143 blockStart := t.Truncate(idxopts.BlockSize()) 144 blockStartNanos := blockStart 145 146 _, exists := r[blockStartNanos] 147 if !exists { 148 r[blockStartNanos] = NewIndexBlockByVolumeType(blockStart) 149 } 150 } 151 152 // Add will add an index block to the collection, merging if one already 153 // exists. 154 func (r IndexResults) Add(blocks IndexBlockByVolumeType) { 155 if blocks.BlockStart().IsZero() { 156 return 157 } 158 159 // Merge results 160 blockStart := blocks.BlockStart() 161 existing, ok := r[blockStart] 162 if !ok { 163 r[blockStart] = blocks 164 return 165 } 166 167 r[blockStart] = existing.Merged(blocks) 168 } 169 170 // AddResults will add another set of index results to the collection, merging 171 // if index blocks already exists. 172 func (r IndexResults) AddResults(other IndexResults) { 173 for _, blocks := range other { 174 r.Add(blocks) 175 } 176 } 177 178 // MarkFulfilled will mark an index block as fulfilled, either partially or 179 // wholly as specified by the shard time ranges passed. 180 func (r IndexResults) MarkFulfilled( 181 t xtime.UnixNano, 182 fulfilled ShardTimeRanges, 183 indexVolumeType persist.IndexVolumeType, 184 idxopts namespace.IndexOptions, 185 ) error { 186 // NB(r): The reason we can align by the retention block size and guarantee 187 // there is only one entry for this time is because index blocks must be a 188 // positive multiple of the data block size, making it easy to map a data 189 // block entry to at most one index block entry. 190 blockStart := t.Truncate(idxopts.BlockSize()) 191 blockStartNanos := blockStart 192 193 blockRange := xtime.Range{ 194 Start: blockStart, 195 End: blockStart.Add(idxopts.BlockSize()), 196 } 197 198 // First check fulfilled is correct 199 min, max := fulfilled.MinMax() 200 if min.Before(blockRange.Start) || max.After(blockRange.End) { 201 return fmt.Errorf("fulfilled range %s is outside of index block range: %s", 202 fulfilled.SummaryString(), blockRange.String()) 203 } 204 205 blocks, exists := r[blockStartNanos] 206 if !exists { 207 blocks = NewIndexBlockByVolumeType(blockStart) 208 r[blockStartNanos] = blocks 209 } 210 211 block, exists := blocks.data[indexVolumeType] 212 if !exists { 213 block = NewIndexBlock(nil, nil) 214 blocks.data[indexVolumeType] = block 215 } 216 blocks.data[indexVolumeType] = block.Merged(NewIndexBlock(nil, fulfilled)) 217 return nil 218 } 219 220 // MergedIndexBootstrapResult returns a merged result of two bootstrap results. 221 // It is a mutating function that mutates the larger result by adding the 222 // smaller result to it and then finally returns the mutated result. 223 func MergedIndexBootstrapResult(i, j IndexBootstrapResult) IndexBootstrapResult { 224 if i == nil { 225 return j 226 } 227 if j == nil { 228 return i 229 } 230 sizeI, sizeJ := 0, 0 231 for _, ir := range i.IndexResults() { 232 for _, b := range ir.data { 233 sizeI += len(b.Segments()) 234 } 235 } 236 for _, ir := range j.IndexResults() { 237 for _, b := range ir.data { 238 sizeJ += len(b.Segments()) 239 } 240 } 241 if sizeI >= sizeJ { 242 i.IndexResults().AddResults(j.IndexResults()) 243 i.Unfulfilled().AddRanges(j.Unfulfilled()) 244 return i 245 } 246 j.IndexResults().AddResults(i.IndexResults()) 247 j.Unfulfilled().AddRanges(i.Unfulfilled()) 248 return j 249 } 250 251 // NewIndexBlock returns a new bootstrap index block result. 252 func NewIndexBlock( 253 segments []Segment, 254 fulfilled ShardTimeRanges, 255 ) IndexBlock { 256 if fulfilled == nil { 257 fulfilled = NewShardTimeRanges() 258 } 259 return IndexBlock{ 260 segments: segments, 261 fulfilled: fulfilled, 262 } 263 } 264 265 // Segments returns the segments. 266 func (b IndexBlock) Segments() []Segment { 267 return b.segments 268 } 269 270 // Fulfilled returns the fulfilled time ranges by this index block. 271 func (b IndexBlock) Fulfilled() ShardTimeRanges { 272 return b.fulfilled 273 } 274 275 // Merged returns a new merged index block, currently it just appends the 276 // list of segments from the other index block and the caller merges 277 // as they see necessary. 278 func (b IndexBlock) Merged(other IndexBlock) IndexBlock { 279 r := b 280 if len(other.segments) > 0 { 281 r.segments = append(r.segments, other.segments...) 282 } 283 if !other.fulfilled.IsEmpty() { 284 r.fulfilled = b.fulfilled.Copy() 285 r.fulfilled.AddRanges(other.fulfilled) 286 } 287 return r 288 } 289 290 // NewIndexBlockByVolumeType returns a new bootstrap index blocks by volume type result. 291 func NewIndexBlockByVolumeType(blockStart xtime.UnixNano) IndexBlockByVolumeType { 292 return IndexBlockByVolumeType{ 293 blockStart: blockStart, 294 data: make(map[persist.IndexVolumeType]IndexBlock), 295 } 296 } 297 298 // BlockStart returns the block start. 299 func (b IndexBlockByVolumeType) BlockStart() xtime.UnixNano { 300 return b.blockStart 301 } 302 303 // GetBlock returns an IndexBlock for volumeType. 304 func (b IndexBlockByVolumeType) GetBlock(volumeType persist.IndexVolumeType) (IndexBlock, bool) { 305 block, ok := b.data[volumeType] 306 return block, ok 307 } 308 309 // SetBlock sets an IndexBlock for volumeType. 310 func (b IndexBlockByVolumeType) SetBlock(volumeType persist.IndexVolumeType, block IndexBlock) { 311 b.data[volumeType] = block 312 } 313 314 // Iter returns the underlying iterable map data. 315 func (b IndexBlockByVolumeType) Iter() map[persist.IndexVolumeType]IndexBlock { 316 return b.data 317 } 318 319 // Merged returns a new merged index block by volume type. 320 // It merges the underlying index blocks together by index volume type. 321 func (b IndexBlockByVolumeType) Merged(other IndexBlockByVolumeType) IndexBlockByVolumeType { 322 r := b 323 for volumeType, otherBlock := range other.data { 324 existing, ok := r.data[volumeType] 325 if !ok { 326 r.data[volumeType] = otherBlock 327 continue 328 } 329 r.data[volumeType] = existing.Merged(otherBlock) 330 } 331 return r 332 }