github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/repair/metadata.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package repair 22 23 import ( 24 "math" 25 26 "github.com/m3db/m3/src/dbnode/client" 27 "github.com/m3db/m3/src/dbnode/storage/block" 28 "github.com/m3db/m3/src/dbnode/topology" 29 "github.com/m3db/m3/src/x/ident" 30 xtime "github.com/m3db/m3/src/x/time" 31 ) 32 33 const ( 34 defaultReplicaBlocksMetadataCapacity = 1 35 defaultReplicaSeriesMetadataCapacity = 4096 36 ) 37 38 type replicaMetadataSlice struct { 39 metadata []block.ReplicaMetadata 40 pool ReplicaMetadataSlicePool 41 } 42 43 func newReplicaMetadataSlice() ReplicaMetadataSlice { 44 return &replicaMetadataSlice{} 45 } 46 47 func newPooledReplicaMetadataSlice(metadata []block.ReplicaMetadata, pool ReplicaMetadataSlicePool) ReplicaMetadataSlice { 48 return &replicaMetadataSlice{metadata: metadata, pool: pool} 49 } 50 51 func (s *replicaMetadataSlice) Add(metadata block.ReplicaMetadata) { 52 s.metadata = append(s.metadata, metadata) 53 } 54 55 func (s *replicaMetadataSlice) Metadata() []block.ReplicaMetadata { 56 return s.metadata 57 } 58 59 func (s *replicaMetadataSlice) Reset() { 60 var zeroed block.ReplicaMetadata 61 for i := range s.metadata { 62 s.metadata[i] = zeroed 63 } 64 s.metadata = s.metadata[:0] 65 } 66 67 func (s *replicaMetadataSlice) Close() { 68 if s.pool != nil { 69 s.pool.Put(s) 70 } 71 } 72 73 type replicaBlockMetadata struct { 74 start xtime.UnixNano 75 metadata ReplicaMetadataSlice 76 } 77 78 // NewReplicaBlockMetadata creates a new replica block metadata 79 func NewReplicaBlockMetadata(start xtime.UnixNano, p ReplicaMetadataSlice) ReplicaBlockMetadata { 80 return replicaBlockMetadata{start: start, metadata: p} 81 } 82 83 func (m replicaBlockMetadata) Start() xtime.UnixNano { return m.start } 84 func (m replicaBlockMetadata) Metadata() []block.ReplicaMetadata { return m.metadata.Metadata() } 85 func (m replicaBlockMetadata) Add(metadata block.ReplicaMetadata) { m.metadata.Add(metadata) } 86 func (m replicaBlockMetadata) Close() { m.metadata.Close() } 87 88 type replicaBlocksMetadata map[xtime.UnixNano]ReplicaBlockMetadata 89 90 // NewReplicaBlocksMetadata creates a new replica blocks metadata 91 func NewReplicaBlocksMetadata() ReplicaBlocksMetadata { 92 return make(replicaBlocksMetadata, defaultReplicaBlocksMetadataCapacity) 93 } 94 95 func (m replicaBlocksMetadata) NumBlocks() int64 { return int64(len(m)) } 96 func (m replicaBlocksMetadata) Blocks() map[xtime.UnixNano]ReplicaBlockMetadata { return m } 97 func (m replicaBlocksMetadata) Add(block ReplicaBlockMetadata) { 98 m[block.Start()] = block 99 } 100 101 func (m replicaBlocksMetadata) GetOrAdd(start xtime.UnixNano, p ReplicaMetadataSlicePool) ReplicaBlockMetadata { 102 block, exists := m[start] 103 if exists { 104 return block 105 } 106 block = NewReplicaBlockMetadata(start, p.Get()) 107 m[start] = block 108 return block 109 } 110 111 func (m replicaBlocksMetadata) Close() { 112 for _, b := range m { 113 b.Close() 114 } 115 } 116 117 // NB(xichen): replicaSeriesMetadata is not thread-safe 118 type replicaSeriesMetadata struct { 119 values *Map 120 } 121 122 // NewReplicaSeriesMetadata creates a new replica series metadata 123 func NewReplicaSeriesMetadata() ReplicaSeriesMetadata { 124 return replicaSeriesMetadata{ 125 values: NewMap(MapOptions{InitialSize: defaultReplicaSeriesMetadataCapacity}), 126 } 127 } 128 129 func (m replicaSeriesMetadata) NumSeries() int64 { return int64(m.values.Len()) } 130 func (m replicaSeriesMetadata) Series() *Map { return m.values } 131 132 func (m replicaSeriesMetadata) NumBlocks() int64 { 133 var numBlocks int64 134 for _, entry := range m.values.Iter() { 135 series := entry.Value() 136 numBlocks += series.Metadata.NumBlocks() 137 } 138 return numBlocks 139 } 140 141 func (m replicaSeriesMetadata) GetOrAdd(id ident.ID) ReplicaBlocksMetadata { 142 blocks, exists := m.values.Get(id) 143 if exists { 144 return blocks.Metadata 145 } 146 blocks = ReplicaSeriesBlocksMetadata{ 147 ID: id, 148 Metadata: NewReplicaBlocksMetadata(), 149 } 150 m.values.Set(id, blocks) 151 return blocks.Metadata 152 } 153 154 func (m replicaSeriesMetadata) Close() { 155 for _, entry := range m.values.Iter() { 156 series := entry.Value() 157 series.Metadata.Close() 158 } 159 } 160 161 type replicaMetadataComparer struct { 162 origin topology.Host 163 metadata ReplicaSeriesMetadata 164 replicaMetadataSlicePool ReplicaMetadataSlicePool 165 peers map[string]topology.Host 166 } 167 168 // NewReplicaMetadataComparer creates a new replica metadata comparer 169 func NewReplicaMetadataComparer(origin topology.Host, opts Options) ReplicaMetadataComparer { 170 return replicaMetadataComparer{ 171 origin: origin, 172 metadata: NewReplicaSeriesMetadata(), 173 replicaMetadataSlicePool: opts.ReplicaMetadataSlicePool(), 174 peers: make(map[string]topology.Host), 175 } 176 } 177 178 func (m replicaMetadataComparer) AddLocalMetadata(localIter block.FilteredBlocksMetadataIter) error { 179 for localIter.Next() { 180 id, localBlock := localIter.Current() 181 blocks := m.metadata.GetOrAdd(id) 182 blocks.GetOrAdd(localBlock.Start, m.replicaMetadataSlicePool).Add(block.ReplicaMetadata{ 183 Host: m.origin, 184 Metadata: localBlock, 185 }) 186 } 187 188 return localIter.Err() 189 } 190 191 func (m replicaMetadataComparer) AddPeerMetadata(peerIter client.PeerBlockMetadataIter) error { 192 for peerIter.Next() { 193 peer, peerBlock := peerIter.Current() 194 blocks := m.metadata.GetOrAdd(peerBlock.ID) 195 blocks.GetOrAdd(peerBlock.Start, m.replicaMetadataSlicePool).Add(block.ReplicaMetadata{ 196 Host: peer, 197 Metadata: peerBlock, 198 }) 199 200 // Add to peers list. 201 if _, ok := m.peers[peer.ID()]; !ok { 202 m.peers[peer.ID()] = peer 203 } 204 } 205 206 return peerIter.Err() 207 } 208 209 type peerMetadataComparison struct { 210 comparedBlocks int64 211 comparedMissingBlocks int64 212 comparedExtraBlocks int64 213 comparedMismatchBlocks int64 214 } 215 216 func (c peerMetadataComparison) comparedDiffering() int64 { 217 return c.comparedMissingBlocks + 218 c.comparedExtraBlocks + 219 c.comparedMismatchBlocks 220 } 221 222 type peerMetadataComparisonMap map[string]*peerMetadataComparison 223 224 type peerBlockMetadataComparison struct { 225 hasBlock bool 226 sizeDiffersVsOriginValue bool 227 checksumDiffersVsOriginValue bool 228 } 229 230 func (m *peerBlockMetadataComparison) Reset() { 231 *m = peerBlockMetadataComparison{} 232 } 233 234 type peerBlockMetadataComparisonMap map[string]*peerBlockMetadataComparison 235 236 func (m peerBlockMetadataComparisonMap) Reset() { 237 for _, elem := range m { 238 elem.Reset() 239 } 240 } 241 242 func (m replicaMetadataComparer) newPeersMetadataComparisonMap() peerMetadataComparisonMap { 243 result := make(peerMetadataComparisonMap, len(m.peers)) 244 for _, peer := range m.peers { 245 result[peer.ID()] = &peerMetadataComparison{} 246 } 247 return result 248 } 249 250 func (m replicaMetadataComparer) newPeersBlockMetadataComparisonMap() peerBlockMetadataComparisonMap { 251 result := make(peerBlockMetadataComparisonMap, len(m.peers)) 252 for _, peer := range m.peers { 253 result[peer.ID()] = &peerBlockMetadataComparison{} 254 } 255 return result 256 } 257 258 func (m replicaMetadataComparer) Compare() MetadataComparisonResult { 259 var ( 260 sizeDiff = NewReplicaSeriesMetadata() 261 checkSumDiff = NewReplicaSeriesMetadata() 262 peersComparison = m.newPeersMetadataComparisonMap() 263 peersBlockComparison = m.newPeersBlockMetadataComparisonMap() 264 ) 265 266 for _, entry := range m.metadata.Series().Iter() { 267 series := entry.Value() 268 for _, b := range series.Metadata.Blocks() { 269 bm := b.Metadata() 270 271 var ( 272 originContainsBlock = false 273 originSizeVal = int64(math.MaxInt64) 274 originChecksumVal = uint32(math.MaxUint32) 275 sizeVal int64 276 sameSize = true 277 firstSize = true 278 checksumVal uint32 279 sameChecksum = true 280 firstChecksum = true 281 ) 282 283 // Reset block comparisons. 284 peersBlockComparison.Reset() 285 286 // First check if origin contains the block to work out if peers 287 // have missing or extra (assumed missing if not checked after a 288 // block comparison reset). 289 for _, hm := range bm { 290 isOrigin := hm.Host.ID() == m.origin.ID() 291 if !isOrigin { 292 continue 293 } 294 295 originContainsBlock = true 296 originSizeVal = hm.Metadata.Size 297 if hm.Metadata.Checksum != nil { 298 originChecksumVal = *hm.Metadata.Checksum 299 } 300 } 301 302 // Now check peers. 303 for _, hm := range bm { 304 var ( 305 hostID = hm.Host.ID() 306 peerCompare, hasPeerCompare = peersBlockComparison[hostID] 307 ) 308 309 if hm.Metadata.Checksum == nil { 310 // Skip metadata that doesn't have a checksum. This usually means that the 311 // metadata represents unmerged or pending data. Better to skip for now and 312 // repair it once it has been merged as opposed to repairing it now and 313 // ping-ponging the same data back and forth between all the repairing nodes. 314 // 315 // The impact of this is that recently modified data may take longer to be 316 // repaired, but it saves a ton of work by preventing nodes from repairing 317 // from each other unnecessarily even when they have identical data. 318 // 319 // TODO(rartoul): Consider skipping series with duplicate metadata as well? 320 continue 321 } 322 323 if hasPeerCompare { 324 peerCompare.hasBlock = true 325 } 326 327 // Check size. 328 if firstSize { 329 sizeVal = hm.Metadata.Size 330 firstSize = false 331 } else if hm.Metadata.Size != sizeVal { 332 sameSize = false 333 } 334 335 // Track if size differs relative to the origin. 336 if hasPeerCompare && hm.Metadata.Size != originSizeVal { 337 peerCompare.sizeDiffersVsOriginValue = true 338 } 339 340 // Check checksum. 341 if firstChecksum { 342 checksumVal = *hm.Metadata.Checksum 343 firstChecksum = false 344 } else if *hm.Metadata.Checksum != checksumVal { 345 sameChecksum = false 346 } 347 348 // Track if checksum differs relative to the origin. 349 if hasPeerCompare && *hm.Metadata.Checksum != originChecksumVal { 350 peerCompare.checksumDiffersVsOriginValue = true 351 } 352 } 353 354 // If only a subset of hosts in the replica set have sizes, or the sizes differ, 355 // we record this block 356 if !originContainsBlock || !sameSize { 357 sizeDiff.GetOrAdd(series.ID).Add(b) 358 } 359 360 // If only a subset of hosts in the replica set have checksums, or the checksums 361 // differ, we record this block 362 if !originContainsBlock || !sameChecksum { 363 checkSumDiff.GetOrAdd(series.ID).Add(b) 364 } 365 366 // Record the totals. 367 for peerID, peerComparison := range peersComparison { 368 peerBlockComparison, ok := peersBlockComparison[peerID] 369 if !ok || (!originContainsBlock && !peerBlockComparison.hasBlock) { 370 // CHECK: !exists(origin) && !exists(peer) 371 // If both origin and the peer are missing then we 372 // technically (for this pair of origin vs peer) didn't 373 // compare the origin to the peer block here since neither 374 // of them had it. 375 continue 376 } 377 378 // Track total comparisons made. 379 peerComparison.comparedBlocks++ 380 381 if !originContainsBlock && peerBlockComparison.hasBlock { 382 // CHECK: !exists(origin) && exists(peer) 383 // This block (regardless of mismatch) was extra relative 384 // to the origin. 385 peerComparison.comparedExtraBlocks++ 386 continue 387 } 388 389 if originContainsBlock && !peerBlockComparison.hasBlock { 390 // CHECK: exists(origin) && !exists(peer) 391 // This block (regardless of mismatch) was missing relative 392 // to the origin. 393 peerComparison.comparedMissingBlocks++ 394 continue 395 } 396 397 // CHECK: exists(origin) && exists(peer) 398 // Both exist, now see if they differ relative to origin. 399 differs := peerBlockComparison.sizeDiffersVsOriginValue || 400 peerBlockComparison.checksumDiffersVsOriginValue 401 if differs { 402 // Only if they mismatch on an attribute to we count as mismatch. 403 peerComparison.comparedMismatchBlocks++ 404 } 405 } 406 } 407 } 408 409 // Construct the peer comparison results. 410 n := len(peersComparison) 411 peerMetadataComparisonResults := make([]PeerMetadataComparisonResult, 0, n) 412 for peerID, peerComparison := range peersComparison { 413 r := PeerMetadataComparisonResult{ 414 ID: peerID, 415 ComparedBlocks: peerComparison.comparedBlocks, 416 ComparedDifferingBlocks: peerComparison.comparedDiffering(), 417 ComparedMismatchBlocks: peerComparison.comparedMismatchBlocks, 418 ComparedMissingBlocks: peerComparison.comparedMissingBlocks, 419 ComparedExtraBlocks: peerComparison.comparedExtraBlocks, 420 } 421 peerMetadataComparisonResults = append(peerMetadataComparisonResults, r) 422 } 423 424 return MetadataComparisonResult{ 425 NumSeries: m.metadata.NumSeries(), 426 NumBlocks: m.metadata.NumBlocks(), 427 SizeDifferences: sizeDiff, 428 ChecksumDifferences: checkSumDiff, 429 PeerMetadataComparisonResults: peerMetadataComparisonResults, 430 } 431 } 432 433 func (m replicaMetadataComparer) Finalize() { 434 m.metadata.Close() 435 }