github.com/m3db/m3@v1.5.0/src/dbnode/storage/repair/metadata.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package repair
    22  
    23  import (
    24  	"math"
    25  
    26  	"github.com/m3db/m3/src/dbnode/client"
    27  	"github.com/m3db/m3/src/dbnode/storage/block"
    28  	"github.com/m3db/m3/src/dbnode/topology"
    29  	"github.com/m3db/m3/src/x/ident"
    30  	xtime "github.com/m3db/m3/src/x/time"
    31  )
    32  
    33  const (
    34  	defaultReplicaBlocksMetadataCapacity = 1
    35  	defaultReplicaSeriesMetadataCapacity = 4096
    36  )
    37  
    38  type replicaMetadataSlice struct {
    39  	metadata []block.ReplicaMetadata
    40  	pool     ReplicaMetadataSlicePool
    41  }
    42  
    43  func newReplicaMetadataSlice() ReplicaMetadataSlice {
    44  	return &replicaMetadataSlice{}
    45  }
    46  
    47  func newPooledReplicaMetadataSlice(metadata []block.ReplicaMetadata, pool ReplicaMetadataSlicePool) ReplicaMetadataSlice {
    48  	return &replicaMetadataSlice{metadata: metadata, pool: pool}
    49  }
    50  
    51  func (s *replicaMetadataSlice) Add(metadata block.ReplicaMetadata) {
    52  	s.metadata = append(s.metadata, metadata)
    53  }
    54  
    55  func (s *replicaMetadataSlice) Metadata() []block.ReplicaMetadata {
    56  	return s.metadata
    57  }
    58  
    59  func (s *replicaMetadataSlice) Reset() {
    60  	var zeroed block.ReplicaMetadata
    61  	for i := range s.metadata {
    62  		s.metadata[i] = zeroed
    63  	}
    64  	s.metadata = s.metadata[:0]
    65  }
    66  
    67  func (s *replicaMetadataSlice) Close() {
    68  	if s.pool != nil {
    69  		s.pool.Put(s)
    70  	}
    71  }
    72  
    73  type replicaBlockMetadata struct {
    74  	start    xtime.UnixNano
    75  	metadata ReplicaMetadataSlice
    76  }
    77  
    78  // NewReplicaBlockMetadata creates a new replica block metadata
    79  func NewReplicaBlockMetadata(start xtime.UnixNano, p ReplicaMetadataSlice) ReplicaBlockMetadata {
    80  	return replicaBlockMetadata{start: start, metadata: p}
    81  }
    82  
    83  func (m replicaBlockMetadata) Start() xtime.UnixNano              { return m.start }
    84  func (m replicaBlockMetadata) Metadata() []block.ReplicaMetadata  { return m.metadata.Metadata() }
    85  func (m replicaBlockMetadata) Add(metadata block.ReplicaMetadata) { m.metadata.Add(metadata) }
    86  func (m replicaBlockMetadata) Close()                             { m.metadata.Close() }
    87  
    88  type replicaBlocksMetadata map[xtime.UnixNano]ReplicaBlockMetadata
    89  
    90  // NewReplicaBlocksMetadata creates a new replica blocks metadata
    91  func NewReplicaBlocksMetadata() ReplicaBlocksMetadata {
    92  	return make(replicaBlocksMetadata, defaultReplicaBlocksMetadataCapacity)
    93  }
    94  
    95  func (m replicaBlocksMetadata) NumBlocks() int64                                { return int64(len(m)) }
    96  func (m replicaBlocksMetadata) Blocks() map[xtime.UnixNano]ReplicaBlockMetadata { return m }
    97  func (m replicaBlocksMetadata) Add(block ReplicaBlockMetadata) {
    98  	m[block.Start()] = block
    99  }
   100  
   101  func (m replicaBlocksMetadata) GetOrAdd(start xtime.UnixNano, p ReplicaMetadataSlicePool) ReplicaBlockMetadata {
   102  	block, exists := m[start]
   103  	if exists {
   104  		return block
   105  	}
   106  	block = NewReplicaBlockMetadata(start, p.Get())
   107  	m[start] = block
   108  	return block
   109  }
   110  
   111  func (m replicaBlocksMetadata) Close() {
   112  	for _, b := range m {
   113  		b.Close()
   114  	}
   115  }
   116  
   117  // NB(xichen): replicaSeriesMetadata is not thread-safe
   118  type replicaSeriesMetadata struct {
   119  	values *Map
   120  }
   121  
   122  // NewReplicaSeriesMetadata creates a new replica series metadata
   123  func NewReplicaSeriesMetadata() ReplicaSeriesMetadata {
   124  	return replicaSeriesMetadata{
   125  		values: NewMap(MapOptions{InitialSize: defaultReplicaSeriesMetadataCapacity}),
   126  	}
   127  }
   128  
   129  func (m replicaSeriesMetadata) NumSeries() int64 { return int64(m.values.Len()) }
   130  func (m replicaSeriesMetadata) Series() *Map     { return m.values }
   131  
   132  func (m replicaSeriesMetadata) NumBlocks() int64 {
   133  	var numBlocks int64
   134  	for _, entry := range m.values.Iter() {
   135  		series := entry.Value()
   136  		numBlocks += series.Metadata.NumBlocks()
   137  	}
   138  	return numBlocks
   139  }
   140  
   141  func (m replicaSeriesMetadata) GetOrAdd(id ident.ID) ReplicaBlocksMetadata {
   142  	blocks, exists := m.values.Get(id)
   143  	if exists {
   144  		return blocks.Metadata
   145  	}
   146  	blocks = ReplicaSeriesBlocksMetadata{
   147  		ID:       id,
   148  		Metadata: NewReplicaBlocksMetadata(),
   149  	}
   150  	m.values.Set(id, blocks)
   151  	return blocks.Metadata
   152  }
   153  
   154  func (m replicaSeriesMetadata) Close() {
   155  	for _, entry := range m.values.Iter() {
   156  		series := entry.Value()
   157  		series.Metadata.Close()
   158  	}
   159  }
   160  
   161  type replicaMetadataComparer struct {
   162  	origin                   topology.Host
   163  	metadata                 ReplicaSeriesMetadata
   164  	replicaMetadataSlicePool ReplicaMetadataSlicePool
   165  	peers                    map[string]topology.Host
   166  }
   167  
   168  // NewReplicaMetadataComparer creates a new replica metadata comparer
   169  func NewReplicaMetadataComparer(origin topology.Host, opts Options) ReplicaMetadataComparer {
   170  	return replicaMetadataComparer{
   171  		origin:                   origin,
   172  		metadata:                 NewReplicaSeriesMetadata(),
   173  		replicaMetadataSlicePool: opts.ReplicaMetadataSlicePool(),
   174  		peers:                    make(map[string]topology.Host),
   175  	}
   176  }
   177  
   178  func (m replicaMetadataComparer) AddLocalMetadata(localIter block.FilteredBlocksMetadataIter) error {
   179  	for localIter.Next() {
   180  		id, localBlock := localIter.Current()
   181  		blocks := m.metadata.GetOrAdd(id)
   182  		blocks.GetOrAdd(localBlock.Start, m.replicaMetadataSlicePool).Add(block.ReplicaMetadata{
   183  			Host:     m.origin,
   184  			Metadata: localBlock,
   185  		})
   186  	}
   187  
   188  	return localIter.Err()
   189  }
   190  
   191  func (m replicaMetadataComparer) AddPeerMetadata(peerIter client.PeerBlockMetadataIter) error {
   192  	for peerIter.Next() {
   193  		peer, peerBlock := peerIter.Current()
   194  		blocks := m.metadata.GetOrAdd(peerBlock.ID)
   195  		blocks.GetOrAdd(peerBlock.Start, m.replicaMetadataSlicePool).Add(block.ReplicaMetadata{
   196  			Host:     peer,
   197  			Metadata: peerBlock,
   198  		})
   199  
   200  		// Add to peers list.
   201  		if _, ok := m.peers[peer.ID()]; !ok {
   202  			m.peers[peer.ID()] = peer
   203  		}
   204  	}
   205  
   206  	return peerIter.Err()
   207  }
   208  
   209  type peerMetadataComparison struct {
   210  	comparedBlocks         int64
   211  	comparedMissingBlocks  int64
   212  	comparedExtraBlocks    int64
   213  	comparedMismatchBlocks int64
   214  }
   215  
   216  func (c peerMetadataComparison) comparedDiffering() int64 {
   217  	return c.comparedMissingBlocks +
   218  		c.comparedExtraBlocks +
   219  		c.comparedMismatchBlocks
   220  }
   221  
   222  type peerMetadataComparisonMap map[string]*peerMetadataComparison
   223  
   224  type peerBlockMetadataComparison struct {
   225  	hasBlock                     bool
   226  	sizeDiffersVsOriginValue     bool
   227  	checksumDiffersVsOriginValue bool
   228  }
   229  
   230  func (m *peerBlockMetadataComparison) Reset() {
   231  	*m = peerBlockMetadataComparison{}
   232  }
   233  
   234  type peerBlockMetadataComparisonMap map[string]*peerBlockMetadataComparison
   235  
   236  func (m peerBlockMetadataComparisonMap) Reset() {
   237  	for _, elem := range m {
   238  		elem.Reset()
   239  	}
   240  }
   241  
   242  func (m replicaMetadataComparer) newPeersMetadataComparisonMap() peerMetadataComparisonMap {
   243  	result := make(peerMetadataComparisonMap, len(m.peers))
   244  	for _, peer := range m.peers {
   245  		result[peer.ID()] = &peerMetadataComparison{}
   246  	}
   247  	return result
   248  }
   249  
   250  func (m replicaMetadataComparer) newPeersBlockMetadataComparisonMap() peerBlockMetadataComparisonMap {
   251  	result := make(peerBlockMetadataComparisonMap, len(m.peers))
   252  	for _, peer := range m.peers {
   253  		result[peer.ID()] = &peerBlockMetadataComparison{}
   254  	}
   255  	return result
   256  }
   257  
   258  func (m replicaMetadataComparer) Compare() MetadataComparisonResult {
   259  	var (
   260  		sizeDiff             = NewReplicaSeriesMetadata()
   261  		checkSumDiff         = NewReplicaSeriesMetadata()
   262  		peersComparison      = m.newPeersMetadataComparisonMap()
   263  		peersBlockComparison = m.newPeersBlockMetadataComparisonMap()
   264  	)
   265  
   266  	for _, entry := range m.metadata.Series().Iter() {
   267  		series := entry.Value()
   268  		for _, b := range series.Metadata.Blocks() {
   269  			bm := b.Metadata()
   270  
   271  			var (
   272  				originContainsBlock = false
   273  				originSizeVal       = int64(math.MaxInt64)
   274  				originChecksumVal   = uint32(math.MaxUint32)
   275  				sizeVal             int64
   276  				sameSize            = true
   277  				firstSize           = true
   278  				checksumVal         uint32
   279  				sameChecksum        = true
   280  				firstChecksum       = true
   281  			)
   282  
   283  			// Reset block comparisons.
   284  			peersBlockComparison.Reset()
   285  
   286  			// First check if origin contains the block to work out if peers
   287  			// have missing or extra (assumed missing if not checked after a
   288  			// block comparison reset).
   289  			for _, hm := range bm {
   290  				isOrigin := hm.Host.ID() == m.origin.ID()
   291  				if !isOrigin {
   292  					continue
   293  				}
   294  
   295  				originContainsBlock = true
   296  				originSizeVal = hm.Metadata.Size
   297  				if hm.Metadata.Checksum != nil {
   298  					originChecksumVal = *hm.Metadata.Checksum
   299  				}
   300  			}
   301  
   302  			// Now check peers.
   303  			for _, hm := range bm {
   304  				var (
   305  					hostID                      = hm.Host.ID()
   306  					peerCompare, hasPeerCompare = peersBlockComparison[hostID]
   307  				)
   308  
   309  				if hm.Metadata.Checksum == nil {
   310  					// Skip metadata that doesn't have a checksum. This usually means that the
   311  					// metadata represents unmerged or pending data. Better to skip for now and
   312  					// repair it once it has been merged as opposed to repairing it now and
   313  					// ping-ponging the same data back and forth between all the repairing nodes.
   314  					//
   315  					// The impact of this is that recently modified data may take longer to be
   316  					// repaired, but it saves a ton of work by preventing nodes from repairing
   317  					// from each other unnecessarily even when they have identical data.
   318  					//
   319  					// TODO(rartoul): Consider skipping series with duplicate metadata as well?
   320  					continue
   321  				}
   322  
   323  				if hasPeerCompare {
   324  					peerCompare.hasBlock = true
   325  				}
   326  
   327  				// Check size.
   328  				if firstSize {
   329  					sizeVal = hm.Metadata.Size
   330  					firstSize = false
   331  				} else if hm.Metadata.Size != sizeVal {
   332  					sameSize = false
   333  				}
   334  
   335  				// Track if size differs relative to the origin.
   336  				if hasPeerCompare && hm.Metadata.Size != originSizeVal {
   337  					peerCompare.sizeDiffersVsOriginValue = true
   338  				}
   339  
   340  				// Check checksum.
   341  				if firstChecksum {
   342  					checksumVal = *hm.Metadata.Checksum
   343  					firstChecksum = false
   344  				} else if *hm.Metadata.Checksum != checksumVal {
   345  					sameChecksum = false
   346  				}
   347  
   348  				// Track if checksum differs relative to the origin.
   349  				if hasPeerCompare && *hm.Metadata.Checksum != originChecksumVal {
   350  					peerCompare.checksumDiffersVsOriginValue = true
   351  				}
   352  			}
   353  
   354  			// If only a subset of hosts in the replica set have sizes, or the sizes differ,
   355  			// we record this block
   356  			if !originContainsBlock || !sameSize {
   357  				sizeDiff.GetOrAdd(series.ID).Add(b)
   358  			}
   359  
   360  			// If only a subset of hosts in the replica set have checksums, or the checksums
   361  			// differ, we record this block
   362  			if !originContainsBlock || !sameChecksum {
   363  				checkSumDiff.GetOrAdd(series.ID).Add(b)
   364  			}
   365  
   366  			// Record the totals.
   367  			for peerID, peerComparison := range peersComparison {
   368  				peerBlockComparison, ok := peersBlockComparison[peerID]
   369  				if !ok || (!originContainsBlock && !peerBlockComparison.hasBlock) {
   370  					// CHECK: !exists(origin) && !exists(peer)
   371  					// If both origin and the peer are missing then we
   372  					// technically (for this pair of origin vs peer) didn't
   373  					// compare the origin to the peer block here since neither
   374  					// of them had it.
   375  					continue
   376  				}
   377  
   378  				// Track total comparisons made.
   379  				peerComparison.comparedBlocks++
   380  
   381  				if !originContainsBlock && peerBlockComparison.hasBlock {
   382  					// CHECK: !exists(origin) && exists(peer)
   383  					// This block (regardless of mismatch) was extra relative
   384  					// to the origin.
   385  					peerComparison.comparedExtraBlocks++
   386  					continue
   387  				}
   388  
   389  				if originContainsBlock && !peerBlockComparison.hasBlock {
   390  					// CHECK: exists(origin) && !exists(peer)
   391  					// This block (regardless of mismatch) was missing relative
   392  					// to the origin.
   393  					peerComparison.comparedMissingBlocks++
   394  					continue
   395  				}
   396  
   397  				// CHECK: exists(origin) && exists(peer)
   398  				// Both exist, now see if they differ relative to origin.
   399  				differs := peerBlockComparison.sizeDiffersVsOriginValue ||
   400  					peerBlockComparison.checksumDiffersVsOriginValue
   401  				if differs {
   402  					// Only if they mismatch on an attribute to we count as mismatch.
   403  					peerComparison.comparedMismatchBlocks++
   404  				}
   405  			}
   406  		}
   407  	}
   408  
   409  	// Construct the peer comparison results.
   410  	n := len(peersComparison)
   411  	peerMetadataComparisonResults := make([]PeerMetadataComparisonResult, 0, n)
   412  	for peerID, peerComparison := range peersComparison {
   413  		r := PeerMetadataComparisonResult{
   414  			ID:                      peerID,
   415  			ComparedBlocks:          peerComparison.comparedBlocks,
   416  			ComparedDifferingBlocks: peerComparison.comparedDiffering(),
   417  			ComparedMismatchBlocks:  peerComparison.comparedMismatchBlocks,
   418  			ComparedMissingBlocks:   peerComparison.comparedMissingBlocks,
   419  			ComparedExtraBlocks:     peerComparison.comparedExtraBlocks,
   420  		}
   421  		peerMetadataComparisonResults = append(peerMetadataComparisonResults, r)
   422  	}
   423  
   424  	return MetadataComparisonResult{
   425  		NumSeries:                     m.metadata.NumSeries(),
   426  		NumBlocks:                     m.metadata.NumBlocks(),
   427  		SizeDifferences:               sizeDiff,
   428  		ChecksumDifferences:           checkSumDiff,
   429  		PeerMetadataComparisonResults: peerMetadataComparisonResults,
   430  	}
   431  }
   432  
   433  func (m replicaMetadataComparer) Finalize() {
   434  	m.metadata.Close()
   435  }