github.com/jbendotnet/noms@v0.0.0-20190904222105-c43e4293ea92/go/nbs/conjoiner.go (about)

     1  // Copyright 2017 Attic Labs, Inc. All rights reserved.
     2  // Licensed under the Apache License, version 2.0:
     3  // http://www.apache.org/licenses/LICENSE-2.0
     4  
     5  package nbs
     6  
     7  import (
     8  	"sort"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/attic-labs/noms/go/constants"
    13  	"github.com/attic-labs/noms/go/d"
    14  )
    15  
    16  type conjoiner interface {
    17  	// ConjoinRequired tells the caller whether or not it's time to request a
    18  	// Conjoin, based upon the contents of |ts| and the conjoiner
    19  	// implementation's policy.
    20  	ConjoinRequired(ts tableSet) bool
    21  
    22  	// Conjoin attempts to use |p| to conjoin some number of tables referenced
    23  	// by |upstream|, allowing it to update |mm| with a new, smaller, set of tables
    24  	// that references precisely the same set of chunks. Conjoin() may not
    25  	// actually conjoin any upstream tables, usually because some out-of-
    26  	// process actor has already landed a conjoin of its own. Callers must
    27  	// handle this, likely by rebasing against upstream and re-evaluating the
    28  	// situation.
    29  	Conjoin(upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) manifestContents
    30  }
    31  
    32  type inlineConjoiner struct {
    33  	maxTables int
    34  }
    35  
    36  func (c inlineConjoiner) ConjoinRequired(ts tableSet) bool {
    37  	return ts.Size() > c.maxTables
    38  }
    39  
    40  func (c inlineConjoiner) Conjoin(upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) manifestContents {
    41  	return conjoin(upstream, mm, p, stats)
    42  }
    43  
    44  func conjoin(upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) manifestContents {
    45  	var conjoined tableSpec
    46  	var conjoinees, keepers []tableSpec
    47  
    48  	for {
    49  		if conjoinees == nil {
    50  			conjoined, conjoinees, keepers = conjoinTables(p, upstream.specs, stats)
    51  		}
    52  
    53  		specs := append(make([]tableSpec, 0, len(keepers)+1), conjoined)
    54  		specs = append(specs, keepers...)
    55  
    56  		newContents := manifestContents{
    57  			vers:  constants.NomsVersion,
    58  			root:  upstream.root,
    59  			lock:  generateLockHash(upstream.root, specs),
    60  			specs: specs,
    61  		}
    62  		upstream = mm.Update(upstream.lock, newContents, stats, nil)
    63  
    64  		if newContents.lock == upstream.lock {
    65  			return upstream // Success!
    66  		}
    67  		// Optimistic lock failure. Someone else moved to the root, the set of tables, or both out from under us.
    68  		// If we can re-use the conjoin we already performed, we want to try again. Currently, we will only do so if ALL conjoinees are still present upstream. If we can't re-use...then someone else almost certainly landed a conjoin upstream. In this case, bail and let clients ask again if they think they still can't proceed.
    69  		conjoineeSet := map[addr]struct{}{}
    70  		upstreamNames := map[addr]struct{}{}
    71  		for _, spec := range upstream.specs {
    72  			upstreamNames[spec.name] = struct{}{}
    73  		}
    74  		for _, c := range conjoinees {
    75  			if _, present := upstreamNames[c.name]; !present {
    76  				return upstream // Bail!
    77  			}
    78  			conjoineeSet[c.name] = struct{}{}
    79  		}
    80  
    81  		// Filter conjoinees out of upstream.specs to generate new set of keepers
    82  		keepers = make([]tableSpec, 0, len(upstream.specs)-len(conjoinees))
    83  		for _, spec := range upstream.specs {
    84  			if _, present := conjoineeSet[spec.name]; !present {
    85  				keepers = append(keepers, spec)
    86  			}
    87  		}
    88  	}
    89  }
    90  
    91  func conjoinTables(p tablePersister, upstream []tableSpec, stats *Stats) (conjoined tableSpec, conjoinees, keepers []tableSpec) {
    92  	// Open all the upstream tables concurrently
    93  	sources := make(chunkSources, len(upstream))
    94  	wg := sync.WaitGroup{}
    95  	for i, spec := range upstream {
    96  		wg.Add(1)
    97  		go func(idx int, spec tableSpec) {
    98  			sources[idx] = p.Open(spec.name, spec.chunkCount, stats)
    99  			wg.Done()
   100  		}(i, spec)
   101  		i++
   102  	}
   103  	wg.Wait()
   104  
   105  	t1 := time.Now()
   106  
   107  	toConjoin, toKeep := chooseConjoinees(sources)
   108  	conjoinedSrc := p.ConjoinAll(toConjoin, stats)
   109  
   110  	stats.ConjoinLatency.SampleTimeSince(t1)
   111  	stats.TablesPerConjoin.SampleLen(len(toConjoin))
   112  	stats.ChunksPerConjoin.Sample(uint64(conjoinedSrc.count()))
   113  
   114  	return tableSpec{conjoinedSrc.hash(), conjoinedSrc.count()}, toSpecs(toConjoin), toSpecs(toKeep)
   115  }
   116  
   117  // Current approach is to choose the smallest N tables which, when removed and replaced with the conjoinment, will leave the conjoinment as the smallest table.
   118  func chooseConjoinees(upstream chunkSources) (toConjoin, toKeep chunkSources) {
   119  	sortedUpstream := make(chunkSources, len(upstream))
   120  	copy(sortedUpstream, upstream)
   121  	sort.Sort(chunkSourcesByAscendingCount(sortedUpstream))
   122  
   123  	partition := 2
   124  	sum := sortedUpstream[0].count() + sortedUpstream[1].count()
   125  	for partition < len(sortedUpstream) && sum > sortedUpstream[partition].count() {
   126  		sum += sortedUpstream[partition].count()
   127  		partition++
   128  	}
   129  
   130  	return sortedUpstream[:partition], sortedUpstream[partition:]
   131  }
   132  
   133  func toSpecs(srcs chunkSources) []tableSpec {
   134  	specs := make([]tableSpec, len(srcs))
   135  	for i, src := range srcs {
   136  		d.PanicIfFalse(src.count() > 0)
   137  		specs[i] = tableSpec{src.hash(), src.count()}
   138  	}
   139  	return specs
   140  }