github.com/jbendotnet/noms@v0.0.0-20190904222105-c43e4293ea92/go/nbs/conjoiner.go (about) 1 // Copyright 2017 Attic Labs, Inc. All rights reserved. 2 // Licensed under the Apache License, version 2.0: 3 // http://www.apache.org/licenses/LICENSE-2.0 4 5 package nbs 6 7 import ( 8 "sort" 9 "sync" 10 "time" 11 12 "github.com/attic-labs/noms/go/constants" 13 "github.com/attic-labs/noms/go/d" 14 ) 15 16 type conjoiner interface { 17 // ConjoinRequired tells the caller whether or not it's time to request a 18 // Conjoin, based upon the contents of |ts| and the conjoiner 19 // implementation's policy. 20 ConjoinRequired(ts tableSet) bool 21 22 // Conjoin attempts to use |p| to conjoin some number of tables referenced 23 // by |upstream|, allowing it to update |mm| with a new, smaller, set of tables 24 // that references precisely the same set of chunks. Conjoin() may not 25 // actually conjoin any upstream tables, usually because some out-of- 26 // process actor has already landed a conjoin of its own. Callers must 27 // handle this, likely by rebasing against upstream and re-evaluating the 28 // situation. 29 Conjoin(upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) manifestContents 30 } 31 32 type inlineConjoiner struct { 33 maxTables int 34 } 35 36 func (c inlineConjoiner) ConjoinRequired(ts tableSet) bool { 37 return ts.Size() > c.maxTables 38 } 39 40 func (c inlineConjoiner) Conjoin(upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) manifestContents { 41 return conjoin(upstream, mm, p, stats) 42 } 43 44 func conjoin(upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) manifestContents { 45 var conjoined tableSpec 46 var conjoinees, keepers []tableSpec 47 48 for { 49 if conjoinees == nil { 50 conjoined, conjoinees, keepers = conjoinTables(p, upstream.specs, stats) 51 } 52 53 specs := append(make([]tableSpec, 0, len(keepers)+1), conjoined) 54 specs = append(specs, keepers...) 55 56 newContents := manifestContents{ 57 vers: constants.NomsVersion, 58 root: upstream.root, 59 lock: generateLockHash(upstream.root, specs), 60 specs: specs, 61 } 62 upstream = mm.Update(upstream.lock, newContents, stats, nil) 63 64 if newContents.lock == upstream.lock { 65 return upstream // Success! 66 } 67 // Optimistic lock failure. Someone else moved to the root, the set of tables, or both out from under us. 68 // If we can re-use the conjoin we already performed, we want to try again. Currently, we will only do so if ALL conjoinees are still present upstream. If we can't re-use...then someone else almost certainly landed a conjoin upstream. In this case, bail and let clients ask again if they think they still can't proceed. 69 conjoineeSet := map[addr]struct{}{} 70 upstreamNames := map[addr]struct{}{} 71 for _, spec := range upstream.specs { 72 upstreamNames[spec.name] = struct{}{} 73 } 74 for _, c := range conjoinees { 75 if _, present := upstreamNames[c.name]; !present { 76 return upstream // Bail! 77 } 78 conjoineeSet[c.name] = struct{}{} 79 } 80 81 // Filter conjoinees out of upstream.specs to generate new set of keepers 82 keepers = make([]tableSpec, 0, len(upstream.specs)-len(conjoinees)) 83 for _, spec := range upstream.specs { 84 if _, present := conjoineeSet[spec.name]; !present { 85 keepers = append(keepers, spec) 86 } 87 } 88 } 89 } 90 91 func conjoinTables(p tablePersister, upstream []tableSpec, stats *Stats) (conjoined tableSpec, conjoinees, keepers []tableSpec) { 92 // Open all the upstream tables concurrently 93 sources := make(chunkSources, len(upstream)) 94 wg := sync.WaitGroup{} 95 for i, spec := range upstream { 96 wg.Add(1) 97 go func(idx int, spec tableSpec) { 98 sources[idx] = p.Open(spec.name, spec.chunkCount, stats) 99 wg.Done() 100 }(i, spec) 101 i++ 102 } 103 wg.Wait() 104 105 t1 := time.Now() 106 107 toConjoin, toKeep := chooseConjoinees(sources) 108 conjoinedSrc := p.ConjoinAll(toConjoin, stats) 109 110 stats.ConjoinLatency.SampleTimeSince(t1) 111 stats.TablesPerConjoin.SampleLen(len(toConjoin)) 112 stats.ChunksPerConjoin.Sample(uint64(conjoinedSrc.count())) 113 114 return tableSpec{conjoinedSrc.hash(), conjoinedSrc.count()}, toSpecs(toConjoin), toSpecs(toKeep) 115 } 116 117 // Current approach is to choose the smallest N tables which, when removed and replaced with the conjoinment, will leave the conjoinment as the smallest table. 118 func chooseConjoinees(upstream chunkSources) (toConjoin, toKeep chunkSources) { 119 sortedUpstream := make(chunkSources, len(upstream)) 120 copy(sortedUpstream, upstream) 121 sort.Sort(chunkSourcesByAscendingCount(sortedUpstream)) 122 123 partition := 2 124 sum := sortedUpstream[0].count() + sortedUpstream[1].count() 125 for partition < len(sortedUpstream) && sum > sortedUpstream[partition].count() { 126 sum += sortedUpstream[partition].count() 127 partition++ 128 } 129 130 return sortedUpstream[:partition], sortedUpstream[partition:] 131 } 132 133 func toSpecs(srcs chunkSources) []tableSpec { 134 specs := make([]tableSpec, len(srcs)) 135 for i, src := range srcs { 136 d.PanicIfFalse(src.count() > 0) 137 specs[i] = tableSpec{src.hash(), src.count()} 138 } 139 return specs 140 }