github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/frag/main.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package main 23 24 import ( 25 "context" 26 "fmt" 27 "log" 28 "os" 29 "sync" 30 31 "github.com/aws/aws-sdk-go/aws" 32 "github.com/aws/aws-sdk-go/aws/session" 33 "github.com/aws/aws-sdk-go/service/dynamodb" 34 "github.com/aws/aws-sdk-go/service/s3" 35 "github.com/dustin/go-humanize" 36 flag "github.com/juju/gnuflag" 37 38 "github.com/dolthub/dolt/go/store/d" 39 "github.com/dolthub/dolt/go/store/datas" 40 "github.com/dolthub/dolt/go/store/hash" 41 "github.com/dolthub/dolt/go/store/nbs" 42 "github.com/dolthub/dolt/go/store/types" 43 "github.com/dolthub/dolt/go/store/util/profile" 44 ) 45 46 var ( 47 dir = flag.String("dir", "", "Write to an NBS store in the given directory") 48 table = flag.String("table", "", "Write to an NBS store in AWS, using this table") 49 bucket = flag.String("bucket", "", "Write to an NBS store in AWS, using this bucket") 50 dbName = flag.String("db", "", "Write to an NBS store in AWS, using this db name") 51 ) 52 53 const memTableSize = 128 * humanize.MiByte 54 55 func main() { 56 flag.Usage = func() { 57 fmt.Fprintf(os.Stderr, "Usage: %s [options]\n", os.Args[0]) 58 flag.PrintDefaults() 59 } 60 61 profile.RegisterProfileFlags(flag.CommandLine) 62 flag.Parse(true) 63 64 if flag.NArg() != 0 { 65 flag.Usage() 66 return 67 } 68 69 var store *nbs.NomsBlockStore 70 if *dir != "" { 71 var err error 72 store, err = nbs.NewLocalStore(context.Background(), types.Format_Default.VersionString(), *dir, memTableSize) 73 d.PanicIfError(err) 74 75 *dbName = *dir 76 } else if *table != "" && *bucket != "" && *dbName != "" { 77 sess := session.Must(session.NewSession(aws.NewConfig().WithRegion("us-west-2"))) 78 79 var err error 80 store, err = nbs.NewAWSStore(context.Background(), types.Format_Default.VersionString(), *table, *dbName, *bucket, s3.New(sess), dynamodb.New(sess), memTableSize) 81 d.PanicIfError(err) 82 } else { 83 log.Fatalf("Must set either --dir or ALL of --table, --bucket and --db\n") 84 } 85 86 db := datas.NewDatabase(store) 87 defer db.Close() 88 89 defer profile.MaybeStartProfile().Stop() 90 91 dss, err := db.Datasets(context.Background()) 92 93 if err != nil { 94 fmt.Fprintln(os.Stderr, "error: failed to get datasets") 95 os.Exit(1) 96 } 97 98 ref, err := types.NewRef(dss, types.Format_7_18) 99 d.PanicIfError(err) 100 height := ref.Height() 101 fmt.Println("Store is of height", height) 102 fmt.Println("| Height | Nodes | Children | Branching | Groups | Reads | Pruned |") 103 fmt.Println("+--------+---------+----------+-----------+--------+-------+--------+") 104 chartFmt := "| %6d | %7d | %8d | %9d | %6d | %5d | %6d |\n" 105 106 var optimal, sum int 107 visited := map[hash.Hash]bool{} 108 109 root, err := store.Root(context.Background()) 110 111 if err != nil { 112 fmt.Fprintln(os.Stderr, "error: failed to get root") 113 os.Exit(1) 114 } 115 116 current := hash.HashSlice{root} 117 for numNodes := 1; numNodes > 0; numNodes = len(current) { 118 // Start by reading the values of the current level of the graph 119 currentValues := make(map[hash.Hash]types.Value, len(current)) 120 readValues, err := db.ReadManyValues(context.Background(), current) 121 d.PanicIfError(err) 122 for i, v := range readValues { 123 h := current[i] 124 currentValues[h] = v 125 visited[h] = true 126 } 127 128 // Iterate all the Values at the current level of the graph IN ORDER (as specified in |current|) and gather up their embedded refs. We'll build two different lists of hash.Hashes during this process: 129 // 1) An ordered list of ALL the children of the current level. 130 // 2) An ordered list of the child nodes that contain refs to chunks we haven't yet visited. This *excludes* already-visted nodes and nodes without children. 131 // We'll use 1) to get an estimate of how good the locality is among the children of the current level, and then 2) to descend to the next level of the graph. 132 orderedChildren := hash.HashSlice{} 133 nextLevel := hash.HashSlice{} 134 for _, h := range current { 135 _ = currentValues[h].WalkRefs(types.Format_7_18, func(r types.Ref) error { 136 target := r.TargetHash() 137 orderedChildren = append(orderedChildren, target) 138 if !visited[target] && r.Height() > 1 { 139 nextLevel = append(nextLevel, target) 140 } 141 142 return nil 143 }) 144 } 145 146 // Estimate locality among the members of |orderedChildren| by splitting into groups that are roughly |branchFactor| in size and calling CalcReads on each group. With perfect locality, we'd expect that each group could be read in a single physical read. 147 numChildren := len(orderedChildren) 148 branchFactor := numChildren / numNodes 149 numGroups := numNodes 150 if numChildren%numNodes != 0 { 151 numGroups++ 152 } 153 wg := &sync.WaitGroup{} 154 reads := make([]int, numGroups) 155 for i := 0; i < numGroups; i++ { 156 wg.Add(1) 157 if i+1 == numGroups { // last group 158 go func(i int) { 159 defer wg.Done() 160 reads[i], _, err = store.CalcReads(orderedChildren[i*branchFactor:].HashSet(), 0) 161 d.PanicIfError(err) 162 }(i) 163 continue 164 } 165 go func(i int) { 166 defer wg.Done() 167 reads[i], _, err = store.CalcReads(orderedChildren[i*branchFactor:(i+1)*branchFactor].HashSet(), 0) 168 d.PanicIfError(err) 169 }(i) 170 } 171 172 wg.Wait() 173 174 sumOfReads := sumInts(reads) 175 fmt.Printf(chartFmt, height, numNodes, numChildren, branchFactor, numGroups, sumOfReads, numChildren-len(nextLevel)) 176 177 sum += sumOfReads 178 optimal += numGroups 179 height-- 180 current = nextLevel 181 } 182 183 fmt.Printf("\nVisited %d chunk groups\n", optimal) 184 fmt.Printf("Reading DB %s requires %.01fx optimal number of reads\n", *dbName, float64(sum)/float64(optimal)) 185 } 186 187 func sumInts(nums []int) (sum int) { 188 for _, n := range nums { 189 sum += n 190 } 191 return 192 }