github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/frag/main.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package main 23 24 import ( 25 "context" 26 "fmt" 27 "log" 28 "os" 29 "sync" 30 31 "github.com/aws/aws-sdk-go/aws" 32 "github.com/aws/aws-sdk-go/aws/session" 33 "github.com/aws/aws-sdk-go/service/dynamodb" 34 "github.com/aws/aws-sdk-go/service/s3" 35 "github.com/dustin/go-humanize" 36 flag "github.com/juju/gnuflag" 37 38 "github.com/dolthub/dolt/go/store/d" 39 "github.com/dolthub/dolt/go/store/hash" 40 "github.com/dolthub/dolt/go/store/nbs" 41 "github.com/dolthub/dolt/go/store/types" 42 "github.com/dolthub/dolt/go/store/util/profile" 43 ) 44 45 var ( 46 dir = flag.String("dir", "", "Write to an NBS store in the given directory") 47 table = flag.String("table", "", "Write to an NBS store in AWS, using this table") 48 bucket = flag.String("bucket", "", "Write to an NBS store in AWS, using this bucket") 49 dbName = flag.String("db", "", "Write to an NBS store in AWS, using this db name") 50 ) 51 52 const memTableSize = 128 * humanize.MiByte 53 54 func main() { 55 flag.Usage = func() { 56 fmt.Fprintf(os.Stderr, "Usage: %s [options]\n", os.Args[0]) 57 flag.PrintDefaults() 58 } 59 60 ctx := context.Background() 61 62 profile.RegisterProfileFlags(flag.CommandLine) 63 flag.Parse(true) 64 65 if flag.NArg() != 0 { 66 flag.Usage() 67 return 68 } 69 70 var store *nbs.NomsBlockStore 71 if *dir != "" { 72 var err error 73 store, err = nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), *dir, memTableSize, nbs.NewUnlimitedMemQuotaProvider()) 74 d.PanicIfError(err) 75 76 *dbName = *dir 77 } else if *table != "" && *bucket != "" && *dbName != "" { 78 sess := session.Must(session.NewSession(aws.NewConfig().WithRegion("us-west-2"))) 79 80 var err error 81 store, err = nbs.NewAWSStore(context.Background(), types.Format_Default.VersionString(), *table, *dbName, *bucket, s3.New(sess), dynamodb.New(sess), memTableSize, nbs.NewUnlimitedMemQuotaProvider()) 82 d.PanicIfError(err) 83 } else { 84 log.Fatalf("Must set either --dir or ALL of --table, --bucket and --db\n") 85 } 86 87 vrw := types.NewValueStore(store) 88 89 root, err := store.Root(ctx) 90 if err != nil { 91 fmt.Fprintf(os.Stderr, "error: failed to get root: %v\n", err) 92 os.Exit(1) 93 } 94 95 defer profile.MaybeStartProfile().Stop() 96 97 rootValue, err := vrw.ReadValue(ctx, root) 98 if err != nil { 99 fmt.Fprintf(os.Stderr, "error: failed to get root value: %v\n", err) 100 os.Exit(1) 101 } 102 103 ref, err := types.NewRef(rootValue, types.Format_Default) 104 d.PanicIfError(err) 105 height := ref.Height() 106 fmt.Println("Store is of height", height) 107 fmt.Println("| Height | Nodes | Children | Branching | Groups | Reads | Pruned |") 108 fmt.Println("+--------+---------+----------+-----------+--------+-------+--------+") 109 chartFmt := "| %6d | %7d | %8d | %9d | %6d | %5d | %6d |\n" 110 111 var optimal, sum int 112 visited := make(map[hash.Hash]struct{}) 113 114 current := hash.HashSlice{root} 115 for numNodes := 1; numNodes > 0; numNodes = len(current) { 116 // Start by reading the values of the current level of the graph 117 currentValues := make(map[hash.Hash]types.Value, len(current)) 118 readValues, err := vrw.ReadManyValues(ctx, current) 119 d.PanicIfError(err) 120 for i, v := range readValues { 121 h := current[i] 122 currentValues[h] = v 123 visited[h] = struct{}{} 124 } 125 126 // Iterate all the Values at the current level of the graph IN ORDER (as specified in |current|) and gather up their embedded refs. We'll build two different lists of hash.Hashes during this process: 127 // 1) An ordered list of ALL the children of the current level. 128 // 2) An ordered list of the child nodes that contain refs to chunks we haven't yet visited. This *excludes* already-visted nodes and nodes without children. 129 // We'll use 1) to get an estimate of how good the locality is among the children of the current level, and then 2) to descend to the next level of the graph. 130 orderedChildren := hash.HashSlice{} 131 nextLevel := hash.HashSlice{} 132 for _, h := range current { 133 _ = types.WalkAddrs(currentValues[h], types.Format_Default, func(h hash.Hash, isleaf bool) error { 134 orderedChildren = append(orderedChildren, h) 135 if _, ok := visited[h]; !ok && !isleaf { 136 nextLevel = append(nextLevel, h) 137 } 138 return nil 139 }) 140 } 141 142 // Estimate locality among the members of |orderedChildren| by splitting into groups that are roughly |branchFactor| in size and calling CalcReads on each group. With perfect locality, we'd expect that each group could be read in a single physical read. 143 numChildren := len(orderedChildren) 144 branchFactor := numChildren / numNodes 145 numGroups := numNodes 146 if numChildren%numNodes != 0 { 147 numGroups++ 148 } 149 wg := &sync.WaitGroup{} 150 reads := make([]int, numGroups) 151 for i := 0; i < numGroups; i++ { 152 wg.Add(1) 153 if i+1 == numGroups { // last group 154 go func(i int) { 155 defer wg.Done() 156 reads[i], _, err = nbs.CalcReads(store, orderedChildren[i*branchFactor:].HashSet(), 0) 157 d.PanicIfError(err) 158 }(i) 159 continue 160 } 161 go func(i int) { 162 defer wg.Done() 163 reads[i], _, err = nbs.CalcReads(store, orderedChildren[i*branchFactor:(i+1)*branchFactor].HashSet(), 0) 164 d.PanicIfError(err) 165 }(i) 166 } 167 168 wg.Wait() 169 170 sumOfReads := sumInts(reads) 171 fmt.Printf(chartFmt, height, numNodes, numChildren, branchFactor, numGroups, sumOfReads, numChildren-len(nextLevel)) 172 173 sum += sumOfReads 174 optimal += numGroups 175 height-- 176 current = nextLevel 177 } 178 179 fmt.Printf("\nVisited %d chunk groups\n", optimal) 180 fmt.Printf("Reading DB %s requires %.01fx optimal number of reads\n", *dbName, float64(sum)/float64(optimal)) 181 } 182 183 func sumInts(nums []int) (sum int) { 184 for _, n := range nums { 185 sum += n 186 } 187 return 188 }