github.com/jbendotnet/noms@v0.0.0-20190904222105-c43e4293ea92/go/nbs/frag/main.go (about) 1 // Copyright 2016 Attic Labs, Inc. All rights reserved. 2 // Licensed under the Apache License, version 2.0: 3 // http://www.apache.org/licenses/LICENSE-2.0 4 5 package main 6 7 import ( 8 "fmt" 9 "log" 10 "sync" 11 12 "github.com/attic-labs/kingpin" 13 "github.com/aws/aws-sdk-go/aws" 14 "github.com/aws/aws-sdk-go/aws/session" 15 "github.com/aws/aws-sdk-go/service/dynamodb" 16 "github.com/aws/aws-sdk-go/service/s3" 17 "github.com/dustin/go-humanize" 18 19 "github.com/attic-labs/noms/go/datas" 20 "github.com/attic-labs/noms/go/hash" 21 "github.com/attic-labs/noms/go/nbs" 22 "github.com/attic-labs/noms/go/types" 23 "github.com/attic-labs/noms/go/util/profile" 24 ) 25 26 var ( 27 dir = kingpin.Flag("dir", "Write to an NBS store in the given directory").String() 28 table = kingpin.Flag("table", "Write to an NBS store in AWS, using this table").String() 29 bucket = kingpin.Flag("bucket", "Write to an NBS store in AWS, using this bucket").String() 30 dbName = kingpin.Flag("db", "Write to an NBS store in AWS, using this db name").String() 31 ) 32 33 const memTableSize = 128 * humanize.MiByte 34 35 func main() { 36 profile.RegisterProfileFlags(kingpin.CommandLine) 37 kingpin.Parse() 38 39 var store *nbs.NomsBlockStore 40 if *dir != "" { 41 store = nbs.NewLocalStore(*dir, memTableSize) 42 *dbName = *dir 43 } else if *table != "" && *bucket != "" && *dbName != "" { 44 sess := session.Must(session.NewSession(aws.NewConfig().WithRegion("us-west-2"))) 45 store = nbs.NewAWSStore(*table, *dbName, *bucket, s3.New(sess), dynamodb.New(sess), memTableSize) 46 } else { 47 log.Fatalf("Must set either --dir or ALL of --table, --bucket and --db\n") 48 } 49 50 db := datas.NewDatabase(store) 51 defer db.Close() 52 53 defer profile.MaybeStartProfile().Stop() 54 55 height := types.NewRef(db.Datasets()).Height() 56 fmt.Println("Store is of height", height) 57 fmt.Println("| Height | Nodes | Children | Branching | Groups | Reads | Pruned |") 58 fmt.Println("+--------+---------+----------+-----------+--------+-------+--------+") 59 chartFmt := "| %6d | %7d | %8d | %9d | %6d | %5d | %6d |\n" 60 61 var optimal, sum int 62 visited := map[hash.Hash]bool{} 63 64 current := hash.HashSlice{store.Root()} 65 for numNodes := 1; numNodes > 0; numNodes = len(current) { 66 // Start by reading the values of the current level of the graph 67 currentValues := make(map[hash.Hash]types.Value, len(current)) 68 readValues := db.ReadManyValues(current) 69 for i, v := range readValues { 70 h := current[i] 71 currentValues[h] = v 72 visited[h] = true 73 } 74 75 // Iterate all the Values at the current level of the graph IN ORDER (as specified in |current|) and gather up their embedded refs. We'll build two different lists of hash.Hashes during this process: 76 // 1) An ordered list of ALL the children of the current level. 77 // 2) An ordered list of the child nodes that contain refs to chunks we haven't yet visited. This *excludes* already-visted nodes and nodes without children. 78 // We'll use 1) to get an estimate of how good the locality is among the children of the current level, and then 2) to descend to the next level of the graph. 79 orderedChildren := hash.HashSlice{} 80 nextLevel := hash.HashSlice{} 81 for _, h := range current { 82 currentValues[h].WalkRefs(func(r types.Ref) { 83 target := r.TargetHash() 84 orderedChildren = append(orderedChildren, target) 85 if !visited[target] && r.Height() > 1 { 86 nextLevel = append(nextLevel, target) 87 } 88 }) 89 } 90 91 // Estimate locality among the members of |orderedChildren| by splitting into groups that are roughly |branchFactor| in size and calling CalcReads on each group. With perfect locality, we'd expect that each group could be read in a single physical read. 92 numChildren := len(orderedChildren) 93 branchFactor := numChildren / numNodes 94 numGroups := numNodes 95 if numChildren%numNodes != 0 { 96 numGroups++ 97 } 98 wg := &sync.WaitGroup{} 99 reads := make([]int, numGroups) 100 for i := 0; i < numGroups; i++ { 101 wg.Add(1) 102 if i+1 == numGroups { // last group 103 go func(i int) { 104 defer wg.Done() 105 reads[i], _ = store.CalcReads(orderedChildren[i*branchFactor:].HashSet(), 0) 106 }(i) 107 continue 108 } 109 go func(i int) { 110 defer wg.Done() 111 reads[i], _ = store.CalcReads(orderedChildren[i*branchFactor:(i+1)*branchFactor].HashSet(), 0) 112 }(i) 113 } 114 115 wg.Wait() 116 117 sumOfReads := sumInts(reads) 118 fmt.Printf(chartFmt, height, numNodes, numChildren, branchFactor, numGroups, sumOfReads, numChildren-len(nextLevel)) 119 120 sum += sumOfReads 121 optimal += numGroups 122 height-- 123 current = nextLevel 124 } 125 126 fmt.Printf("\nVisited %d chunk groups\n", optimal) 127 fmt.Printf("Reading DB %s requires %.01fx optimal number of reads\n", *dbName, float64(sum)/float64(optimal)) 128 } 129 130 func sumInts(nums []int) (sum int) { 131 for _, n := range nums { 132 sum += n 133 } 134 return 135 }