github.com/jbendotnet/noms@v0.0.0-20190904222105-c43e4293ea92/go/nbs/frag/main.go (about)

     1  // Copyright 2016 Attic Labs, Inc. All rights reserved.
     2  // Licensed under the Apache License, version 2.0:
     3  // http://www.apache.org/licenses/LICENSE-2.0
     4  
     5  package main
     6  
     7  import (
     8  	"fmt"
     9  	"log"
    10  	"sync"
    11  
    12  	"github.com/attic-labs/kingpin"
    13  	"github.com/aws/aws-sdk-go/aws"
    14  	"github.com/aws/aws-sdk-go/aws/session"
    15  	"github.com/aws/aws-sdk-go/service/dynamodb"
    16  	"github.com/aws/aws-sdk-go/service/s3"
    17  	"github.com/dustin/go-humanize"
    18  
    19  	"github.com/attic-labs/noms/go/datas"
    20  	"github.com/attic-labs/noms/go/hash"
    21  	"github.com/attic-labs/noms/go/nbs"
    22  	"github.com/attic-labs/noms/go/types"
    23  	"github.com/attic-labs/noms/go/util/profile"
    24  )
    25  
    26  var (
    27  	dir    = kingpin.Flag("dir", "Write to an NBS store in the given directory").String()
    28  	table  = kingpin.Flag("table", "Write to an NBS store in AWS, using this table").String()
    29  	bucket = kingpin.Flag("bucket", "Write to an NBS store in AWS, using this bucket").String()
    30  	dbName = kingpin.Flag("db", "Write to an NBS store in AWS, using this db name").String()
    31  )
    32  
    33  const memTableSize = 128 * humanize.MiByte
    34  
    35  func main() {
    36  	profile.RegisterProfileFlags(kingpin.CommandLine)
    37  	kingpin.Parse()
    38  
    39  	var store *nbs.NomsBlockStore
    40  	if *dir != "" {
    41  		store = nbs.NewLocalStore(*dir, memTableSize)
    42  		*dbName = *dir
    43  	} else if *table != "" && *bucket != "" && *dbName != "" {
    44  		sess := session.Must(session.NewSession(aws.NewConfig().WithRegion("us-west-2")))
    45  		store = nbs.NewAWSStore(*table, *dbName, *bucket, s3.New(sess), dynamodb.New(sess), memTableSize)
    46  	} else {
    47  		log.Fatalf("Must set either --dir or ALL of --table, --bucket and --db\n")
    48  	}
    49  
    50  	db := datas.NewDatabase(store)
    51  	defer db.Close()
    52  
    53  	defer profile.MaybeStartProfile().Stop()
    54  
    55  	height := types.NewRef(db.Datasets()).Height()
    56  	fmt.Println("Store is of height", height)
    57  	fmt.Println("| Height |   Nodes | Children | Branching | Groups | Reads | Pruned |")
    58  	fmt.Println("+--------+---------+----------+-----------+--------+-------+--------+")
    59  	chartFmt := "| %6d | %7d | %8d | %9d | %6d | %5d | %6d |\n"
    60  
    61  	var optimal, sum int
    62  	visited := map[hash.Hash]bool{}
    63  
    64  	current := hash.HashSlice{store.Root()}
    65  	for numNodes := 1; numNodes > 0; numNodes = len(current) {
    66  		// Start by reading the values of the current level of the graph
    67  		currentValues := make(map[hash.Hash]types.Value, len(current))
    68  		readValues := db.ReadManyValues(current)
    69  		for i, v := range readValues {
    70  			h := current[i]
    71  			currentValues[h] = v
    72  			visited[h] = true
    73  		}
    74  
    75  		// Iterate all the Values at the current level of the graph IN ORDER (as specified in |current|) and gather up their embedded refs. We'll build two different lists of hash.Hashes during this process:
    76  		// 1) An ordered list of ALL the children of the current level.
    77  		// 2) An ordered list of the child nodes that contain refs to chunks we haven't yet visited. This *excludes* already-visted nodes and nodes without children.
    78  		// We'll use 1) to get an estimate of how good the locality is among the children of the current level, and then 2) to descend to the next level of the graph.
    79  		orderedChildren := hash.HashSlice{}
    80  		nextLevel := hash.HashSlice{}
    81  		for _, h := range current {
    82  			currentValues[h].WalkRefs(func(r types.Ref) {
    83  				target := r.TargetHash()
    84  				orderedChildren = append(orderedChildren, target)
    85  				if !visited[target] && r.Height() > 1 {
    86  					nextLevel = append(nextLevel, target)
    87  				}
    88  			})
    89  		}
    90  
    91  		// Estimate locality among the members of |orderedChildren| by splitting into groups that are roughly |branchFactor| in size and calling CalcReads on each group. With perfect locality, we'd expect that each group could be read in a single physical read.
    92  		numChildren := len(orderedChildren)
    93  		branchFactor := numChildren / numNodes
    94  		numGroups := numNodes
    95  		if numChildren%numNodes != 0 {
    96  			numGroups++
    97  		}
    98  		wg := &sync.WaitGroup{}
    99  		reads := make([]int, numGroups)
   100  		for i := 0; i < numGroups; i++ {
   101  			wg.Add(1)
   102  			if i+1 == numGroups { // last group
   103  				go func(i int) {
   104  					defer wg.Done()
   105  					reads[i], _ = store.CalcReads(orderedChildren[i*branchFactor:].HashSet(), 0)
   106  				}(i)
   107  				continue
   108  			}
   109  			go func(i int) {
   110  				defer wg.Done()
   111  				reads[i], _ = store.CalcReads(orderedChildren[i*branchFactor:(i+1)*branchFactor].HashSet(), 0)
   112  			}(i)
   113  		}
   114  
   115  		wg.Wait()
   116  
   117  		sumOfReads := sumInts(reads)
   118  		fmt.Printf(chartFmt, height, numNodes, numChildren, branchFactor, numGroups, sumOfReads, numChildren-len(nextLevel))
   119  
   120  		sum += sumOfReads
   121  		optimal += numGroups
   122  		height--
   123  		current = nextLevel
   124  	}
   125  
   126  	fmt.Printf("\nVisited %d chunk groups\n", optimal)
   127  	fmt.Printf("Reading DB %s requires %.01fx optimal number of reads\n", *dbName, float64(sum)/float64(optimal))
   128  }
   129  
   130  func sumInts(nums []int) (sum int) {
   131  	for _, n := range nums {
   132  		sum += n
   133  	}
   134  	return
   135  }