github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/frag/main.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package main
    23  
    24  import (
    25  	"context"
    26  	"fmt"
    27  	"log"
    28  	"os"
    29  	"sync"
    30  
    31  	"github.com/aws/aws-sdk-go/aws"
    32  	"github.com/aws/aws-sdk-go/aws/session"
    33  	"github.com/aws/aws-sdk-go/service/dynamodb"
    34  	"github.com/aws/aws-sdk-go/service/s3"
    35  	"github.com/dustin/go-humanize"
    36  	flag "github.com/juju/gnuflag"
    37  
    38  	"github.com/dolthub/dolt/go/store/d"
    39  	"github.com/dolthub/dolt/go/store/datas"
    40  	"github.com/dolthub/dolt/go/store/hash"
    41  	"github.com/dolthub/dolt/go/store/nbs"
    42  	"github.com/dolthub/dolt/go/store/types"
    43  	"github.com/dolthub/dolt/go/store/util/profile"
    44  )
    45  
    46  var (
    47  	dir    = flag.String("dir", "", "Write to an NBS store in the given directory")
    48  	table  = flag.String("table", "", "Write to an NBS store in AWS, using this table")
    49  	bucket = flag.String("bucket", "", "Write to an NBS store in AWS, using this bucket")
    50  	dbName = flag.String("db", "", "Write to an NBS store in AWS, using this db name")
    51  )
    52  
    53  const memTableSize = 128 * humanize.MiByte
    54  
    55  func main() {
    56  	flag.Usage = func() {
    57  		fmt.Fprintf(os.Stderr, "Usage: %s [options]\n", os.Args[0])
    58  		flag.PrintDefaults()
    59  	}
    60  
    61  	profile.RegisterProfileFlags(flag.CommandLine)
    62  	flag.Parse(true)
    63  
    64  	if flag.NArg() != 0 {
    65  		flag.Usage()
    66  		return
    67  	}
    68  
    69  	var store *nbs.NomsBlockStore
    70  	if *dir != "" {
    71  		var err error
    72  		store, err = nbs.NewLocalStore(context.Background(), types.Format_Default.VersionString(), *dir, memTableSize)
    73  		d.PanicIfError(err)
    74  
    75  		*dbName = *dir
    76  	} else if *table != "" && *bucket != "" && *dbName != "" {
    77  		sess := session.Must(session.NewSession(aws.NewConfig().WithRegion("us-west-2")))
    78  
    79  		var err error
    80  		store, err = nbs.NewAWSStore(context.Background(), types.Format_Default.VersionString(), *table, *dbName, *bucket, s3.New(sess), dynamodb.New(sess), memTableSize)
    81  		d.PanicIfError(err)
    82  	} else {
    83  		log.Fatalf("Must set either --dir or ALL of --table, --bucket and --db\n")
    84  	}
    85  
    86  	db := datas.NewDatabase(store)
    87  	defer db.Close()
    88  
    89  	defer profile.MaybeStartProfile().Stop()
    90  
    91  	dss, err := db.Datasets(context.Background())
    92  
    93  	if err != nil {
    94  		fmt.Fprintln(os.Stderr, "error: failed to get datasets")
    95  		os.Exit(1)
    96  	}
    97  
    98  	ref, err := types.NewRef(dss, types.Format_7_18)
    99  	d.PanicIfError(err)
   100  	height := ref.Height()
   101  	fmt.Println("Store is of height", height)
   102  	fmt.Println("| Height |   Nodes | Children | Branching | Groups | Reads | Pruned |")
   103  	fmt.Println("+--------+---------+----------+-----------+--------+-------+--------+")
   104  	chartFmt := "| %6d | %7d | %8d | %9d | %6d | %5d | %6d |\n"
   105  
   106  	var optimal, sum int
   107  	visited := map[hash.Hash]bool{}
   108  
   109  	root, err := store.Root(context.Background())
   110  
   111  	if err != nil {
   112  		fmt.Fprintln(os.Stderr, "error: failed to get root")
   113  		os.Exit(1)
   114  	}
   115  
   116  	current := hash.HashSlice{root}
   117  	for numNodes := 1; numNodes > 0; numNodes = len(current) {
   118  		// Start by reading the values of the current level of the graph
   119  		currentValues := make(map[hash.Hash]types.Value, len(current))
   120  		readValues, err := db.ReadManyValues(context.Background(), current)
   121  		d.PanicIfError(err)
   122  		for i, v := range readValues {
   123  			h := current[i]
   124  			currentValues[h] = v
   125  			visited[h] = true
   126  		}
   127  
   128  		// Iterate all the Values at the current level of the graph IN ORDER (as specified in |current|) and gather up their embedded refs. We'll build two different lists of hash.Hashes during this process:
   129  		// 1) An ordered list of ALL the children of the current level.
   130  		// 2) An ordered list of the child nodes that contain refs to chunks we haven't yet visited. This *excludes* already-visted nodes and nodes without children.
   131  		// We'll use 1) to get an estimate of how good the locality is among the children of the current level, and then 2) to descend to the next level of the graph.
   132  		orderedChildren := hash.HashSlice{}
   133  		nextLevel := hash.HashSlice{}
   134  		for _, h := range current {
   135  			_ = currentValues[h].WalkRefs(types.Format_7_18, func(r types.Ref) error {
   136  				target := r.TargetHash()
   137  				orderedChildren = append(orderedChildren, target)
   138  				if !visited[target] && r.Height() > 1 {
   139  					nextLevel = append(nextLevel, target)
   140  				}
   141  
   142  				return nil
   143  			})
   144  		}
   145  
   146  		// Estimate locality among the members of |orderedChildren| by splitting into groups that are roughly |branchFactor| in size and calling CalcReads on each group. With perfect locality, we'd expect that each group could be read in a single physical read.
   147  		numChildren := len(orderedChildren)
   148  		branchFactor := numChildren / numNodes
   149  		numGroups := numNodes
   150  		if numChildren%numNodes != 0 {
   151  			numGroups++
   152  		}
   153  		wg := &sync.WaitGroup{}
   154  		reads := make([]int, numGroups)
   155  		for i := 0; i < numGroups; i++ {
   156  			wg.Add(1)
   157  			if i+1 == numGroups { // last group
   158  				go func(i int) {
   159  					defer wg.Done()
   160  					reads[i], _, err = store.CalcReads(orderedChildren[i*branchFactor:].HashSet(), 0)
   161  					d.PanicIfError(err)
   162  				}(i)
   163  				continue
   164  			}
   165  			go func(i int) {
   166  				defer wg.Done()
   167  				reads[i], _, err = store.CalcReads(orderedChildren[i*branchFactor:(i+1)*branchFactor].HashSet(), 0)
   168  				d.PanicIfError(err)
   169  			}(i)
   170  		}
   171  
   172  		wg.Wait()
   173  
   174  		sumOfReads := sumInts(reads)
   175  		fmt.Printf(chartFmt, height, numNodes, numChildren, branchFactor, numGroups, sumOfReads, numChildren-len(nextLevel))
   176  
   177  		sum += sumOfReads
   178  		optimal += numGroups
   179  		height--
   180  		current = nextLevel
   181  	}
   182  
   183  	fmt.Printf("\nVisited %d chunk groups\n", optimal)
   184  	fmt.Printf("Reading DB %s requires %.01fx optimal number of reads\n", *dbName, float64(sum)/float64(optimal))
   185  }
   186  
   187  func sumInts(nums []int) (sum int) {
   188  	for _, n := range nums {
   189  		sum += n
   190  	}
   191  	return
   192  }