github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/frag/main.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package main
    23  
    24  import (
    25  	"context"
    26  	"fmt"
    27  	"log"
    28  	"os"
    29  	"sync"
    30  
    31  	"github.com/aws/aws-sdk-go/aws"
    32  	"github.com/aws/aws-sdk-go/aws/session"
    33  	"github.com/aws/aws-sdk-go/service/dynamodb"
    34  	"github.com/aws/aws-sdk-go/service/s3"
    35  	"github.com/dustin/go-humanize"
    36  	flag "github.com/juju/gnuflag"
    37  
    38  	"github.com/dolthub/dolt/go/store/d"
    39  	"github.com/dolthub/dolt/go/store/hash"
    40  	"github.com/dolthub/dolt/go/store/nbs"
    41  	"github.com/dolthub/dolt/go/store/types"
    42  	"github.com/dolthub/dolt/go/store/util/profile"
    43  )
    44  
    45  var (
    46  	dir    = flag.String("dir", "", "Write to an NBS store in the given directory")
    47  	table  = flag.String("table", "", "Write to an NBS store in AWS, using this table")
    48  	bucket = flag.String("bucket", "", "Write to an NBS store in AWS, using this bucket")
    49  	dbName = flag.String("db", "", "Write to an NBS store in AWS, using this db name")
    50  )
    51  
    52  const memTableSize = 128 * humanize.MiByte
    53  
    54  func main() {
    55  	flag.Usage = func() {
    56  		fmt.Fprintf(os.Stderr, "Usage: %s [options]\n", os.Args[0])
    57  		flag.PrintDefaults()
    58  	}
    59  
    60  	ctx := context.Background()
    61  
    62  	profile.RegisterProfileFlags(flag.CommandLine)
    63  	flag.Parse(true)
    64  
    65  	if flag.NArg() != 0 {
    66  		flag.Usage()
    67  		return
    68  	}
    69  
    70  	var store *nbs.NomsBlockStore
    71  	if *dir != "" {
    72  		var err error
    73  		store, err = nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), *dir, memTableSize, nbs.NewUnlimitedMemQuotaProvider())
    74  		d.PanicIfError(err)
    75  
    76  		*dbName = *dir
    77  	} else if *table != "" && *bucket != "" && *dbName != "" {
    78  		sess := session.Must(session.NewSession(aws.NewConfig().WithRegion("us-west-2")))
    79  
    80  		var err error
    81  		store, err = nbs.NewAWSStore(context.Background(), types.Format_Default.VersionString(), *table, *dbName, *bucket, s3.New(sess), dynamodb.New(sess), memTableSize, nbs.NewUnlimitedMemQuotaProvider())
    82  		d.PanicIfError(err)
    83  	} else {
    84  		log.Fatalf("Must set either --dir or ALL of --table, --bucket and --db\n")
    85  	}
    86  
    87  	vrw := types.NewValueStore(store)
    88  
    89  	root, err := store.Root(ctx)
    90  	if err != nil {
    91  		fmt.Fprintf(os.Stderr, "error: failed to get root: %v\n", err)
    92  		os.Exit(1)
    93  	}
    94  
    95  	defer profile.MaybeStartProfile().Stop()
    96  
    97  	rootValue, err := vrw.ReadValue(ctx, root)
    98  	if err != nil {
    99  		fmt.Fprintf(os.Stderr, "error: failed to get root value: %v\n", err)
   100  		os.Exit(1)
   101  	}
   102  
   103  	ref, err := types.NewRef(rootValue, types.Format_Default)
   104  	d.PanicIfError(err)
   105  	height := ref.Height()
   106  	fmt.Println("Store is of height", height)
   107  	fmt.Println("| Height |   Nodes | Children | Branching | Groups | Reads | Pruned |")
   108  	fmt.Println("+--------+---------+----------+-----------+--------+-------+--------+")
   109  	chartFmt := "| %6d | %7d | %8d | %9d | %6d | %5d | %6d |\n"
   110  
   111  	var optimal, sum int
   112  	visited := make(map[hash.Hash]struct{})
   113  
   114  	current := hash.HashSlice{root}
   115  	for numNodes := 1; numNodes > 0; numNodes = len(current) {
   116  		// Start by reading the values of the current level of the graph
   117  		currentValues := make(map[hash.Hash]types.Value, len(current))
   118  		readValues, err := vrw.ReadManyValues(ctx, current)
   119  		d.PanicIfError(err)
   120  		for i, v := range readValues {
   121  			h := current[i]
   122  			currentValues[h] = v
   123  			visited[h] = struct{}{}
   124  		}
   125  
   126  		// Iterate all the Values at the current level of the graph IN ORDER (as specified in |current|) and gather up their embedded refs. We'll build two different lists of hash.Hashes during this process:
   127  		// 1) An ordered list of ALL the children of the current level.
   128  		// 2) An ordered list of the child nodes that contain refs to chunks we haven't yet visited. This *excludes* already-visted nodes and nodes without children.
   129  		// We'll use 1) to get an estimate of how good the locality is among the children of the current level, and then 2) to descend to the next level of the graph.
   130  		orderedChildren := hash.HashSlice{}
   131  		nextLevel := hash.HashSlice{}
   132  		for _, h := range current {
   133  			_ = types.WalkAddrs(currentValues[h], types.Format_Default, func(h hash.Hash, isleaf bool) error {
   134  				orderedChildren = append(orderedChildren, h)
   135  				if _, ok := visited[h]; !ok && !isleaf {
   136  					nextLevel = append(nextLevel, h)
   137  				}
   138  				return nil
   139  			})
   140  		}
   141  
   142  		// Estimate locality among the members of |orderedChildren| by splitting into groups that are roughly |branchFactor| in size and calling CalcReads on each group. With perfect locality, we'd expect that each group could be read in a single physical read.
   143  		numChildren := len(orderedChildren)
   144  		branchFactor := numChildren / numNodes
   145  		numGroups := numNodes
   146  		if numChildren%numNodes != 0 {
   147  			numGroups++
   148  		}
   149  		wg := &sync.WaitGroup{}
   150  		reads := make([]int, numGroups)
   151  		for i := 0; i < numGroups; i++ {
   152  			wg.Add(1)
   153  			if i+1 == numGroups { // last group
   154  				go func(i int) {
   155  					defer wg.Done()
   156  					reads[i], _, err = nbs.CalcReads(store, orderedChildren[i*branchFactor:].HashSet(), 0)
   157  					d.PanicIfError(err)
   158  				}(i)
   159  				continue
   160  			}
   161  			go func(i int) {
   162  				defer wg.Done()
   163  				reads[i], _, err = nbs.CalcReads(store, orderedChildren[i*branchFactor:(i+1)*branchFactor].HashSet(), 0)
   164  				d.PanicIfError(err)
   165  			}(i)
   166  		}
   167  
   168  		wg.Wait()
   169  
   170  		sumOfReads := sumInts(reads)
   171  		fmt.Printf(chartFmt, height, numNodes, numChildren, branchFactor, numGroups, sumOfReads, numChildren-len(nextLevel))
   172  
   173  		sum += sumOfReads
   174  		optimal += numGroups
   175  		height--
   176  		current = nextLevel
   177  	}
   178  
   179  	fmt.Printf("\nVisited %d chunk groups\n", optimal)
   180  	fmt.Printf("Reading DB %s requires %.01fx optimal number of reads\n", *dbName, float64(sum)/float64(optimal))
   181  }
   182  
   183  func sumInts(nums []int) (sum int) {
   184  	for _, n := range nums {
   185  		sum += n
   186  	}
   187  	return
   188  }