github.com/wrgl/wrgl@v0.14.0/cmd/perf/size_cmd.go (about)

     1  package main
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"math"
     7  	"path/filepath"
     8  	"sort"
     9  
    10  	"github.com/dgraph-io/badger/v3"
    11  	"github.com/spf13/cobra"
    12  	"github.com/wrgl/wrgl/pkg/objects"
    13  )
    14  
    15  func sizeCmd() *cobra.Command {
    16  	cmd := &cobra.Command{
    17  		Use:   "size WRGL_DIRECTORY",
    18  		Short: "Measure size and statistics of different types of object in WRGL_DIRECTORY.",
    19  		Args:  cobra.ExactArgs(1),
    20  		RunE: func(cmd *cobra.Command, args []string) error {
    21  			db, err := badger.Open(
    22  				badger.DefaultOptions(filepath.Join(args[0], "kv")).
    23  					WithLoggingLevel(badger.ERROR),
    24  			)
    25  			if err != nil {
    26  				return err
    27  			}
    28  			defer db.Close()
    29  
    30  			prefixes := objects.Prefixes()
    31  
    32  			keySizes := []int64{}
    33  			valSizes := []int64{}
    34  			cumKeySizes := []int64{}
    35  			cumValSizes := []int64{}
    36  			statMap := map[string]*sizeStat{}
    37  			for _, prefix := range prefixes {
    38  				keySizes = keySizes[:0]
    39  				valSizes = valSizes[:0]
    40  
    41  				if err = db.View(func(txn *badger.Txn) error {
    42  					opt := badger.DefaultIteratorOptions
    43  					opt.Prefix = []byte(prefix)
    44  					it := txn.NewIterator(opt)
    45  					defer it.Close()
    46  					for it.Rewind(); it.Valid(); it.Next() {
    47  						item := it.Item()
    48  						keySizes = append(keySizes, item.KeySize())
    49  						valSizes = append(valSizes, item.ValueSize())
    50  					}
    51  					return nil
    52  				}); err != nil {
    53  					return err
    54  				}
    55  
    56  				statMap[prefix] = calculateSizeStat(keySizes, valSizes)
    57  
    58  				cumKeySizes = append(cumKeySizes, keySizes...)
    59  				cumValSizes = append(cumValSizes, valSizes...)
    60  			}
    61  
    62  			total := len(cumKeySizes)
    63  			for prefix, stat := range statMap {
    64  				cmd.Printf("Prefix %q: %d (%s) objects\n", prefix, stat.Count, percentage(uint64(stat.Count), uint64(total)))
    65  				stat.Print(cmd.OutOrStdout())
    66  				cmd.Println()
    67  			}
    68  			cummulativeStat := calculateSizeStat(cumKeySizes, cumValSizes)
    69  			cmd.Printf("Cummulative: %d objects\n", total)
    70  			cummulativeStat.Print(cmd.OutOrStdout())
    71  			return nil
    72  		},
    73  	}
    74  	return cmd
    75  }
    76  
    77  func percentage(count, total uint64) string {
    78  	return fmt.Sprintf("%d%%", uint64(math.Round(float64(count)/float64(total)*100)))
    79  }
    80  
    81  type sizeStat struct {
    82  	Count     int
    83  	KeyMedian uint64
    84  	KeyMean   uint64
    85  	KeyStdDev uint64
    86  	ValMedian uint64
    87  	ValMean   uint64
    88  	ValStdDev uint64
    89  }
    90  
    91  func calculateSizeStat(keySizes, valSizes []int64) *sizeStat {
    92  	s := &sizeStat{}
    93  	var totalKeySize uint64
    94  	var totalValSize uint64
    95  	for i, v := range keySizes {
    96  		totalKeySize += uint64(v)
    97  		totalValSize += uint64(valSizes[i])
    98  	}
    99  	s.Count = len(keySizes)
   100  	sort.Slice(keySizes, func(i, j int) bool { return keySizes[i] < keySizes[j] })
   101  	sort.Slice(valSizes, func(i, j int) bool { return valSizes[i] < valSizes[j] })
   102  	s.KeyMean = uint64(math.Round(float64(totalKeySize) / float64(s.Count)))
   103  	s.ValMean = uint64(math.Round(float64(totalValSize) / float64(s.Count)))
   104  	s.KeyStdDev = standardDeviation(keySizes, int64(s.KeyMean))
   105  	s.ValStdDev = standardDeviation(valSizes, int64(s.ValMean))
   106  	s.KeyMedian = uint64(keySizes[s.Count/2])
   107  	s.ValMedian = uint64(valSizes[s.Count/2])
   108  	return s
   109  }
   110  
   111  func (s *sizeStat) Print(out io.Writer) {
   112  	fmt.Fprintf(out, "Key mean: \t%s\t\tVal mean: \t%s\n", humanSize(s.KeyMean), humanSize(s.ValMean))
   113  	fmt.Fprintf(out, "Key median: \t%s\t\tVal median: \t%s\n", humanSize(s.KeyMedian), humanSize(s.ValMedian))
   114  	fmt.Fprintf(out, "Key stddev: \t%s (%s)\tVal stddev: \t%s (%s)\n",
   115  		humanSize(s.KeyStdDev),
   116  		percentage(s.KeyStdDev, s.KeyMean),
   117  		humanSize(s.ValStdDev),
   118  		percentage(s.ValStdDev, s.ValMean),
   119  	)
   120  }
   121  
   122  func humanSize(v uint64) string {
   123  	if v >= 1<<30 {
   124  		return fmt.Sprintf("%.2f Gb", float64(v)/float64(1<<30))
   125  	}
   126  	if v >= 1<<20 {
   127  		return fmt.Sprintf("%.2f Mb", float64(v)/float64(1<<20))
   128  	}
   129  	if v >= 1<<10 {
   130  		return fmt.Sprintf("%.2f Kb", float64(v)/float64(1<<10))
   131  	}
   132  	return fmt.Sprintf("%d B", v)
   133  }
   134  
   135  func standardDeviation(values []int64, mean int64) uint64 {
   136  	var sum uint64
   137  	for _, v := range values {
   138  		sum += uint64(math.Abs(float64((v - mean)))) ^ 2
   139  	}
   140  	return uint64(math.Round(math.Sqrt(float64(sum) / float64(len(values)))))
   141  }