github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/samples_test.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tree
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"math"
    21  	"sort"
    22  	"testing"
    23  
    24  	"github.com/stretchr/testify/require"
    25  	"gonum.org/v1/plot"
    26  	"gonum.org/v1/plot/plotter"
    27  	"gonum.org/v1/plot/vg"
    28  )
    29  
    30  type Samples []int
    31  
    32  func (s Samples) Summary() string {
    33  	f := "mean: %8.2f \t stddev: %8.2f \t p50: %5d \t p90: %5d \t p99: %5d \t p99.9: %5d \t p100: %5d"
    34  	p50, p90, p99, p999, p100 := s.percentiles()
    35  	return fmt.Sprintf(f, s.mean(), s.stdDev(), p50, p90, p99, p999, p100)
    36  }
    37  
    38  func (s Samples) count() float64 {
    39  	return float64(len(s))
    40  }
    41  
    42  func (s Samples) sum() (total float64) {
    43  	for _, v := range s {
    44  		total += float64(v)
    45  	}
    46  	return
    47  }
    48  
    49  func (s Samples) mean() float64 {
    50  	return s.sum() / float64(len(s))
    51  }
    52  
    53  func (s Samples) stdDev() float64 {
    54  	var acc float64
    55  	u := s.mean()
    56  	for _, v := range s {
    57  		d := float64(v) - u
    58  		acc += d * d
    59  	}
    60  	return math.Sqrt(acc / s.count())
    61  }
    62  
    63  func (s Samples) percentiles() (p50, p90, p99, p999, p100 int) {
    64  	sort.Ints(s)
    65  	l := len(s)
    66  	p50 = s[l/2]
    67  	p90 = s[(l*9)/10]
    68  	p99 = s[(l*99)/100]
    69  	p999 = s[(l*999)/1000]
    70  	p100 = s[l-1]
    71  	return
    72  }
    73  
    74  func PrintTreeSummaryByLevel(t *testing.T, nd Node, ns NodeStore) {
    75  	ctx := context.Background()
    76  
    77  	sizeByLevel := make([]Samples, nd.Level()+1)
    78  	cardByLevel := make([]Samples, nd.Level()+1)
    79  	err := WalkNodes(ctx, nd, ns, func(ctx context.Context, nd Node) error {
    80  		lvl := nd.Level()
    81  		sizeByLevel[lvl] = append(sizeByLevel[lvl], nd.Size())
    82  		cardByLevel[lvl] = append(cardByLevel[lvl], int(nd.count))
    83  		return nil
    84  	})
    85  	require.NoError(t, err)
    86  
    87  	fmt.Println("pre-edit map Summary: ")
    88  	fmt.Println("| Level | count | avg Size \t  p50 \t  p90 \t p100 | avg card \t  p50 \t  p90 \t p100 |")
    89  	for i := nd.Level(); i >= 0; i-- {
    90  		sizes, cards := sizeByLevel[i], cardByLevel[i]
    91  		sp50, _, sp90, _, sp100 := sizes.percentiles()
    92  		cp50, _, cp90, _, cp100 := cards.percentiles()
    93  		fmt.Printf("| %5d | %5d | %8.2f \t %4d \t %4d \t %4d | %8.2f \t %4d \t %4d \t %4d |\n",
    94  			i, len(cards), sizes.mean(), sp50, sp90, sp100, cards.mean(), cp50, cp90, cp100)
    95  	}
    96  	fmt.Println()
    97  }
    98  
    99  func plotNodeSizeDistribution(t *testing.T, name string, nd Node, ns NodeStore) {
   100  	data, err := measureTreeNodes(nd, ns)
   101  	require.NoError(t, err)
   102  	plotIntHistogram(name, data)
   103  }
   104  
   105  func measureTreeNodes(nd Node, ns NodeStore) (Samples, error) {
   106  	ctx := context.Background()
   107  	data := make(Samples, 0, 1024)
   108  	err := WalkNodes(ctx, nd, ns, func(ctx context.Context, nd Node) error {
   109  		data = append(data, nd.Size())
   110  		return nil
   111  	})
   112  	return data, err
   113  }
   114  
   115  func plotIntHistogram(name string, data []int) {
   116  	var values plotter.Values
   117  	for _, d := range data {
   118  		values = append(values, float64(d))
   119  	}
   120  
   121  	p := plot.New()
   122  	p.Title.Text = "histogram plot"
   123  
   124  	hist, err := plotter.NewHist(values, 50)
   125  	if err != nil {
   126  		panic(err)
   127  	}
   128  	p.Add(hist)
   129  
   130  	if err := p.Save(3*vg.Inch, 3*vg.Inch, name); err != nil {
   131  		panic(err)
   132  	}
   133  }