github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/samples_test.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tree 16 17 import ( 18 "context" 19 "fmt" 20 "math" 21 "sort" 22 "testing" 23 24 "github.com/stretchr/testify/require" 25 "gonum.org/v1/plot" 26 "gonum.org/v1/plot/plotter" 27 "gonum.org/v1/plot/vg" 28 ) 29 30 type Samples []int 31 32 func (s Samples) Summary() string { 33 f := "mean: %8.2f \t stddev: %8.2f \t p50: %5d \t p90: %5d \t p99: %5d \t p99.9: %5d \t p100: %5d" 34 p50, p90, p99, p999, p100 := s.percentiles() 35 return fmt.Sprintf(f, s.mean(), s.stdDev(), p50, p90, p99, p999, p100) 36 } 37 38 func (s Samples) count() float64 { 39 return float64(len(s)) 40 } 41 42 func (s Samples) sum() (total float64) { 43 for _, v := range s { 44 total += float64(v) 45 } 46 return 47 } 48 49 func (s Samples) mean() float64 { 50 return s.sum() / float64(len(s)) 51 } 52 53 func (s Samples) stdDev() float64 { 54 var acc float64 55 u := s.mean() 56 for _, v := range s { 57 d := float64(v) - u 58 acc += d * d 59 } 60 return math.Sqrt(acc / s.count()) 61 } 62 63 func (s Samples) percentiles() (p50, p90, p99, p999, p100 int) { 64 sort.Ints(s) 65 l := len(s) 66 p50 = s[l/2] 67 p90 = s[(l*9)/10] 68 p99 = s[(l*99)/100] 69 p999 = s[(l*999)/1000] 70 p100 = s[l-1] 71 return 72 } 73 74 func PrintTreeSummaryByLevel(t *testing.T, nd Node, ns NodeStore) { 75 ctx := context.Background() 76 77 sizeByLevel := make([]Samples, nd.Level()+1) 78 cardByLevel := make([]Samples, nd.Level()+1) 79 err := WalkNodes(ctx, nd, ns, func(ctx context.Context, nd Node) error { 80 lvl := nd.Level() 81 sizeByLevel[lvl] = append(sizeByLevel[lvl], nd.Size()) 82 cardByLevel[lvl] = append(cardByLevel[lvl], int(nd.count)) 83 return nil 84 }) 85 require.NoError(t, err) 86 87 fmt.Println("pre-edit map Summary: ") 88 fmt.Println("| Level | count | avg Size \t p50 \t p90 \t p100 | avg card \t p50 \t p90 \t p100 |") 89 for i := nd.Level(); i >= 0; i-- { 90 sizes, cards := sizeByLevel[i], cardByLevel[i] 91 sp50, _, sp90, _, sp100 := sizes.percentiles() 92 cp50, _, cp90, _, cp100 := cards.percentiles() 93 fmt.Printf("| %5d | %5d | %8.2f \t %4d \t %4d \t %4d | %8.2f \t %4d \t %4d \t %4d |\n", 94 i, len(cards), sizes.mean(), sp50, sp90, sp100, cards.mean(), cp50, cp90, cp100) 95 } 96 fmt.Println() 97 } 98 99 func plotNodeSizeDistribution(t *testing.T, name string, nd Node, ns NodeStore) { 100 data, err := measureTreeNodes(nd, ns) 101 require.NoError(t, err) 102 plotIntHistogram(name, data) 103 } 104 105 func measureTreeNodes(nd Node, ns NodeStore) (Samples, error) { 106 ctx := context.Background() 107 data := make(Samples, 0, 1024) 108 err := WalkNodes(ctx, nd, ns, func(ctx context.Context, nd Node) error { 109 data = append(data, nd.Size()) 110 return nil 111 }) 112 return data, err 113 } 114 115 func plotIntHistogram(name string, data []int) { 116 var values plotter.Values 117 for _, d := range data { 118 values = append(values, float64(d)) 119 } 120 121 p := plot.New() 122 p.Title.Text = "histogram plot" 123 124 hist, err := plotter.NewHist(values, 50) 125 if err != nil { 126 panic(err) 127 } 128 p.Add(hist) 129 130 if err := p.Save(3*vg.Inch, 3*vg.Inch, name); err != nil { 131 panic(err) 132 } 133 }