github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/internal/mkbench/split.go (about) 1 package main 2 3 import ( 4 "cmp" 5 "slices" 6 ) 7 8 const increment = 50 // ops/sec 9 10 // findOptimalSplit computes and returns a value that separates the given pass 11 // and fail measurements optimally, such that the number of mis-classified 12 // passes (pass values that fall above the split) and fails (fail values that 13 // fall below the split) is minimized. 14 // 15 // The following gives a visual representation of the problem: 16 // 17 // Optimal partition (=550) -----> | 18 // | 19 // Passes: o o o o o o oo | 20 // Fails: x x |x x x x x x 21 // |---------|---------|---------|---------|---------|----|----|---------|---------|---------|---> x 22 // 0 100 200 300 400 500 | 600 700 800 900 23 // 24 // The algorithm works by computing the error (i.e. mis-classifications) at 25 // various points along the x-axis, starting from the origin and increasing by 26 // the given increment. 27 func findOptimalSplit(pass, fail []int) int { 28 // Not enough data to compute a sensible score. 29 if len(pass) == 0 || len(fail) == 0 { 30 return -1 31 } 32 33 // Maintain counters for the number of incorrectly classified passes and 34 // fails. All passes are initially incorrect, as we start at 0. Conversely, 35 // no fails are incorrectly classified, as all scores are >= 0. 36 pCount, fCount := len(pass), 0 37 p, f := make([]int, len(pass)), make([]int, len(fail)) 38 copy(p, pass) 39 copy(f, fail) 40 41 // Sort the inputs. 42 slices.Sort(p) 43 slices.Sort(f) 44 45 // Find the global min and max. 46 min, max := p[0], f[len(fail)-1] 47 48 // Iterate over the range in increments. 49 var result [][]int 50 for x := min; x <= max; x = x + increment { 51 // Reduce the count of incorrect passes as x increases (i.e. fewer pass 52 // values are incorrect as x increases). 53 for len(p) > 0 && p[0] <= x { 54 pCount-- 55 p = p[1:] 56 } 57 58 // Increase the count of incorrect fails as x increases (i.e. more fail 59 // values are incorrect as x increases). 60 for len(f) > 0 && f[0] < x { 61 fCount++ 62 f = f[1:] 63 } 64 65 // Add a (x, score) tuple to result slice. 66 result = append(result, []int{x, pCount + fCount}) 67 } 68 69 // Sort the (x, score) result slice by score ascending. Tie-break by x 70 // ascending. 71 slices.SortFunc(result, func(a, b []int) int { 72 if v := cmp.Compare(a[1], b[1]); v != 0 { 73 return v 74 } 75 return cmp.Compare(a[0], b[0]) 76 }) 77 78 // If there is more than one interval, split the difference between the min 79 // and the max. 80 splitMin, splitMax := result[0][0], result[0][0] 81 for i := 1; i < len(result); i++ { 82 if result[i][1] != result[0][1] { 83 break 84 } 85 splitMax = result[i][0] 86 } 87 88 return (splitMin + splitMax) / 2 89 }