github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/internal/mkbench/split.go (about) 1 package main 2 3 import "sort" 4 5 const increment = 50 // ops/sec 6 7 // findOptimalSplit computes and returns a value that separates the given pass 8 // and fail measurements optimally, such that the number of mis-classified 9 // passes (pass values that fall above the split) and fails (fail values that 10 // fall below the split) is minimized. 11 // 12 // The following gives a visual representation of the problem: 13 // 14 // Optimal partition (=550) -----> | 15 // | 16 // Passes: o o o o o o oo | 17 // Fails: x x |x x x x x x 18 // |---------|---------|---------|---------|---------|----|----|---------|---------|---------|---> x 19 // 0 100 200 300 400 500 | 600 700 800 900 20 // 21 // The algorithm works by computing the error (i.e. mis-classifications) at 22 // various points along the x-axis, starting from the origin and increasing by 23 // the given increment. 24 func findOptimalSplit(pass, fail []int) int { 25 // Not enough data to compute a sensible score. 26 if len(pass) == 0 || len(fail) == 0 { 27 return -1 28 } 29 30 // Maintain counters for the number of incorrectly classified passes and 31 // fails. All passes are initially incorrect, as we start at 0. Conversely, 32 // no fails are incorrectly classified, as all scores are >= 0. 33 pCount, fCount := len(pass), 0 34 p, f := make([]int, len(pass)), make([]int, len(fail)) 35 copy(p, pass) 36 copy(f, fail) 37 38 // Sort the inputs. 39 sort.Slice(p, func(i, j int) bool { 40 return p[i] < p[j] 41 }) 42 sort.Slice(f, func(i, j int) bool { 43 return f[i] < f[j] 44 }) 45 46 // Find the global min and max. 47 min, max := p[0], f[len(fail)-1] 48 49 // Iterate over the range in increments. 50 var result [][]int 51 for x := min; x <= max; x = x + increment { 52 // Reduce the count of incorrect passes as x increases (i.e. fewer pass 53 // values are incorrect as x increases). 54 for len(p) > 0 && p[0] <= x { 55 pCount-- 56 p = p[1:] 57 } 58 59 // Increase the count of incorrect fails as x increases (i.e. more fail 60 // values are incorrect as x increases). 61 for len(f) > 0 && f[0] < x { 62 fCount++ 63 f = f[1:] 64 } 65 66 // Add a (x, score) tuple to result slice. 67 result = append(result, []int{x, pCount + fCount}) 68 } 69 70 // Sort the (x, score) result slice by score ascending. Tie-break by x 71 // ascending. 72 sort.Slice(result, func(i, j int) bool { 73 if result[i][1] == result[j][1] { 74 return result[i][0] < result[j][0] 75 } 76 return result[i][1] < result[j][1] 77 }) 78 79 // If there is more than one interval, split the difference between the min 80 // and the max. 81 splitMin, splitMax := result[0][0], result[0][0] 82 for i := 1; i < len(result); i++ { 83 if result[i][1] != result[0][1] { 84 break 85 } 86 splitMax = result[i][0] 87 } 88 89 return (splitMin + splitMax) / 2 90 }