github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/internal/mkbench/split.go (about)

     1  package main
     2  
     3  import (
     4  	"cmp"
     5  	"slices"
     6  )
     7  
     8  const increment = 50 // ops/sec
     9  
    10  // findOptimalSplit computes and returns a value that separates the given pass
    11  // and fail measurements optimally, such that the number of mis-classified
    12  // passes (pass values that fall above the split) and fails (fail values that
    13  // fall below the split) is minimized.
    14  //
    15  // The following gives a visual representation of the problem:
    16  //
    17  //		                     Optimal partition (=550) -----> |
    18  //	                                                         |
    19  //	  Passes:   o          o        o              o o o oo  |
    20  //	  Fails:                         x             x         |x    x  x     x x        x
    21  //	  |---------|---------|---------|---------|---------|----|----|---------|---------|---------|---> x
    22  //	  0        100       200       300       400       500   |   600       700       800       900
    23  //
    24  // The algorithm works by computing the error (i.e. mis-classifications) at
    25  // various points along the x-axis, starting from the origin and increasing by
    26  // the given increment.
    27  func findOptimalSplit(pass, fail []int) int {
    28  	// Not enough data to compute a sensible score.
    29  	if len(pass) == 0 || len(fail) == 0 {
    30  		return -1
    31  	}
    32  
    33  	// Maintain counters for the number of incorrectly classified passes and
    34  	// fails. All passes are initially incorrect, as we start at 0. Conversely,
    35  	// no fails are incorrectly classified, as all scores are >= 0.
    36  	pCount, fCount := len(pass), 0
    37  	p, f := make([]int, len(pass)), make([]int, len(fail))
    38  	copy(p, pass)
    39  	copy(f, fail)
    40  
    41  	// Sort the inputs.
    42  	slices.Sort(p)
    43  	slices.Sort(f)
    44  
    45  	// Find the global min and max.
    46  	min, max := p[0], f[len(fail)-1]
    47  
    48  	// Iterate over the range in increments.
    49  	var result [][]int
    50  	for x := min; x <= max; x = x + increment {
    51  		// Reduce the count of incorrect passes as x increases (i.e. fewer pass
    52  		// values are incorrect as x increases).
    53  		for len(p) > 0 && p[0] <= x {
    54  			pCount--
    55  			p = p[1:]
    56  		}
    57  
    58  		// Increase the count of incorrect fails as x increases (i.e. more fail
    59  		// values are incorrect as x increases).
    60  		for len(f) > 0 && f[0] < x {
    61  			fCount++
    62  			f = f[1:]
    63  		}
    64  
    65  		// Add a (x, score) tuple to result slice.
    66  		result = append(result, []int{x, pCount + fCount})
    67  	}
    68  
    69  	// Sort the (x, score) result slice by score ascending. Tie-break by x
    70  	// ascending.
    71  	slices.SortFunc(result, func(a, b []int) int {
    72  		if v := cmp.Compare(a[1], b[1]); v != 0 {
    73  			return v
    74  		}
    75  		return cmp.Compare(a[0], b[0])
    76  	})
    77  
    78  	// If there is more than one interval, split the difference between the min
    79  	// and the max.
    80  	splitMin, splitMax := result[0][0], result[0][0]
    81  	for i := 1; i < len(result); i++ {
    82  		if result[i][1] != result[0][1] {
    83  			break
    84  		}
    85  		splitMax = result[i][0]
    86  	}
    87  
    88  	return (splitMin + splitMax) / 2
    89  }