gitee.com/quant1x/num@v0.3.2/internal/partial/topk.go (about)

     1  package partial
     2  
     3  import (
     4  	"cmp"
     5  	"math"
     6  )
     7  
     8  // TopK reorders a slice such that x[:k] contains the first k elements of the
     9  // slice when sorted in ascending order. Only the kth element x[k-1] is
    10  // guaranteed to be in sorted order. All elements in x[:k-1] are less than or
    11  // equal to the kth element, all elements in x[k:] are greater than or equal.
    12  // This is faster than using slices.Sort.
    13  func TopK[E cmp.Ordered](x []E, k int) {
    14  	k = __min(k, len(x))
    15  	if k > 0 {
    16  		floydRivest(x, 0, len(x)-1, k-1) // 0-indexed
    17  	}
    18  }
    19  
    20  // TopKFunc reorders a slice such that x[:k] contains the first k elements of
    21  // the slice when sorted in ascending order as determined by the less function.
    22  // Only the kth element x[k-1] is guaranteed to be in sorted order. All elements
    23  // in x[:k-1] are less than or equal to the kth element, all elements in x[k:]
    24  // are greater than or equal. This is faster than using slices.SortFunc.
    25  func TopKFunc[E any](x []E, k int, cmp func(E, E) int) {
    26  	k = __min(k, len(x))
    27  	if k > 0 {
    28  		floydRivestFunc(x, 0, len(x)-1, k-1, cmp)
    29  	}
    30  }
    31  
    32  // https://en.wikipedia.org/wiki/Floyd%E2%80%93Rivest_algorithm
    33  func floydRivest[E cmp.Ordered](x []E, left, right, k int) {
    34  	// left is the left index for the interval
    35  	// right is the right index for the interval
    36  	// k is the desired index value, where x[k] is the (k+1)th smallest element when left = 0
    37  	length := len(x)
    38  	for right > left {
    39  		// Use select recursively to sample a smaller set of size s
    40  		// the arbitrary constants 600 and 0.5 are used in the original
    41  		// version to minimize execution time.
    42  		if right-left > 600 {
    43  			var n = float64(right - left + 1)
    44  			var i = float64(k - left + 1)
    45  			var z = math.Log(n)
    46  			var s = 0.5 * math.Exp(2*z/3)
    47  			var sd = 0.5 * math.Sqrt(z*s*(n-s)/n) * float64(__sign(i-n/2))
    48  			var kf = float64(k)
    49  			var newLeft = __max(left, int(math.Floor(kf-i*s/n+sd)))
    50  			var newRight = __min(right, int(math.Floor(kf+(n-i)*s/n+sd)))
    51  			floydRivest(x, newLeft, newRight, k)
    52  		}
    53  		// partition the elements between left and right around t
    54  		var t = x[k]
    55  		var i = left
    56  		var j = right
    57  		x[left], x[k] = x[k], x[left]
    58  		if t < x[right] {
    59  			x[left], x[right] = x[right], x[left]
    60  		}
    61  		for i < j {
    62  			x[i], x[j] = x[j], x[i]
    63  			i++
    64  			j--
    65  			for i < length && x[i] < t {
    66  				i++
    67  			}
    68  			for j >= 0 && t < x[j] {
    69  				j--
    70  			}
    71  		}
    72  		if x[left] == t {
    73  			x[left], x[j] = x[j], x[left]
    74  		} else {
    75  			j++
    76  			x[j], x[right] = x[right], x[j]
    77  		}
    78  		// Adjust left and right towards the boundaries of the subset
    79  		// containing the (k − left + 1)th smallest element.
    80  		if j <= k {
    81  			left = j + 1
    82  		}
    83  		if k <= j {
    84  			right = j - 1
    85  		}
    86  	}
    87  }
    88  
    89  func floydRivestFunc[E any](x []E, left, right, k int, cmp func(E, E) int) {
    90  	// left is the left index for the interval
    91  	// right is the right index for the interval
    92  	// k is the desired index value, where x[k] is the (k+1)th smallest element when left = 0
    93  	length := len(x)
    94  	for right > left {
    95  		// Use select recursively to sample a smaller set of size s
    96  		// the arbitrary constants 600 and 0.5 are used in the original
    97  		// version to minimize execution time.
    98  		if right-left > 600 {
    99  			var n = float64(right - left + 1)
   100  			var i = float64(k - left + 1)
   101  			var z = math.Log(n)
   102  			var s = 0.5 * math.Exp(2*z/3)
   103  			var sd = 0.5 * math.Sqrt(z*s*(n-s)/n) * float64(__sign(i-n/2))
   104  			var kf = float64(k)
   105  			var newLeft = __max(left, int(math.Floor(kf-i*s/n+sd)))
   106  			var newRight = __min(right, int(math.Floor(kf+(n-i)*s/n+sd)))
   107  			floydRivestFunc(x, newLeft, newRight, k, cmp)
   108  		}
   109  		// partition the elements between left and right around t
   110  		var t = x[k]
   111  		var i = left
   112  		var j = right
   113  		x[left], x[k] = x[k], x[left]
   114  		if cmp(t, x[right]) < 0 {
   115  			x[left], x[right] = x[right], x[left]
   116  		}
   117  		for i < j {
   118  			x[i], x[j] = x[j], x[i]
   119  			i++
   120  			j--
   121  			for i < length && cmp(x[i], t) < 0 {
   122  				i++
   123  			}
   124  			for j >= 0 && cmp(t, x[j]) < 0 {
   125  				j--
   126  			}
   127  		}
   128  		if !(cmp(x[left], t) < 0 || cmp(t, x[left]) < 0) { // x[left] == t
   129  			x[left], x[j] = x[j], x[left]
   130  		} else {
   131  			j++
   132  			x[j], x[right] = x[right], x[j]
   133  		}
   134  		// Adjust left and right towards the boundaries of the subset
   135  		// containing the (k − left + 1)th smallest element.
   136  		if j <= k {
   137  			left = j + 1
   138  		}
   139  		if k <= j {
   140  			right = j - 1
   141  		}
   142  	}
   143  }
   144  
   145  func __min[E cmp.Ordered](x, y E) E {
   146  	if x < y {
   147  		return x
   148  	}
   149  	return y
   150  }
   151  
   152  func __max[E cmp.Ordered](x, y E) E {
   153  	if x > y {
   154  		return x
   155  	}
   156  	return y
   157  }
   158  
   159  func __sign(x float64) int {
   160  	if x < 0 {
   161  		return -1
   162  	}
   163  	return 1
   164  }