github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/util/topk.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package util
    12  
    13  import (
    14  	"fmt"
    15  	"math/rand"
    16  	"sort"
    17  )
    18  
    19  // moveTopKToFront swaps elements in the range [start, end) so that all elements
    20  // in the range [start, k) are <= than all elements in the range [k, end).
    21  func moveTopKToFront(data sort.Interface, start, end, k int, rng *rand.Rand) {
    22  	if k < start || k > end {
    23  		panic(fmt.Sprintf("k (%d) outside of range [%d, %d)", k, start, end))
    24  	}
    25  	if k == start || k == end {
    26  		return
    27  	}
    28  
    29  	// The strategy is to choose a random pivot and partition the data into
    30  	// three regions: elements < pivot, elements == pivot, elements > pivot.
    31  	//
    32  	// We first partition into two regions: elements <= pivot and
    33  	// elements > pivot and further refine the first region if necessary.
    34  
    35  	// Choose a random pivot and move it to the front.
    36  	data.Swap(start, start+rng.Intn(end-start))
    37  	pivot := start
    38  	l, r := start+1, end
    39  	for l < r {
    40  		// Invariants:
    41  		//  - elements in the range [start, l) are <= pivot
    42  		//  - elements in the range [r, end) are > pivot
    43  		if !data.Less(pivot, l) {
    44  			l++
    45  		} else if data.Less(pivot, r-1) {
    46  			r--
    47  		} else {
    48  			data.Swap(l, r-1)
    49  			l++
    50  			r--
    51  		}
    52  	}
    53  	mid := l
    54  	// Everything in the range [start, mid) is <= than the pivot.
    55  	// Everything in the range [mid, end) is > than the pivot.
    56  	if k >= mid {
    57  		// In this case, we eliminated at least the pivot (and all elements
    58  		// equal to it).
    59  		moveTopKToFront(data, mid, end, k, rng)
    60  		return
    61  	}
    62  
    63  	// If we eliminated a decent amount of elements, we can recurse on [0, mid).
    64  	// If the elements were distinct we would do this unconditionally, but in
    65  	// general we could have a lot of elements equal to the pivot.
    66  	if end-mid > (end-start)/4 {
    67  		moveTopKToFront(data, start, mid, k, rng)
    68  		return
    69  	}
    70  
    71  	// Now we work on the range [0, mid). Move everything that is equal to the
    72  	// pivot to the back.
    73  	data.Swap(pivot, mid-1)
    74  	pivot = mid - 1
    75  	for l, r = start, pivot-1; l <= r; {
    76  		if data.Less(l, pivot) {
    77  			l++
    78  		} else {
    79  			data.Swap(l, r)
    80  			r--
    81  		}
    82  	}
    83  	// Now everything in the range [start, l) is < than the pivot. Everything in the
    84  	// range [l, mid) is equal to the pivot. If k is in the [l, mid) range we
    85  	// are done, otherwise we recurse on [start, l).
    86  	if k <= l {
    87  		moveTopKToFront(data, start, l, k, rng)
    88  	}
    89  }
    90  
    91  // MoveTopKToFront moves the top K elements to the front. It makes O(n) calls to
    92  // data.Less and data.Swap (with very high probability). It uses Hoare's
    93  // selection algorithm (aka quickselect).
    94  func MoveTopKToFront(data sort.Interface, k int) {
    95  	if data.Len() <= k {
    96  		return
    97  	}
    98  	// We want the call to be deterministic so we use a predictable seed.
    99  	r := rand.New(rand.NewSource(int64(data.Len()*1000 + k)))
   100  	moveTopKToFront(data, 0, data.Len(), k, r)
   101  }