github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/util/topk.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package util 12 13 import ( 14 "fmt" 15 "math/rand" 16 "sort" 17 ) 18 19 // moveTopKToFront swaps elements in the range [start, end) so that all elements 20 // in the range [start, k) are <= than all elements in the range [k, end). 21 func moveTopKToFront(data sort.Interface, start, end, k int, rng *rand.Rand) { 22 if k < start || k > end { 23 panic(fmt.Sprintf("k (%d) outside of range [%d, %d)", k, start, end)) 24 } 25 if k == start || k == end { 26 return 27 } 28 29 // The strategy is to choose a random pivot and partition the data into 30 // three regions: elements < pivot, elements == pivot, elements > pivot. 31 // 32 // We first partition into two regions: elements <= pivot and 33 // elements > pivot and further refine the first region if necessary. 34 35 // Choose a random pivot and move it to the front. 36 data.Swap(start, start+rng.Intn(end-start)) 37 pivot := start 38 l, r := start+1, end 39 for l < r { 40 // Invariants: 41 // - elements in the range [start, l) are <= pivot 42 // - elements in the range [r, end) are > pivot 43 if !data.Less(pivot, l) { 44 l++ 45 } else if data.Less(pivot, r-1) { 46 r-- 47 } else { 48 data.Swap(l, r-1) 49 l++ 50 r-- 51 } 52 } 53 mid := l 54 // Everything in the range [start, mid) is <= than the pivot. 55 // Everything in the range [mid, end) is > than the pivot. 56 if k >= mid { 57 // In this case, we eliminated at least the pivot (and all elements 58 // equal to it). 59 moveTopKToFront(data, mid, end, k, rng) 60 return 61 } 62 63 // If we eliminated a decent amount of elements, we can recurse on [0, mid). 64 // If the elements were distinct we would do this unconditionally, but in 65 // general we could have a lot of elements equal to the pivot. 66 if end-mid > (end-start)/4 { 67 moveTopKToFront(data, start, mid, k, rng) 68 return 69 } 70 71 // Now we work on the range [0, mid). Move everything that is equal to the 72 // pivot to the back. 73 data.Swap(pivot, mid-1) 74 pivot = mid - 1 75 for l, r = start, pivot-1; l <= r; { 76 if data.Less(l, pivot) { 77 l++ 78 } else { 79 data.Swap(l, r) 80 r-- 81 } 82 } 83 // Now everything in the range [start, l) is < than the pivot. Everything in the 84 // range [l, mid) is equal to the pivot. If k is in the [l, mid) range we 85 // are done, otherwise we recurse on [start, l). 86 if k <= l { 87 moveTopKToFront(data, start, l, k, rng) 88 } 89 } 90 91 // MoveTopKToFront moves the top K elements to the front. It makes O(n) calls to 92 // data.Less and data.Swap (with very high probability). It uses Hoare's 93 // selection algorithm (aka quickselect). 94 func MoveTopKToFront(data sort.Interface, k int) { 95 if data.Len() <= k { 96 return 97 } 98 // We want the call to be deterministic so we use a predictable seed. 99 r := rand.New(rand.NewSource(int64(data.Len()*1000 + k))) 100 moveTopKToFront(data, 0, data.Len(), k, r) 101 }