github.com/roboticscm/goman@v0.0.0-20210203095141-87c07b4a0a55/src/sort/sort.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package sort provides primitives for sorting slices and user-defined 6 // collections. 7 package sort 8 9 // A type, typically a collection, that satisfies sort.Interface can be 10 // sorted by the routines in this package. The methods require that the 11 // elements of the collection be enumerated by an integer index. 12 type Interface interface { 13 // Len is the number of elements in the collection. 14 Len() int 15 // Less reports whether the element with 16 // index i should sort before the element with index j. 17 Less(i, j int) bool 18 // Swap swaps the elements with indexes i and j. 19 Swap(i, j int) 20 } 21 22 func min(a, b int) int { 23 if a < b { 24 return a 25 } 26 return b 27 } 28 29 // Insertion sort 30 func insertionSort(data Interface, a, b int) { 31 for i := a + 1; i < b; i++ { 32 for j := i; j > a && data.Less(j, j-1); j-- { 33 data.Swap(j, j-1) 34 } 35 } 36 } 37 38 // siftDown implements the heap property on data[lo, hi). 39 // first is an offset into the array where the root of the heap lies. 40 func siftDown(data Interface, lo, hi, first int) { 41 root := lo 42 for { 43 child := 2*root + 1 44 if child >= hi { 45 break 46 } 47 if child+1 < hi && data.Less(first+child, first+child+1) { 48 child++ 49 } 50 if !data.Less(first+root, first+child) { 51 return 52 } 53 data.Swap(first+root, first+child) 54 root = child 55 } 56 } 57 58 func heapSort(data Interface, a, b int) { 59 first := a 60 lo := 0 61 hi := b - a 62 63 // Build heap with greatest element at top. 64 for i := (hi - 1) / 2; i >= 0; i-- { 65 siftDown(data, i, hi, first) 66 } 67 68 // Pop elements, largest first, into end of data. 69 for i := hi - 1; i >= 0; i-- { 70 data.Swap(first, first+i) 71 siftDown(data, lo, i, first) 72 } 73 } 74 75 // Quicksort, following Bentley and McIlroy, 76 // ``Engineering a Sort Function,'' SP&E November 1993. 77 78 // medianOfThree moves the median of the three values data[a], data[b], data[c] into data[a]. 79 func medianOfThree(data Interface, a, b, c int) { 80 m0 := b 81 m1 := a 82 m2 := c 83 // bubble sort on 3 elements 84 if data.Less(m1, m0) { 85 data.Swap(m1, m0) 86 } 87 if data.Less(m2, m1) { 88 data.Swap(m2, m1) 89 } 90 if data.Less(m1, m0) { 91 data.Swap(m1, m0) 92 } 93 // now data[m0] <= data[m1] <= data[m2] 94 } 95 96 func swapRange(data Interface, a, b, n int) { 97 for i := 0; i < n; i++ { 98 data.Swap(a+i, b+i) 99 } 100 } 101 102 func doPivot(data Interface, lo, hi int) (midlo, midhi int) { 103 m := lo + (hi-lo)/2 // Written like this to avoid integer overflow. 104 if hi-lo > 40 { 105 // Tukey's ``Ninther,'' median of three medians of three. 106 s := (hi - lo) / 8 107 medianOfThree(data, lo, lo+s, lo+2*s) 108 medianOfThree(data, m, m-s, m+s) 109 medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) 110 } 111 medianOfThree(data, lo, m, hi-1) 112 113 // Invariants are: 114 // data[lo] = pivot (set up by ChoosePivot) 115 // data[lo <= i < a] = pivot 116 // data[a <= i < b] < pivot 117 // data[b <= i < c] is unexamined 118 // data[c <= i < d] > pivot 119 // data[d <= i < hi] = pivot 120 // 121 // Once b meets c, can swap the "= pivot" sections 122 // into the middle of the slice. 123 pivot := lo 124 a, b, c, d := lo+1, lo+1, hi, hi 125 for { 126 for b < c { 127 if data.Less(b, pivot) { // data[b] < pivot 128 b++ 129 } else if !data.Less(pivot, b) { // data[b] = pivot 130 data.Swap(a, b) 131 a++ 132 b++ 133 } else { 134 break 135 } 136 } 137 for b < c { 138 if data.Less(pivot, c-1) { // data[c-1] > pivot 139 c-- 140 } else if !data.Less(c-1, pivot) { // data[c-1] = pivot 141 data.Swap(c-1, d-1) 142 c-- 143 d-- 144 } else { 145 break 146 } 147 } 148 if b >= c { 149 break 150 } 151 // data[b] > pivot; data[c-1] < pivot 152 data.Swap(b, c-1) 153 b++ 154 c-- 155 } 156 157 n := min(b-a, a-lo) 158 swapRange(data, lo, b-n, n) 159 160 n = min(hi-d, d-c) 161 swapRange(data, c, hi-n, n) 162 163 return lo + b - a, hi - (d - c) 164 } 165 166 func quickSort(data Interface, a, b, maxDepth int) { 167 for b-a > 7 { 168 if maxDepth == 0 { 169 heapSort(data, a, b) 170 return 171 } 172 maxDepth-- 173 mlo, mhi := doPivot(data, a, b) 174 // Avoiding recursion on the larger subproblem guarantees 175 // a stack depth of at most lg(b-a). 176 if mlo-a < b-mhi { 177 quickSort(data, a, mlo, maxDepth) 178 a = mhi // i.e., quickSort(data, mhi, b) 179 } else { 180 quickSort(data, mhi, b, maxDepth) 181 b = mlo // i.e., quickSort(data, a, mlo) 182 } 183 } 184 if b-a > 1 { 185 insertionSort(data, a, b) 186 } 187 } 188 189 // Sort sorts data. 190 // It makes one call to data.Len to determine n, and O(n*log(n)) calls to 191 // data.Less and data.Swap. The sort is not guaranteed to be stable. 192 func Sort(data Interface) { 193 // Switch to heapsort if depth of 2*ceil(lg(n+1)) is reached. 194 n := data.Len() 195 maxDepth := 0 196 for i := n; i > 0; i >>= 1 { 197 maxDepth++ 198 } 199 maxDepth *= 2 200 quickSort(data, 0, n, maxDepth) 201 } 202 203 type reverse struct { 204 // This embedded Interface permits Reverse to use the methods of 205 // another Interface implementation. 206 Interface 207 } 208 209 // Less returns the opposite of the embedded implementation's Less method. 210 func (r reverse) Less(i, j int) bool { 211 return r.Interface.Less(j, i) 212 } 213 214 // Reverse returns the reverse order for data. 215 func Reverse(data Interface) Interface { 216 return &reverse{data} 217 } 218 219 // IsSorted reports whether data is sorted. 220 func IsSorted(data Interface) bool { 221 n := data.Len() 222 for i := n - 1; i > 0; i-- { 223 if data.Less(i, i-1) { 224 return false 225 } 226 } 227 return true 228 } 229 230 // Convenience types for common cases 231 232 // IntSlice attaches the methods of Interface to []int, sorting in increasing order. 233 type IntSlice []int 234 235 func (p IntSlice) Len() int { return len(p) } 236 func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] } 237 func (p IntSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 238 239 // Sort is a convenience method. 240 func (p IntSlice) Sort() { Sort(p) } 241 242 // Float64Slice attaches the methods of Interface to []float64, sorting in increasing order. 243 type Float64Slice []float64 244 245 func (p Float64Slice) Len() int { return len(p) } 246 func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) } 247 func (p Float64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 248 249 // isNaN is a copy of math.IsNaN to avoid a dependency on the math package. 250 func isNaN(f float64) bool { 251 return f != f 252 } 253 254 // Sort is a convenience method. 255 func (p Float64Slice) Sort() { Sort(p) } 256 257 // StringSlice attaches the methods of Interface to []string, sorting in increasing order. 258 type StringSlice []string 259 260 func (p StringSlice) Len() int { return len(p) } 261 func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] } 262 func (p StringSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 263 264 // Sort is a convenience method. 265 func (p StringSlice) Sort() { Sort(p) } 266 267 // Convenience wrappers for common cases 268 269 // Ints sorts a slice of ints in increasing order. 270 func Ints(a []int) { Sort(IntSlice(a)) } 271 272 // Float64s sorts a slice of float64s in increasing order. 273 func Float64s(a []float64) { Sort(Float64Slice(a)) } 274 275 // Strings sorts a slice of strings in increasing order. 276 func Strings(a []string) { Sort(StringSlice(a)) } 277 278 // IntsAreSorted tests whether a slice of ints is sorted in increasing order. 279 func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) } 280 281 // Float64sAreSorted tests whether a slice of float64s is sorted in increasing order. 282 func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) } 283 284 // StringsAreSorted tests whether a slice of strings is sorted in increasing order. 285 func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) } 286 287 // Notes on stable sorting: 288 // The used algorithms are simple and provable correct on all input and use 289 // only logarithmic additional stack space. They perform well if compared 290 // experimentally to other stable in-place sorting algorithms. 291 // 292 // Remarks on other algorithms evaluated: 293 // - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++: 294 // Not faster. 295 // - GCC's __rotate for block rotations: Not faster. 296 // - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen 297 // and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40: 298 // The given algorithms are in-place, number of Swap and Assignments 299 // grow as n log n but the algorithm is not stable. 300 // - "Fast Stable In-Plcae Sorting with O(n) Data Moves" J.I. Munro and 301 // V. Raman in Algorithmica (1996) 16, 115-160: 302 // This algorithm either needs additional 2n bits or works only if there 303 // are enough different elements available to encode some permutations 304 // which have to be undone later (so not stable an any input). 305 // - All the optimal in-place sorting/merging algorithms I found are either 306 // unstable or rely on enough different elements in each step to encode the 307 // performed block rearrangements. See also "In-Place Merging Algorithms", 308 // Denham Coates-Evely, Department of Computer Science, Kings College, 309 // January 2004 and the reverences in there. 310 // - Often "optimal" algorithms are optimal in the number of assignments 311 // but Interface has only Swap as operation. 312 313 // Stable sorts data while keeping the original order of equal elements. 314 // 315 // It makes one call to data.Len to determine n, O(n*log(n)) calls to 316 // data.Less and O(n*log(n)*log(n)) calls to data.Swap. 317 func Stable(data Interface) { 318 n := data.Len() 319 blockSize := 20 320 a, b := 0, blockSize 321 for b <= n { 322 insertionSort(data, a, b) 323 a = b 324 b += blockSize 325 } 326 insertionSort(data, a, n) 327 328 for blockSize < n { 329 a, b = 0, 2*blockSize 330 for b <= n { 331 symMerge(data, a, a+blockSize, b) 332 a = b 333 b += 2 * blockSize 334 } 335 symMerge(data, a, a+blockSize, n) 336 blockSize *= 2 337 } 338 } 339 340 // SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using 341 // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum 342 // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz 343 // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in 344 // Computer Science, pages 714-723. Springer, 2004. 345 // 346 // Let M = m-a and N = b-n. Wolog M < N. 347 // The recursion depth is bound by ceil(log(N+M)). 348 // The algorithm needs O(M*log(N/M + 1)) calls to data.Less. 349 // The algorithm needs O((M+N)*log(M)) calls to data.Swap. 350 // 351 // The paper gives O((M+N)*log(M)) as the number of assignments assuming a 352 // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation 353 // in the paper carries through for Swap operations, especially as the block 354 // swapping rotate uses only O(M+N) Swaps. 355 func symMerge(data Interface, a, m, b int) { 356 if a >= m || m >= b { 357 return 358 } 359 360 mid := a + (b-a)/2 361 n := mid + m 362 start := 0 363 if m > mid { 364 start = n - b 365 r, p := mid, n-1 366 for start < r { 367 c := start + (r-start)/2 368 if !data.Less(p-c, c) { 369 start = c + 1 370 } else { 371 r = c 372 } 373 } 374 } else { 375 start = a 376 r, p := m, n-1 377 for start < r { 378 c := start + (r-start)/2 379 if !data.Less(p-c, c) { 380 start = c + 1 381 } else { 382 r = c 383 } 384 } 385 } 386 end := n - start 387 rotate(data, start, m, end) 388 symMerge(data, a, start, mid) 389 symMerge(data, mid, end, b) 390 } 391 392 // Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data: 393 // Data of the form 'x u v y' is changed to 'x v u y'. 394 // Rotate performs at most b-a many calls to data.Swap. 395 func rotate(data Interface, a, m, b int) { 396 i := m - a 397 if i == 0 { 398 return 399 } 400 j := b - m 401 if j == 0 { 402 return 403 } 404 405 if i == j { 406 swapRange(data, a, m, i) 407 return 408 } 409 410 p := a + i 411 for i != j { 412 if i > j { 413 swapRange(data, p-i, p, j) 414 i -= j 415 } else { 416 swapRange(data, p-i, p+j-i, i) 417 j -= i 418 } 419 } 420 swapRange(data, p-i, p, i) 421 } 422 423 /* 424 Complexity of Stable Sorting 425 426 427 Complexity of block swapping rotation 428 429 Each Swap puts one new element into its correct, final position. 430 Elements which reach their final position are no longer moved. 431 Thus block swapping rotation needs |u|+|v| calls to Swaps. 432 This is best possible as each element might need a move. 433 434 Pay attention when comparing to other optimal algorithms which 435 typically count the number of assignments instead of swaps: 436 E.g. the optimal algorithm of Dudzinski and Dydek for in-place 437 rotations uses O(u + v + gcd(u,v)) assignments which is 438 better than our O(3 * (u+v)) as gcd(u,v) <= u. 439 440 441 Stable sorting by SymMerge and BlockSwap rotations 442 443 SymMerg complexity for same size input M = N: 444 Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N) 445 Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N)) 446 447 (The following argument does not fuzz over a missing -1 or 448 other stuff which does not impact the final result). 449 450 Let n = data.Len(). Assume n = 2^k. 451 452 Plain merge sort performs log(n) = k iterations. 453 On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i. 454 455 Thus iteration i of merge sort performs: 456 Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n) 457 Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i) 458 459 In total k = log(n) iterations are performed; so in total: 460 Calls to Less O(log(n) * n) 461 Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n) 462 = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n)) 463 464 465 Above results should generalize to arbitrary n = 2^k + p 466 and should not be influenced by the initial insertion sort phase: 467 Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of 468 size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort. 469 Merge sort iterations start at i = log(bs). With t = log(bs) constant: 470 Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n) 471 = O(n * log(n)) 472 Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n)) 473 474 */