github.com/tidwall/go@v0.0.0-20170415222209-6694a6888b7d/src/sort/sort.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run genzfunc.go 6 7 // Package sort provides primitives for sorting slices and user-defined 8 // collections. 9 package sort 10 11 import "reflect" 12 13 // A type, typically a collection, that satisfies sort.Interface can be 14 // sorted by the routines in this package. The methods require that the 15 // elements of the collection be enumerated by an integer index. 16 type Interface interface { 17 // Len is the number of elements in the collection. 18 Len() int 19 // Less reports whether the element with 20 // index i should sort before the element with index j. 21 Less(i, j int) bool 22 // Swap swaps the elements with indexes i and j. 23 Swap(i, j int) 24 } 25 26 // Insertion sort 27 func insertionSort(data Interface, a, b int) { 28 for i := a + 1; i < b; i++ { 29 for j := i; j > a && data.Less(j, j-1); j-- { 30 data.Swap(j, j-1) 31 } 32 } 33 } 34 35 // siftDown implements the heap property on data[lo, hi). 36 // first is an offset into the array where the root of the heap lies. 37 func siftDown(data Interface, lo, hi, first int) { 38 root := lo 39 for { 40 child := 2*root + 1 41 if child >= hi { 42 break 43 } 44 if child+1 < hi && data.Less(first+child, first+child+1) { 45 child++ 46 } 47 if !data.Less(first+root, first+child) { 48 return 49 } 50 data.Swap(first+root, first+child) 51 root = child 52 } 53 } 54 55 func heapSort(data Interface, a, b int) { 56 first := a 57 lo := 0 58 hi := b - a 59 60 // Build heap with greatest element at top. 61 for i := (hi - 1) / 2; i >= 0; i-- { 62 siftDown(data, i, hi, first) 63 } 64 65 // Pop elements, largest first, into end of data. 66 for i := hi - 1; i >= 0; i-- { 67 data.Swap(first, first+i) 68 siftDown(data, lo, i, first) 69 } 70 } 71 72 // Quicksort, loosely following Bentley and McIlroy, 73 // ``Engineering a Sort Function,'' SP&E November 1993. 74 75 // medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. 76 func medianOfThree(data Interface, m1, m0, m2 int) { 77 // sort 3 elements 78 if data.Less(m1, m0) { 79 data.Swap(m1, m0) 80 } 81 // data[m0] <= data[m1] 82 if data.Less(m2, m1) { 83 data.Swap(m2, m1) 84 // data[m0] <= data[m2] && data[m1] < data[m2] 85 if data.Less(m1, m0) { 86 data.Swap(m1, m0) 87 } 88 } 89 // now data[m0] <= data[m1] <= data[m2] 90 } 91 92 func swapRange(data Interface, a, b, n int) { 93 for i := 0; i < n; i++ { 94 data.Swap(a+i, b+i) 95 } 96 } 97 98 func doPivot(data Interface, lo, hi int) (midlo, midhi int) { 99 m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. 100 if hi-lo > 40 { 101 // Tukey's ``Ninther,'' median of three medians of three. 102 s := (hi - lo) / 8 103 medianOfThree(data, lo, lo+s, lo+2*s) 104 medianOfThree(data, m, m-s, m+s) 105 medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) 106 } 107 medianOfThree(data, lo, m, hi-1) 108 109 // Invariants are: 110 // data[lo] = pivot (set up by ChoosePivot) 111 // data[lo < i < a] < pivot 112 // data[a <= i < b] <= pivot 113 // data[b <= i < c] unexamined 114 // data[c <= i < hi-1] > pivot 115 // data[hi-1] >= pivot 116 pivot := lo 117 a, c := lo+1, hi-1 118 119 for ; a < c && data.Less(a, pivot); a++ { 120 } 121 b := a 122 for { 123 for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot 124 } 125 for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot 126 } 127 if b >= c { 128 break 129 } 130 // data[b] > pivot; data[c-1] <= pivot 131 data.Swap(b, c-1) 132 b++ 133 c-- 134 } 135 // If hi-c<3 then there are duplicates (by property of median of nine). 136 // Let be a bit more conservative, and set border to 5. 137 protect := hi-c < 5 138 if !protect && hi-c < (hi-lo)/4 { 139 // Lets test some points for equality to pivot 140 dups := 0 141 if !data.Less(pivot, hi-1) { // data[hi-1] = pivot 142 data.Swap(c, hi-1) 143 c++ 144 dups++ 145 } 146 if !data.Less(b-1, pivot) { // data[b-1] = pivot 147 b-- 148 dups++ 149 } 150 // m-lo = (hi-lo)/2 > 6 151 // b-lo > (hi-lo)*3/4-1 > 8 152 // ==> m < b ==> data[m] <= pivot 153 if !data.Less(m, pivot) { // data[m] = pivot 154 data.Swap(m, b-1) 155 b-- 156 dups++ 157 } 158 // if at least 2 points are equal to pivot, assume skewed distribution 159 protect = dups > 1 160 } 161 if protect { 162 // Protect against a lot of duplicates 163 // Add invariant: 164 // data[a <= i < b] unexamined 165 // data[b <= i < c] = pivot 166 for { 167 for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot 168 } 169 for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot 170 } 171 if a >= b { 172 break 173 } 174 // data[a] == pivot; data[b-1] < pivot 175 data.Swap(a, b-1) 176 a++ 177 b-- 178 } 179 } 180 // Swap pivot into middle 181 data.Swap(pivot, b-1) 182 return b - 1, c 183 } 184 185 func quickSort(data Interface, a, b, maxDepth int) { 186 for b-a > 12 { // Use ShellSort for slices <= 12 elements 187 if maxDepth == 0 { 188 heapSort(data, a, b) 189 return 190 } 191 maxDepth-- 192 mlo, mhi := doPivot(data, a, b) 193 // Avoiding recursion on the larger subproblem guarantees 194 // a stack depth of at most lg(b-a). 195 if mlo-a < b-mhi { 196 quickSort(data, a, mlo, maxDepth) 197 a = mhi // i.e., quickSort(data, mhi, b) 198 } else { 199 quickSort(data, mhi, b, maxDepth) 200 b = mlo // i.e., quickSort(data, a, mlo) 201 } 202 } 203 if b-a > 1 { 204 // Do ShellSort pass with gap 6 205 // It could be written in this simplified form cause b-a <= 12 206 for i := a + 6; i < b; i++ { 207 if data.Less(i, i-6) { 208 data.Swap(i, i-6) 209 } 210 } 211 insertionSort(data, a, b) 212 } 213 } 214 215 // Sort sorts data. 216 // It makes one call to data.Len to determine n, and O(n*log(n)) calls to 217 // data.Less and data.Swap. The sort is not guaranteed to be stable. 218 func Sort(data Interface) { 219 n := data.Len() 220 quickSort(data, 0, n, maxDepth(n)) 221 } 222 223 // maxDepth returns a threshold at which quicksort should switch 224 // to heapsort. It returns 2*ceil(lg(n+1)). 225 func maxDepth(n int) int { 226 var depth int 227 for i := n; i > 0; i >>= 1 { 228 depth++ 229 } 230 return depth * 2 231 } 232 233 // lessSwap is a pair of Less and Swap function for use with the 234 // auto-generated func-optimized variant of sort.go in 235 // zfuncversion.go. 236 type lessSwap struct { 237 Less func(i, j int) bool 238 Swap func(i, j int) 239 } 240 241 // Slice sorts the provided slice given the provided less function. 242 // 243 // The sort is not guaranteed to be stable. For a stable sort, use 244 // SliceStable. 245 // 246 // The function panics if the provided interface is not a slice. 247 func Slice(slice interface{}, less func(i, j int) bool) { 248 rv := reflect.ValueOf(slice) 249 swap := reflect.Swapper(slice) 250 length := rv.Len() 251 quickSort_func(lessSwap{less, swap}, 0, length, maxDepth(length)) 252 } 253 254 // SliceStable sorts the provided slice given the provided less 255 // function while keeping the original order of equal elements. 256 // 257 // The function panics if the provided interface is not a slice. 258 func SliceStable(slice interface{}, less func(i, j int) bool) { 259 rv := reflect.ValueOf(slice) 260 swap := reflect.Swapper(slice) 261 stable_func(lessSwap{less, swap}, rv.Len()) 262 } 263 264 // SliceIsSorted tests whether a slice is sorted. 265 // 266 // The function panics if the provided interface is not a slice. 267 func SliceIsSorted(slice interface{}, less func(i, j int) bool) bool { 268 rv := reflect.ValueOf(slice) 269 n := rv.Len() 270 for i := n - 1; i > 0; i-- { 271 if less(i, i-1) { 272 return false 273 } 274 } 275 return true 276 } 277 278 type reverse struct { 279 // This embedded Interface permits Reverse to use the methods of 280 // another Interface implementation. 281 Interface 282 } 283 284 // Less returns the opposite of the embedded implementation's Less method. 285 func (r reverse) Less(i, j int) bool { 286 return r.Interface.Less(j, i) 287 } 288 289 // Reverse returns the reverse order for data. 290 func Reverse(data Interface) Interface { 291 return &reverse{data} 292 } 293 294 // IsSorted reports whether data is sorted. 295 func IsSorted(data Interface) bool { 296 n := data.Len() 297 for i := n - 1; i > 0; i-- { 298 if data.Less(i, i-1) { 299 return false 300 } 301 } 302 return true 303 } 304 305 // Convenience types for common cases 306 307 // IntSlice attaches the methods of Interface to []int, sorting in increasing order. 308 type IntSlice []int 309 310 func (p IntSlice) Len() int { return len(p) } 311 func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] } 312 func (p IntSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 313 314 // Sort is a convenience method. 315 func (p IntSlice) Sort() { Sort(p) } 316 317 // Float64Slice attaches the methods of Interface to []float64, sorting in increasing order. 318 type Float64Slice []float64 319 320 func (p Float64Slice) Len() int { return len(p) } 321 func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) } 322 func (p Float64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 323 324 // isNaN is a copy of math.IsNaN to avoid a dependency on the math package. 325 func isNaN(f float64) bool { 326 return f != f 327 } 328 329 // Sort is a convenience method. 330 func (p Float64Slice) Sort() { Sort(p) } 331 332 // StringSlice attaches the methods of Interface to []string, sorting in increasing order. 333 type StringSlice []string 334 335 func (p StringSlice) Len() int { return len(p) } 336 func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] } 337 func (p StringSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 338 339 // Sort is a convenience method. 340 func (p StringSlice) Sort() { Sort(p) } 341 342 // Convenience wrappers for common cases 343 344 // Ints sorts a slice of ints in increasing order. 345 func Ints(a []int) { Sort(IntSlice(a)) } 346 347 // Float64s sorts a slice of float64s in increasing order. 348 func Float64s(a []float64) { Sort(Float64Slice(a)) } 349 350 // Strings sorts a slice of strings in increasing order. 351 func Strings(a []string) { Sort(StringSlice(a)) } 352 353 // IntsAreSorted tests whether a slice of ints is sorted in increasing order. 354 func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) } 355 356 // Float64sAreSorted tests whether a slice of float64s is sorted in increasing order. 357 func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) } 358 359 // StringsAreSorted tests whether a slice of strings is sorted in increasing order. 360 func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) } 361 362 // Notes on stable sorting: 363 // The used algorithms are simple and provable correct on all input and use 364 // only logarithmic additional stack space. They perform well if compared 365 // experimentally to other stable in-place sorting algorithms. 366 // 367 // Remarks on other algorithms evaluated: 368 // - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++: 369 // Not faster. 370 // - GCC's __rotate for block rotations: Not faster. 371 // - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen 372 // and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40: 373 // The given algorithms are in-place, number of Swap and Assignments 374 // grow as n log n but the algorithm is not stable. 375 // - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and 376 // V. Raman in Algorithmica (1996) 16, 115-160: 377 // This algorithm either needs additional 2n bits or works only if there 378 // are enough different elements available to encode some permutations 379 // which have to be undone later (so not stable on any input). 380 // - All the optimal in-place sorting/merging algorithms I found are either 381 // unstable or rely on enough different elements in each step to encode the 382 // performed block rearrangements. See also "In-Place Merging Algorithms", 383 // Denham Coates-Evely, Department of Computer Science, Kings College, 384 // January 2004 and the references in there. 385 // - Often "optimal" algorithms are optimal in the number of assignments 386 // but Interface has only Swap as operation. 387 388 // Stable sorts data while keeping the original order of equal elements. 389 // 390 // It makes one call to data.Len to determine n, O(n*log(n)) calls to 391 // data.Less and O(n*log(n)*log(n)) calls to data.Swap. 392 func Stable(data Interface) { 393 stable(data, data.Len()) 394 } 395 396 func stable(data Interface, n int) { 397 blockSize := 20 // must be > 0 398 a, b := 0, blockSize 399 for b <= n { 400 insertionSort(data, a, b) 401 a = b 402 b += blockSize 403 } 404 insertionSort(data, a, n) 405 406 for blockSize < n { 407 a, b = 0, 2*blockSize 408 for b <= n { 409 symMerge(data, a, a+blockSize, b) 410 a = b 411 b += 2 * blockSize 412 } 413 if m := a + blockSize; m < n { 414 symMerge(data, a, m, n) 415 } 416 blockSize *= 2 417 } 418 } 419 420 // SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using 421 // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum 422 // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz 423 // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in 424 // Computer Science, pages 714-723. Springer, 2004. 425 // 426 // Let M = m-a and N = b-n. Wolog M < N. 427 // The recursion depth is bound by ceil(log(N+M)). 428 // The algorithm needs O(M*log(N/M + 1)) calls to data.Less. 429 // The algorithm needs O((M+N)*log(M)) calls to data.Swap. 430 // 431 // The paper gives O((M+N)*log(M)) as the number of assignments assuming a 432 // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation 433 // in the paper carries through for Swap operations, especially as the block 434 // swapping rotate uses only O(M+N) Swaps. 435 // 436 // symMerge assumes non-degenerate arguments: a < m && m < b. 437 // Having the caller check this condition eliminates many leaf recursion calls, 438 // which improves performance. 439 func symMerge(data Interface, a, m, b int) { 440 // Avoid unnecessary recursions of symMerge 441 // by direct insertion of data[a] into data[m:b] 442 // if data[a:m] only contains one element. 443 if m-a == 1 { 444 // Use binary search to find the lowest index i 445 // such that data[i] >= data[a] for m <= i < b. 446 // Exit the search loop with i == b in case no such index exists. 447 i := m 448 j := b 449 for i < j { 450 h := int(uint(i+j) >> 1) 451 if data.Less(h, a) { 452 i = h + 1 453 } else { 454 j = h 455 } 456 } 457 // Swap values until data[a] reaches the position before i. 458 for k := a; k < i-1; k++ { 459 data.Swap(k, k+1) 460 } 461 return 462 } 463 464 // Avoid unnecessary recursions of symMerge 465 // by direct insertion of data[m] into data[a:m] 466 // if data[m:b] only contains one element. 467 if b-m == 1 { 468 // Use binary search to find the lowest index i 469 // such that data[i] > data[m] for a <= i < m. 470 // Exit the search loop with i == m in case no such index exists. 471 i := a 472 j := m 473 for i < j { 474 h := int(uint(i+j) >> 1) 475 if !data.Less(m, h) { 476 i = h + 1 477 } else { 478 j = h 479 } 480 } 481 // Swap values until data[m] reaches the position i. 482 for k := m; k > i; k-- { 483 data.Swap(k, k-1) 484 } 485 return 486 } 487 488 mid := int(uint(a+b) >> 1) 489 n := mid + m 490 var start, r int 491 if m > mid { 492 start = n - b 493 r = mid 494 } else { 495 start = a 496 r = m 497 } 498 p := n - 1 499 500 for start < r { 501 c := int(uint(start+r) >> 1) 502 if !data.Less(p-c, c) { 503 start = c + 1 504 } else { 505 r = c 506 } 507 } 508 509 end := n - start 510 if start < m && m < end { 511 rotate(data, start, m, end) 512 } 513 if a < start && start < mid { 514 symMerge(data, a, start, mid) 515 } 516 if mid < end && end < b { 517 symMerge(data, mid, end, b) 518 } 519 } 520 521 // Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data: 522 // Data of the form 'x u v y' is changed to 'x v u y'. 523 // Rotate performs at most b-a many calls to data.Swap. 524 // Rotate assumes non-degenerate arguments: a < m && m < b. 525 func rotate(data Interface, a, m, b int) { 526 i := m - a 527 j := b - m 528 529 for i != j { 530 if i > j { 531 swapRange(data, m-i, m, j) 532 i -= j 533 } else { 534 swapRange(data, m-i, m+j-i, i) 535 j -= i 536 } 537 } 538 // i == j 539 swapRange(data, m-i, m, i) 540 } 541 542 /* 543 Complexity of Stable Sorting 544 545 546 Complexity of block swapping rotation 547 548 Each Swap puts one new element into its correct, final position. 549 Elements which reach their final position are no longer moved. 550 Thus block swapping rotation needs |u|+|v| calls to Swaps. 551 This is best possible as each element might need a move. 552 553 Pay attention when comparing to other optimal algorithms which 554 typically count the number of assignments instead of swaps: 555 E.g. the optimal algorithm of Dudzinski and Dydek for in-place 556 rotations uses O(u + v + gcd(u,v)) assignments which is 557 better than our O(3 * (u+v)) as gcd(u,v) <= u. 558 559 560 Stable sorting by SymMerge and BlockSwap rotations 561 562 SymMerg complexity for same size input M = N: 563 Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N) 564 Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N)) 565 566 (The following argument does not fuzz over a missing -1 or 567 other stuff which does not impact the final result). 568 569 Let n = data.Len(). Assume n = 2^k. 570 571 Plain merge sort performs log(n) = k iterations. 572 On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i. 573 574 Thus iteration i of merge sort performs: 575 Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n) 576 Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i) 577 578 In total k = log(n) iterations are performed; so in total: 579 Calls to Less O(log(n) * n) 580 Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n) 581 = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n)) 582 583 584 Above results should generalize to arbitrary n = 2^k + p 585 and should not be influenced by the initial insertion sort phase: 586 Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of 587 size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort. 588 Merge sort iterations start at i = log(bs). With t = log(bs) constant: 589 Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n) 590 = O(n * log(n)) 591 Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n)) 592 593 */