github.com/MangoDowner/go-gm@v0.0.0-20180818020936-8baa2bd4408c/src/sort/sort.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run genzfunc.go 6 7 // Package sort provides primitives for sorting slices and user-defined 8 // collections. 9 package sort 10 11 import "reflect" 12 13 // A type, typically a collection, that satisfies sort.Interface can be 14 // sorted by the routines in this package. The methods require that the 15 // elements of the collection be enumerated by an integer index. 16 type Interface interface { 17 // Len is the number of elements in the collection. 18 Len() int 19 // Less reports whether the element with 20 // index i should sort before the element with index j. 21 Less(i, j int) bool 22 // Swap swaps the elements with indexes i and j. 23 Swap(i, j int) 24 } 25 26 // Insertion sort 27 func insertionSort(data Interface, a, b int) { 28 for i := a + 1; i < b; i++ { 29 for j := i; j > a && data.Less(j, j-1); j-- { 30 data.Swap(j, j-1) 31 } 32 } 33 } 34 35 // siftDown implements the heap property on data[lo, hi). 36 // first is an offset into the array where the root of the heap lies. 37 func siftDown(data Interface, lo, hi, first int) { 38 root := lo 39 for { 40 child := 2*root + 1 41 if child >= hi { 42 break 43 } 44 if child+1 < hi && data.Less(first+child, first+child+1) { 45 child++ 46 } 47 if !data.Less(first+root, first+child) { 48 return 49 } 50 data.Swap(first+root, first+child) 51 root = child 52 } 53 } 54 55 func heapSort(data Interface, a, b int) { 56 first := a 57 lo := 0 58 hi := b - a 59 60 // Build heap with greatest element at top. 61 for i := (hi - 1) / 2; i >= 0; i-- { 62 siftDown(data, i, hi, first) 63 } 64 65 // Pop elements, largest first, into end of data. 66 for i := hi - 1; i >= 0; i-- { 67 data.Swap(first, first+i) 68 siftDown(data, lo, i, first) 69 } 70 } 71 72 // Quicksort, loosely following Bentley and McIlroy, 73 // ``Engineering a Sort Function,'' SP&E November 1993. 74 75 // medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. 76 func medianOfThree(data Interface, m1, m0, m2 int) { 77 // sort 3 elements 78 if data.Less(m1, m0) { 79 data.Swap(m1, m0) 80 } 81 // data[m0] <= data[m1] 82 if data.Less(m2, m1) { 83 data.Swap(m2, m1) 84 // data[m0] <= data[m2] && data[m1] < data[m2] 85 if data.Less(m1, m0) { 86 data.Swap(m1, m0) 87 } 88 } 89 // now data[m0] <= data[m1] <= data[m2] 90 } 91 92 func swapRange(data Interface, a, b, n int) { 93 for i := 0; i < n; i++ { 94 data.Swap(a+i, b+i) 95 } 96 } 97 98 func doPivot(data Interface, lo, hi int) (midlo, midhi int) { 99 m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. 100 if hi-lo > 40 { 101 // Tukey's ``Ninther,'' median of three medians of three. 102 s := (hi - lo) / 8 103 medianOfThree(data, lo, lo+s, lo+2*s) 104 medianOfThree(data, m, m-s, m+s) 105 medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) 106 } 107 medianOfThree(data, lo, m, hi-1) 108 109 // Invariants are: 110 // data[lo] = pivot (set up by ChoosePivot) 111 // data[lo < i < a] < pivot 112 // data[a <= i < b] <= pivot 113 // data[b <= i < c] unexamined 114 // data[c <= i < hi-1] > pivot 115 // data[hi-1] >= pivot 116 pivot := lo 117 a, c := lo+1, hi-1 118 119 for ; a < c && data.Less(a, pivot); a++ { 120 } 121 b := a 122 for { 123 for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot 124 } 125 for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot 126 } 127 if b >= c { 128 break 129 } 130 // data[b] > pivot; data[c-1] <= pivot 131 data.Swap(b, c-1) 132 b++ 133 c-- 134 } 135 // If hi-c<3 then there are duplicates (by property of median of nine). 136 // Let be a bit more conservative, and set border to 5. 137 protect := hi-c < 5 138 if !protect && hi-c < (hi-lo)/4 { 139 // Lets test some points for equality to pivot 140 dups := 0 141 if !data.Less(pivot, hi-1) { // data[hi-1] = pivot 142 data.Swap(c, hi-1) 143 c++ 144 dups++ 145 } 146 if !data.Less(b-1, pivot) { // data[b-1] = pivot 147 b-- 148 dups++ 149 } 150 // m-lo = (hi-lo)/2 > 6 151 // b-lo > (hi-lo)*3/4-1 > 8 152 // ==> m < b ==> data[m] <= pivot 153 if !data.Less(m, pivot) { // data[m] = pivot 154 data.Swap(m, b-1) 155 b-- 156 dups++ 157 } 158 // if at least 2 points are equal to pivot, assume skewed distribution 159 protect = dups > 1 160 } 161 if protect { 162 // Protect against a lot of duplicates 163 // Add invariant: 164 // data[a <= i < b] unexamined 165 // data[b <= i < c] = pivot 166 for { 167 for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot 168 } 169 for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot 170 } 171 if a >= b { 172 break 173 } 174 // data[a] == pivot; data[b-1] < pivot 175 data.Swap(a, b-1) 176 a++ 177 b-- 178 } 179 } 180 // Swap pivot into middle 181 data.Swap(pivot, b-1) 182 return b - 1, c 183 } 184 185 func quickSort(data Interface, a, b, maxDepth int) { 186 for b-a > 12 { // Use ShellSort for slices <= 12 elements 187 if maxDepth == 0 { 188 heapSort(data, a, b) 189 return 190 } 191 maxDepth-- 192 mlo, mhi := doPivot(data, a, b) 193 // Avoiding recursion on the larger subproblem guarantees 194 // a stack depth of at most lg(b-a). 195 if mlo-a < b-mhi { 196 quickSort(data, a, mlo, maxDepth) 197 a = mhi // i.e., quickSort(data, mhi, b) 198 } else { 199 quickSort(data, mhi, b, maxDepth) 200 b = mlo // i.e., quickSort(data, a, mlo) 201 } 202 } 203 if b-a > 1 { 204 // Do ShellSort pass with gap 6 205 // It could be written in this simplified form cause b-a <= 12 206 for i := a + 6; i < b; i++ { 207 if data.Less(i, i-6) { 208 data.Swap(i, i-6) 209 } 210 } 211 insertionSort(data, a, b) 212 } 213 } 214 215 // Sort sorts data. 216 // It makes one call to data.Len to determine n, and O(n*log(n)) calls to 217 // data.Less and data.Swap. The sort is not guaranteed to be stable. 218 func Sort(data Interface) { 219 n := data.Len() 220 quickSort(data, 0, n, maxDepth(n)) 221 } 222 223 // maxDepth returns a threshold at which quicksort should switch 224 // to heapsort. It returns 2*ceil(lg(n+1)). 225 func maxDepth(n int) int { 226 var depth int 227 for i := n; i > 0; i >>= 1 { 228 depth++ 229 } 230 return depth * 2 231 } 232 233 // lessSwap is a pair of Less and Swap function for use with the 234 // auto-generated func-optimized variant of sort.go in 235 // zfuncversion.go. 236 type lessSwap struct { 237 Less func(i, j int) bool 238 Swap func(i, j int) 239 } 240 241 // Slice sorts the provided slice given the provided less function. 242 // 243 // The sort is not guaranteed to be stable. For a stable sort, use 244 // SliceStable. 245 // 246 // The function panics if the provided interface is not a slice. 247 func Slice(slice interface{}, less func(i, j int) bool) { 248 rv := reflect.ValueOf(slice) 249 swap := reflect.Swapper(slice) 250 length := rv.Len() 251 quickSort_func(lessSwap{less, swap}, 0, length, maxDepth(length)) 252 } 253 254 // SliceStable sorts the provided slice given the provided less 255 // function while keeping the original order of equal elements. 256 // 257 // The function panics if the provided interface is not a slice. 258 func SliceStable(slice interface{}, less func(i, j int) bool) { 259 rv := reflect.ValueOf(slice) 260 swap := reflect.Swapper(slice) 261 stable_func(lessSwap{less, swap}, rv.Len()) 262 } 263 264 // SliceIsSorted tests whether a slice is sorted. 265 // 266 // The function panics if the provided interface is not a slice. 267 func SliceIsSorted(slice interface{}, less func(i, j int) bool) bool { 268 rv := reflect.ValueOf(slice) 269 n := rv.Len() 270 for i := n - 1; i > 0; i-- { 271 if less(i, i-1) { 272 return false 273 } 274 } 275 return true 276 } 277 278 type reverse struct { 279 // This embedded Interface permits Reverse to use the methods of 280 // another Interface implementation. 281 Interface 282 } 283 284 // Less returns the opposite of the embedded implementation's Less method. 285 func (r reverse) Less(i, j int) bool { 286 return r.Interface.Less(j, i) 287 } 288 289 // Reverse returns the reverse order for data. 290 func Reverse(data Interface) Interface { 291 return &reverse{data} 292 } 293 294 // IsSorted reports whether data is sorted. 295 func IsSorted(data Interface) bool { 296 n := data.Len() 297 for i := n - 1; i > 0; i-- { 298 if data.Less(i, i-1) { 299 return false 300 } 301 } 302 return true 303 } 304 305 // Convenience types for common cases 306 307 // IntSlice attaches the methods of Interface to []int, sorting in increasing order. 308 type IntSlice []int 309 310 func (p IntSlice) Len() int { return len(p) } 311 func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] } 312 func (p IntSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 313 314 // Sort is a convenience method. 315 func (p IntSlice) Sort() { Sort(p) } 316 317 // Float64Slice attaches the methods of Interface to []float64, sorting in increasing order 318 // (not-a-number values are treated as less than other values). 319 type Float64Slice []float64 320 321 func (p Float64Slice) Len() int { return len(p) } 322 func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) } 323 func (p Float64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 324 325 // isNaN is a copy of math.IsNaN to avoid a dependency on the math package. 326 func isNaN(f float64) bool { 327 return f != f 328 } 329 330 // Sort is a convenience method. 331 func (p Float64Slice) Sort() { Sort(p) } 332 333 // StringSlice attaches the methods of Interface to []string, sorting in increasing order. 334 type StringSlice []string 335 336 func (p StringSlice) Len() int { return len(p) } 337 func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] } 338 func (p StringSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 339 340 // Sort is a convenience method. 341 func (p StringSlice) Sort() { Sort(p) } 342 343 // Convenience wrappers for common cases 344 345 // Ints sorts a slice of ints in increasing order. 346 func Ints(a []int) { Sort(IntSlice(a)) } 347 348 // Float64s sorts a slice of float64s in increasing order 349 // (not-a-number values are treated as less than other values). 350 func Float64s(a []float64) { Sort(Float64Slice(a)) } 351 352 // Strings sorts a slice of strings in increasing order. 353 func Strings(a []string) { Sort(StringSlice(a)) } 354 355 // IntsAreSorted tests whether a slice of ints is sorted in increasing order. 356 func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) } 357 358 // Float64sAreSorted tests whether a slice of float64s is sorted in increasing order 359 // (not-a-number values are treated as less than other values). 360 func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) } 361 362 // StringsAreSorted tests whether a slice of strings is sorted in increasing order. 363 func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) } 364 365 // Notes on stable sorting: 366 // The used algorithms are simple and provable correct on all input and use 367 // only logarithmic additional stack space. They perform well if compared 368 // experimentally to other stable in-place sorting algorithms. 369 // 370 // Remarks on other algorithms evaluated: 371 // - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++: 372 // Not faster. 373 // - GCC's __rotate for block rotations: Not faster. 374 // - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen 375 // and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40: 376 // The given algorithms are in-place, number of Swap and Assignments 377 // grow as n log n but the algorithm is not stable. 378 // - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and 379 // V. Raman in Algorithmica (1996) 16, 115-160: 380 // This algorithm either needs additional 2n bits or works only if there 381 // are enough different elements available to encode some permutations 382 // which have to be undone later (so not stable on any input). 383 // - All the optimal in-place sorting/merging algorithms I found are either 384 // unstable or rely on enough different elements in each step to encode the 385 // performed block rearrangements. See also "In-Place Merging Algorithms", 386 // Denham Coates-Evely, Department of Computer Science, Kings College, 387 // January 2004 and the references in there. 388 // - Often "optimal" algorithms are optimal in the number of assignments 389 // but Interface has only Swap as operation. 390 391 // Stable sorts data while keeping the original order of equal elements. 392 // 393 // It makes one call to data.Len to determine n, O(n*log(n)) calls to 394 // data.Less and O(n*log(n)*log(n)) calls to data.Swap. 395 func Stable(data Interface) { 396 stable(data, data.Len()) 397 } 398 399 func stable(data Interface, n int) { 400 blockSize := 20 // must be > 0 401 a, b := 0, blockSize 402 for b <= n { 403 insertionSort(data, a, b) 404 a = b 405 b += blockSize 406 } 407 insertionSort(data, a, n) 408 409 for blockSize < n { 410 a, b = 0, 2*blockSize 411 for b <= n { 412 symMerge(data, a, a+blockSize, b) 413 a = b 414 b += 2 * blockSize 415 } 416 if m := a + blockSize; m < n { 417 symMerge(data, a, m, n) 418 } 419 blockSize *= 2 420 } 421 } 422 423 // SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using 424 // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum 425 // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz 426 // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in 427 // Computer Science, pages 714-723. Springer, 2004. 428 // 429 // Let M = m-a and N = b-n. Wolog M < N. 430 // The recursion depth is bound by ceil(log(N+M)). 431 // The algorithm needs O(M*log(N/M + 1)) calls to data.Less. 432 // The algorithm needs O((M+N)*log(M)) calls to data.Swap. 433 // 434 // The paper gives O((M+N)*log(M)) as the number of assignments assuming a 435 // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation 436 // in the paper carries through for Swap operations, especially as the block 437 // swapping rotate uses only O(M+N) Swaps. 438 // 439 // symMerge assumes non-degenerate arguments: a < m && m < b. 440 // Having the caller check this condition eliminates many leaf recursion calls, 441 // which improves performance. 442 func symMerge(data Interface, a, m, b int) { 443 // Avoid unnecessary recursions of symMerge 444 // by direct insertion of data[a] into data[m:b] 445 // if data[a:m] only contains one element. 446 if m-a == 1 { 447 // Use binary search to find the lowest index i 448 // such that data[i] >= data[a] for m <= i < b. 449 // Exit the search loop with i == b in case no such index exists. 450 i := m 451 j := b 452 for i < j { 453 h := int(uint(i+j) >> 1) 454 if data.Less(h, a) { 455 i = h + 1 456 } else { 457 j = h 458 } 459 } 460 // Swap values until data[a] reaches the position before i. 461 for k := a; k < i-1; k++ { 462 data.Swap(k, k+1) 463 } 464 return 465 } 466 467 // Avoid unnecessary recursions of symMerge 468 // by direct insertion of data[m] into data[a:m] 469 // if data[m:b] only contains one element. 470 if b-m == 1 { 471 // Use binary search to find the lowest index i 472 // such that data[i] > data[m] for a <= i < m. 473 // Exit the search loop with i == m in case no such index exists. 474 i := a 475 j := m 476 for i < j { 477 h := int(uint(i+j) >> 1) 478 if !data.Less(m, h) { 479 i = h + 1 480 } else { 481 j = h 482 } 483 } 484 // Swap values until data[m] reaches the position i. 485 for k := m; k > i; k-- { 486 data.Swap(k, k-1) 487 } 488 return 489 } 490 491 mid := int(uint(a+b) >> 1) 492 n := mid + m 493 var start, r int 494 if m > mid { 495 start = n - b 496 r = mid 497 } else { 498 start = a 499 r = m 500 } 501 p := n - 1 502 503 for start < r { 504 c := int(uint(start+r) >> 1) 505 if !data.Less(p-c, c) { 506 start = c + 1 507 } else { 508 r = c 509 } 510 } 511 512 end := n - start 513 if start < m && m < end { 514 rotate(data, start, m, end) 515 } 516 if a < start && start < mid { 517 symMerge(data, a, start, mid) 518 } 519 if mid < end && end < b { 520 symMerge(data, mid, end, b) 521 } 522 } 523 524 // Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data: 525 // Data of the form 'x u v y' is changed to 'x v u y'. 526 // Rotate performs at most b-a many calls to data.Swap. 527 // Rotate assumes non-degenerate arguments: a < m && m < b. 528 func rotate(data Interface, a, m, b int) { 529 i := m - a 530 j := b - m 531 532 for i != j { 533 if i > j { 534 swapRange(data, m-i, m, j) 535 i -= j 536 } else { 537 swapRange(data, m-i, m+j-i, i) 538 j -= i 539 } 540 } 541 // i == j 542 swapRange(data, m-i, m, i) 543 } 544 545 /* 546 Complexity of Stable Sorting 547 548 549 Complexity of block swapping rotation 550 551 Each Swap puts one new element into its correct, final position. 552 Elements which reach their final position are no longer moved. 553 Thus block swapping rotation needs |u|+|v| calls to Swaps. 554 This is best possible as each element might need a move. 555 556 Pay attention when comparing to other optimal algorithms which 557 typically count the number of assignments instead of swaps: 558 E.g. the optimal algorithm of Dudzinski and Dydek for in-place 559 rotations uses O(u + v + gcd(u,v)) assignments which is 560 better than our O(3 * (u+v)) as gcd(u,v) <= u. 561 562 563 Stable sorting by SymMerge and BlockSwap rotations 564 565 SymMerg complexity for same size input M = N: 566 Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N) 567 Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N)) 568 569 (The following argument does not fuzz over a missing -1 or 570 other stuff which does not impact the final result). 571 572 Let n = data.Len(). Assume n = 2^k. 573 574 Plain merge sort performs log(n) = k iterations. 575 On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i. 576 577 Thus iteration i of merge sort performs: 578 Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n) 579 Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i) 580 581 In total k = log(n) iterations are performed; so in total: 582 Calls to Less O(log(n) * n) 583 Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n) 584 = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n)) 585 586 587 Above results should generalize to arbitrary n = 2^k + p 588 and should not be influenced by the initial insertion sort phase: 589 Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of 590 size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort. 591 Merge sort iterations start at i = log(bs). With t = log(bs) constant: 592 Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n) 593 = O(n * log(n)) 594 Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n)) 595 596 */