github.com/s1s1ty/go@v0.0.0-20180207192209-104445e3140f/src/sort/sort.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run genzfunc.go 6 7 // Package sort provides primitives for sorting slices and user-defined 8 // collections. 9 package sort 10 11 // A type, typically a collection, that satisfies sort.Interface can be 12 // sorted by the routines in this package. The methods require that the 13 // elements of the collection be enumerated by an integer index. 14 type Interface interface { 15 // Len is the number of elements in the collection. 16 Len() int 17 // Less reports whether the element with 18 // index i should sort before the element with index j. 19 Less(i, j int) bool 20 // Swap swaps the elements with indexes i and j. 21 Swap(i, j int) 22 } 23 24 // Insertion sort 25 func insertionSort(data Interface, a, b int) { 26 for i := a + 1; i < b; i++ { 27 for j := i; j > a && data.Less(j, j-1); j-- { 28 data.Swap(j, j-1) 29 } 30 } 31 } 32 33 // siftDown implements the heap property on data[lo, hi). 34 // first is an offset into the array where the root of the heap lies. 35 func siftDown(data Interface, lo, hi, first int) { 36 root := lo 37 for { 38 child := 2*root + 1 39 if child >= hi { 40 break 41 } 42 if child+1 < hi && data.Less(first+child, first+child+1) { 43 child++ 44 } 45 if !data.Less(first+root, first+child) { 46 return 47 } 48 data.Swap(first+root, first+child) 49 root = child 50 } 51 } 52 53 func heapSort(data Interface, a, b int) { 54 first := a 55 lo := 0 56 hi := b - a 57 58 // Build heap with greatest element at top. 59 for i := (hi - 1) / 2; i >= 0; i-- { 60 siftDown(data, i, hi, first) 61 } 62 63 // Pop elements, largest first, into end of data. 64 for i := hi - 1; i >= 0; i-- { 65 data.Swap(first, first+i) 66 siftDown(data, lo, i, first) 67 } 68 } 69 70 // Quicksort, loosely following Bentley and McIlroy, 71 // ``Engineering a Sort Function,'' SP&E November 1993. 72 73 // medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. 74 func medianOfThree(data Interface, m1, m0, m2 int) { 75 // sort 3 elements 76 if data.Less(m1, m0) { 77 data.Swap(m1, m0) 78 } 79 // data[m0] <= data[m1] 80 if data.Less(m2, m1) { 81 data.Swap(m2, m1) 82 // data[m0] <= data[m2] && data[m1] < data[m2] 83 if data.Less(m1, m0) { 84 data.Swap(m1, m0) 85 } 86 } 87 // now data[m0] <= data[m1] <= data[m2] 88 } 89 90 func swapRange(data Interface, a, b, n int) { 91 for i := 0; i < n; i++ { 92 data.Swap(a+i, b+i) 93 } 94 } 95 96 func doPivot(data Interface, lo, hi int) (midlo, midhi int) { 97 m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. 98 if hi-lo > 40 { 99 // Tukey's ``Ninther,'' median of three medians of three. 100 s := (hi - lo) / 8 101 medianOfThree(data, lo, lo+s, lo+2*s) 102 medianOfThree(data, m, m-s, m+s) 103 medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) 104 } 105 medianOfThree(data, lo, m, hi-1) 106 107 // Invariants are: 108 // data[lo] = pivot (set up by ChoosePivot) 109 // data[lo < i < a] < pivot 110 // data[a <= i < b] <= pivot 111 // data[b <= i < c] unexamined 112 // data[c <= i < hi-1] > pivot 113 // data[hi-1] >= pivot 114 pivot := lo 115 a, c := lo+1, hi-1 116 117 for ; a < c && data.Less(a, pivot); a++ { 118 } 119 b := a 120 for { 121 for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot 122 } 123 for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot 124 } 125 if b >= c { 126 break 127 } 128 // data[b] > pivot; data[c-1] <= pivot 129 data.Swap(b, c-1) 130 b++ 131 c-- 132 } 133 // If hi-c<3 then there are duplicates (by property of median of nine). 134 // Let be a bit more conservative, and set border to 5. 135 protect := hi-c < 5 136 if !protect && hi-c < (hi-lo)/4 { 137 // Lets test some points for equality to pivot 138 dups := 0 139 if !data.Less(pivot, hi-1) { // data[hi-1] = pivot 140 data.Swap(c, hi-1) 141 c++ 142 dups++ 143 } 144 if !data.Less(b-1, pivot) { // data[b-1] = pivot 145 b-- 146 dups++ 147 } 148 // m-lo = (hi-lo)/2 > 6 149 // b-lo > (hi-lo)*3/4-1 > 8 150 // ==> m < b ==> data[m] <= pivot 151 if !data.Less(m, pivot) { // data[m] = pivot 152 data.Swap(m, b-1) 153 b-- 154 dups++ 155 } 156 // if at least 2 points are equal to pivot, assume skewed distribution 157 protect = dups > 1 158 } 159 if protect { 160 // Protect against a lot of duplicates 161 // Add invariant: 162 // data[a <= i < b] unexamined 163 // data[b <= i < c] = pivot 164 for { 165 for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot 166 } 167 for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot 168 } 169 if a >= b { 170 break 171 } 172 // data[a] == pivot; data[b-1] < pivot 173 data.Swap(a, b-1) 174 a++ 175 b-- 176 } 177 } 178 // Swap pivot into middle 179 data.Swap(pivot, b-1) 180 return b - 1, c 181 } 182 183 func quickSort(data Interface, a, b, maxDepth int) { 184 for b-a > 12 { // Use ShellSort for slices <= 12 elements 185 if maxDepth == 0 { 186 heapSort(data, a, b) 187 return 188 } 189 maxDepth-- 190 mlo, mhi := doPivot(data, a, b) 191 // Avoiding recursion on the larger subproblem guarantees 192 // a stack depth of at most lg(b-a). 193 if mlo-a < b-mhi { 194 quickSort(data, a, mlo, maxDepth) 195 a = mhi // i.e., quickSort(data, mhi, b) 196 } else { 197 quickSort(data, mhi, b, maxDepth) 198 b = mlo // i.e., quickSort(data, a, mlo) 199 } 200 } 201 if b-a > 1 { 202 // Do ShellSort pass with gap 6 203 // It could be written in this simplified form cause b-a <= 12 204 for i := a + 6; i < b; i++ { 205 if data.Less(i, i-6) { 206 data.Swap(i, i-6) 207 } 208 } 209 insertionSort(data, a, b) 210 } 211 } 212 213 // Sort sorts data. 214 // It makes one call to data.Len to determine n, and O(n*log(n)) calls to 215 // data.Less and data.Swap. The sort is not guaranteed to be stable. 216 func Sort(data Interface) { 217 n := data.Len() 218 quickSort(data, 0, n, maxDepth(n)) 219 } 220 221 // maxDepth returns a threshold at which quicksort should switch 222 // to heapsort. It returns 2*ceil(lg(n+1)). 223 func maxDepth(n int) int { 224 var depth int 225 for i := n; i > 0; i >>= 1 { 226 depth++ 227 } 228 return depth * 2 229 } 230 231 // lessSwap is a pair of Less and Swap function for use with the 232 // auto-generated func-optimized variant of sort.go in 233 // zfuncversion.go. 234 type lessSwap struct { 235 Less func(i, j int) bool 236 Swap func(i, j int) 237 } 238 239 type reverse struct { 240 // This embedded Interface permits Reverse to use the methods of 241 // another Interface implementation. 242 Interface 243 } 244 245 // Less returns the opposite of the embedded implementation's Less method. 246 func (r reverse) Less(i, j int) bool { 247 return r.Interface.Less(j, i) 248 } 249 250 // Reverse returns the reverse order for data. 251 func Reverse(data Interface) Interface { 252 return &reverse{data} 253 } 254 255 // IsSorted reports whether data is sorted. 256 func IsSorted(data Interface) bool { 257 n := data.Len() 258 for i := n - 1; i > 0; i-- { 259 if data.Less(i, i-1) { 260 return false 261 } 262 } 263 return true 264 } 265 266 // Convenience types for common cases 267 268 // IntSlice attaches the methods of Interface to []int, sorting in increasing order. 269 type IntSlice []int 270 271 func (p IntSlice) Len() int { return len(p) } 272 func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] } 273 func (p IntSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 274 275 // Sort is a convenience method. 276 func (p IntSlice) Sort() { Sort(p) } 277 278 // Float64Slice attaches the methods of Interface to []float64, sorting in increasing order 279 // (not-a-number values are treated as less than other values). 280 type Float64Slice []float64 281 282 func (p Float64Slice) Len() int { return len(p) } 283 func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) } 284 func (p Float64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 285 286 // isNaN is a copy of math.IsNaN to avoid a dependency on the math package. 287 func isNaN(f float64) bool { 288 return f != f 289 } 290 291 // Sort is a convenience method. 292 func (p Float64Slice) Sort() { Sort(p) } 293 294 // StringSlice attaches the methods of Interface to []string, sorting in increasing order. 295 type StringSlice []string 296 297 func (p StringSlice) Len() int { return len(p) } 298 func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] } 299 func (p StringSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 300 301 // Sort is a convenience method. 302 func (p StringSlice) Sort() { Sort(p) } 303 304 // Convenience wrappers for common cases 305 306 // Ints sorts a slice of ints in increasing order. 307 func Ints(a []int) { Sort(IntSlice(a)) } 308 309 // Float64s sorts a slice of float64s in increasing order 310 // (not-a-number values are treated as less than other values). 311 func Float64s(a []float64) { Sort(Float64Slice(a)) } 312 313 // Strings sorts a slice of strings in increasing order. 314 func Strings(a []string) { Sort(StringSlice(a)) } 315 316 // IntsAreSorted tests whether a slice of ints is sorted in increasing order. 317 func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) } 318 319 // Float64sAreSorted tests whether a slice of float64s is sorted in increasing order 320 // (not-a-number values are treated as less than other values). 321 func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) } 322 323 // StringsAreSorted tests whether a slice of strings is sorted in increasing order. 324 func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) } 325 326 // Notes on stable sorting: 327 // The used algorithms are simple and provable correct on all input and use 328 // only logarithmic additional stack space. They perform well if compared 329 // experimentally to other stable in-place sorting algorithms. 330 // 331 // Remarks on other algorithms evaluated: 332 // - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++: 333 // Not faster. 334 // - GCC's __rotate for block rotations: Not faster. 335 // - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen 336 // and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40: 337 // The given algorithms are in-place, number of Swap and Assignments 338 // grow as n log n but the algorithm is not stable. 339 // - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and 340 // V. Raman in Algorithmica (1996) 16, 115-160: 341 // This algorithm either needs additional 2n bits or works only if there 342 // are enough different elements available to encode some permutations 343 // which have to be undone later (so not stable on any input). 344 // - All the optimal in-place sorting/merging algorithms I found are either 345 // unstable or rely on enough different elements in each step to encode the 346 // performed block rearrangements. See also "In-Place Merging Algorithms", 347 // Denham Coates-Evely, Department of Computer Science, Kings College, 348 // January 2004 and the references in there. 349 // - Often "optimal" algorithms are optimal in the number of assignments 350 // but Interface has only Swap as operation. 351 352 // Stable sorts data while keeping the original order of equal elements. 353 // 354 // It makes one call to data.Len to determine n, O(n*log(n)) calls to 355 // data.Less and O(n*log(n)*log(n)) calls to data.Swap. 356 func Stable(data Interface) { 357 stable(data, data.Len()) 358 } 359 360 func stable(data Interface, n int) { 361 blockSize := 20 // must be > 0 362 a, b := 0, blockSize 363 for b <= n { 364 insertionSort(data, a, b) 365 a = b 366 b += blockSize 367 } 368 insertionSort(data, a, n) 369 370 for blockSize < n { 371 a, b = 0, 2*blockSize 372 for b <= n { 373 symMerge(data, a, a+blockSize, b) 374 a = b 375 b += 2 * blockSize 376 } 377 if m := a + blockSize; m < n { 378 symMerge(data, a, m, n) 379 } 380 blockSize *= 2 381 } 382 } 383 384 // SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using 385 // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum 386 // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz 387 // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in 388 // Computer Science, pages 714-723. Springer, 2004. 389 // 390 // Let M = m-a and N = b-n. Wolog M < N. 391 // The recursion depth is bound by ceil(log(N+M)). 392 // The algorithm needs O(M*log(N/M + 1)) calls to data.Less. 393 // The algorithm needs O((M+N)*log(M)) calls to data.Swap. 394 // 395 // The paper gives O((M+N)*log(M)) as the number of assignments assuming a 396 // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation 397 // in the paper carries through for Swap operations, especially as the block 398 // swapping rotate uses only O(M+N) Swaps. 399 // 400 // symMerge assumes non-degenerate arguments: a < m && m < b. 401 // Having the caller check this condition eliminates many leaf recursion calls, 402 // which improves performance. 403 func symMerge(data Interface, a, m, b int) { 404 // Avoid unnecessary recursions of symMerge 405 // by direct insertion of data[a] into data[m:b] 406 // if data[a:m] only contains one element. 407 if m-a == 1 { 408 // Use binary search to find the lowest index i 409 // such that data[i] >= data[a] for m <= i < b. 410 // Exit the search loop with i == b in case no such index exists. 411 i := m 412 j := b 413 for i < j { 414 h := int(uint(i+j) >> 1) 415 if data.Less(h, a) { 416 i = h + 1 417 } else { 418 j = h 419 } 420 } 421 // Swap values until data[a] reaches the position before i. 422 for k := a; k < i-1; k++ { 423 data.Swap(k, k+1) 424 } 425 return 426 } 427 428 // Avoid unnecessary recursions of symMerge 429 // by direct insertion of data[m] into data[a:m] 430 // if data[m:b] only contains one element. 431 if b-m == 1 { 432 // Use binary search to find the lowest index i 433 // such that data[i] > data[m] for a <= i < m. 434 // Exit the search loop with i == m in case no such index exists. 435 i := a 436 j := m 437 for i < j { 438 h := int(uint(i+j) >> 1) 439 if !data.Less(m, h) { 440 i = h + 1 441 } else { 442 j = h 443 } 444 } 445 // Swap values until data[m] reaches the position i. 446 for k := m; k > i; k-- { 447 data.Swap(k, k-1) 448 } 449 return 450 } 451 452 mid := int(uint(a+b) >> 1) 453 n := mid + m 454 var start, r int 455 if m > mid { 456 start = n - b 457 r = mid 458 } else { 459 start = a 460 r = m 461 } 462 p := n - 1 463 464 for start < r { 465 c := int(uint(start+r) >> 1) 466 if !data.Less(p-c, c) { 467 start = c + 1 468 } else { 469 r = c 470 } 471 } 472 473 end := n - start 474 if start < m && m < end { 475 rotate(data, start, m, end) 476 } 477 if a < start && start < mid { 478 symMerge(data, a, start, mid) 479 } 480 if mid < end && end < b { 481 symMerge(data, mid, end, b) 482 } 483 } 484 485 // Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data: 486 // Data of the form 'x u v y' is changed to 'x v u y'. 487 // Rotate performs at most b-a many calls to data.Swap. 488 // Rotate assumes non-degenerate arguments: a < m && m < b. 489 func rotate(data Interface, a, m, b int) { 490 i := m - a 491 j := b - m 492 493 for i != j { 494 if i > j { 495 swapRange(data, m-i, m, j) 496 i -= j 497 } else { 498 swapRange(data, m-i, m+j-i, i) 499 j -= i 500 } 501 } 502 // i == j 503 swapRange(data, m-i, m, i) 504 } 505 506 /* 507 Complexity of Stable Sorting 508 509 510 Complexity of block swapping rotation 511 512 Each Swap puts one new element into its correct, final position. 513 Elements which reach their final position are no longer moved. 514 Thus block swapping rotation needs |u|+|v| calls to Swaps. 515 This is best possible as each element might need a move. 516 517 Pay attention when comparing to other optimal algorithms which 518 typically count the number of assignments instead of swaps: 519 E.g. the optimal algorithm of Dudzinski and Dydek for in-place 520 rotations uses O(u + v + gcd(u,v)) assignments which is 521 better than our O(3 * (u+v)) as gcd(u,v) <= u. 522 523 524 Stable sorting by SymMerge and BlockSwap rotations 525 526 SymMerg complexity for same size input M = N: 527 Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N) 528 Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N)) 529 530 (The following argument does not fuzz over a missing -1 or 531 other stuff which does not impact the final result). 532 533 Let n = data.Len(). Assume n = 2^k. 534 535 Plain merge sort performs log(n) = k iterations. 536 On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i. 537 538 Thus iteration i of merge sort performs: 539 Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n) 540 Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i) 541 542 In total k = log(n) iterations are performed; so in total: 543 Calls to Less O(log(n) * n) 544 Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n) 545 = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n)) 546 547 548 Above results should generalize to arbitrary n = 2^k + p 549 and should not be influenced by the initial insertion sort phase: 550 Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of 551 size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort. 552 Merge sort iterations start at i = log(bs). With t = log(bs) constant: 553 Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n) 554 = O(n * log(n)) 555 Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n)) 556 557 */