github.com/4ad/go@v0.0.0-20161219182952-69a12818b605/src/sort/sort.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package sort provides primitives for sorting slices and user-defined 6 // collections. 7 package sort 8 9 // A type, typically a collection, that satisfies sort.Interface can be 10 // sorted by the routines in this package. The methods require that the 11 // elements of the collection be enumerated by an integer index. 12 type Interface interface { 13 // Len is the number of elements in the collection. 14 Len() int 15 // Less reports whether the element with 16 // index i should sort before the element with index j. 17 Less(i, j int) bool 18 // Swap swaps the elements with indexes i and j. 19 Swap(i, j int) 20 } 21 22 // Insertion sort 23 func insertionSort(data Interface, a, b int) { 24 for i := a + 1; i < b; i++ { 25 for j := i; j > a && data.Less(j, j-1); j-- { 26 data.Swap(j, j-1) 27 } 28 } 29 } 30 31 // siftDown implements the heap property on data[lo, hi). 32 // first is an offset into the array where the root of the heap lies. 33 func siftDown(data Interface, lo, hi, first int) { 34 root := lo 35 for { 36 child := 2*root + 1 37 if child >= hi { 38 break 39 } 40 if child+1 < hi && data.Less(first+child, first+child+1) { 41 child++ 42 } 43 if !data.Less(first+root, first+child) { 44 return 45 } 46 data.Swap(first+root, first+child) 47 root = child 48 } 49 } 50 51 func heapSort(data Interface, a, b int) { 52 first := a 53 lo := 0 54 hi := b - a 55 56 // Build heap with greatest element at top. 57 for i := (hi - 1) / 2; i >= 0; i-- { 58 siftDown(data, i, hi, first) 59 } 60 61 // Pop elements, largest first, into end of data. 62 for i := hi - 1; i >= 0; i-- { 63 data.Swap(first, first+i) 64 siftDown(data, lo, i, first) 65 } 66 } 67 68 // Quicksort, loosely following Bentley and McIlroy, 69 // ``Engineering a Sort Function,'' SP&E November 1993. 70 71 // medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. 72 func medianOfThree(data Interface, m1, m0, m2 int) { 73 // sort 3 elements 74 if data.Less(m1, m0) { 75 data.Swap(m1, m0) 76 } 77 // data[m0] <= data[m1] 78 if data.Less(m2, m1) { 79 data.Swap(m2, m1) 80 // data[m0] <= data[m2] && data[m1] < data[m2] 81 if data.Less(m1, m0) { 82 data.Swap(m1, m0) 83 } 84 } 85 // now data[m0] <= data[m1] <= data[m2] 86 } 87 88 func swapRange(data Interface, a, b, n int) { 89 for i := 0; i < n; i++ { 90 data.Swap(a+i, b+i) 91 } 92 } 93 94 func doPivot(data Interface, lo, hi int) (midlo, midhi int) { 95 m := lo + (hi-lo)/2 // Written like this to avoid integer overflow. 96 if hi-lo > 40 { 97 // Tukey's ``Ninther,'' median of three medians of three. 98 s := (hi - lo) / 8 99 medianOfThree(data, lo, lo+s, lo+2*s) 100 medianOfThree(data, m, m-s, m+s) 101 medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) 102 } 103 medianOfThree(data, lo, m, hi-1) 104 105 // Invariants are: 106 // data[lo] = pivot (set up by ChoosePivot) 107 // data[lo < i < a] < pivot 108 // data[a <= i < b] <= pivot 109 // data[b <= i < c] unexamined 110 // data[c <= i < hi-1] > pivot 111 // data[hi-1] >= pivot 112 pivot := lo 113 a, c := lo+1, hi-1 114 115 for ; a < c && data.Less(a, pivot); a++ { 116 } 117 b := a 118 for { 119 for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot 120 } 121 for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot 122 } 123 if b >= c { 124 break 125 } 126 // data[b] > pivot; data[c-1] <= pivot 127 data.Swap(b, c-1) 128 b++ 129 c-- 130 } 131 // If hi-c<3 then there are duplicates (by property of median of nine). 132 // Let be a bit more conservative, and set border to 5. 133 protect := hi-c < 5 134 if !protect && hi-c < (hi-lo)/4 { 135 // Lets test some points for equality to pivot 136 dups := 0 137 if !data.Less(pivot, hi-1) { // data[hi-1] = pivot 138 data.Swap(c, hi-1) 139 c++ 140 dups++ 141 } 142 if !data.Less(b-1, pivot) { // data[b-1] = pivot 143 b-- 144 dups++ 145 } 146 // m-lo = (hi-lo)/2 > 6 147 // b-lo > (hi-lo)*3/4-1 > 8 148 // ==> m < b ==> data[m] <= pivot 149 if !data.Less(m, pivot) { // data[m] = pivot 150 data.Swap(m, b-1) 151 b-- 152 dups++ 153 } 154 // if at least 2 points are equal to pivot, assume skewed distribution 155 protect = dups > 1 156 } 157 if protect { 158 // Protect against a lot of duplicates 159 // Add invariant: 160 // data[a <= i < b] unexamined 161 // data[b <= i < c] = pivot 162 for { 163 for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot 164 } 165 for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot 166 } 167 if a >= b { 168 break 169 } 170 // data[a] == pivot; data[b-1] < pivot 171 data.Swap(a, b-1) 172 a++ 173 b-- 174 } 175 } 176 // Swap pivot into middle 177 data.Swap(pivot, b-1) 178 return b - 1, c 179 } 180 181 func quickSort(data Interface, a, b, maxDepth int) { 182 for b-a > 12 { // Use ShellSort for slices <= 12 elements 183 if maxDepth == 0 { 184 heapSort(data, a, b) 185 return 186 } 187 maxDepth-- 188 mlo, mhi := doPivot(data, a, b) 189 // Avoiding recursion on the larger subproblem guarantees 190 // a stack depth of at most lg(b-a). 191 if mlo-a < b-mhi { 192 quickSort(data, a, mlo, maxDepth) 193 a = mhi // i.e., quickSort(data, mhi, b) 194 } else { 195 quickSort(data, mhi, b, maxDepth) 196 b = mlo // i.e., quickSort(data, a, mlo) 197 } 198 } 199 if b-a > 1 { 200 // Do ShellSort pass with gap 6 201 // It could be written in this simplified form cause b-a <= 12 202 for i := a + 6; i < b; i++ { 203 if data.Less(i, i-6) { 204 data.Swap(i, i-6) 205 } 206 } 207 insertionSort(data, a, b) 208 } 209 } 210 211 // Sort sorts data. 212 // It makes one call to data.Len to determine n, and O(n*log(n)) calls to 213 // data.Less and data.Swap. The sort is not guaranteed to be stable. 214 func Sort(data Interface) { 215 // Switch to heapsort if depth of 2*ceil(lg(n+1)) is reached. 216 n := data.Len() 217 maxDepth := 0 218 for i := n; i > 0; i >>= 1 { 219 maxDepth++ 220 } 221 maxDepth *= 2 222 quickSort(data, 0, n, maxDepth) 223 } 224 225 type reverse struct { 226 // This embedded Interface permits Reverse to use the methods of 227 // another Interface implementation. 228 Interface 229 } 230 231 // Less returns the opposite of the embedded implementation's Less method. 232 func (r reverse) Less(i, j int) bool { 233 return r.Interface.Less(j, i) 234 } 235 236 // Reverse returns the reverse order for data. 237 func Reverse(data Interface) Interface { 238 return &reverse{data} 239 } 240 241 // IsSorted reports whether data is sorted. 242 func IsSorted(data Interface) bool { 243 n := data.Len() 244 for i := n - 1; i > 0; i-- { 245 if data.Less(i, i-1) { 246 return false 247 } 248 } 249 return true 250 } 251 252 // Convenience types for common cases 253 254 // IntSlice attaches the methods of Interface to []int, sorting in increasing order. 255 type IntSlice []int 256 257 func (p IntSlice) Len() int { return len(p) } 258 func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] } 259 func (p IntSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 260 261 // Sort is a convenience method. 262 func (p IntSlice) Sort() { Sort(p) } 263 264 // Float64Slice attaches the methods of Interface to []float64, sorting in increasing order. 265 type Float64Slice []float64 266 267 func (p Float64Slice) Len() int { return len(p) } 268 func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) } 269 func (p Float64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 270 271 // isNaN is a copy of math.IsNaN to avoid a dependency on the math package. 272 func isNaN(f float64) bool { 273 return f != f 274 } 275 276 // Sort is a convenience method. 277 func (p Float64Slice) Sort() { Sort(p) } 278 279 // StringSlice attaches the methods of Interface to []string, sorting in increasing order. 280 type StringSlice []string 281 282 func (p StringSlice) Len() int { return len(p) } 283 func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] } 284 func (p StringSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 285 286 // Sort is a convenience method. 287 func (p StringSlice) Sort() { Sort(p) } 288 289 // Convenience wrappers for common cases 290 291 // Ints sorts a slice of ints in increasing order. 292 func Ints(a []int) { Sort(IntSlice(a)) } 293 294 // Float64s sorts a slice of float64s in increasing order. 295 func Float64s(a []float64) { Sort(Float64Slice(a)) } 296 297 // Strings sorts a slice of strings in increasing order. 298 func Strings(a []string) { Sort(StringSlice(a)) } 299 300 // IntsAreSorted tests whether a slice of ints is sorted in increasing order. 301 func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) } 302 303 // Float64sAreSorted tests whether a slice of float64s is sorted in increasing order. 304 func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) } 305 306 // StringsAreSorted tests whether a slice of strings is sorted in increasing order. 307 func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) } 308 309 // Notes on stable sorting: 310 // The used algorithms are simple and provable correct on all input and use 311 // only logarithmic additional stack space. They perform well if compared 312 // experimentally to other stable in-place sorting algorithms. 313 // 314 // Remarks on other algorithms evaluated: 315 // - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++: 316 // Not faster. 317 // - GCC's __rotate for block rotations: Not faster. 318 // - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen 319 // and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40: 320 // The given algorithms are in-place, number of Swap and Assignments 321 // grow as n log n but the algorithm is not stable. 322 // - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and 323 // V. Raman in Algorithmica (1996) 16, 115-160: 324 // This algorithm either needs additional 2n bits or works only if there 325 // are enough different elements available to encode some permutations 326 // which have to be undone later (so not stable on any input). 327 // - All the optimal in-place sorting/merging algorithms I found are either 328 // unstable or rely on enough different elements in each step to encode the 329 // performed block rearrangements. See also "In-Place Merging Algorithms", 330 // Denham Coates-Evely, Department of Computer Science, Kings College, 331 // January 2004 and the references in there. 332 // - Often "optimal" algorithms are optimal in the number of assignments 333 // but Interface has only Swap as operation. 334 335 // Stable sorts data while keeping the original order of equal elements. 336 // 337 // It makes one call to data.Len to determine n, O(n*log(n)) calls to 338 // data.Less and O(n*log(n)*log(n)) calls to data.Swap. 339 func Stable(data Interface) { 340 n := data.Len() 341 blockSize := 20 // must be > 0 342 a, b := 0, blockSize 343 for b <= n { 344 insertionSort(data, a, b) 345 a = b 346 b += blockSize 347 } 348 insertionSort(data, a, n) 349 350 for blockSize < n { 351 a, b = 0, 2*blockSize 352 for b <= n { 353 symMerge(data, a, a+blockSize, b) 354 a = b 355 b += 2 * blockSize 356 } 357 if m := a + blockSize; m < n { 358 symMerge(data, a, m, n) 359 } 360 blockSize *= 2 361 } 362 } 363 364 // SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using 365 // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum 366 // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz 367 // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in 368 // Computer Science, pages 714-723. Springer, 2004. 369 // 370 // Let M = m-a and N = b-n. Wolog M < N. 371 // The recursion depth is bound by ceil(log(N+M)). 372 // The algorithm needs O(M*log(N/M + 1)) calls to data.Less. 373 // The algorithm needs O((M+N)*log(M)) calls to data.Swap. 374 // 375 // The paper gives O((M+N)*log(M)) as the number of assignments assuming a 376 // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation 377 // in the paper carries through for Swap operations, especially as the block 378 // swapping rotate uses only O(M+N) Swaps. 379 // 380 // symMerge assumes non-degenerate arguments: a < m && m < b. 381 // Having the caller check this condition eliminates many leaf recursion calls, 382 // which improves performance. 383 func symMerge(data Interface, a, m, b int) { 384 // Avoid unnecessary recursions of symMerge 385 // by direct insertion of data[a] into data[m:b] 386 // if data[a:m] only contains one element. 387 if m-a == 1 { 388 // Use binary search to find the lowest index i 389 // such that data[i] >= data[a] for m <= i < b. 390 // Exit the search loop with i == b in case no such index exists. 391 i := m 392 j := b 393 for i < j { 394 h := i + (j-i)/2 395 if data.Less(h, a) { 396 i = h + 1 397 } else { 398 j = h 399 } 400 } 401 // Swap values until data[a] reaches the position before i. 402 for k := a; k < i-1; k++ { 403 data.Swap(k, k+1) 404 } 405 return 406 } 407 408 // Avoid unnecessary recursions of symMerge 409 // by direct insertion of data[m] into data[a:m] 410 // if data[m:b] only contains one element. 411 if b-m == 1 { 412 // Use binary search to find the lowest index i 413 // such that data[i] > data[m] for a <= i < m. 414 // Exit the search loop with i == m in case no such index exists. 415 i := a 416 j := m 417 for i < j { 418 h := i + (j-i)/2 419 if !data.Less(m, h) { 420 i = h + 1 421 } else { 422 j = h 423 } 424 } 425 // Swap values until data[m] reaches the position i. 426 for k := m; k > i; k-- { 427 data.Swap(k, k-1) 428 } 429 return 430 } 431 432 mid := a + (b-a)/2 433 n := mid + m 434 var start, r int 435 if m > mid { 436 start = n - b 437 r = mid 438 } else { 439 start = a 440 r = m 441 } 442 p := n - 1 443 444 for start < r { 445 c := start + (r-start)/2 446 if !data.Less(p-c, c) { 447 start = c + 1 448 } else { 449 r = c 450 } 451 } 452 453 end := n - start 454 if start < m && m < end { 455 rotate(data, start, m, end) 456 } 457 if a < start && start < mid { 458 symMerge(data, a, start, mid) 459 } 460 if mid < end && end < b { 461 symMerge(data, mid, end, b) 462 } 463 } 464 465 // Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data: 466 // Data of the form 'x u v y' is changed to 'x v u y'. 467 // Rotate performs at most b-a many calls to data.Swap. 468 // Rotate assumes non-degenerate arguments: a < m && m < b. 469 func rotate(data Interface, a, m, b int) { 470 i := m - a 471 j := b - m 472 473 for i != j { 474 if i > j { 475 swapRange(data, m-i, m, j) 476 i -= j 477 } else { 478 swapRange(data, m-i, m+j-i, i) 479 j -= i 480 } 481 } 482 // i == j 483 swapRange(data, m-i, m, i) 484 } 485 486 /* 487 Complexity of Stable Sorting 488 489 490 Complexity of block swapping rotation 491 492 Each Swap puts one new element into its correct, final position. 493 Elements which reach their final position are no longer moved. 494 Thus block swapping rotation needs |u|+|v| calls to Swaps. 495 This is best possible as each element might need a move. 496 497 Pay attention when comparing to other optimal algorithms which 498 typically count the number of assignments instead of swaps: 499 E.g. the optimal algorithm of Dudzinski and Dydek for in-place 500 rotations uses O(u + v + gcd(u,v)) assignments which is 501 better than our O(3 * (u+v)) as gcd(u,v) <= u. 502 503 504 Stable sorting by SymMerge and BlockSwap rotations 505 506 SymMerg complexity for same size input M = N: 507 Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N) 508 Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N)) 509 510 (The following argument does not fuzz over a missing -1 or 511 other stuff which does not impact the final result). 512 513 Let n = data.Len(). Assume n = 2^k. 514 515 Plain merge sort performs log(n) = k iterations. 516 On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i. 517 518 Thus iteration i of merge sort performs: 519 Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n) 520 Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i) 521 522 In total k = log(n) iterations are performed; so in total: 523 Calls to Less O(log(n) * n) 524 Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n) 525 = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n)) 526 527 528 Above results should generalize to arbitrary n = 2^k + p 529 and should not be influenced by the initial insertion sort phase: 530 Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of 531 size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort. 532 Merge sort iterations start at i = log(bs). With t = log(bs) constant: 533 Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n) 534 = O(n * log(n)) 535 Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n)) 536 537 */