github.com/zach-klippenstein/go@v0.0.0-20150108044943-fcfbeb3adf58/src/sort/sort.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package sort provides primitives for sorting slices and user-defined 6 // collections. 7 package sort 8 9 // A type, typically a collection, that satisfies sort.Interface can be 10 // sorted by the routines in this package. The methods require that the 11 // elements of the collection be enumerated by an integer index. 12 type Interface interface { 13 // Len is the number of elements in the collection. 14 Len() int 15 // Less reports whether the element with 16 // index i should sort before the element with index j. 17 Less(i, j int) bool 18 // Swap swaps the elements with indexes i and j. 19 Swap(i, j int) 20 } 21 22 func min(a, b int) int { 23 if a < b { 24 return a 25 } 26 return b 27 } 28 29 // Insertion sort 30 func insertionSort(data Interface, a, b int) { 31 for i := a + 1; i < b; i++ { 32 for j := i; j > a && data.Less(j, j-1); j-- { 33 data.Swap(j, j-1) 34 } 35 } 36 } 37 38 // siftDown implements the heap property on data[lo, hi). 39 // first is an offset into the array where the root of the heap lies. 40 func siftDown(data Interface, lo, hi, first int) { 41 root := lo 42 for { 43 child := 2*root + 1 44 if child >= hi { 45 break 46 } 47 if child+1 < hi && data.Less(first+child, first+child+1) { 48 child++ 49 } 50 if !data.Less(first+root, first+child) { 51 return 52 } 53 data.Swap(first+root, first+child) 54 root = child 55 } 56 } 57 58 func heapSort(data Interface, a, b int) { 59 first := a 60 lo := 0 61 hi := b - a 62 63 // Build heap with greatest element at top. 64 for i := (hi - 1) / 2; i >= 0; i-- { 65 siftDown(data, i, hi, first) 66 } 67 68 // Pop elements, largest first, into end of data. 69 for i := hi - 1; i >= 0; i-- { 70 data.Swap(first, first+i) 71 siftDown(data, lo, i, first) 72 } 73 } 74 75 // Quicksort, following Bentley and McIlroy, 76 // ``Engineering a Sort Function,'' SP&E November 1993. 77 78 // medianOfThree moves the median of the three values data[a], data[b], data[c] into data[a]. 79 func medianOfThree(data Interface, a, b, c int) { 80 m0 := b 81 m1 := a 82 m2 := c 83 // bubble sort on 3 elements 84 if data.Less(m1, m0) { 85 data.Swap(m1, m0) 86 } 87 if data.Less(m2, m1) { 88 data.Swap(m2, m1) 89 } 90 if data.Less(m1, m0) { 91 data.Swap(m1, m0) 92 } 93 // now data[m0] <= data[m1] <= data[m2] 94 } 95 96 func swapRange(data Interface, a, b, n int) { 97 for i := 0; i < n; i++ { 98 data.Swap(a+i, b+i) 99 } 100 } 101 102 func doPivot(data Interface, lo, hi int) (midlo, midhi int) { 103 m := lo + (hi-lo)/2 // Written like this to avoid integer overflow. 104 if hi-lo > 40 { 105 // Tukey's ``Ninther,'' median of three medians of three. 106 s := (hi - lo) / 8 107 medianOfThree(data, lo, lo+s, lo+2*s) 108 medianOfThree(data, m, m-s, m+s) 109 medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) 110 } 111 medianOfThree(data, lo, m, hi-1) 112 113 // Invariants are: 114 // data[lo] = pivot (set up by ChoosePivot) 115 // data[lo <= i < a] = pivot 116 // data[a <= i < b] < pivot 117 // data[b <= i < c] is unexamined 118 // data[c <= i < d] > pivot 119 // data[d <= i < hi] = pivot 120 // 121 // Once b meets c, can swap the "= pivot" sections 122 // into the middle of the slice. 123 pivot := lo 124 a, b, c, d := lo+1, lo+1, hi, hi 125 for { 126 for b < c { 127 if data.Less(b, pivot) { // data[b] < pivot 128 b++ 129 } else if !data.Less(pivot, b) { // data[b] = pivot 130 data.Swap(a, b) 131 a++ 132 b++ 133 } else { 134 break 135 } 136 } 137 for b < c { 138 if data.Less(pivot, c-1) { // data[c-1] > pivot 139 c-- 140 } else if !data.Less(c-1, pivot) { // data[c-1] = pivot 141 data.Swap(c-1, d-1) 142 c-- 143 d-- 144 } else { 145 break 146 } 147 } 148 if b >= c { 149 break 150 } 151 // data[b] > pivot; data[c-1] < pivot 152 data.Swap(b, c-1) 153 b++ 154 c-- 155 } 156 157 n := min(b-a, a-lo) 158 swapRange(data, lo, b-n, n) 159 160 n = min(hi-d, d-c) 161 swapRange(data, c, hi-n, n) 162 163 return lo + b - a, hi - (d - c) 164 } 165 166 func quickSort(data Interface, a, b, maxDepth int) { 167 for b-a > 7 { 168 if maxDepth == 0 { 169 heapSort(data, a, b) 170 return 171 } 172 maxDepth-- 173 mlo, mhi := doPivot(data, a, b) 174 // Avoiding recursion on the larger subproblem guarantees 175 // a stack depth of at most lg(b-a). 176 if mlo-a < b-mhi { 177 quickSort(data, a, mlo, maxDepth) 178 a = mhi // i.e., quickSort(data, mhi, b) 179 } else { 180 quickSort(data, mhi, b, maxDepth) 181 b = mlo // i.e., quickSort(data, a, mlo) 182 } 183 } 184 if b-a > 1 { 185 insertionSort(data, a, b) 186 } 187 } 188 189 // Sort sorts data. 190 // It makes one call to data.Len to determine n, and O(n*log(n)) calls to 191 // data.Less and data.Swap. The sort is not guaranteed to be stable. 192 func Sort(data Interface) { 193 // Switch to heapsort if depth of 2*ceil(lg(n+1)) is reached. 194 n := data.Len() 195 maxDepth := 0 196 for i := n; i > 0; i >>= 1 { 197 maxDepth++ 198 } 199 maxDepth *= 2 200 quickSort(data, 0, n, maxDepth) 201 } 202 203 type reverse struct { 204 // This embedded Interface permits Reverse to use the methods of 205 // another Interface implementation. 206 Interface 207 } 208 209 // Less returns the opposite of the embedded implementation's Less method. 210 func (r reverse) Less(i, j int) bool { 211 return r.Interface.Less(j, i) 212 } 213 214 // Reverse returns the reverse order for data. 215 func Reverse(data Interface) Interface { 216 return &reverse{data} 217 } 218 219 // IsSorted reports whether data is sorted. 220 func IsSorted(data Interface) bool { 221 n := data.Len() 222 for i := n - 1; i > 0; i-- { 223 if data.Less(i, i-1) { 224 return false 225 } 226 } 227 return true 228 } 229 230 // Convenience types for common cases 231 232 // IntSlice attaches the methods of Interface to []int, sorting in increasing order. 233 type IntSlice []int 234 235 func (p IntSlice) Len() int { return len(p) } 236 func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] } 237 func (p IntSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 238 239 // Sort is a convenience method. 240 func (p IntSlice) Sort() { Sort(p) } 241 242 // Float64Slice attaches the methods of Interface to []float64, sorting in increasing order. 243 type Float64Slice []float64 244 245 func (p Float64Slice) Len() int { return len(p) } 246 func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) } 247 func (p Float64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 248 249 // isNaN is a copy of math.IsNaN to avoid a dependency on the math package. 250 func isNaN(f float64) bool { 251 return f != f 252 } 253 254 // Sort is a convenience method. 255 func (p Float64Slice) Sort() { Sort(p) } 256 257 // StringSlice attaches the methods of Interface to []string, sorting in increasing order. 258 type StringSlice []string 259 260 func (p StringSlice) Len() int { return len(p) } 261 func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] } 262 func (p StringSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 263 264 // Sort is a convenience method. 265 func (p StringSlice) Sort() { Sort(p) } 266 267 // Convenience wrappers for common cases 268 269 // Ints sorts a slice of ints in increasing order. 270 func Ints(a []int) { Sort(IntSlice(a)) } 271 272 // Float64s sorts a slice of float64s in increasing order. 273 func Float64s(a []float64) { Sort(Float64Slice(a)) } 274 275 // Strings sorts a slice of strings in increasing order. 276 func Strings(a []string) { Sort(StringSlice(a)) } 277 278 // IntsAreSorted tests whether a slice of ints is sorted in increasing order. 279 func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) } 280 281 // Float64sAreSorted tests whether a slice of float64s is sorted in increasing order. 282 func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) } 283 284 // StringsAreSorted tests whether a slice of strings is sorted in increasing order. 285 func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) } 286 287 // Notes on stable sorting: 288 // The used algorithms are simple and provable correct on all input and use 289 // only logarithmic additional stack space. They perform well if compared 290 // experimentally to other stable in-place sorting algorithms. 291 // 292 // Remarks on other algorithms evaluated: 293 // - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++: 294 // Not faster. 295 // - GCC's __rotate for block rotations: Not faster. 296 // - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen 297 // and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40: 298 // The given algorithms are in-place, number of Swap and Assignments 299 // grow as n log n but the algorithm is not stable. 300 // - "Fast Stable In-Plcae Sorting with O(n) Data Moves" J.I. Munro and 301 // V. Raman in Algorithmica (1996) 16, 115-160: 302 // This algorithm either needs additional 2n bits or works only if there 303 // are enough different elements available to encode some permutations 304 // which have to be undone later (so not stable an any input). 305 // - All the optimal in-place sorting/merging algorithms I found are either 306 // unstable or rely on enough different elements in each step to encode the 307 // performed block rearrangements. See also "In-Place Merging Algorithms", 308 // Denham Coates-Evely, Department of Computer Science, Kings College, 309 // January 2004 and the reverences in there. 310 // - Often "optimal" algorithms are optimal in the number of assignments 311 // but Interface has only Swap as operation. 312 313 // Stable sorts data while keeping the original order of equal elements. 314 // 315 // It makes one call to data.Len to determine n, O(n*log(n)) calls to 316 // data.Less and O(n*log(n)*log(n)) calls to data.Swap. 317 func Stable(data Interface) { 318 n := data.Len() 319 blockSize := 20 // must be > 0 320 a, b := 0, blockSize 321 for b <= n { 322 insertionSort(data, a, b) 323 a = b 324 b += blockSize 325 } 326 insertionSort(data, a, n) 327 328 for blockSize < n { 329 a, b = 0, 2*blockSize 330 for b <= n { 331 symMerge(data, a, a+blockSize, b) 332 a = b 333 b += 2 * blockSize 334 } 335 if m := a + blockSize; m < n { 336 symMerge(data, a, m, n) 337 } 338 blockSize *= 2 339 } 340 } 341 342 // SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using 343 // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum 344 // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz 345 // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in 346 // Computer Science, pages 714-723. Springer, 2004. 347 // 348 // Let M = m-a and N = b-n. Wolog M < N. 349 // The recursion depth is bound by ceil(log(N+M)). 350 // The algorithm needs O(M*log(N/M + 1)) calls to data.Less. 351 // The algorithm needs O((M+N)*log(M)) calls to data.Swap. 352 // 353 // The paper gives O((M+N)*log(M)) as the number of assignments assuming a 354 // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation 355 // in the paper carries through for Swap operations, especially as the block 356 // swapping rotate uses only O(M+N) Swaps. 357 // 358 // symMerge assumes non-degenerate arguments: a < m && m < b. 359 // Having the caller check this condition eliminates many leaf recursion calls, 360 // which improves performance. 361 func symMerge(data Interface, a, m, b int) { 362 // Avoid unnecessary recursions of symMerge 363 // by direct insertion of data[a] into data[m:b] 364 // if data[a:m] only contains one element. 365 if m-a == 1 { 366 // Use binary search to find the lowest index i 367 // such that data[i] >= data[a] for m <= i < b. 368 // Exit the search loop with i == b in case no such index exists. 369 i := m 370 j := b 371 for i < j { 372 h := i + (j-i)/2 373 if data.Less(h, a) { 374 i = h + 1 375 } else { 376 j = h 377 } 378 } 379 // Swap values until data[a] reaches the position before i. 380 for k := a; k < i-1; k++ { 381 data.Swap(k, k+1) 382 } 383 return 384 } 385 386 // Avoid unnecessary recursions of symMerge 387 // by direct insertion of data[m] into data[a:m] 388 // if data[m:b] only contains one element. 389 if b-m == 1 { 390 // Use binary search to find the lowest index i 391 // such that data[i] > data[m] for a <= i < m. 392 // Exit the search loop with i == m in case no such index exists. 393 i := a 394 j := m 395 for i < j { 396 h := i + (j-i)/2 397 if !data.Less(m, h) { 398 i = h + 1 399 } else { 400 j = h 401 } 402 } 403 // Swap values until data[m] reaches the position i. 404 for k := m; k > i; k-- { 405 data.Swap(k, k-1) 406 } 407 return 408 } 409 410 mid := a + (b-a)/2 411 n := mid + m 412 var start, r int 413 if m > mid { 414 start = n - b 415 r = mid 416 } else { 417 start = a 418 r = m 419 } 420 p := n - 1 421 422 for start < r { 423 c := start + (r-start)/2 424 if !data.Less(p-c, c) { 425 start = c + 1 426 } else { 427 r = c 428 } 429 } 430 431 end := n - start 432 if start < m && m < end { 433 rotate(data, start, m, end) 434 } 435 if a < start && start < mid { 436 symMerge(data, a, start, mid) 437 } 438 if mid < end && end < b { 439 symMerge(data, mid, end, b) 440 } 441 } 442 443 // Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data: 444 // Data of the form 'x u v y' is changed to 'x v u y'. 445 // Rotate performs at most b-a many calls to data.Swap. 446 // Rotate assumes non-degenerate arguments: a < m && m < b. 447 func rotate(data Interface, a, m, b int) { 448 i := m - a 449 j := b - m 450 451 for i != j { 452 if i > j { 453 swapRange(data, m-i, m, j) 454 i -= j 455 } else { 456 swapRange(data, m-i, m+j-i, i) 457 j -= i 458 } 459 } 460 // i == j 461 swapRange(data, m-i, m, i) 462 } 463 464 /* 465 Complexity of Stable Sorting 466 467 468 Complexity of block swapping rotation 469 470 Each Swap puts one new element into its correct, final position. 471 Elements which reach their final position are no longer moved. 472 Thus block swapping rotation needs |u|+|v| calls to Swaps. 473 This is best possible as each element might need a move. 474 475 Pay attention when comparing to other optimal algorithms which 476 typically count the number of assignments instead of swaps: 477 E.g. the optimal algorithm of Dudzinski and Dydek for in-place 478 rotations uses O(u + v + gcd(u,v)) assignments which is 479 better than our O(3 * (u+v)) as gcd(u,v) <= u. 480 481 482 Stable sorting by SymMerge and BlockSwap rotations 483 484 SymMerg complexity for same size input M = N: 485 Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N) 486 Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N)) 487 488 (The following argument does not fuzz over a missing -1 or 489 other stuff which does not impact the final result). 490 491 Let n = data.Len(). Assume n = 2^k. 492 493 Plain merge sort performs log(n) = k iterations. 494 On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i. 495 496 Thus iteration i of merge sort performs: 497 Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n) 498 Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i) 499 500 In total k = log(n) iterations are performed; so in total: 501 Calls to Less O(log(n) * n) 502 Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n) 503 = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n)) 504 505 506 Above results should generalize to arbitrary n = 2^k + p 507 and should not be influenced by the initial insertion sort phase: 508 Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of 509 size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort. 510 Merge sort iterations start at i = log(bs). With t = log(bs) constant: 511 Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n) 512 = O(n * log(n)) 513 Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n)) 514 515 */