github.com/huandu/go@v0.0.0-20151114150818-04e615e41150/src/sort/sort.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package sort provides primitives for sorting slices and user-defined 6 // collections. 7 package sort 8 9 // A type, typically a collection, that satisfies sort.Interface can be 10 // sorted by the routines in this package. The methods require that the 11 // elements of the collection be enumerated by an integer index. 12 type Interface interface { 13 // Len is the number of elements in the collection. 14 Len() int 15 // Less reports whether the element with 16 // index i should sort before the element with index j. 17 Less(i, j int) bool 18 // Swap swaps the elements with indexes i and j. 19 Swap(i, j int) 20 } 21 22 func min(a, b int) int { 23 if a < b { 24 return a 25 } 26 return b 27 } 28 29 // Insertion sort 30 func insertionSort(data Interface, a, b int) { 31 for i := a + 1; i < b; i++ { 32 for j := i; j > a && data.Less(j, j-1); j-- { 33 data.Swap(j, j-1) 34 } 35 } 36 } 37 38 // siftDown implements the heap property on data[lo, hi). 39 // first is an offset into the array where the root of the heap lies. 40 func siftDown(data Interface, lo, hi, first int) { 41 root := lo 42 for { 43 child := 2*root + 1 44 if child >= hi { 45 break 46 } 47 if child+1 < hi && data.Less(first+child, first+child+1) { 48 child++ 49 } 50 if !data.Less(first+root, first+child) { 51 return 52 } 53 data.Swap(first+root, first+child) 54 root = child 55 } 56 } 57 58 func heapSort(data Interface, a, b int) { 59 first := a 60 lo := 0 61 hi := b - a 62 63 // Build heap with greatest element at top. 64 for i := (hi - 1) / 2; i >= 0; i-- { 65 siftDown(data, i, hi, first) 66 } 67 68 // Pop elements, largest first, into end of data. 69 for i := hi - 1; i >= 0; i-- { 70 data.Swap(first, first+i) 71 siftDown(data, lo, i, first) 72 } 73 } 74 75 // Quicksort, following Bentley and McIlroy, 76 // ``Engineering a Sort Function,'' SP&E November 1993. 77 78 // medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. 79 func medianOfThree(data Interface, m1, m0, m2 int) { 80 // sort 3 elements 81 if data.Less(m1, m0) { 82 data.Swap(m1, m0) 83 } 84 // data[m0] <= data[m1] 85 if data.Less(m2, m1) { 86 data.Swap(m2, m1) 87 // data[m0] <= data[m2] && data[m1] < data[m2] 88 if data.Less(m1, m0) { 89 data.Swap(m1, m0) 90 } 91 } 92 // now data[m0] <= data[m1] <= data[m2] 93 } 94 95 func swapRange(data Interface, a, b, n int) { 96 for i := 0; i < n; i++ { 97 data.Swap(a+i, b+i) 98 } 99 } 100 101 func doPivot(data Interface, lo, hi int) (midlo, midhi int) { 102 m := lo + (hi-lo)/2 // Written like this to avoid integer overflow. 103 if hi-lo > 40 { 104 // Tukey's ``Ninther,'' median of three medians of three. 105 s := (hi - lo) / 8 106 medianOfThree(data, lo, lo+s, lo+2*s) 107 medianOfThree(data, m, m-s, m+s) 108 medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) 109 } 110 medianOfThree(data, lo, m, hi-1) 111 112 // Invariants are: 113 // data[lo] = pivot (set up by ChoosePivot) 114 // data[lo <= i < a] = pivot 115 // data[a <= i < b] < pivot 116 // data[b <= i < c] is unexamined 117 // data[c <= i < d] > pivot 118 // data[d <= i < hi] = pivot 119 // 120 // Once b meets c, can swap the "= pivot" sections 121 // into the middle of the slice. 122 pivot := lo 123 a, b, c, d := lo+1, lo+1, hi, hi 124 for { 125 for b < c { 126 if data.Less(b, pivot) { // data[b] < pivot 127 b++ 128 } else if !data.Less(pivot, b) { // data[b] = pivot 129 data.Swap(a, b) 130 a++ 131 b++ 132 } else { 133 break 134 } 135 } 136 for b < c { 137 if data.Less(pivot, c-1) { // data[c-1] > pivot 138 c-- 139 } else if !data.Less(c-1, pivot) { // data[c-1] = pivot 140 data.Swap(c-1, d-1) 141 c-- 142 d-- 143 } else { 144 break 145 } 146 } 147 if b >= c { 148 break 149 } 150 // data[b] > pivot; data[c-1] < pivot 151 data.Swap(b, c-1) 152 b++ 153 c-- 154 } 155 156 n := min(b-a, a-lo) 157 swapRange(data, lo, b-n, n) 158 159 n = min(hi-d, d-c) 160 swapRange(data, c, hi-n, n) 161 162 return lo + b - a, hi - (d - c) 163 } 164 165 func quickSort(data Interface, a, b, maxDepth int) { 166 for b-a > 7 { 167 if maxDepth == 0 { 168 heapSort(data, a, b) 169 return 170 } 171 maxDepth-- 172 mlo, mhi := doPivot(data, a, b) 173 // Avoiding recursion on the larger subproblem guarantees 174 // a stack depth of at most lg(b-a). 175 if mlo-a < b-mhi { 176 quickSort(data, a, mlo, maxDepth) 177 a = mhi // i.e., quickSort(data, mhi, b) 178 } else { 179 quickSort(data, mhi, b, maxDepth) 180 b = mlo // i.e., quickSort(data, a, mlo) 181 } 182 } 183 if b-a > 1 { 184 insertionSort(data, a, b) 185 } 186 } 187 188 // Sort sorts data. 189 // It makes one call to data.Len to determine n, and O(n*log(n)) calls to 190 // data.Less and data.Swap. The sort is not guaranteed to be stable. 191 func Sort(data Interface) { 192 // Switch to heapsort if depth of 2*ceil(lg(n+1)) is reached. 193 n := data.Len() 194 maxDepth := 0 195 for i := n; i > 0; i >>= 1 { 196 maxDepth++ 197 } 198 maxDepth *= 2 199 quickSort(data, 0, n, maxDepth) 200 } 201 202 type reverse struct { 203 // This embedded Interface permits Reverse to use the methods of 204 // another Interface implementation. 205 Interface 206 } 207 208 // Less returns the opposite of the embedded implementation's Less method. 209 func (r reverse) Less(i, j int) bool { 210 return r.Interface.Less(j, i) 211 } 212 213 // Reverse returns the reverse order for data. 214 func Reverse(data Interface) Interface { 215 return &reverse{data} 216 } 217 218 // IsSorted reports whether data is sorted. 219 func IsSorted(data Interface) bool { 220 n := data.Len() 221 for i := n - 1; i > 0; i-- { 222 if data.Less(i, i-1) { 223 return false 224 } 225 } 226 return true 227 } 228 229 // Convenience types for common cases 230 231 // IntSlice attaches the methods of Interface to []int, sorting in increasing order. 232 type IntSlice []int 233 234 func (p IntSlice) Len() int { return len(p) } 235 func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] } 236 func (p IntSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 237 238 // Sort is a convenience method. 239 func (p IntSlice) Sort() { Sort(p) } 240 241 // Float64Slice attaches the methods of Interface to []float64, sorting in increasing order. 242 type Float64Slice []float64 243 244 func (p Float64Slice) Len() int { return len(p) } 245 func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) } 246 func (p Float64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 247 248 // isNaN is a copy of math.IsNaN to avoid a dependency on the math package. 249 func isNaN(f float64) bool { 250 return f != f 251 } 252 253 // Sort is a convenience method. 254 func (p Float64Slice) Sort() { Sort(p) } 255 256 // StringSlice attaches the methods of Interface to []string, sorting in increasing order. 257 type StringSlice []string 258 259 func (p StringSlice) Len() int { return len(p) } 260 func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] } 261 func (p StringSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 262 263 // Sort is a convenience method. 264 func (p StringSlice) Sort() { Sort(p) } 265 266 // Convenience wrappers for common cases 267 268 // Ints sorts a slice of ints in increasing order. 269 func Ints(a []int) { Sort(IntSlice(a)) } 270 271 // Float64s sorts a slice of float64s in increasing order. 272 func Float64s(a []float64) { Sort(Float64Slice(a)) } 273 274 // Strings sorts a slice of strings in increasing order. 275 func Strings(a []string) { Sort(StringSlice(a)) } 276 277 // IntsAreSorted tests whether a slice of ints is sorted in increasing order. 278 func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) } 279 280 // Float64sAreSorted tests whether a slice of float64s is sorted in increasing order. 281 func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) } 282 283 // StringsAreSorted tests whether a slice of strings is sorted in increasing order. 284 func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) } 285 286 // Notes on stable sorting: 287 // The used algorithms are simple and provable correct on all input and use 288 // only logarithmic additional stack space. They perform well if compared 289 // experimentally to other stable in-place sorting algorithms. 290 // 291 // Remarks on other algorithms evaluated: 292 // - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++: 293 // Not faster. 294 // - GCC's __rotate for block rotations: Not faster. 295 // - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen 296 // and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40: 297 // The given algorithms are in-place, number of Swap and Assignments 298 // grow as n log n but the algorithm is not stable. 299 // - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and 300 // V. Raman in Algorithmica (1996) 16, 115-160: 301 // This algorithm either needs additional 2n bits or works only if there 302 // are enough different elements available to encode some permutations 303 // which have to be undone later (so not stable on any input). 304 // - All the optimal in-place sorting/merging algorithms I found are either 305 // unstable or rely on enough different elements in each step to encode the 306 // performed block rearrangements. See also "In-Place Merging Algorithms", 307 // Denham Coates-Evely, Department of Computer Science, Kings College, 308 // January 2004 and the reverences in there. 309 // - Often "optimal" algorithms are optimal in the number of assignments 310 // but Interface has only Swap as operation. 311 312 // Stable sorts data while keeping the original order of equal elements. 313 // 314 // It makes one call to data.Len to determine n, O(n*log(n)) calls to 315 // data.Less and O(n*log(n)*log(n)) calls to data.Swap. 316 func Stable(data Interface) { 317 n := data.Len() 318 blockSize := 20 // must be > 0 319 a, b := 0, blockSize 320 for b <= n { 321 insertionSort(data, a, b) 322 a = b 323 b += blockSize 324 } 325 insertionSort(data, a, n) 326 327 for blockSize < n { 328 a, b = 0, 2*blockSize 329 for b <= n { 330 symMerge(data, a, a+blockSize, b) 331 a = b 332 b += 2 * blockSize 333 } 334 if m := a + blockSize; m < n { 335 symMerge(data, a, m, n) 336 } 337 blockSize *= 2 338 } 339 } 340 341 // SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using 342 // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum 343 // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz 344 // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in 345 // Computer Science, pages 714-723. Springer, 2004. 346 // 347 // Let M = m-a and N = b-n. Wolog M < N. 348 // The recursion depth is bound by ceil(log(N+M)). 349 // The algorithm needs O(M*log(N/M + 1)) calls to data.Less. 350 // The algorithm needs O((M+N)*log(M)) calls to data.Swap. 351 // 352 // The paper gives O((M+N)*log(M)) as the number of assignments assuming a 353 // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation 354 // in the paper carries through for Swap operations, especially as the block 355 // swapping rotate uses only O(M+N) Swaps. 356 // 357 // symMerge assumes non-degenerate arguments: a < m && m < b. 358 // Having the caller check this condition eliminates many leaf recursion calls, 359 // which improves performance. 360 func symMerge(data Interface, a, m, b int) { 361 // Avoid unnecessary recursions of symMerge 362 // by direct insertion of data[a] into data[m:b] 363 // if data[a:m] only contains one element. 364 if m-a == 1 { 365 // Use binary search to find the lowest index i 366 // such that data[i] >= data[a] for m <= i < b. 367 // Exit the search loop with i == b in case no such index exists. 368 i := m 369 j := b 370 for i < j { 371 h := i + (j-i)/2 372 if data.Less(h, a) { 373 i = h + 1 374 } else { 375 j = h 376 } 377 } 378 // Swap values until data[a] reaches the position before i. 379 for k := a; k < i-1; k++ { 380 data.Swap(k, k+1) 381 } 382 return 383 } 384 385 // Avoid unnecessary recursions of symMerge 386 // by direct insertion of data[m] into data[a:m] 387 // if data[m:b] only contains one element. 388 if b-m == 1 { 389 // Use binary search to find the lowest index i 390 // such that data[i] > data[m] for a <= i < m. 391 // Exit the search loop with i == m in case no such index exists. 392 i := a 393 j := m 394 for i < j { 395 h := i + (j-i)/2 396 if !data.Less(m, h) { 397 i = h + 1 398 } else { 399 j = h 400 } 401 } 402 // Swap values until data[m] reaches the position i. 403 for k := m; k > i; k-- { 404 data.Swap(k, k-1) 405 } 406 return 407 } 408 409 mid := a + (b-a)/2 410 n := mid + m 411 var start, r int 412 if m > mid { 413 start = n - b 414 r = mid 415 } else { 416 start = a 417 r = m 418 } 419 p := n - 1 420 421 for start < r { 422 c := start + (r-start)/2 423 if !data.Less(p-c, c) { 424 start = c + 1 425 } else { 426 r = c 427 } 428 } 429 430 end := n - start 431 if start < m && m < end { 432 rotate(data, start, m, end) 433 } 434 if a < start && start < mid { 435 symMerge(data, a, start, mid) 436 } 437 if mid < end && end < b { 438 symMerge(data, mid, end, b) 439 } 440 } 441 442 // Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data: 443 // Data of the form 'x u v y' is changed to 'x v u y'. 444 // Rotate performs at most b-a many calls to data.Swap. 445 // Rotate assumes non-degenerate arguments: a < m && m < b. 446 func rotate(data Interface, a, m, b int) { 447 i := m - a 448 j := b - m 449 450 for i != j { 451 if i > j { 452 swapRange(data, m-i, m, j) 453 i -= j 454 } else { 455 swapRange(data, m-i, m+j-i, i) 456 j -= i 457 } 458 } 459 // i == j 460 swapRange(data, m-i, m, i) 461 } 462 463 /* 464 Complexity of Stable Sorting 465 466 467 Complexity of block swapping rotation 468 469 Each Swap puts one new element into its correct, final position. 470 Elements which reach their final position are no longer moved. 471 Thus block swapping rotation needs |u|+|v| calls to Swaps. 472 This is best possible as each element might need a move. 473 474 Pay attention when comparing to other optimal algorithms which 475 typically count the number of assignments instead of swaps: 476 E.g. the optimal algorithm of Dudzinski and Dydek for in-place 477 rotations uses O(u + v + gcd(u,v)) assignments which is 478 better than our O(3 * (u+v)) as gcd(u,v) <= u. 479 480 481 Stable sorting by SymMerge and BlockSwap rotations 482 483 SymMerg complexity for same size input M = N: 484 Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N) 485 Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N)) 486 487 (The following argument does not fuzz over a missing -1 or 488 other stuff which does not impact the final result). 489 490 Let n = data.Len(). Assume n = 2^k. 491 492 Plain merge sort performs log(n) = k iterations. 493 On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i. 494 495 Thus iteration i of merge sort performs: 496 Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n) 497 Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i) 498 499 In total k = log(n) iterations are performed; so in total: 500 Calls to Less O(log(n) * n) 501 Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n) 502 = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n)) 503 504 505 Above results should generalize to arbitrary n = 2^k + p 506 and should not be influenced by the initial insertion sort phase: 507 Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of 508 size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort. 509 Merge sort iterations start at i = log(bs). With t = log(bs) constant: 510 Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n) 511 = O(n * log(n)) 512 Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n)) 513 514 */