github.com/go4org/go4@v0.0.0-20200104003542-c7e774b10ea0/sort/sort.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run genzfunc.go 6 7 // Package sort provides primitives for sorting slices and user-defined 8 // collections. 9 // 10 // This is a copy of the Go standard library's sort package with the 11 // addition of some helpers for sorting slices and using func literals 12 // to sort, rather than having to create a sorter type. See the 13 // additional MakeInterface, SliceSorter, and Slice functions. 14 // Discussion of moving such helpers into the standard library is 15 // at: 16 // 17 // https://golang.org/issue/16721 18 // 19 // Per Go's "no +1 policy", please only leave a comment on that issue 20 // if you have something unique to add. Use Github's emoji reactions 21 // otherwise. 22 package sort 23 24 import ( 25 "reflect" 26 27 "go4.org/reflectutil" 28 ) 29 30 // A type, typically a collection, that satisfies sort.Interface can be 31 // sorted by the routines in this package. The methods require that the 32 // elements of the collection be enumerated by an integer index. 33 type Interface interface { 34 // Len is the number of elements in the collection. 35 Len() int 36 // Less reports whether the element with 37 // index i should sort before the element with index j. 38 Less(i, j int) bool 39 // Swap swaps the elements with indexes i and j. 40 Swap(i, j int) 41 } 42 43 // lessSwap is a pair of Less and Swap function for use with the 44 // auto-generated func-optimized variant of sort.go in 45 // zfuncversion.go. 46 type lessSwap struct { 47 Less func(i, j int) bool 48 Swap func(i, j int) 49 } 50 51 // MakeInterface returns a sort Interface using the provided length 52 // and pair of swap and less functions. 53 func MakeInterface(length int, swap func(i, j int), less func(i, j int) bool) Interface { 54 return &funcs{length, lessSwap{less, swap}} 55 } 56 57 // SliceSorter returns a sort.Interface to sort the provided slice 58 // using the provided less function. 59 // If the provided interface is not a slice, the function panics. 60 func SliceSorter(slice interface{}, less func(i, j int) bool) Interface { 61 return MakeInterface(reflect.ValueOf(slice).Len(), reflectutil.Swapper(slice), less) 62 } 63 64 // Slice sorts the provided slice using less. 65 // If the provided interface is not a slice, the function panics. 66 // The sort is not stable. For a stable sort, use sort.Stable with sort.SliceSorter. 67 func Slice(slice interface{}, less func(i, j int) bool) { 68 Sort(SliceSorter(slice, less)) 69 } 70 71 // funcs implements Interface, but is recognized by Sort and Stable 72 // which use its lessSwap field with the non-interface sorting 73 // routines in zfuncversion.go. 74 type funcs struct { 75 length int 76 lessSwap 77 } 78 79 func (f *funcs) Len() int { return f.length } 80 func (f *funcs) Swap(i, j int) { f.lessSwap.Swap(i, j) } 81 func (f *funcs) Less(i, j int) bool { return f.lessSwap.Less(i, j) } 82 83 // Insertion sort 84 func insertionSort(data Interface, a, b int) { 85 for i := a + 1; i < b; i++ { 86 for j := i; j > a && data.Less(j, j-1); j-- { 87 data.Swap(j, j-1) 88 } 89 } 90 } 91 92 // siftDown implements the heap property on data[lo, hi). 93 // first is an offset into the array where the root of the heap lies. 94 func siftDown(data Interface, lo, hi, first int) { 95 root := lo 96 for { 97 child := 2*root + 1 98 if child >= hi { 99 break 100 } 101 if child+1 < hi && data.Less(first+child, first+child+1) { 102 child++ 103 } 104 if !data.Less(first+root, first+child) { 105 return 106 } 107 data.Swap(first+root, first+child) 108 root = child 109 } 110 } 111 112 func heapSort(data Interface, a, b int) { 113 first := a 114 lo := 0 115 hi := b - a 116 117 // Build heap with greatest element at top. 118 for i := (hi - 1) / 2; i >= 0; i-- { 119 siftDown(data, i, hi, first) 120 } 121 122 // Pop elements, largest first, into end of data. 123 for i := hi - 1; i >= 0; i-- { 124 data.Swap(first, first+i) 125 siftDown(data, lo, i, first) 126 } 127 } 128 129 // Quicksort, loosely following Bentley and McIlroy, 130 // ``Engineering a Sort Function,'' SP&E November 1993. 131 132 // medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. 133 func medianOfThree(data Interface, m1, m0, m2 int) { 134 // sort 3 elements 135 if data.Less(m1, m0) { 136 data.Swap(m1, m0) 137 } 138 // data[m0] <= data[m1] 139 if data.Less(m2, m1) { 140 data.Swap(m2, m1) 141 // data[m0] <= data[m2] && data[m1] < data[m2] 142 if data.Less(m1, m0) { 143 data.Swap(m1, m0) 144 } 145 } 146 // now data[m0] <= data[m1] <= data[m2] 147 } 148 149 func swapRange(data Interface, a, b, n int) { 150 for i := 0; i < n; i++ { 151 data.Swap(a+i, b+i) 152 } 153 } 154 155 func doPivot(data Interface, lo, hi int) (midlo, midhi int) { 156 m := lo + (hi-lo)/2 // Written like this to avoid integer overflow. 157 if hi-lo > 40 { 158 // Tukey's ``Ninther,'' median of three medians of three. 159 s := (hi - lo) / 8 160 medianOfThree(data, lo, lo+s, lo+2*s) 161 medianOfThree(data, m, m-s, m+s) 162 medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) 163 } 164 medianOfThree(data, lo, m, hi-1) 165 166 // Invariants are: 167 // data[lo] = pivot (set up by ChoosePivot) 168 // data[lo < i < a] < pivot 169 // data[a <= i < b] <= pivot 170 // data[b <= i < c] unexamined 171 // data[c <= i < hi-1] > pivot 172 // data[hi-1] >= pivot 173 pivot := lo 174 a, c := lo+1, hi-1 175 176 for ; a < c && data.Less(a, pivot); a++ { 177 } 178 b := a 179 for { 180 for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot 181 } 182 for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot 183 } 184 if b >= c { 185 break 186 } 187 // data[b] > pivot; data[c-1] <= pivot 188 data.Swap(b, c-1) 189 b++ 190 c-- 191 } 192 // If hi-c<3 then there are duplicates (by property of median of nine). 193 // Let be a bit more conservative, and set border to 5. 194 protect := hi-c < 5 195 if !protect && hi-c < (hi-lo)/4 { 196 // Lets test some points for equality to pivot 197 dups := 0 198 if !data.Less(pivot, hi-1) { // data[hi-1] = pivot 199 data.Swap(c, hi-1) 200 c++ 201 dups++ 202 } 203 if !data.Less(b-1, pivot) { // data[b-1] = pivot 204 b-- 205 dups++ 206 } 207 // m-lo = (hi-lo)/2 > 6 208 // b-lo > (hi-lo)*3/4-1 > 8 209 // ==> m < b ==> data[m] <= pivot 210 if !data.Less(m, pivot) { // data[m] = pivot 211 data.Swap(m, b-1) 212 b-- 213 dups++ 214 } 215 // if at least 2 points are equal to pivot, assume skewed distribution 216 protect = dups > 1 217 } 218 if protect { 219 // Protect against a lot of duplicates 220 // Add invariant: 221 // data[a <= i < b] unexamined 222 // data[b <= i < c] = pivot 223 for { 224 for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot 225 } 226 for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot 227 } 228 if a >= b { 229 break 230 } 231 // data[a] == pivot; data[b-1] < pivot 232 data.Swap(a, b-1) 233 a++ 234 b-- 235 } 236 } 237 // Swap pivot into middle 238 data.Swap(pivot, b-1) 239 return b - 1, c 240 } 241 242 func quickSort(data Interface, a, b, maxDepth int) { 243 for b-a > 12 { // Use ShellSort for slices <= 12 elements 244 if maxDepth == 0 { 245 heapSort(data, a, b) 246 return 247 } 248 maxDepth-- 249 mlo, mhi := doPivot(data, a, b) 250 // Avoiding recursion on the larger subproblem guarantees 251 // a stack depth of at most lg(b-a). 252 if mlo-a < b-mhi { 253 quickSort(data, a, mlo, maxDepth) 254 a = mhi // i.e., quickSort(data, mhi, b) 255 } else { 256 quickSort(data, mhi, b, maxDepth) 257 b = mlo // i.e., quickSort(data, a, mlo) 258 } 259 } 260 if b-a > 1 { 261 // Do ShellSort pass with gap 6 262 // It could be written in this simplified form cause b-a <= 12 263 for i := a + 6; i < b; i++ { 264 if data.Less(i, i-6) { 265 data.Swap(i, i-6) 266 } 267 } 268 insertionSort(data, a, b) 269 } 270 } 271 272 // Sort sorts data. 273 // 274 // It makes one call to data.Len to determine n, and O(n*log(n)) calls to 275 // data.Less and data.Swap. The sort is not guaranteed to be stable. 276 // 277 // To sort slices without creating a type, see Slice. 278 func Sort(data Interface) { 279 n := data.Len() 280 if fs, ok := data.(*funcs); ok { 281 quickSort_func(fs.lessSwap, 0, n, maxDepth(n)) 282 } else { 283 quickSort(data, 0, n, maxDepth(n)) 284 } 285 } 286 287 // With sorts data given the provided length, swap, and less 288 // functions. 289 // The sort is not guaranteed to be stable. 290 func With(length int, swap func(i, j int), less func(i, j int) bool) { 291 quickSort_func(lessSwap{less, swap}, 0, length, maxDepth(length)) 292 } 293 294 // maxDepth returns a threshold at which quicksort should switch 295 // to heapsort. It returns 2*ceil(lg(n+1)). 296 func maxDepth(n int) int { 297 var depth int 298 for i := n; i > 0; i >>= 1 { 299 depth++ 300 } 301 return depth * 2 302 } 303 304 type reverse struct { 305 // This embedded Interface permits Reverse to use the methods of 306 // another Interface implementation. 307 Interface 308 } 309 310 // Less returns the opposite of the embedded implementation's Less method. 311 func (r reverse) Less(i, j int) bool { 312 return r.Interface.Less(j, i) 313 } 314 315 // Reverse returns the reverse order for data. 316 func Reverse(data Interface) Interface { 317 return &reverse{data} 318 } 319 320 // IsSorted reports whether data is sorted. 321 func IsSorted(data Interface) bool { 322 n := data.Len() 323 for i := n - 1; i > 0; i-- { 324 if data.Less(i, i-1) { 325 return false 326 } 327 } 328 return true 329 } 330 331 // Convenience types for common cases 332 333 // IntSlice attaches the methods of Interface to []int, sorting in increasing order. 334 type IntSlice []int 335 336 func (p IntSlice) Len() int { return len(p) } 337 func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] } 338 func (p IntSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 339 340 // Sort is a convenience method. 341 func (p IntSlice) Sort() { Sort(p) } 342 343 // Float64Slice attaches the methods of Interface to []float64, sorting in increasing order. 344 type Float64Slice []float64 345 346 func (p Float64Slice) Len() int { return len(p) } 347 func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) } 348 func (p Float64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 349 350 // isNaN is a copy of math.IsNaN to avoid a dependency on the math package. 351 func isNaN(f float64) bool { 352 return f != f 353 } 354 355 // Sort is a convenience method. 356 func (p Float64Slice) Sort() { Sort(p) } 357 358 // StringSlice attaches the methods of Interface to []string, sorting in increasing order. 359 type StringSlice []string 360 361 func (p StringSlice) Len() int { return len(p) } 362 func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] } 363 func (p StringSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 364 365 // Sort is a convenience method. 366 func (p StringSlice) Sort() { Sort(p) } 367 368 // Convenience wrappers for common cases 369 370 // Ints sorts a slice of ints in increasing order. 371 func Ints(a []int) { Sort(IntSlice(a)) } 372 373 // Float64s sorts a slice of float64s in increasing order. 374 func Float64s(a []float64) { Sort(Float64Slice(a)) } 375 376 // Strings sorts a slice of strings in increasing order. 377 func Strings(a []string) { Sort(StringSlice(a)) } 378 379 // IntsAreSorted tests whether a slice of ints is sorted in increasing order. 380 func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) } 381 382 // Float64sAreSorted tests whether a slice of float64s is sorted in increasing order. 383 func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) } 384 385 // StringsAreSorted tests whether a slice of strings is sorted in increasing order. 386 func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) } 387 388 // Notes on stable sorting: 389 // The used algorithms are simple and provable correct on all input and use 390 // only logarithmic additional stack space. They perform well if compared 391 // experimentally to other stable in-place sorting algorithms. 392 // 393 // Remarks on other algorithms evaluated: 394 // - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++: 395 // Not faster. 396 // - GCC's __rotate for block rotations: Not faster. 397 // - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen 398 // and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40: 399 // The given algorithms are in-place, number of Swap and Assignments 400 // grow as n log n but the algorithm is not stable. 401 // - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and 402 // V. Raman in Algorithmica (1996) 16, 115-160: 403 // This algorithm either needs additional 2n bits or works only if there 404 // are enough different elements available to encode some permutations 405 // which have to be undone later (so not stable on any input). 406 // - All the optimal in-place sorting/merging algorithms I found are either 407 // unstable or rely on enough different elements in each step to encode the 408 // performed block rearrangements. See also "In-Place Merging Algorithms", 409 // Denham Coates-Evely, Department of Computer Science, Kings College, 410 // January 2004 and the references in there. 411 // - Often "optimal" algorithms are optimal in the number of assignments 412 // but Interface has only Swap as operation. 413 414 // Stable sorts data while keeping the original order of equal elements. 415 // 416 // It makes one call to data.Len to determine n, O(n*log(n)) calls to 417 // data.Less and O(n*log(n)*log(n)) calls to data.Swap. 418 func Stable(data Interface) { 419 if fs, ok := data.(*funcs); ok { 420 stable_func(fs.lessSwap, fs.length) 421 } else { 422 stable(data, data.Len()) 423 } 424 } 425 426 func stable(data Interface, n int) { 427 blockSize := 20 // must be > 0 428 a, b := 0, blockSize 429 for b <= n { 430 insertionSort(data, a, b) 431 a = b 432 b += blockSize 433 } 434 insertionSort(data, a, n) 435 436 for blockSize < n { 437 a, b = 0, 2*blockSize 438 for b <= n { 439 symMerge(data, a, a+blockSize, b) 440 a = b 441 b += 2 * blockSize 442 } 443 if m := a + blockSize; m < n { 444 symMerge(data, a, m, n) 445 } 446 blockSize *= 2 447 } 448 } 449 450 // SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using 451 // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum 452 // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz 453 // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in 454 // Computer Science, pages 714-723. Springer, 2004. 455 // 456 // Let M = m-a and N = b-n. Wolog M < N. 457 // The recursion depth is bound by ceil(log(N+M)). 458 // The algorithm needs O(M*log(N/M + 1)) calls to data.Less. 459 // The algorithm needs O((M+N)*log(M)) calls to data.Swap. 460 // 461 // The paper gives O((M+N)*log(M)) as the number of assignments assuming a 462 // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation 463 // in the paper carries through for Swap operations, especially as the block 464 // swapping rotate uses only O(M+N) Swaps. 465 // 466 // symMerge assumes non-degenerate arguments: a < m && m < b. 467 // Having the caller check this condition eliminates many leaf recursion calls, 468 // which improves performance. 469 func symMerge(data Interface, a, m, b int) { 470 // Avoid unnecessary recursions of symMerge 471 // by direct insertion of data[a] into data[m:b] 472 // if data[a:m] only contains one element. 473 if m-a == 1 { 474 // Use binary search to find the lowest index i 475 // such that data[i] >= data[a] for m <= i < b. 476 // Exit the search loop with i == b in case no such index exists. 477 i := m 478 j := b 479 for i < j { 480 h := i + (j-i)/2 481 if data.Less(h, a) { 482 i = h + 1 483 } else { 484 j = h 485 } 486 } 487 // Swap values until data[a] reaches the position before i. 488 for k := a; k < i-1; k++ { 489 data.Swap(k, k+1) 490 } 491 return 492 } 493 494 // Avoid unnecessary recursions of symMerge 495 // by direct insertion of data[m] into data[a:m] 496 // if data[m:b] only contains one element. 497 if b-m == 1 { 498 // Use binary search to find the lowest index i 499 // such that data[i] > data[m] for a <= i < m. 500 // Exit the search loop with i == m in case no such index exists. 501 i := a 502 j := m 503 for i < j { 504 h := i + (j-i)/2 505 if !data.Less(m, h) { 506 i = h + 1 507 } else { 508 j = h 509 } 510 } 511 // Swap values until data[m] reaches the position i. 512 for k := m; k > i; k-- { 513 data.Swap(k, k-1) 514 } 515 return 516 } 517 518 mid := a + (b-a)/2 519 n := mid + m 520 var start, r int 521 if m > mid { 522 start = n - b 523 r = mid 524 } else { 525 start = a 526 r = m 527 } 528 p := n - 1 529 530 for start < r { 531 c := start + (r-start)/2 532 if !data.Less(p-c, c) { 533 start = c + 1 534 } else { 535 r = c 536 } 537 } 538 539 end := n - start 540 if start < m && m < end { 541 rotate(data, start, m, end) 542 } 543 if a < start && start < mid { 544 symMerge(data, a, start, mid) 545 } 546 if mid < end && end < b { 547 symMerge(data, mid, end, b) 548 } 549 } 550 551 // Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data: 552 // Data of the form 'x u v y' is changed to 'x v u y'. 553 // Rotate performs at most b-a many calls to data.Swap. 554 // Rotate assumes non-degenerate arguments: a < m && m < b. 555 func rotate(data Interface, a, m, b int) { 556 i := m - a 557 j := b - m 558 559 for i != j { 560 if i > j { 561 swapRange(data, m-i, m, j) 562 i -= j 563 } else { 564 swapRange(data, m-i, m+j-i, i) 565 j -= i 566 } 567 } 568 // i == j 569 swapRange(data, m-i, m, i) 570 } 571 572 /* 573 Complexity of Stable Sorting 574 575 576 Complexity of block swapping rotation 577 578 Each Swap puts one new element into its correct, final position. 579 Elements which reach their final position are no longer moved. 580 Thus block swapping rotation needs |u|+|v| calls to Swaps. 581 This is best possible as each element might need a move. 582 583 Pay attention when comparing to other optimal algorithms which 584 typically count the number of assignments instead of swaps: 585 E.g. the optimal algorithm of Dudzinski and Dydek for in-place 586 rotations uses O(u + v + gcd(u,v)) assignments which is 587 better than our O(3 * (u+v)) as gcd(u,v) <= u. 588 589 590 Stable sorting by SymMerge and BlockSwap rotations 591 592 SymMerg complexity for same size input M = N: 593 Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N) 594 Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N)) 595 596 (The following argument does not fuzz over a missing -1 or 597 other stuff which does not impact the final result). 598 599 Let n = data.Len(). Assume n = 2^k. 600 601 Plain merge sort performs log(n) = k iterations. 602 On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i. 603 604 Thus iteration i of merge sort performs: 605 Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n) 606 Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i) 607 608 In total k = log(n) iterations are performed; so in total: 609 Calls to Less O(log(n) * n) 610 Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n) 611 = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n)) 612 613 614 Above results should generalize to arbitrary n = 2^k + p 615 and should not be influenced by the initial insertion sort phase: 616 Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of 617 size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort. 618 Merge sort iterations start at i = log(bs). With t = log(bs) constant: 619 Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n) 620 = O(n * log(n)) 621 Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n)) 622 623 */