github.com/dorkamotorka/go/src@v0.0.0-20230614113921-187095f0e316/sort/gen_sort_variants.go (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ignore 6 // +build ignore 7 8 // This program is run via "go generate" (via a directive in sort.go) 9 // to generate implementation variants of the underlying sorting algorithm. 10 // When passed the -generic flag it generates generic variants of sorting; 11 // otherwise it generates the non-generic variants used by the sort package. 12 13 package main 14 15 import ( 16 "bytes" 17 "flag" 18 "fmt" 19 "go/format" 20 "log" 21 "os" 22 "text/template" 23 ) 24 25 type Variant struct { 26 // Name is the variant name: should be unique among variants. 27 Name string 28 29 // Path is the file path into which the generator will emit the code for this 30 // variant. 31 Path string 32 33 // Package is the package this code will be emitted into. 34 Package string 35 36 // Imports is the imports needed for this package. 37 Imports string 38 39 // FuncSuffix is appended to all function names in this variant's code. All 40 // suffixes should be unique within a package. 41 FuncSuffix string 42 43 // DataType is the type of the data parameter of functions in this variant's 44 // code. 45 DataType string 46 47 // TypeParam is the optional type parameter for the function. 48 TypeParam string 49 50 // ExtraParam is an extra parameter to pass to the function. Should begin with 51 // ", " to separate from other params. 52 ExtraParam string 53 54 // ExtraArg is an extra argument to pass to calls between functions; typically 55 // it invokes ExtraParam. Should begin with ", " to separate from other args. 56 ExtraArg string 57 58 // Funcs is a map of functions used from within the template. The following 59 // functions are expected to exist: 60 // 61 // Less (name, i, j): 62 // emits a comparison expression that checks if the value `name` at 63 // index `i` is smaller than at index `j`. 64 // 65 // Swap (name, i, j): 66 // emits a statement that performs a data swap between elements `i` and 67 // `j` of the value `name`. 68 Funcs template.FuncMap 69 } 70 71 func main() { 72 genGeneric := flag.Bool("generic", false, "generate generic versions") 73 flag.Parse() 74 75 if *genGeneric { 76 generate(&Variant{ 77 Name: "generic_ordered", 78 Path: "zsortordered.go", 79 Package: "slices", 80 Imports: "import \"cmp\"\n", 81 FuncSuffix: "Ordered", 82 TypeParam: "[E cmp.Ordered]", 83 ExtraParam: "", 84 ExtraArg: "", 85 DataType: "[]E", 86 Funcs: template.FuncMap{ 87 "Less": func(name, i, j string) string { 88 return fmt.Sprintf("cmp.Less(%s[%s], %s[%s])", name, i, name, j) 89 }, 90 "Swap": func(name, i, j string) string { 91 return fmt.Sprintf("%s[%s], %s[%s] = %s[%s], %s[%s]", name, i, name, j, name, j, name, i) 92 }, 93 }, 94 }) 95 96 generate(&Variant{ 97 Name: "generic_func", 98 Path: "zsortanyfunc.go", 99 Package: "slices", 100 FuncSuffix: "CmpFunc", 101 TypeParam: "[E any]", 102 ExtraParam: ", cmp func(a, b E) int", 103 ExtraArg: ", cmp", 104 DataType: "[]E", 105 Funcs: template.FuncMap{ 106 "Less": func(name, i, j string) string { 107 return fmt.Sprintf("(cmp(%s[%s], %s[%s]) < 0)", name, i, name, j) 108 }, 109 "Swap": func(name, i, j string) string { 110 return fmt.Sprintf("%s[%s], %s[%s] = %s[%s], %s[%s]", name, i, name, j, name, j, name, i) 111 }, 112 }, 113 }) 114 } else { 115 generate(&Variant{ 116 Name: "interface", 117 Path: "zsortinterface.go", 118 Package: "sort", 119 Imports: "", 120 FuncSuffix: "", 121 TypeParam: "", 122 ExtraParam: "", 123 ExtraArg: "", 124 DataType: "Interface", 125 Funcs: template.FuncMap{ 126 "Less": func(name, i, j string) string { 127 return fmt.Sprintf("%s.Less(%s, %s)", name, i, j) 128 }, 129 "Swap": func(name, i, j string) string { 130 return fmt.Sprintf("%s.Swap(%s, %s)", name, i, j) 131 }, 132 }, 133 }) 134 135 generate(&Variant{ 136 Name: "func", 137 Path: "zsortfunc.go", 138 Package: "sort", 139 Imports: "", 140 FuncSuffix: "_func", 141 TypeParam: "", 142 ExtraParam: "", 143 ExtraArg: "", 144 DataType: "lessSwap", 145 Funcs: template.FuncMap{ 146 "Less": func(name, i, j string) string { 147 return fmt.Sprintf("%s.Less(%s, %s)", name, i, j) 148 }, 149 "Swap": func(name, i, j string) string { 150 return fmt.Sprintf("%s.Swap(%s, %s)", name, i, j) 151 }, 152 }, 153 }) 154 } 155 } 156 157 // generate generates the code for variant `v` into a file named by `v.Path`. 158 func generate(v *Variant) { 159 // Parse templateCode anew for each variant because Parse requires Funcs to be 160 // registered, and it helps type-check the funcs. 161 tmpl, err := template.New("gen").Funcs(v.Funcs).Parse(templateCode) 162 if err != nil { 163 log.Fatal("template Parse:", err) 164 } 165 166 var out bytes.Buffer 167 err = tmpl.Execute(&out, v) 168 if err != nil { 169 log.Fatal("template Execute:", err) 170 } 171 172 formatted, err := format.Source(out.Bytes()) 173 if err != nil { 174 log.Fatal("format:", err) 175 } 176 177 if err := os.WriteFile(v.Path, formatted, 0644); err != nil { 178 log.Fatal("WriteFile:", err) 179 } 180 } 181 182 var templateCode = `// Code generated by gen_sort_variants.go; DO NOT EDIT. 183 184 // Copyright 2022 The Go Authors. All rights reserved. 185 // Use of this source code is governed by a BSD-style 186 // license that can be found in the LICENSE file. 187 188 package {{.Package}} 189 190 {{.Imports}} 191 192 // insertionSort{{.FuncSuffix}} sorts data[a:b] using insertion sort. 193 func insertionSort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) { 194 for i := a + 1; i < b; i++ { 195 for j := i; j > a && {{Less "data" "j" "j-1"}}; j-- { 196 {{Swap "data" "j" "j-1"}} 197 } 198 } 199 } 200 201 // siftDown{{.FuncSuffix}} implements the heap property on data[lo:hi]. 202 // first is an offset into the array where the root of the heap lies. 203 func siftDown{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, lo, hi, first int {{.ExtraParam}}) { 204 root := lo 205 for { 206 child := 2*root + 1 207 if child >= hi { 208 break 209 } 210 if child+1 < hi && {{Less "data" "first+child" "first+child+1"}} { 211 child++ 212 } 213 if !{{Less "data" "first+root" "first+child"}} { 214 return 215 } 216 {{Swap "data" "first+root" "first+child"}} 217 root = child 218 } 219 } 220 221 func heapSort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) { 222 first := a 223 lo := 0 224 hi := b - a 225 226 // Build heap with greatest element at top. 227 for i := (hi - 1) / 2; i >= 0; i-- { 228 siftDown{{.FuncSuffix}}(data, i, hi, first {{.ExtraArg}}) 229 } 230 231 // Pop elements, largest first, into end of data. 232 for i := hi - 1; i >= 0; i-- { 233 {{Swap "data" "first" "first+i"}} 234 siftDown{{.FuncSuffix}}(data, lo, i, first {{.ExtraArg}}) 235 } 236 } 237 238 // pdqsort{{.FuncSuffix}} sorts data[a:b]. 239 // The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort. 240 // pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf 241 // C++ implementation: https://github.com/orlp/pdqsort 242 // Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/ 243 // limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort. 244 func pdqsort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, limit int {{.ExtraParam}}) { 245 const maxInsertion = 12 246 247 var ( 248 wasBalanced = true // whether the last partitioning was reasonably balanced 249 wasPartitioned = true // whether the slice was already partitioned 250 ) 251 252 for { 253 length := b - a 254 255 if length <= maxInsertion { 256 insertionSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) 257 return 258 } 259 260 // Fall back to heapsort if too many bad choices were made. 261 if limit == 0 { 262 heapSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) 263 return 264 } 265 266 // If the last partitioning was imbalanced, we need to breaking patterns. 267 if !wasBalanced { 268 breakPatterns{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) 269 limit-- 270 } 271 272 pivot, hint := choosePivot{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) 273 if hint == decreasingHint { 274 reverseRange{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) 275 // The chosen pivot was pivot-a elements after the start of the array. 276 // After reversing it is pivot-a elements before the end of the array. 277 // The idea came from Rust's implementation. 278 pivot = (b - 1) - (pivot - a) 279 hint = increasingHint 280 } 281 282 // The slice is likely already sorted. 283 if wasBalanced && wasPartitioned && hint == increasingHint { 284 if partialInsertionSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) { 285 return 286 } 287 } 288 289 // Probably the slice contains many duplicate elements, partition the slice into 290 // elements equal to and elements greater than the pivot. 291 if a > 0 && !{{Less "data" "a-1" "pivot"}} { 292 mid := partitionEqual{{.FuncSuffix}}(data, a, b, pivot {{.ExtraArg}}) 293 a = mid 294 continue 295 } 296 297 mid, alreadyPartitioned := partition{{.FuncSuffix}}(data, a, b, pivot {{.ExtraArg}}) 298 wasPartitioned = alreadyPartitioned 299 300 leftLen, rightLen := mid-a, b-mid 301 balanceThreshold := length / 8 302 if leftLen < rightLen { 303 wasBalanced = leftLen >= balanceThreshold 304 pdqsort{{.FuncSuffix}}(data, a, mid, limit {{.ExtraArg}}) 305 a = mid + 1 306 } else { 307 wasBalanced = rightLen >= balanceThreshold 308 pdqsort{{.FuncSuffix}}(data, mid+1, b, limit {{.ExtraArg}}) 309 b = mid 310 } 311 } 312 } 313 314 // partition{{.FuncSuffix}} does one quicksort partition. 315 // Let p = data[pivot] 316 // Moves elements in data[a:b] around, so that data[i]<p and data[j]>=p for i<newpivot and j>newpivot. 317 // On return, data[newpivot] = p 318 func partition{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pivot int {{.ExtraParam}}) (newpivot int, alreadyPartitioned bool) { 319 {{Swap "data" "a" "pivot"}} 320 i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned 321 322 for i <= j && {{Less "data" "i" "a"}} { 323 i++ 324 } 325 for i <= j && !{{Less "data" "j" "a"}} { 326 j-- 327 } 328 if i > j { 329 {{Swap "data" "j" "a"}} 330 return j, true 331 } 332 {{Swap "data" "i" "j"}} 333 i++ 334 j-- 335 336 for { 337 for i <= j && {{Less "data" "i" "a"}} { 338 i++ 339 } 340 for i <= j && !{{Less "data" "j" "a"}} { 341 j-- 342 } 343 if i > j { 344 break 345 } 346 {{Swap "data" "i" "j"}} 347 i++ 348 j-- 349 } 350 {{Swap "data" "j" "a"}} 351 return j, false 352 } 353 354 // partitionEqual{{.FuncSuffix}} partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot]. 355 // It assumed that data[a:b] does not contain elements smaller than the data[pivot]. 356 func partitionEqual{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pivot int {{.ExtraParam}}) (newpivot int) { 357 {{Swap "data" "a" "pivot"}} 358 i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned 359 360 for { 361 for i <= j && !{{Less "data" "a" "i"}} { 362 i++ 363 } 364 for i <= j && {{Less "data" "a" "j"}} { 365 j-- 366 } 367 if i > j { 368 break 369 } 370 {{Swap "data" "i" "j"}} 371 i++ 372 j-- 373 } 374 return i 375 } 376 377 // partialInsertionSort{{.FuncSuffix}} partially sorts a slice, returns true if the slice is sorted at the end. 378 func partialInsertionSort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) bool { 379 const ( 380 maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted 381 shortestShifting = 50 // don't shift any elements on short arrays 382 ) 383 i := a + 1 384 for j := 0; j < maxSteps; j++ { 385 for i < b && !{{Less "data" "i" "i-1"}} { 386 i++ 387 } 388 389 if i == b { 390 return true 391 } 392 393 if b-a < shortestShifting { 394 return false 395 } 396 397 {{Swap "data" "i" "i-1"}} 398 399 // Shift the smaller one to the left. 400 if i-a >= 2 { 401 for j := i - 1; j >= 1; j-- { 402 if !{{Less "data" "j" "j-1"}} { 403 break 404 } 405 {{Swap "data" "j" "j-1"}} 406 } 407 } 408 // Shift the greater one to the right. 409 if b-i >= 2 { 410 for j := i + 1; j < b; j++ { 411 if !{{Less "data" "j" "j-1"}} { 412 break 413 } 414 {{Swap "data" "j" "j-1"}} 415 } 416 } 417 } 418 return false 419 } 420 421 // breakPatterns{{.FuncSuffix}} scatters some elements around in an attempt to break some patterns 422 // that might cause imbalanced partitions in quicksort. 423 func breakPatterns{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) { 424 length := b - a 425 if length >= 8 { 426 random := xorshift(length) 427 modulus := nextPowerOfTwo(length) 428 429 for idx := a + (length/4)*2 - 1; idx <= a + (length/4)*2 + 1; idx++ { 430 other := int(uint(random.Next()) & (modulus - 1)) 431 if other >= length { 432 other -= length 433 } 434 {{Swap "data" "idx" "a+other"}} 435 } 436 } 437 } 438 439 // choosePivot{{.FuncSuffix}} chooses a pivot in data[a:b]. 440 // 441 // [0,8): chooses a static pivot. 442 // [8,shortestNinther): uses the simple median-of-three method. 443 // [shortestNinther,∞): uses the Tukey ninther method. 444 func choosePivot{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) (pivot int, hint sortedHint) { 445 const ( 446 shortestNinther = 50 447 maxSwaps = 4 * 3 448 ) 449 450 l := b - a 451 452 var ( 453 swaps int 454 i = a + l/4*1 455 j = a + l/4*2 456 k = a + l/4*3 457 ) 458 459 if l >= 8 { 460 if l >= shortestNinther { 461 // Tukey ninther method, the idea came from Rust's implementation. 462 i = medianAdjacent{{.FuncSuffix}}(data, i, &swaps {{.ExtraArg}}) 463 j = medianAdjacent{{.FuncSuffix}}(data, j, &swaps {{.ExtraArg}}) 464 k = medianAdjacent{{.FuncSuffix}}(data, k, &swaps {{.ExtraArg}}) 465 } 466 // Find the median among i, j, k and stores it into j. 467 j = median{{.FuncSuffix}}(data, i, j, k, &swaps {{.ExtraArg}}) 468 } 469 470 switch swaps { 471 case 0: 472 return j, increasingHint 473 case maxSwaps: 474 return j, decreasingHint 475 default: 476 return j, unknownHint 477 } 478 } 479 480 // order2{{.FuncSuffix}} returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a. 481 func order2{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int, swaps *int {{.ExtraParam}}) (int, int) { 482 if {{Less "data" "b" "a"}} { 483 *swaps++ 484 return b, a 485 } 486 return a, b 487 } 488 489 // median{{.FuncSuffix}} returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c. 490 func median{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, c int, swaps *int {{.ExtraParam}}) int { 491 a, b = order2{{.FuncSuffix}}(data, a, b, swaps {{.ExtraArg}}) 492 b, c = order2{{.FuncSuffix}}(data, b, c, swaps {{.ExtraArg}}) 493 a, b = order2{{.FuncSuffix}}(data, a, b, swaps {{.ExtraArg}}) 494 return b 495 } 496 497 // medianAdjacent{{.FuncSuffix}} finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a. 498 func medianAdjacent{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a int, swaps *int {{.ExtraParam}}) int { 499 return median{{.FuncSuffix}}(data, a-1, a, a+1, swaps {{.ExtraArg}}) 500 } 501 502 func reverseRange{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) { 503 i := a 504 j := b - 1 505 for i < j { 506 {{Swap "data" "i" "j"}} 507 i++ 508 j-- 509 } 510 } 511 512 func swapRange{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, n int {{.ExtraParam}}) { 513 for i := 0; i < n; i++ { 514 {{Swap "data" "a+i" "b+i"}} 515 } 516 } 517 518 func stable{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, n int {{.ExtraParam}}) { 519 blockSize := 20 // must be > 0 520 a, b := 0, blockSize 521 for b <= n { 522 insertionSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) 523 a = b 524 b += blockSize 525 } 526 insertionSort{{.FuncSuffix}}(data, a, n {{.ExtraArg}}) 527 528 for blockSize < n { 529 a, b = 0, 2*blockSize 530 for b <= n { 531 symMerge{{.FuncSuffix}}(data, a, a+blockSize, b {{.ExtraArg}}) 532 a = b 533 b += 2 * blockSize 534 } 535 if m := a + blockSize; m < n { 536 symMerge{{.FuncSuffix}}(data, a, m, n {{.ExtraArg}}) 537 } 538 blockSize *= 2 539 } 540 } 541 542 // symMerge{{.FuncSuffix}} merges the two sorted subsequences data[a:m] and data[m:b] using 543 // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum 544 // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz 545 // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in 546 // Computer Science, pages 714-723. Springer, 2004. 547 // 548 // Let M = m-a and N = b-n. Wolog M < N. 549 // The recursion depth is bound by ceil(log(N+M)). 550 // The algorithm needs O(M*log(N/M + 1)) calls to data.Less. 551 // The algorithm needs O((M+N)*log(M)) calls to data.Swap. 552 // 553 // The paper gives O((M+N)*log(M)) as the number of assignments assuming a 554 // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation 555 // in the paper carries through for Swap operations, especially as the block 556 // swapping rotate uses only O(M+N) Swaps. 557 // 558 // symMerge assumes non-degenerate arguments: a < m && m < b. 559 // Having the caller check this condition eliminates many leaf recursion calls, 560 // which improves performance. 561 func symMerge{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, m, b int {{.ExtraParam}}) { 562 // Avoid unnecessary recursions of symMerge 563 // by direct insertion of data[a] into data[m:b] 564 // if data[a:m] only contains one element. 565 if m-a == 1 { 566 // Use binary search to find the lowest index i 567 // such that data[i] >= data[a] for m <= i < b. 568 // Exit the search loop with i == b in case no such index exists. 569 i := m 570 j := b 571 for i < j { 572 h := int(uint(i+j) >> 1) 573 if {{Less "data" "h" "a"}} { 574 i = h + 1 575 } else { 576 j = h 577 } 578 } 579 // Swap values until data[a] reaches the position before i. 580 for k := a; k < i-1; k++ { 581 {{Swap "data" "k" "k+1"}} 582 } 583 return 584 } 585 586 // Avoid unnecessary recursions of symMerge 587 // by direct insertion of data[m] into data[a:m] 588 // if data[m:b] only contains one element. 589 if b-m == 1 { 590 // Use binary search to find the lowest index i 591 // such that data[i] > data[m] for a <= i < m. 592 // Exit the search loop with i == m in case no such index exists. 593 i := a 594 j := m 595 for i < j { 596 h := int(uint(i+j) >> 1) 597 if !{{Less "data" "m" "h"}} { 598 i = h + 1 599 } else { 600 j = h 601 } 602 } 603 // Swap values until data[m] reaches the position i. 604 for k := m; k > i; k-- { 605 {{Swap "data" "k" "k-1"}} 606 } 607 return 608 } 609 610 mid := int(uint(a+b) >> 1) 611 n := mid + m 612 var start, r int 613 if m > mid { 614 start = n - b 615 r = mid 616 } else { 617 start = a 618 r = m 619 } 620 p := n - 1 621 622 for start < r { 623 c := int(uint(start+r) >> 1) 624 if !{{Less "data" "p-c" "c"}} { 625 start = c + 1 626 } else { 627 r = c 628 } 629 } 630 631 end := n - start 632 if start < m && m < end { 633 rotate{{.FuncSuffix}}(data, start, m, end {{.ExtraArg}}) 634 } 635 if a < start && start < mid { 636 symMerge{{.FuncSuffix}}(data, a, start, mid {{.ExtraArg}}) 637 } 638 if mid < end && end < b { 639 symMerge{{.FuncSuffix}}(data, mid, end, b {{.ExtraArg}}) 640 } 641 } 642 643 // rotate{{.FuncSuffix}} rotates two consecutive blocks u = data[a:m] and v = data[m:b] in data: 644 // Data of the form 'x u v y' is changed to 'x v u y'. 645 // rotate performs at most b-a many calls to data.Swap, 646 // and it assumes non-degenerate arguments: a < m && m < b. 647 func rotate{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, m, b int {{.ExtraParam}}) { 648 i := m - a 649 j := b - m 650 651 for i != j { 652 if i > j { 653 swapRange{{.FuncSuffix}}(data, m-i, m, j {{.ExtraArg}}) 654 i -= j 655 } else { 656 swapRange{{.FuncSuffix}}(data, m-i, m+j-i, i {{.ExtraArg}}) 657 j -= i 658 } 659 } 660 // i == j 661 swapRange{{.FuncSuffix}}(data, m-i, m, i {{.ExtraArg}}) 662 } 663 `