github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/sort.go (about) 1 package util 2 3 import ( 4 "fmt" 5 "sort" 6 ) 7 8 // util/Sorter.java 9 10 const SORTER_THRESHOLD = 20 11 12 // Base class for sorting algorithms implementations. 13 type Sorter struct { 14 sort.Interface 15 } 16 17 func newSorter(arr sort.Interface) *Sorter { 18 return &Sorter{ 19 Interface: arr, 20 } 21 } 22 23 func (sorter *Sorter) checkRange(from, to int) { 24 assert2(from <= to, fmt.Sprintf("'to' must be >= 'from', got from=%v, and to=%v", from, to)) 25 } 26 27 func assert2(ok bool, msg string, args ...interface{}) { 28 if !ok { 29 panic(fmt.Sprintf(msg, args...)) 30 } 31 } 32 33 func (s *Sorter) mergeInPlace(from, mid, to int) { 34 if from == mid || mid == to || !s.Less(mid, mid-1) { 35 return 36 } 37 if to-from == 2 { 38 s.Swap(mid-1, mid) 39 return 40 } 41 for !s.Less(mid, from) { 42 from++ 43 } 44 for !s.Less(to-1, mid-1) { 45 to-- 46 } 47 var first_cut, second_cut int 48 var len11, len22 int 49 if mid-from > to-mid { 50 len11 = int(uint(mid-from) >> 1) 51 first_cut = from + len11 52 second_cut = s.lower(mid, to, first_cut) 53 len22 = second_cut - mid 54 } else { 55 len22 = int(uint(to-mid) >> 1) 56 second_cut = mid + len22 57 first_cut = s.upper(from, mid, second_cut) 58 // len11 = first_cut - from 59 } 60 s.rotate(first_cut, mid, second_cut) 61 new_mid := first_cut + len22 62 s.mergeInPlace(from, first_cut, new_mid) 63 s.mergeInPlace(new_mid, second_cut, to) 64 } 65 66 func (s *Sorter) lower(from, to, val int) int { 67 size := to - from 68 for size > 0 { 69 half := int(uint(size) >> 1) 70 mid := from + half 71 if s.Less(mid, val) { 72 from = mid + 1 73 size = size - half - 1 74 } else { 75 size = half 76 } 77 } 78 return from 79 } 80 81 func (s *Sorter) upper(from, to, val int) int { 82 size := to - from 83 for size > 0 { 84 half := int(uint(size) >> 1) 85 mid := from + half 86 if s.Less(val, mid) { 87 size = half 88 } else { 89 from = mid + 1 90 size = size - half - 1 91 } 92 } 93 return from 94 } 95 96 func (s *Sorter) rotate(lo, mid, hi int) { 97 assert(lo <= mid && mid <= hi) 98 if lo == mid || mid == hi { 99 return 100 } 101 s.doRotate(lo, mid, hi) 102 } 103 104 func (s *Sorter) doRotate(lo, mid, hi int) { 105 if mid-lo == hi-mid { 106 // happens rarely but saves n/2 swaps 107 for mid < hi { 108 s.Swap(lo, mid) 109 lo++ 110 mid++ 111 } 112 } else { 113 s.reverse(lo, mid) 114 s.reverse(mid, hi) 115 s.reverse(lo, hi) 116 } 117 } 118 119 func (sorter *Sorter) reverse(from, to int) { 120 for to--; from < to; from, to = from+1, to-1 { 121 sorter.Swap(from, to) 122 } 123 } 124 125 func (sorter *Sorter) insertionSort(from, to int) { 126 for i := from + 1; i < to; i++ { 127 for j := i; j > from; j-- { 128 if sorter.Less(j, j-1) { 129 sorter.Swap(j-1, j) 130 } else { 131 break 132 } 133 } 134 } 135 } 136 137 func (sorter *Sorter) binarySort(from, to, i int) { 138 // log.Printf("Binary sort [%v,%v] at %v", from, to, i) 139 for ; i < to; i++ { 140 l, h := from, i-1 141 for l <= h { 142 mid := int(uint(l+h) >> 1) 143 if sorter.Less(i, mid) { 144 h = mid - 1 145 } else { 146 l = mid + 1 147 } 148 } 149 switch i - l { 150 case 2: 151 sorter.Swap(l+1, l+2) 152 sorter.Swap(l, l+1) 153 case 1: 154 sorter.Swap(l, l+1) 155 case 0: 156 default: 157 for j := i; j > l; j-- { 158 sorter.Swap(j-1, j) 159 } 160 } 161 } 162 } 163 164 func (s *Sorter) heapSort(from, to int) { 165 if to-from <= 1 { 166 return 167 } 168 s.heapify(from, to) 169 for end := to - 1; end > from; end-- { 170 s.Swap(from, end) 171 s.siftDown(from, from, end) 172 } 173 // TODO remove this 174 // for i := from; i < to-1; i++ { 175 // assert(!s.Less(i+1, i)) 176 // } 177 } 178 179 func (s *Sorter) heapify(from, to int) { 180 for i := s.heapParent(from, to-1); i >= from; i-- { 181 s.siftDown(i, from, to) 182 } 183 } 184 185 func (s *Sorter) siftDown(i, from, to int) { 186 for leftChild := s.heapChild(from, i); leftChild < to; leftChild = s.heapChild(from, i) { 187 rightChild := leftChild + 1 188 if s.Less(i, leftChild) { 189 if rightChild < to && s.Less(leftChild, rightChild) { 190 s.Swap(i, rightChild) 191 i = rightChild 192 } else { 193 s.Swap(i, leftChild) 194 i = leftChild 195 } 196 } else if rightChild < to && s.Less(i, rightChild) { 197 s.Swap(i, rightChild) 198 i = rightChild 199 } else { 200 break 201 } 202 } 203 } 204 205 func (s *Sorter) heapParent(from, i int) int { 206 return int(uint(i-1-from)>>1) + from 207 } 208 209 func (s *Sorter) heapChild(from, i int) int { 210 return ((i - from) << 1) + 1 + from 211 } 212 213 // util/TimSorter.java 214 215 const ( 216 MINRUN = 32 217 RUN_THRESHOLD = 64 218 STACKSIZE = 40 // depends on MINRUN 219 MIN_GALLOP = 7 220 ) 221 222 /* 223 Sorter implementation based on [TimSorter](http://svn.python.org/projects/python/trunk/Objects/listsort.txt) algorithm. 224 225 This implementation is especially good at sorting partially-sorted 226 arrays and sorts small arrays with binary sort. 227 228 NOTE: There are a few differences with the original implementation: 229 230 1. The extra amount of memory to perform merges is configurable. This 231 allows small merges to be very fast while large merges will be 232 performed in-place (slightly slower). You can make sure that the fast 233 merge routine will always be used by having maxTempSlots equal to 234 half of the length of the slice of data to sort. 235 236 2. Only the fast merge routine can gallop (the one that doesn't 237 in-place) and it only gallops on the longest slice. 238 */ 239 type TimSorter struct { 240 *Sorter 241 maxTempSlots int 242 minRun int 243 to int 244 stackSize int 245 runEnds []int 246 } 247 248 // Create a new TimSorter 249 func newTimSorter(arr sort.Interface, maxTempSlots int) *TimSorter { 250 return &TimSorter{ 251 Sorter: newSorter(arr), 252 runEnds: make([]int, 1+STACKSIZE), 253 maxTempSlots: maxTempSlots, 254 } 255 } 256 257 // Minimum run length for an array of given length. 258 func minRun(length int) int { 259 assert2(length >= MINRUN, fmt.Sprintf("length=%v", length)) 260 n := length 261 r := 0 262 for n >= 64 { 263 r = (r | (n & 1)) 264 n = int(uint(n) >> 1) 265 } 266 minRun := n + r 267 assert(minRun >= MINRUN && minRun <= RUN_THRESHOLD) 268 return minRun 269 } 270 271 func (sorter *TimSorter) runEnd(i int) int { 272 return sorter.runEnds[sorter.stackSize-i] 273 } 274 275 func (sorter *TimSorter) pushRunLen(length int) { 276 sorter.runEnds[sorter.stackSize+1] = sorter.runEnds[sorter.stackSize] + length 277 sorter.stackSize++ 278 } 279 280 // Compute the length of the next run, make the run sorted and return its length 281 func (sorter *TimSorter) nextRun() int { 282 runBase := sorter.runEnd(0) 283 assert2(runBase < sorter.to, fmt.Sprintf("runBase=%v to=%v", runBase, sorter.to)) 284 if runBase == sorter.to-1 { 285 return 1 286 } 287 o := runBase + 2 288 if sorter.Less(runBase+1, runBase) { 289 // run must be strictly descending 290 for o < sorter.to && sorter.Less(o, o-1) { 291 o++ 292 } 293 sorter.reverse(runBase, o) 294 } else { 295 // run must be non-descending 296 for o < sorter.to && !sorter.Less(o, o-1) { 297 o++ 298 } 299 } 300 runHi := runBase + sorter.minRun 301 if sorter.to < runHi { 302 runHi = sorter.to 303 } 304 if o > runHi { 305 runHi = o 306 } 307 sorter.binarySort(runBase, runHi, o) 308 for i := runBase; i < runHi-1; i++ { 309 assert(!sorter.Less(i+1, i)) 310 } 311 return runHi - runBase 312 } 313 314 func assert(ok bool) { 315 if !ok { 316 panic("assert fail") 317 } 318 } 319 320 func (sorter *TimSorter) ensureInvariants() { 321 for sorter.stackSize > 1 { 322 panic("not implemented yet") 323 } 324 } 325 326 func (sorter *TimSorter) exhaustStack() { 327 for sorter.stackSize > 1 { 328 panic("not implemented yet") 329 } 330 } 331 332 func (sorter *TimSorter) reset(from, to int) { 333 sorter.stackSize = 0 334 for i, _ := range sorter.runEnds { 335 sorter.runEnds[i] = 0 336 } 337 sorter.runEnds[0] = from 338 sorter.to = to 339 if length := to - from; length <= RUN_THRESHOLD { 340 sorter.minRun = length 341 } else { 342 sorter.minRun = minRun(length) 343 } 344 } 345 346 func (sorter *TimSorter) sort(from, to int) { 347 sorter.checkRange(from, to) 348 if to-from <= 1 { 349 return 350 } 351 sorter.reset(from, to) 352 for { 353 sorter.ensureInvariants() 354 sorter.pushRunLen(sorter.nextRun()) 355 if sorter.runEnd(0) >= to { 356 break 357 } 358 } 359 sorter.exhaustStack() 360 assert(sorter.runEnd(0) == to) 361 } 362 363 // util/IntroSorter.java 364 365 type IntroSorterSPI interface { 366 // Save the value at slot i so that it can later be used as a pivot. 367 SetPivot(int) 368 // Compare the pivot with the slot at j, similarly to Less(int,int). 369 PivotLess(int) bool 370 } 371 372 /* 373 Sorter implementation based on a variant of the quicksort algorithm 374 called introsort: when the recursion level exceeds the log of the 375 length of the array to sort, it falls back to heapsort. This prevents 376 quicksort from running into its worst-case quadratic runtime. Small 377 arrays are sorted with insertion sort. 378 */ 379 type IntroSorter struct { 380 spi IntroSorterSPI 381 *Sorter 382 } 383 384 func NewIntroSorter(spi IntroSorterSPI, arr sort.Interface) *IntroSorter { 385 return &IntroSorter{spi, newSorter(arr)} 386 } 387 388 // 32 - leadingZero(n-1) 389 func ceilLog2(n int) int { 390 assert(n >= 1) 391 if n == 1 { 392 return 0 393 } 394 n-- 395 ans := 0 396 for n > 0 { 397 n >>= 1 398 ans++ 399 } 400 return ans 401 } 402 403 func (s *IntroSorter) Sort(from, to int) { 404 s.checkRange(from, to) 405 s.quicksort(from, to, ceilLog2(to-from)) 406 } 407 408 func (s *IntroSorter) quicksort(from, to, maxDepth int) { 409 if to-from < SORTER_THRESHOLD { 410 s.insertionSort(from, to) 411 // for i := from; i < to-1; i++ { 412 // assert(!s.Less(i+1, i)) 413 // } 414 return 415 } 416 if maxDepth--; maxDepth < 0 { 417 s.heapSort(from, to) 418 // for i := from; i < to-1; i++ { 419 // assert(!s.Less(i+1, i)) 420 // } 421 return 422 } 423 424 mid := (from + to) >> 1 425 426 if s.Less(mid, from) { 427 s.Swap(from, mid) 428 } 429 430 if s.Less(to-1, mid) { 431 s.Swap(mid, to-1) 432 if s.Less(mid, from) { 433 s.Swap(from, mid) 434 } 435 } 436 437 left := from + 1 438 right := to - 2 439 440 s.spi.SetPivot(mid) 441 for { 442 for s.spi.PivotLess(right) { 443 right-- 444 } 445 446 for left < right && !s.spi.PivotLess(left) { 447 left++ 448 } 449 450 if left < right { 451 s.Swap(left, right) 452 right-- 453 } else { 454 break 455 } 456 } 457 458 s.quicksort(from, left+1, maxDepth) 459 s.quicksort(left+1, to, maxDepth) 460 // for i := from; i < to-1; i++ { 461 // assert(!s.Less(i+1, i)) 462 // } 463 } 464 465 // util/InPlaceMergeSorter.java 466 467 /* 468 Sorter implementation absed on the merge-sort algorithm that merges 469 in place (no extra memory will be allocated). Small arrays are sorter 470 with insertion sort. 471 */ 472 type InPlaceMergeSorter struct { 473 *Sorter 474 } 475 476 func NewInPlaceMergeSorter(impl sort.Interface) *InPlaceMergeSorter { 477 return &InPlaceMergeSorter{ 478 Sorter: newSorter(impl), 479 } 480 } 481 482 func (s *InPlaceMergeSorter) Sort(from, to int) { 483 s.checkRange(from, to) 484 s.mergeSort(from, to) 485 } 486 487 func (s *InPlaceMergeSorter) mergeSort(from, to int) { 488 if to-from < SORTER_THRESHOLD { 489 s.insertionSort(from, to) 490 } else { 491 mid := int((uint(from) + uint(to)) >> 1) 492 s.mergeSort(from, mid) 493 s.mergeSort(mid, to) 494 s.mergeInPlace(from, mid, to) 495 } 496 }