github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/sort.go (about)

     1  package util
     2  
     3  import (
     4  	"fmt"
     5  	"sort"
     6  )
     7  
     8  // util/Sorter.java
     9  
    10  const SORTER_THRESHOLD = 20
    11  
    12  // Base class for sorting algorithms implementations.
    13  type Sorter struct {
    14  	sort.Interface
    15  }
    16  
    17  func newSorter(arr sort.Interface) *Sorter {
    18  	return &Sorter{
    19  		Interface: arr,
    20  	}
    21  }
    22  
    23  func (sorter *Sorter) checkRange(from, to int) {
    24  	assert2(from <= to, fmt.Sprintf("'to' must be >= 'from', got from=%v, and to=%v", from, to))
    25  }
    26  
    27  func assert2(ok bool, msg string, args ...interface{}) {
    28  	if !ok {
    29  		panic(fmt.Sprintf(msg, args...))
    30  	}
    31  }
    32  
    33  func (s *Sorter) mergeInPlace(from, mid, to int) {
    34  	if from == mid || mid == to || !s.Less(mid, mid-1) {
    35  		return
    36  	}
    37  	if to-from == 2 {
    38  		s.Swap(mid-1, mid)
    39  		return
    40  	}
    41  	for !s.Less(mid, from) {
    42  		from++
    43  	}
    44  	for !s.Less(to-1, mid-1) {
    45  		to--
    46  	}
    47  	var first_cut, second_cut int
    48  	var len11, len22 int
    49  	if mid-from > to-mid {
    50  		len11 = int(uint(mid-from) >> 1)
    51  		first_cut = from + len11
    52  		second_cut = s.lower(mid, to, first_cut)
    53  		len22 = second_cut - mid
    54  	} else {
    55  		len22 = int(uint(to-mid) >> 1)
    56  		second_cut = mid + len22
    57  		first_cut = s.upper(from, mid, second_cut)
    58  		// len11 = first_cut - from
    59  	}
    60  	s.rotate(first_cut, mid, second_cut)
    61  	new_mid := first_cut + len22
    62  	s.mergeInPlace(from, first_cut, new_mid)
    63  	s.mergeInPlace(new_mid, second_cut, to)
    64  }
    65  
    66  func (s *Sorter) lower(from, to, val int) int {
    67  	size := to - from
    68  	for size > 0 {
    69  		half := int(uint(size) >> 1)
    70  		mid := from + half
    71  		if s.Less(mid, val) {
    72  			from = mid + 1
    73  			size = size - half - 1
    74  		} else {
    75  			size = half
    76  		}
    77  	}
    78  	return from
    79  }
    80  
    81  func (s *Sorter) upper(from, to, val int) int {
    82  	size := to - from
    83  	for size > 0 {
    84  		half := int(uint(size) >> 1)
    85  		mid := from + half
    86  		if s.Less(val, mid) {
    87  			size = half
    88  		} else {
    89  			from = mid + 1
    90  			size = size - half - 1
    91  		}
    92  	}
    93  	return from
    94  }
    95  
    96  func (s *Sorter) rotate(lo, mid, hi int) {
    97  	assert(lo <= mid && mid <= hi)
    98  	if lo == mid || mid == hi {
    99  		return
   100  	}
   101  	s.doRotate(lo, mid, hi)
   102  }
   103  
   104  func (s *Sorter) doRotate(lo, mid, hi int) {
   105  	if mid-lo == hi-mid {
   106  		// happens rarely but saves n/2 swaps
   107  		for mid < hi {
   108  			s.Swap(lo, mid)
   109  			lo++
   110  			mid++
   111  		}
   112  	} else {
   113  		s.reverse(lo, mid)
   114  		s.reverse(mid, hi)
   115  		s.reverse(lo, hi)
   116  	}
   117  }
   118  
   119  func (sorter *Sorter) reverse(from, to int) {
   120  	for to--; from < to; from, to = from+1, to-1 {
   121  		sorter.Swap(from, to)
   122  	}
   123  }
   124  
   125  func (sorter *Sorter) insertionSort(from, to int) {
   126  	for i := from + 1; i < to; i++ {
   127  		for j := i; j > from; j-- {
   128  			if sorter.Less(j, j-1) {
   129  				sorter.Swap(j-1, j)
   130  			} else {
   131  				break
   132  			}
   133  		}
   134  	}
   135  }
   136  
   137  func (sorter *Sorter) binarySort(from, to, i int) {
   138  	// log.Printf("Binary sort [%v,%v] at %v", from, to, i)
   139  	for ; i < to; i++ {
   140  		l, h := from, i-1
   141  		for l <= h {
   142  			mid := int(uint(l+h) >> 1)
   143  			if sorter.Less(i, mid) {
   144  				h = mid - 1
   145  			} else {
   146  				l = mid + 1
   147  			}
   148  		}
   149  		switch i - l {
   150  		case 2:
   151  			sorter.Swap(l+1, l+2)
   152  			sorter.Swap(l, l+1)
   153  		case 1:
   154  			sorter.Swap(l, l+1)
   155  		case 0:
   156  		default:
   157  			for j := i; j > l; j-- {
   158  				sorter.Swap(j-1, j)
   159  			}
   160  		}
   161  	}
   162  }
   163  
   164  func (s *Sorter) heapSort(from, to int) {
   165  	if to-from <= 1 {
   166  		return
   167  	}
   168  	s.heapify(from, to)
   169  	for end := to - 1; end > from; end-- {
   170  		s.Swap(from, end)
   171  		s.siftDown(from, from, end)
   172  	}
   173  	// TODO remove this
   174  	// for i := from; i < to-1; i++ {
   175  	// 	assert(!s.Less(i+1, i))
   176  	// }
   177  }
   178  
   179  func (s *Sorter) heapify(from, to int) {
   180  	for i := s.heapParent(from, to-1); i >= from; i-- {
   181  		s.siftDown(i, from, to)
   182  	}
   183  }
   184  
   185  func (s *Sorter) siftDown(i, from, to int) {
   186  	for leftChild := s.heapChild(from, i); leftChild < to; leftChild = s.heapChild(from, i) {
   187  		rightChild := leftChild + 1
   188  		if s.Less(i, leftChild) {
   189  			if rightChild < to && s.Less(leftChild, rightChild) {
   190  				s.Swap(i, rightChild)
   191  				i = rightChild
   192  			} else {
   193  				s.Swap(i, leftChild)
   194  				i = leftChild
   195  			}
   196  		} else if rightChild < to && s.Less(i, rightChild) {
   197  			s.Swap(i, rightChild)
   198  			i = rightChild
   199  		} else {
   200  			break
   201  		}
   202  	}
   203  }
   204  
   205  func (s *Sorter) heapParent(from, i int) int {
   206  	return int(uint(i-1-from)>>1) + from
   207  }
   208  
   209  func (s *Sorter) heapChild(from, i int) int {
   210  	return ((i - from) << 1) + 1 + from
   211  }
   212  
   213  // util/TimSorter.java
   214  
   215  const (
   216  	MINRUN        = 32
   217  	RUN_THRESHOLD = 64
   218  	STACKSIZE     = 40 // depends on MINRUN
   219  	MIN_GALLOP    = 7
   220  )
   221  
   222  /*
   223  Sorter implementation based on [TimSorter](http://svn.python.org/projects/python/trunk/Objects/listsort.txt) algorithm.
   224  
   225  This implementation is especially good at sorting partially-sorted
   226  arrays and sorts small arrays with binary sort.
   227  
   228  NOTE: There are a few differences with the original implementation:
   229  
   230  1. The extra amount of memory to perform merges is configurable. This
   231  allows small merges to be very fast while large merges will be
   232  performed in-place (slightly slower). You can make sure that the fast
   233  merge routine will always be used by having maxTempSlots equal to
   234  half of the length of the slice of data to sort.
   235  
   236  2. Only the fast merge routine can gallop (the one that doesn't
   237  in-place) and it only gallops on the longest slice.
   238  */
   239  type TimSorter struct {
   240  	*Sorter
   241  	maxTempSlots int
   242  	minRun       int
   243  	to           int
   244  	stackSize    int
   245  	runEnds      []int
   246  }
   247  
   248  // Create a new TimSorter
   249  func newTimSorter(arr sort.Interface, maxTempSlots int) *TimSorter {
   250  	return &TimSorter{
   251  		Sorter:       newSorter(arr),
   252  		runEnds:      make([]int, 1+STACKSIZE),
   253  		maxTempSlots: maxTempSlots,
   254  	}
   255  }
   256  
   257  // Minimum run length for an array of given length.
   258  func minRun(length int) int {
   259  	assert2(length >= MINRUN, fmt.Sprintf("length=%v", length))
   260  	n := length
   261  	r := 0
   262  	for n >= 64 {
   263  		r = (r | (n & 1))
   264  		n = int(uint(n) >> 1)
   265  	}
   266  	minRun := n + r
   267  	assert(minRun >= MINRUN && minRun <= RUN_THRESHOLD)
   268  	return minRun
   269  }
   270  
   271  func (sorter *TimSorter) runEnd(i int) int {
   272  	return sorter.runEnds[sorter.stackSize-i]
   273  }
   274  
   275  func (sorter *TimSorter) pushRunLen(length int) {
   276  	sorter.runEnds[sorter.stackSize+1] = sorter.runEnds[sorter.stackSize] + length
   277  	sorter.stackSize++
   278  }
   279  
   280  // Compute the length of the next run, make the run sorted and return its length
   281  func (sorter *TimSorter) nextRun() int {
   282  	runBase := sorter.runEnd(0)
   283  	assert2(runBase < sorter.to, fmt.Sprintf("runBase=%v to=%v", runBase, sorter.to))
   284  	if runBase == sorter.to-1 {
   285  		return 1
   286  	}
   287  	o := runBase + 2
   288  	if sorter.Less(runBase+1, runBase) {
   289  		// run must be strictly descending
   290  		for o < sorter.to && sorter.Less(o, o-1) {
   291  			o++
   292  		}
   293  		sorter.reverse(runBase, o)
   294  	} else {
   295  		// run must be non-descending
   296  		for o < sorter.to && !sorter.Less(o, o-1) {
   297  			o++
   298  		}
   299  	}
   300  	runHi := runBase + sorter.minRun
   301  	if sorter.to < runHi {
   302  		runHi = sorter.to
   303  	}
   304  	if o > runHi {
   305  		runHi = o
   306  	}
   307  	sorter.binarySort(runBase, runHi, o)
   308  	for i := runBase; i < runHi-1; i++ {
   309  		assert(!sorter.Less(i+1, i))
   310  	}
   311  	return runHi - runBase
   312  }
   313  
   314  func assert(ok bool) {
   315  	if !ok {
   316  		panic("assert fail")
   317  	}
   318  }
   319  
   320  func (sorter *TimSorter) ensureInvariants() {
   321  	for sorter.stackSize > 1 {
   322  		panic("not implemented yet")
   323  	}
   324  }
   325  
   326  func (sorter *TimSorter) exhaustStack() {
   327  	for sorter.stackSize > 1 {
   328  		panic("not implemented yet")
   329  	}
   330  }
   331  
   332  func (sorter *TimSorter) reset(from, to int) {
   333  	sorter.stackSize = 0
   334  	for i, _ := range sorter.runEnds {
   335  		sorter.runEnds[i] = 0
   336  	}
   337  	sorter.runEnds[0] = from
   338  	sorter.to = to
   339  	if length := to - from; length <= RUN_THRESHOLD {
   340  		sorter.minRun = length
   341  	} else {
   342  		sorter.minRun = minRun(length)
   343  	}
   344  }
   345  
   346  func (sorter *TimSorter) sort(from, to int) {
   347  	sorter.checkRange(from, to)
   348  	if to-from <= 1 {
   349  		return
   350  	}
   351  	sorter.reset(from, to)
   352  	for {
   353  		sorter.ensureInvariants()
   354  		sorter.pushRunLen(sorter.nextRun())
   355  		if sorter.runEnd(0) >= to {
   356  			break
   357  		}
   358  	}
   359  	sorter.exhaustStack()
   360  	assert(sorter.runEnd(0) == to)
   361  }
   362  
   363  // util/IntroSorter.java
   364  
   365  type IntroSorterSPI interface {
   366  	// Save the value at slot i so that it can later be used as a pivot.
   367  	SetPivot(int)
   368  	// Compare the pivot with the slot at j, similarly to Less(int,int).
   369  	PivotLess(int) bool
   370  }
   371  
   372  /*
   373  Sorter implementation based on a variant of the quicksort algorithm
   374  called introsort: when the recursion level exceeds the log of the
   375  length of the array to sort, it falls back to heapsort. This prevents
   376  quicksort from running into its worst-case quadratic runtime. Small
   377  arrays are sorted with insertion sort.
   378  */
   379  type IntroSorter struct {
   380  	spi IntroSorterSPI
   381  	*Sorter
   382  }
   383  
   384  func NewIntroSorter(spi IntroSorterSPI, arr sort.Interface) *IntroSorter {
   385  	return &IntroSorter{spi, newSorter(arr)}
   386  }
   387  
   388  // 32 - leadingZero(n-1)
   389  func ceilLog2(n int) int {
   390  	assert(n >= 1)
   391  	if n == 1 {
   392  		return 0
   393  	}
   394  	n--
   395  	ans := 0
   396  	for n > 0 {
   397  		n >>= 1
   398  		ans++
   399  	}
   400  	return ans
   401  }
   402  
   403  func (s *IntroSorter) Sort(from, to int) {
   404  	s.checkRange(from, to)
   405  	s.quicksort(from, to, ceilLog2(to-from))
   406  }
   407  
   408  func (s *IntroSorter) quicksort(from, to, maxDepth int) {
   409  	if to-from < SORTER_THRESHOLD {
   410  		s.insertionSort(from, to)
   411  		// for i := from; i < to-1; i++ {
   412  		// 	assert(!s.Less(i+1, i))
   413  		// }
   414  		return
   415  	}
   416  	if maxDepth--; maxDepth < 0 {
   417  		s.heapSort(from, to)
   418  		// for i := from; i < to-1; i++ {
   419  		// 	assert(!s.Less(i+1, i))
   420  		// }
   421  		return
   422  	}
   423  
   424  	mid := (from + to) >> 1
   425  
   426  	if s.Less(mid, from) {
   427  		s.Swap(from, mid)
   428  	}
   429  
   430  	if s.Less(to-1, mid) {
   431  		s.Swap(mid, to-1)
   432  		if s.Less(mid, from) {
   433  			s.Swap(from, mid)
   434  		}
   435  	}
   436  
   437  	left := from + 1
   438  	right := to - 2
   439  
   440  	s.spi.SetPivot(mid)
   441  	for {
   442  		for s.spi.PivotLess(right) {
   443  			right--
   444  		}
   445  
   446  		for left < right && !s.spi.PivotLess(left) {
   447  			left++
   448  		}
   449  
   450  		if left < right {
   451  			s.Swap(left, right)
   452  			right--
   453  		} else {
   454  			break
   455  		}
   456  	}
   457  
   458  	s.quicksort(from, left+1, maxDepth)
   459  	s.quicksort(left+1, to, maxDepth)
   460  	// for i := from; i < to-1; i++ {
   461  	// 	assert(!s.Less(i+1, i))
   462  	// }
   463  }
   464  
   465  // util/InPlaceMergeSorter.java
   466  
   467  /*
   468  Sorter implementation absed on the merge-sort algorithm that merges
   469  in place (no extra memory will be allocated). Small arrays are sorter
   470  with insertion sort.
   471  */
   472  type InPlaceMergeSorter struct {
   473  	*Sorter
   474  }
   475  
   476  func NewInPlaceMergeSorter(impl sort.Interface) *InPlaceMergeSorter {
   477  	return &InPlaceMergeSorter{
   478  		Sorter: newSorter(impl),
   479  	}
   480  }
   481  
   482  func (s *InPlaceMergeSorter) Sort(from, to int) {
   483  	s.checkRange(from, to)
   484  	s.mergeSort(from, to)
   485  }
   486  
   487  func (s *InPlaceMergeSorter) mergeSort(from, to int) {
   488  	if to-from < SORTER_THRESHOLD {
   489  		s.insertionSort(from, to)
   490  	} else {
   491  		mid := int((uint(from) + uint(to)) >> 1)
   492  		s.mergeSort(from, mid)
   493  		s.mergeSort(mid, to)
   494  		s.mergeInPlace(from, mid, to)
   495  	}
   496  }