github.com/MangoDowner/go-gm@v0.0.0-20180818020936-8baa2bd4408c/src/sort/sort.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate go run genzfunc.go
     6  
     7  // Package sort provides primitives for sorting slices and user-defined
     8  // collections.
     9  package sort
    10  
    11  import "reflect"
    12  
    13  // A type, typically a collection, that satisfies sort.Interface can be
    14  // sorted by the routines in this package. The methods require that the
    15  // elements of the collection be enumerated by an integer index.
    16  type Interface interface {
    17  	// Len is the number of elements in the collection.
    18  	Len() int
    19  	// Less reports whether the element with
    20  	// index i should sort before the element with index j.
    21  	Less(i, j int) bool
    22  	// Swap swaps the elements with indexes i and j.
    23  	Swap(i, j int)
    24  }
    25  
    26  // Insertion sort
    27  func insertionSort(data Interface, a, b int) {
    28  	for i := a + 1; i < b; i++ {
    29  		for j := i; j > a && data.Less(j, j-1); j-- {
    30  			data.Swap(j, j-1)
    31  		}
    32  	}
    33  }
    34  
    35  // siftDown implements the heap property on data[lo, hi).
    36  // first is an offset into the array where the root of the heap lies.
    37  func siftDown(data Interface, lo, hi, first int) {
    38  	root := lo
    39  	for {
    40  		child := 2*root + 1
    41  		if child >= hi {
    42  			break
    43  		}
    44  		if child+1 < hi && data.Less(first+child, first+child+1) {
    45  			child++
    46  		}
    47  		if !data.Less(first+root, first+child) {
    48  			return
    49  		}
    50  		data.Swap(first+root, first+child)
    51  		root = child
    52  	}
    53  }
    54  
    55  func heapSort(data Interface, a, b int) {
    56  	first := a
    57  	lo := 0
    58  	hi := b - a
    59  
    60  	// Build heap with greatest element at top.
    61  	for i := (hi - 1) / 2; i >= 0; i-- {
    62  		siftDown(data, i, hi, first)
    63  	}
    64  
    65  	// Pop elements, largest first, into end of data.
    66  	for i := hi - 1; i >= 0; i-- {
    67  		data.Swap(first, first+i)
    68  		siftDown(data, lo, i, first)
    69  	}
    70  }
    71  
    72  // Quicksort, loosely following Bentley and McIlroy,
    73  // ``Engineering a Sort Function,'' SP&E November 1993.
    74  
    75  // medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
    76  func medianOfThree(data Interface, m1, m0, m2 int) {
    77  	// sort 3 elements
    78  	if data.Less(m1, m0) {
    79  		data.Swap(m1, m0)
    80  	}
    81  	// data[m0] <= data[m1]
    82  	if data.Less(m2, m1) {
    83  		data.Swap(m2, m1)
    84  		// data[m0] <= data[m2] && data[m1] < data[m2]
    85  		if data.Less(m1, m0) {
    86  			data.Swap(m1, m0)
    87  		}
    88  	}
    89  	// now data[m0] <= data[m1] <= data[m2]
    90  }
    91  
    92  func swapRange(data Interface, a, b, n int) {
    93  	for i := 0; i < n; i++ {
    94  		data.Swap(a+i, b+i)
    95  	}
    96  }
    97  
    98  func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
    99  	m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
   100  	if hi-lo > 40 {
   101  		// Tukey's ``Ninther,'' median of three medians of three.
   102  		s := (hi - lo) / 8
   103  		medianOfThree(data, lo, lo+s, lo+2*s)
   104  		medianOfThree(data, m, m-s, m+s)
   105  		medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
   106  	}
   107  	medianOfThree(data, lo, m, hi-1)
   108  
   109  	// Invariants are:
   110  	//	data[lo] = pivot (set up by ChoosePivot)
   111  	//	data[lo < i < a] < pivot
   112  	//	data[a <= i < b] <= pivot
   113  	//	data[b <= i < c] unexamined
   114  	//	data[c <= i < hi-1] > pivot
   115  	//	data[hi-1] >= pivot
   116  	pivot := lo
   117  	a, c := lo+1, hi-1
   118  
   119  	for ; a < c && data.Less(a, pivot); a++ {
   120  	}
   121  	b := a
   122  	for {
   123  		for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot
   124  		}
   125  		for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot
   126  		}
   127  		if b >= c {
   128  			break
   129  		}
   130  		// data[b] > pivot; data[c-1] <= pivot
   131  		data.Swap(b, c-1)
   132  		b++
   133  		c--
   134  	}
   135  	// If hi-c<3 then there are duplicates (by property of median of nine).
   136  	// Let be a bit more conservative, and set border to 5.
   137  	protect := hi-c < 5
   138  	if !protect && hi-c < (hi-lo)/4 {
   139  		// Lets test some points for equality to pivot
   140  		dups := 0
   141  		if !data.Less(pivot, hi-1) { // data[hi-1] = pivot
   142  			data.Swap(c, hi-1)
   143  			c++
   144  			dups++
   145  		}
   146  		if !data.Less(b-1, pivot) { // data[b-1] = pivot
   147  			b--
   148  			dups++
   149  		}
   150  		// m-lo = (hi-lo)/2 > 6
   151  		// b-lo > (hi-lo)*3/4-1 > 8
   152  		// ==> m < b ==> data[m] <= pivot
   153  		if !data.Less(m, pivot) { // data[m] = pivot
   154  			data.Swap(m, b-1)
   155  			b--
   156  			dups++
   157  		}
   158  		// if at least 2 points are equal to pivot, assume skewed distribution
   159  		protect = dups > 1
   160  	}
   161  	if protect {
   162  		// Protect against a lot of duplicates
   163  		// Add invariant:
   164  		//	data[a <= i < b] unexamined
   165  		//	data[b <= i < c] = pivot
   166  		for {
   167  			for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot
   168  			}
   169  			for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot
   170  			}
   171  			if a >= b {
   172  				break
   173  			}
   174  			// data[a] == pivot; data[b-1] < pivot
   175  			data.Swap(a, b-1)
   176  			a++
   177  			b--
   178  		}
   179  	}
   180  	// Swap pivot into middle
   181  	data.Swap(pivot, b-1)
   182  	return b - 1, c
   183  }
   184  
   185  func quickSort(data Interface, a, b, maxDepth int) {
   186  	for b-a > 12 { // Use ShellSort for slices <= 12 elements
   187  		if maxDepth == 0 {
   188  			heapSort(data, a, b)
   189  			return
   190  		}
   191  		maxDepth--
   192  		mlo, mhi := doPivot(data, a, b)
   193  		// Avoiding recursion on the larger subproblem guarantees
   194  		// a stack depth of at most lg(b-a).
   195  		if mlo-a < b-mhi {
   196  			quickSort(data, a, mlo, maxDepth)
   197  			a = mhi // i.e., quickSort(data, mhi, b)
   198  		} else {
   199  			quickSort(data, mhi, b, maxDepth)
   200  			b = mlo // i.e., quickSort(data, a, mlo)
   201  		}
   202  	}
   203  	if b-a > 1 {
   204  		// Do ShellSort pass with gap 6
   205  		// It could be written in this simplified form cause b-a <= 12
   206  		for i := a + 6; i < b; i++ {
   207  			if data.Less(i, i-6) {
   208  				data.Swap(i, i-6)
   209  			}
   210  		}
   211  		insertionSort(data, a, b)
   212  	}
   213  }
   214  
   215  // Sort sorts data.
   216  // It makes one call to data.Len to determine n, and O(n*log(n)) calls to
   217  // data.Less and data.Swap. The sort is not guaranteed to be stable.
   218  func Sort(data Interface) {
   219  	n := data.Len()
   220  	quickSort(data, 0, n, maxDepth(n))
   221  }
   222  
   223  // maxDepth returns a threshold at which quicksort should switch
   224  // to heapsort. It returns 2*ceil(lg(n+1)).
   225  func maxDepth(n int) int {
   226  	var depth int
   227  	for i := n; i > 0; i >>= 1 {
   228  		depth++
   229  	}
   230  	return depth * 2
   231  }
   232  
   233  // lessSwap is a pair of Less and Swap function for use with the
   234  // auto-generated func-optimized variant of sort.go in
   235  // zfuncversion.go.
   236  type lessSwap struct {
   237  	Less func(i, j int) bool
   238  	Swap func(i, j int)
   239  }
   240  
   241  // Slice sorts the provided slice given the provided less function.
   242  //
   243  // The sort is not guaranteed to be stable. For a stable sort, use
   244  // SliceStable.
   245  //
   246  // The function panics if the provided interface is not a slice.
   247  func Slice(slice interface{}, less func(i, j int) bool) {
   248  	rv := reflect.ValueOf(slice)
   249  	swap := reflect.Swapper(slice)
   250  	length := rv.Len()
   251  	quickSort_func(lessSwap{less, swap}, 0, length, maxDepth(length))
   252  }
   253  
   254  // SliceStable sorts the provided slice given the provided less
   255  // function while keeping the original order of equal elements.
   256  //
   257  // The function panics if the provided interface is not a slice.
   258  func SliceStable(slice interface{}, less func(i, j int) bool) {
   259  	rv := reflect.ValueOf(slice)
   260  	swap := reflect.Swapper(slice)
   261  	stable_func(lessSwap{less, swap}, rv.Len())
   262  }
   263  
   264  // SliceIsSorted tests whether a slice is sorted.
   265  //
   266  // The function panics if the provided interface is not a slice.
   267  func SliceIsSorted(slice interface{}, less func(i, j int) bool) bool {
   268  	rv := reflect.ValueOf(slice)
   269  	n := rv.Len()
   270  	for i := n - 1; i > 0; i-- {
   271  		if less(i, i-1) {
   272  			return false
   273  		}
   274  	}
   275  	return true
   276  }
   277  
   278  type reverse struct {
   279  	// This embedded Interface permits Reverse to use the methods of
   280  	// another Interface implementation.
   281  	Interface
   282  }
   283  
   284  // Less returns the opposite of the embedded implementation's Less method.
   285  func (r reverse) Less(i, j int) bool {
   286  	return r.Interface.Less(j, i)
   287  }
   288  
   289  // Reverse returns the reverse order for data.
   290  func Reverse(data Interface) Interface {
   291  	return &reverse{data}
   292  }
   293  
   294  // IsSorted reports whether data is sorted.
   295  func IsSorted(data Interface) bool {
   296  	n := data.Len()
   297  	for i := n - 1; i > 0; i-- {
   298  		if data.Less(i, i-1) {
   299  			return false
   300  		}
   301  	}
   302  	return true
   303  }
   304  
   305  // Convenience types for common cases
   306  
   307  // IntSlice attaches the methods of Interface to []int, sorting in increasing order.
   308  type IntSlice []int
   309  
   310  func (p IntSlice) Len() int           { return len(p) }
   311  func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] }
   312  func (p IntSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   313  
   314  // Sort is a convenience method.
   315  func (p IntSlice) Sort() { Sort(p) }
   316  
   317  // Float64Slice attaches the methods of Interface to []float64, sorting in increasing order
   318  // (not-a-number values are treated as less than other values).
   319  type Float64Slice []float64
   320  
   321  func (p Float64Slice) Len() int           { return len(p) }
   322  func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) }
   323  func (p Float64Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   324  
   325  // isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
   326  func isNaN(f float64) bool {
   327  	return f != f
   328  }
   329  
   330  // Sort is a convenience method.
   331  func (p Float64Slice) Sort() { Sort(p) }
   332  
   333  // StringSlice attaches the methods of Interface to []string, sorting in increasing order.
   334  type StringSlice []string
   335  
   336  func (p StringSlice) Len() int           { return len(p) }
   337  func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] }
   338  func (p StringSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   339  
   340  // Sort is a convenience method.
   341  func (p StringSlice) Sort() { Sort(p) }
   342  
   343  // Convenience wrappers for common cases
   344  
   345  // Ints sorts a slice of ints in increasing order.
   346  func Ints(a []int) { Sort(IntSlice(a)) }
   347  
   348  // Float64s sorts a slice of float64s in increasing order
   349  // (not-a-number values are treated as less than other values).
   350  func Float64s(a []float64) { Sort(Float64Slice(a)) }
   351  
   352  // Strings sorts a slice of strings in increasing order.
   353  func Strings(a []string) { Sort(StringSlice(a)) }
   354  
   355  // IntsAreSorted tests whether a slice of ints is sorted in increasing order.
   356  func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) }
   357  
   358  // Float64sAreSorted tests whether a slice of float64s is sorted in increasing order
   359  // (not-a-number values are treated as less than other values).
   360  func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) }
   361  
   362  // StringsAreSorted tests whether a slice of strings is sorted in increasing order.
   363  func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) }
   364  
   365  // Notes on stable sorting:
   366  // The used algorithms are simple and provable correct on all input and use
   367  // only logarithmic additional stack space. They perform well if compared
   368  // experimentally to other stable in-place sorting algorithms.
   369  //
   370  // Remarks on other algorithms evaluated:
   371  //  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
   372  //    Not faster.
   373  //  - GCC's __rotate for block rotations: Not faster.
   374  //  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
   375  //    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
   376  //    The given algorithms are in-place, number of Swap and Assignments
   377  //    grow as n log n but the algorithm is not stable.
   378  //  - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and
   379  //    V. Raman in Algorithmica (1996) 16, 115-160:
   380  //    This algorithm either needs additional 2n bits or works only if there
   381  //    are enough different elements available to encode some permutations
   382  //    which have to be undone later (so not stable on any input).
   383  //  - All the optimal in-place sorting/merging algorithms I found are either
   384  //    unstable or rely on enough different elements in each step to encode the
   385  //    performed block rearrangements. See also "In-Place Merging Algorithms",
   386  //    Denham Coates-Evely, Department of Computer Science, Kings College,
   387  //    January 2004 and the references in there.
   388  //  - Often "optimal" algorithms are optimal in the number of assignments
   389  //    but Interface has only Swap as operation.
   390  
   391  // Stable sorts data while keeping the original order of equal elements.
   392  //
   393  // It makes one call to data.Len to determine n, O(n*log(n)) calls to
   394  // data.Less and O(n*log(n)*log(n)) calls to data.Swap.
   395  func Stable(data Interface) {
   396  	stable(data, data.Len())
   397  }
   398  
   399  func stable(data Interface, n int) {
   400  	blockSize := 20 // must be > 0
   401  	a, b := 0, blockSize
   402  	for b <= n {
   403  		insertionSort(data, a, b)
   404  		a = b
   405  		b += blockSize
   406  	}
   407  	insertionSort(data, a, n)
   408  
   409  	for blockSize < n {
   410  		a, b = 0, 2*blockSize
   411  		for b <= n {
   412  			symMerge(data, a, a+blockSize, b)
   413  			a = b
   414  			b += 2 * blockSize
   415  		}
   416  		if m := a + blockSize; m < n {
   417  			symMerge(data, a, m, n)
   418  		}
   419  		blockSize *= 2
   420  	}
   421  }
   422  
   423  // SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using
   424  // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
   425  // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
   426  // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
   427  // Computer Science, pages 714-723. Springer, 2004.
   428  //
   429  // Let M = m-a and N = b-n. Wolog M < N.
   430  // The recursion depth is bound by ceil(log(N+M)).
   431  // The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
   432  // The algorithm needs O((M+N)*log(M)) calls to data.Swap.
   433  //
   434  // The paper gives O((M+N)*log(M)) as the number of assignments assuming a
   435  // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
   436  // in the paper carries through for Swap operations, especially as the block
   437  // swapping rotate uses only O(M+N) Swaps.
   438  //
   439  // symMerge assumes non-degenerate arguments: a < m && m < b.
   440  // Having the caller check this condition eliminates many leaf recursion calls,
   441  // which improves performance.
   442  func symMerge(data Interface, a, m, b int) {
   443  	// Avoid unnecessary recursions of symMerge
   444  	// by direct insertion of data[a] into data[m:b]
   445  	// if data[a:m] only contains one element.
   446  	if m-a == 1 {
   447  		// Use binary search to find the lowest index i
   448  		// such that data[i] >= data[a] for m <= i < b.
   449  		// Exit the search loop with i == b in case no such index exists.
   450  		i := m
   451  		j := b
   452  		for i < j {
   453  			h := int(uint(i+j) >> 1)
   454  			if data.Less(h, a) {
   455  				i = h + 1
   456  			} else {
   457  				j = h
   458  			}
   459  		}
   460  		// Swap values until data[a] reaches the position before i.
   461  		for k := a; k < i-1; k++ {
   462  			data.Swap(k, k+1)
   463  		}
   464  		return
   465  	}
   466  
   467  	// Avoid unnecessary recursions of symMerge
   468  	// by direct insertion of data[m] into data[a:m]
   469  	// if data[m:b] only contains one element.
   470  	if b-m == 1 {
   471  		// Use binary search to find the lowest index i
   472  		// such that data[i] > data[m] for a <= i < m.
   473  		// Exit the search loop with i == m in case no such index exists.
   474  		i := a
   475  		j := m
   476  		for i < j {
   477  			h := int(uint(i+j) >> 1)
   478  			if !data.Less(m, h) {
   479  				i = h + 1
   480  			} else {
   481  				j = h
   482  			}
   483  		}
   484  		// Swap values until data[m] reaches the position i.
   485  		for k := m; k > i; k-- {
   486  			data.Swap(k, k-1)
   487  		}
   488  		return
   489  	}
   490  
   491  	mid := int(uint(a+b) >> 1)
   492  	n := mid + m
   493  	var start, r int
   494  	if m > mid {
   495  		start = n - b
   496  		r = mid
   497  	} else {
   498  		start = a
   499  		r = m
   500  	}
   501  	p := n - 1
   502  
   503  	for start < r {
   504  		c := int(uint(start+r) >> 1)
   505  		if !data.Less(p-c, c) {
   506  			start = c + 1
   507  		} else {
   508  			r = c
   509  		}
   510  	}
   511  
   512  	end := n - start
   513  	if start < m && m < end {
   514  		rotate(data, start, m, end)
   515  	}
   516  	if a < start && start < mid {
   517  		symMerge(data, a, start, mid)
   518  	}
   519  	if mid < end && end < b {
   520  		symMerge(data, mid, end, b)
   521  	}
   522  }
   523  
   524  // Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data:
   525  // Data of the form 'x u v y' is changed to 'x v u y'.
   526  // Rotate performs at most b-a many calls to data.Swap.
   527  // Rotate assumes non-degenerate arguments: a < m && m < b.
   528  func rotate(data Interface, a, m, b int) {
   529  	i := m - a
   530  	j := b - m
   531  
   532  	for i != j {
   533  		if i > j {
   534  			swapRange(data, m-i, m, j)
   535  			i -= j
   536  		} else {
   537  			swapRange(data, m-i, m+j-i, i)
   538  			j -= i
   539  		}
   540  	}
   541  	// i == j
   542  	swapRange(data, m-i, m, i)
   543  }
   544  
   545  /*
   546  Complexity of Stable Sorting
   547  
   548  
   549  Complexity of block swapping rotation
   550  
   551  Each Swap puts one new element into its correct, final position.
   552  Elements which reach their final position are no longer moved.
   553  Thus block swapping rotation needs |u|+|v| calls to Swaps.
   554  This is best possible as each element might need a move.
   555  
   556  Pay attention when comparing to other optimal algorithms which
   557  typically count the number of assignments instead of swaps:
   558  E.g. the optimal algorithm of Dudzinski and Dydek for in-place
   559  rotations uses O(u + v + gcd(u,v)) assignments which is
   560  better than our O(3 * (u+v)) as gcd(u,v) <= u.
   561  
   562  
   563  Stable sorting by SymMerge and BlockSwap rotations
   564  
   565  SymMerg complexity for same size input M = N:
   566  Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
   567  Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
   568  
   569  (The following argument does not fuzz over a missing -1 or
   570  other stuff which does not impact the final result).
   571  
   572  Let n = data.Len(). Assume n = 2^k.
   573  
   574  Plain merge sort performs log(n) = k iterations.
   575  On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
   576  
   577  Thus iteration i of merge sort performs:
   578  Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
   579  Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
   580  
   581  In total k = log(n) iterations are performed; so in total:
   582  Calls to Less O(log(n) * n)
   583  Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
   584     = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
   585  
   586  
   587  Above results should generalize to arbitrary n = 2^k + p
   588  and should not be influenced by the initial insertion sort phase:
   589  Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
   590  size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
   591  Merge sort iterations start at i = log(bs). With t = log(bs) constant:
   592  Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
   593     = O(n * log(n))
   594  Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
   595  
   596  */