github.com/4ad/go@v0.0.0-20161219182952-69a12818b605/src/sort/sort.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package sort provides primitives for sorting slices and user-defined
     6  // collections.
     7  package sort
     8  
     9  // A type, typically a collection, that satisfies sort.Interface can be
    10  // sorted by the routines in this package. The methods require that the
    11  // elements of the collection be enumerated by an integer index.
    12  type Interface interface {
    13  	// Len is the number of elements in the collection.
    14  	Len() int
    15  	// Less reports whether the element with
    16  	// index i should sort before the element with index j.
    17  	Less(i, j int) bool
    18  	// Swap swaps the elements with indexes i and j.
    19  	Swap(i, j int)
    20  }
    21  
    22  // Insertion sort
    23  func insertionSort(data Interface, a, b int) {
    24  	for i := a + 1; i < b; i++ {
    25  		for j := i; j > a && data.Less(j, j-1); j-- {
    26  			data.Swap(j, j-1)
    27  		}
    28  	}
    29  }
    30  
    31  // siftDown implements the heap property on data[lo, hi).
    32  // first is an offset into the array where the root of the heap lies.
    33  func siftDown(data Interface, lo, hi, first int) {
    34  	root := lo
    35  	for {
    36  		child := 2*root + 1
    37  		if child >= hi {
    38  			break
    39  		}
    40  		if child+1 < hi && data.Less(first+child, first+child+1) {
    41  			child++
    42  		}
    43  		if !data.Less(first+root, first+child) {
    44  			return
    45  		}
    46  		data.Swap(first+root, first+child)
    47  		root = child
    48  	}
    49  }
    50  
    51  func heapSort(data Interface, a, b int) {
    52  	first := a
    53  	lo := 0
    54  	hi := b - a
    55  
    56  	// Build heap with greatest element at top.
    57  	for i := (hi - 1) / 2; i >= 0; i-- {
    58  		siftDown(data, i, hi, first)
    59  	}
    60  
    61  	// Pop elements, largest first, into end of data.
    62  	for i := hi - 1; i >= 0; i-- {
    63  		data.Swap(first, first+i)
    64  		siftDown(data, lo, i, first)
    65  	}
    66  }
    67  
    68  // Quicksort, loosely following Bentley and McIlroy,
    69  // ``Engineering a Sort Function,'' SP&E November 1993.
    70  
    71  // medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
    72  func medianOfThree(data Interface, m1, m0, m2 int) {
    73  	// sort 3 elements
    74  	if data.Less(m1, m0) {
    75  		data.Swap(m1, m0)
    76  	}
    77  	// data[m0] <= data[m1]
    78  	if data.Less(m2, m1) {
    79  		data.Swap(m2, m1)
    80  		// data[m0] <= data[m2] && data[m1] < data[m2]
    81  		if data.Less(m1, m0) {
    82  			data.Swap(m1, m0)
    83  		}
    84  	}
    85  	// now data[m0] <= data[m1] <= data[m2]
    86  }
    87  
    88  func swapRange(data Interface, a, b, n int) {
    89  	for i := 0; i < n; i++ {
    90  		data.Swap(a+i, b+i)
    91  	}
    92  }
    93  
    94  func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
    95  	m := lo + (hi-lo)/2 // Written like this to avoid integer overflow.
    96  	if hi-lo > 40 {
    97  		// Tukey's ``Ninther,'' median of three medians of three.
    98  		s := (hi - lo) / 8
    99  		medianOfThree(data, lo, lo+s, lo+2*s)
   100  		medianOfThree(data, m, m-s, m+s)
   101  		medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
   102  	}
   103  	medianOfThree(data, lo, m, hi-1)
   104  
   105  	// Invariants are:
   106  	//	data[lo] = pivot (set up by ChoosePivot)
   107  	//	data[lo < i < a] < pivot
   108  	//	data[a <= i < b] <= pivot
   109  	//	data[b <= i < c] unexamined
   110  	//	data[c <= i < hi-1] > pivot
   111  	//	data[hi-1] >= pivot
   112  	pivot := lo
   113  	a, c := lo+1, hi-1
   114  
   115  	for ; a < c && data.Less(a, pivot); a++ {
   116  	}
   117  	b := a
   118  	for {
   119  		for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot
   120  		}
   121  		for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot
   122  		}
   123  		if b >= c {
   124  			break
   125  		}
   126  		// data[b] > pivot; data[c-1] <= pivot
   127  		data.Swap(b, c-1)
   128  		b++
   129  		c--
   130  	}
   131  	// If hi-c<3 then there are duplicates (by property of median of nine).
   132  	// Let be a bit more conservative, and set border to 5.
   133  	protect := hi-c < 5
   134  	if !protect && hi-c < (hi-lo)/4 {
   135  		// Lets test some points for equality to pivot
   136  		dups := 0
   137  		if !data.Less(pivot, hi-1) { // data[hi-1] = pivot
   138  			data.Swap(c, hi-1)
   139  			c++
   140  			dups++
   141  		}
   142  		if !data.Less(b-1, pivot) { // data[b-1] = pivot
   143  			b--
   144  			dups++
   145  		}
   146  		// m-lo = (hi-lo)/2 > 6
   147  		// b-lo > (hi-lo)*3/4-1 > 8
   148  		// ==> m < b ==> data[m] <= pivot
   149  		if !data.Less(m, pivot) { // data[m] = pivot
   150  			data.Swap(m, b-1)
   151  			b--
   152  			dups++
   153  		}
   154  		// if at least 2 points are equal to pivot, assume skewed distribution
   155  		protect = dups > 1
   156  	}
   157  	if protect {
   158  		// Protect against a lot of duplicates
   159  		// Add invariant:
   160  		//	data[a <= i < b] unexamined
   161  		//	data[b <= i < c] = pivot
   162  		for {
   163  			for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot
   164  			}
   165  			for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot
   166  			}
   167  			if a >= b {
   168  				break
   169  			}
   170  			// data[a] == pivot; data[b-1] < pivot
   171  			data.Swap(a, b-1)
   172  			a++
   173  			b--
   174  		}
   175  	}
   176  	// Swap pivot into middle
   177  	data.Swap(pivot, b-1)
   178  	return b - 1, c
   179  }
   180  
   181  func quickSort(data Interface, a, b, maxDepth int) {
   182  	for b-a > 12 { // Use ShellSort for slices <= 12 elements
   183  		if maxDepth == 0 {
   184  			heapSort(data, a, b)
   185  			return
   186  		}
   187  		maxDepth--
   188  		mlo, mhi := doPivot(data, a, b)
   189  		// Avoiding recursion on the larger subproblem guarantees
   190  		// a stack depth of at most lg(b-a).
   191  		if mlo-a < b-mhi {
   192  			quickSort(data, a, mlo, maxDepth)
   193  			a = mhi // i.e., quickSort(data, mhi, b)
   194  		} else {
   195  			quickSort(data, mhi, b, maxDepth)
   196  			b = mlo // i.e., quickSort(data, a, mlo)
   197  		}
   198  	}
   199  	if b-a > 1 {
   200  		// Do ShellSort pass with gap 6
   201  		// It could be written in this simplified form cause b-a <= 12
   202  		for i := a + 6; i < b; i++ {
   203  			if data.Less(i, i-6) {
   204  				data.Swap(i, i-6)
   205  			}
   206  		}
   207  		insertionSort(data, a, b)
   208  	}
   209  }
   210  
   211  // Sort sorts data.
   212  // It makes one call to data.Len to determine n, and O(n*log(n)) calls to
   213  // data.Less and data.Swap. The sort is not guaranteed to be stable.
   214  func Sort(data Interface) {
   215  	// Switch to heapsort if depth of 2*ceil(lg(n+1)) is reached.
   216  	n := data.Len()
   217  	maxDepth := 0
   218  	for i := n; i > 0; i >>= 1 {
   219  		maxDepth++
   220  	}
   221  	maxDepth *= 2
   222  	quickSort(data, 0, n, maxDepth)
   223  }
   224  
   225  type reverse struct {
   226  	// This embedded Interface permits Reverse to use the methods of
   227  	// another Interface implementation.
   228  	Interface
   229  }
   230  
   231  // Less returns the opposite of the embedded implementation's Less method.
   232  func (r reverse) Less(i, j int) bool {
   233  	return r.Interface.Less(j, i)
   234  }
   235  
   236  // Reverse returns the reverse order for data.
   237  func Reverse(data Interface) Interface {
   238  	return &reverse{data}
   239  }
   240  
   241  // IsSorted reports whether data is sorted.
   242  func IsSorted(data Interface) bool {
   243  	n := data.Len()
   244  	for i := n - 1; i > 0; i-- {
   245  		if data.Less(i, i-1) {
   246  			return false
   247  		}
   248  	}
   249  	return true
   250  }
   251  
   252  // Convenience types for common cases
   253  
   254  // IntSlice attaches the methods of Interface to []int, sorting in increasing order.
   255  type IntSlice []int
   256  
   257  func (p IntSlice) Len() int           { return len(p) }
   258  func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] }
   259  func (p IntSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   260  
   261  // Sort is a convenience method.
   262  func (p IntSlice) Sort() { Sort(p) }
   263  
   264  // Float64Slice attaches the methods of Interface to []float64, sorting in increasing order.
   265  type Float64Slice []float64
   266  
   267  func (p Float64Slice) Len() int           { return len(p) }
   268  func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) }
   269  func (p Float64Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   270  
   271  // isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
   272  func isNaN(f float64) bool {
   273  	return f != f
   274  }
   275  
   276  // Sort is a convenience method.
   277  func (p Float64Slice) Sort() { Sort(p) }
   278  
   279  // StringSlice attaches the methods of Interface to []string, sorting in increasing order.
   280  type StringSlice []string
   281  
   282  func (p StringSlice) Len() int           { return len(p) }
   283  func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] }
   284  func (p StringSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   285  
   286  // Sort is a convenience method.
   287  func (p StringSlice) Sort() { Sort(p) }
   288  
   289  // Convenience wrappers for common cases
   290  
   291  // Ints sorts a slice of ints in increasing order.
   292  func Ints(a []int) { Sort(IntSlice(a)) }
   293  
   294  // Float64s sorts a slice of float64s in increasing order.
   295  func Float64s(a []float64) { Sort(Float64Slice(a)) }
   296  
   297  // Strings sorts a slice of strings in increasing order.
   298  func Strings(a []string) { Sort(StringSlice(a)) }
   299  
   300  // IntsAreSorted tests whether a slice of ints is sorted in increasing order.
   301  func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) }
   302  
   303  // Float64sAreSorted tests whether a slice of float64s is sorted in increasing order.
   304  func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) }
   305  
   306  // StringsAreSorted tests whether a slice of strings is sorted in increasing order.
   307  func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) }
   308  
   309  // Notes on stable sorting:
   310  // The used algorithms are simple and provable correct on all input and use
   311  // only logarithmic additional stack space. They perform well if compared
   312  // experimentally to other stable in-place sorting algorithms.
   313  //
   314  // Remarks on other algorithms evaluated:
   315  //  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
   316  //    Not faster.
   317  //  - GCC's __rotate for block rotations: Not faster.
   318  //  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
   319  //    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
   320  //    The given algorithms are in-place, number of Swap and Assignments
   321  //    grow as n log n but the algorithm is not stable.
   322  //  - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and
   323  //    V. Raman in Algorithmica (1996) 16, 115-160:
   324  //    This algorithm either needs additional 2n bits or works only if there
   325  //    are enough different elements available to encode some permutations
   326  //    which have to be undone later (so not stable on any input).
   327  //  - All the optimal in-place sorting/merging algorithms I found are either
   328  //    unstable or rely on enough different elements in each step to encode the
   329  //    performed block rearrangements. See also "In-Place Merging Algorithms",
   330  //    Denham Coates-Evely, Department of Computer Science, Kings College,
   331  //    January 2004 and the references in there.
   332  //  - Often "optimal" algorithms are optimal in the number of assignments
   333  //    but Interface has only Swap as operation.
   334  
   335  // Stable sorts data while keeping the original order of equal elements.
   336  //
   337  // It makes one call to data.Len to determine n, O(n*log(n)) calls to
   338  // data.Less and O(n*log(n)*log(n)) calls to data.Swap.
   339  func Stable(data Interface) {
   340  	n := data.Len()
   341  	blockSize := 20 // must be > 0
   342  	a, b := 0, blockSize
   343  	for b <= n {
   344  		insertionSort(data, a, b)
   345  		a = b
   346  		b += blockSize
   347  	}
   348  	insertionSort(data, a, n)
   349  
   350  	for blockSize < n {
   351  		a, b = 0, 2*blockSize
   352  		for b <= n {
   353  			symMerge(data, a, a+blockSize, b)
   354  			a = b
   355  			b += 2 * blockSize
   356  		}
   357  		if m := a + blockSize; m < n {
   358  			symMerge(data, a, m, n)
   359  		}
   360  		blockSize *= 2
   361  	}
   362  }
   363  
   364  // SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using
   365  // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
   366  // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
   367  // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
   368  // Computer Science, pages 714-723. Springer, 2004.
   369  //
   370  // Let M = m-a and N = b-n. Wolog M < N.
   371  // The recursion depth is bound by ceil(log(N+M)).
   372  // The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
   373  // The algorithm needs O((M+N)*log(M)) calls to data.Swap.
   374  //
   375  // The paper gives O((M+N)*log(M)) as the number of assignments assuming a
   376  // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
   377  // in the paper carries through for Swap operations, especially as the block
   378  // swapping rotate uses only O(M+N) Swaps.
   379  //
   380  // symMerge assumes non-degenerate arguments: a < m && m < b.
   381  // Having the caller check this condition eliminates many leaf recursion calls,
   382  // which improves performance.
   383  func symMerge(data Interface, a, m, b int) {
   384  	// Avoid unnecessary recursions of symMerge
   385  	// by direct insertion of data[a] into data[m:b]
   386  	// if data[a:m] only contains one element.
   387  	if m-a == 1 {
   388  		// Use binary search to find the lowest index i
   389  		// such that data[i] >= data[a] for m <= i < b.
   390  		// Exit the search loop with i == b in case no such index exists.
   391  		i := m
   392  		j := b
   393  		for i < j {
   394  			h := i + (j-i)/2
   395  			if data.Less(h, a) {
   396  				i = h + 1
   397  			} else {
   398  				j = h
   399  			}
   400  		}
   401  		// Swap values until data[a] reaches the position before i.
   402  		for k := a; k < i-1; k++ {
   403  			data.Swap(k, k+1)
   404  		}
   405  		return
   406  	}
   407  
   408  	// Avoid unnecessary recursions of symMerge
   409  	// by direct insertion of data[m] into data[a:m]
   410  	// if data[m:b] only contains one element.
   411  	if b-m == 1 {
   412  		// Use binary search to find the lowest index i
   413  		// such that data[i] > data[m] for a <= i < m.
   414  		// Exit the search loop with i == m in case no such index exists.
   415  		i := a
   416  		j := m
   417  		for i < j {
   418  			h := i + (j-i)/2
   419  			if !data.Less(m, h) {
   420  				i = h + 1
   421  			} else {
   422  				j = h
   423  			}
   424  		}
   425  		// Swap values until data[m] reaches the position i.
   426  		for k := m; k > i; k-- {
   427  			data.Swap(k, k-1)
   428  		}
   429  		return
   430  	}
   431  
   432  	mid := a + (b-a)/2
   433  	n := mid + m
   434  	var start, r int
   435  	if m > mid {
   436  		start = n - b
   437  		r = mid
   438  	} else {
   439  		start = a
   440  		r = m
   441  	}
   442  	p := n - 1
   443  
   444  	for start < r {
   445  		c := start + (r-start)/2
   446  		if !data.Less(p-c, c) {
   447  			start = c + 1
   448  		} else {
   449  			r = c
   450  		}
   451  	}
   452  
   453  	end := n - start
   454  	if start < m && m < end {
   455  		rotate(data, start, m, end)
   456  	}
   457  	if a < start && start < mid {
   458  		symMerge(data, a, start, mid)
   459  	}
   460  	if mid < end && end < b {
   461  		symMerge(data, mid, end, b)
   462  	}
   463  }
   464  
   465  // Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data:
   466  // Data of the form 'x u v y' is changed to 'x v u y'.
   467  // Rotate performs at most b-a many calls to data.Swap.
   468  // Rotate assumes non-degenerate arguments: a < m && m < b.
   469  func rotate(data Interface, a, m, b int) {
   470  	i := m - a
   471  	j := b - m
   472  
   473  	for i != j {
   474  		if i > j {
   475  			swapRange(data, m-i, m, j)
   476  			i -= j
   477  		} else {
   478  			swapRange(data, m-i, m+j-i, i)
   479  			j -= i
   480  		}
   481  	}
   482  	// i == j
   483  	swapRange(data, m-i, m, i)
   484  }
   485  
   486  /*
   487  Complexity of Stable Sorting
   488  
   489  
   490  Complexity of block swapping rotation
   491  
   492  Each Swap puts one new element into its correct, final position.
   493  Elements which reach their final position are no longer moved.
   494  Thus block swapping rotation needs |u|+|v| calls to Swaps.
   495  This is best possible as each element might need a move.
   496  
   497  Pay attention when comparing to other optimal algorithms which
   498  typically count the number of assignments instead of swaps:
   499  E.g. the optimal algorithm of Dudzinski and Dydek for in-place
   500  rotations uses O(u + v + gcd(u,v)) assignments which is
   501  better than our O(3 * (u+v)) as gcd(u,v) <= u.
   502  
   503  
   504  Stable sorting by SymMerge and BlockSwap rotations
   505  
   506  SymMerg complexity for same size input M = N:
   507  Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
   508  Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
   509  
   510  (The following argument does not fuzz over a missing -1 or
   511  other stuff which does not impact the final result).
   512  
   513  Let n = data.Len(). Assume n = 2^k.
   514  
   515  Plain merge sort performs log(n) = k iterations.
   516  On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
   517  
   518  Thus iteration i of merge sort performs:
   519  Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
   520  Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
   521  
   522  In total k = log(n) iterations are performed; so in total:
   523  Calls to Less O(log(n) * n)
   524  Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
   525     = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
   526  
   527  
   528  Above results should generalize to arbitrary n = 2^k + p
   529  and should not be influenced by the initial insertion sort phase:
   530  Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
   531  size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
   532  Merge sort iterations start at i = log(bs). With t = log(bs) constant:
   533  Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
   534     = O(n * log(n))
   535  Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
   536  
   537  */