github.com/huandu/go@v0.0.0-20151114150818-04e615e41150/src/sort/sort.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package sort provides primitives for sorting slices and user-defined
     6  // collections.
     7  package sort
     8  
     9  // A type, typically a collection, that satisfies sort.Interface can be
    10  // sorted by the routines in this package.  The methods require that the
    11  // elements of the collection be enumerated by an integer index.
    12  type Interface interface {
    13  	// Len is the number of elements in the collection.
    14  	Len() int
    15  	// Less reports whether the element with
    16  	// index i should sort before the element with index j.
    17  	Less(i, j int) bool
    18  	// Swap swaps the elements with indexes i and j.
    19  	Swap(i, j int)
    20  }
    21  
    22  func min(a, b int) int {
    23  	if a < b {
    24  		return a
    25  	}
    26  	return b
    27  }
    28  
    29  // Insertion sort
    30  func insertionSort(data Interface, a, b int) {
    31  	for i := a + 1; i < b; i++ {
    32  		for j := i; j > a && data.Less(j, j-1); j-- {
    33  			data.Swap(j, j-1)
    34  		}
    35  	}
    36  }
    37  
    38  // siftDown implements the heap property on data[lo, hi).
    39  // first is an offset into the array where the root of the heap lies.
    40  func siftDown(data Interface, lo, hi, first int) {
    41  	root := lo
    42  	for {
    43  		child := 2*root + 1
    44  		if child >= hi {
    45  			break
    46  		}
    47  		if child+1 < hi && data.Less(first+child, first+child+1) {
    48  			child++
    49  		}
    50  		if !data.Less(first+root, first+child) {
    51  			return
    52  		}
    53  		data.Swap(first+root, first+child)
    54  		root = child
    55  	}
    56  }
    57  
    58  func heapSort(data Interface, a, b int) {
    59  	first := a
    60  	lo := 0
    61  	hi := b - a
    62  
    63  	// Build heap with greatest element at top.
    64  	for i := (hi - 1) / 2; i >= 0; i-- {
    65  		siftDown(data, i, hi, first)
    66  	}
    67  
    68  	// Pop elements, largest first, into end of data.
    69  	for i := hi - 1; i >= 0; i-- {
    70  		data.Swap(first, first+i)
    71  		siftDown(data, lo, i, first)
    72  	}
    73  }
    74  
    75  // Quicksort, following Bentley and McIlroy,
    76  // ``Engineering a Sort Function,'' SP&E November 1993.
    77  
    78  // medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
    79  func medianOfThree(data Interface, m1, m0, m2 int) {
    80  	// sort 3 elements
    81  	if data.Less(m1, m0) {
    82  		data.Swap(m1, m0)
    83  	}
    84  	// data[m0] <= data[m1]
    85  	if data.Less(m2, m1) {
    86  		data.Swap(m2, m1)
    87  		// data[m0] <= data[m2] && data[m1] < data[m2]
    88  		if data.Less(m1, m0) {
    89  			data.Swap(m1, m0)
    90  		}
    91  	}
    92  	// now data[m0] <= data[m1] <= data[m2]
    93  }
    94  
    95  func swapRange(data Interface, a, b, n int) {
    96  	for i := 0; i < n; i++ {
    97  		data.Swap(a+i, b+i)
    98  	}
    99  }
   100  
   101  func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
   102  	m := lo + (hi-lo)/2 // Written like this to avoid integer overflow.
   103  	if hi-lo > 40 {
   104  		// Tukey's ``Ninther,'' median of three medians of three.
   105  		s := (hi - lo) / 8
   106  		medianOfThree(data, lo, lo+s, lo+2*s)
   107  		medianOfThree(data, m, m-s, m+s)
   108  		medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
   109  	}
   110  	medianOfThree(data, lo, m, hi-1)
   111  
   112  	// Invariants are:
   113  	//	data[lo] = pivot (set up by ChoosePivot)
   114  	//	data[lo <= i < a] = pivot
   115  	//	data[a <= i < b] < pivot
   116  	//	data[b <= i < c] is unexamined
   117  	//	data[c <= i < d] > pivot
   118  	//	data[d <= i < hi] = pivot
   119  	//
   120  	// Once b meets c, can swap the "= pivot" sections
   121  	// into the middle of the slice.
   122  	pivot := lo
   123  	a, b, c, d := lo+1, lo+1, hi, hi
   124  	for {
   125  		for b < c {
   126  			if data.Less(b, pivot) { // data[b] < pivot
   127  				b++
   128  			} else if !data.Less(pivot, b) { // data[b] = pivot
   129  				data.Swap(a, b)
   130  				a++
   131  				b++
   132  			} else {
   133  				break
   134  			}
   135  		}
   136  		for b < c {
   137  			if data.Less(pivot, c-1) { // data[c-1] > pivot
   138  				c--
   139  			} else if !data.Less(c-1, pivot) { // data[c-1] = pivot
   140  				data.Swap(c-1, d-1)
   141  				c--
   142  				d--
   143  			} else {
   144  				break
   145  			}
   146  		}
   147  		if b >= c {
   148  			break
   149  		}
   150  		// data[b] > pivot; data[c-1] < pivot
   151  		data.Swap(b, c-1)
   152  		b++
   153  		c--
   154  	}
   155  
   156  	n := min(b-a, a-lo)
   157  	swapRange(data, lo, b-n, n)
   158  
   159  	n = min(hi-d, d-c)
   160  	swapRange(data, c, hi-n, n)
   161  
   162  	return lo + b - a, hi - (d - c)
   163  }
   164  
   165  func quickSort(data Interface, a, b, maxDepth int) {
   166  	for b-a > 7 {
   167  		if maxDepth == 0 {
   168  			heapSort(data, a, b)
   169  			return
   170  		}
   171  		maxDepth--
   172  		mlo, mhi := doPivot(data, a, b)
   173  		// Avoiding recursion on the larger subproblem guarantees
   174  		// a stack depth of at most lg(b-a).
   175  		if mlo-a < b-mhi {
   176  			quickSort(data, a, mlo, maxDepth)
   177  			a = mhi // i.e., quickSort(data, mhi, b)
   178  		} else {
   179  			quickSort(data, mhi, b, maxDepth)
   180  			b = mlo // i.e., quickSort(data, a, mlo)
   181  		}
   182  	}
   183  	if b-a > 1 {
   184  		insertionSort(data, a, b)
   185  	}
   186  }
   187  
   188  // Sort sorts data.
   189  // It makes one call to data.Len to determine n, and O(n*log(n)) calls to
   190  // data.Less and data.Swap. The sort is not guaranteed to be stable.
   191  func Sort(data Interface) {
   192  	// Switch to heapsort if depth of 2*ceil(lg(n+1)) is reached.
   193  	n := data.Len()
   194  	maxDepth := 0
   195  	for i := n; i > 0; i >>= 1 {
   196  		maxDepth++
   197  	}
   198  	maxDepth *= 2
   199  	quickSort(data, 0, n, maxDepth)
   200  }
   201  
   202  type reverse struct {
   203  	// This embedded Interface permits Reverse to use the methods of
   204  	// another Interface implementation.
   205  	Interface
   206  }
   207  
   208  // Less returns the opposite of the embedded implementation's Less method.
   209  func (r reverse) Less(i, j int) bool {
   210  	return r.Interface.Less(j, i)
   211  }
   212  
   213  // Reverse returns the reverse order for data.
   214  func Reverse(data Interface) Interface {
   215  	return &reverse{data}
   216  }
   217  
   218  // IsSorted reports whether data is sorted.
   219  func IsSorted(data Interface) bool {
   220  	n := data.Len()
   221  	for i := n - 1; i > 0; i-- {
   222  		if data.Less(i, i-1) {
   223  			return false
   224  		}
   225  	}
   226  	return true
   227  }
   228  
   229  // Convenience types for common cases
   230  
   231  // IntSlice attaches the methods of Interface to []int, sorting in increasing order.
   232  type IntSlice []int
   233  
   234  func (p IntSlice) Len() int           { return len(p) }
   235  func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] }
   236  func (p IntSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   237  
   238  // Sort is a convenience method.
   239  func (p IntSlice) Sort() { Sort(p) }
   240  
   241  // Float64Slice attaches the methods of Interface to []float64, sorting in increasing order.
   242  type Float64Slice []float64
   243  
   244  func (p Float64Slice) Len() int           { return len(p) }
   245  func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) }
   246  func (p Float64Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   247  
   248  // isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
   249  func isNaN(f float64) bool {
   250  	return f != f
   251  }
   252  
   253  // Sort is a convenience method.
   254  func (p Float64Slice) Sort() { Sort(p) }
   255  
   256  // StringSlice attaches the methods of Interface to []string, sorting in increasing order.
   257  type StringSlice []string
   258  
   259  func (p StringSlice) Len() int           { return len(p) }
   260  func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] }
   261  func (p StringSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   262  
   263  // Sort is a convenience method.
   264  func (p StringSlice) Sort() { Sort(p) }
   265  
   266  // Convenience wrappers for common cases
   267  
   268  // Ints sorts a slice of ints in increasing order.
   269  func Ints(a []int) { Sort(IntSlice(a)) }
   270  
   271  // Float64s sorts a slice of float64s in increasing order.
   272  func Float64s(a []float64) { Sort(Float64Slice(a)) }
   273  
   274  // Strings sorts a slice of strings in increasing order.
   275  func Strings(a []string) { Sort(StringSlice(a)) }
   276  
   277  // IntsAreSorted tests whether a slice of ints is sorted in increasing order.
   278  func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) }
   279  
   280  // Float64sAreSorted tests whether a slice of float64s is sorted in increasing order.
   281  func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) }
   282  
   283  // StringsAreSorted tests whether a slice of strings is sorted in increasing order.
   284  func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) }
   285  
   286  // Notes on stable sorting:
   287  // The used algorithms are simple and provable correct on all input and use
   288  // only logarithmic additional stack space.  They perform well if compared
   289  // experimentally to other stable in-place sorting algorithms.
   290  //
   291  // Remarks on other algorithms evaluated:
   292  //  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
   293  //    Not faster.
   294  //  - GCC's __rotate for block rotations: Not faster.
   295  //  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
   296  //    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
   297  //    The given algorithms are in-place, number of Swap and Assignments
   298  //    grow as n log n but the algorithm is not stable.
   299  //  - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and
   300  //    V. Raman in Algorithmica (1996) 16, 115-160:
   301  //    This algorithm either needs additional 2n bits or works only if there
   302  //    are enough different elements available to encode some permutations
   303  //    which have to be undone later (so not stable on any input).
   304  //  - All the optimal in-place sorting/merging algorithms I found are either
   305  //    unstable or rely on enough different elements in each step to encode the
   306  //    performed block rearrangements. See also "In-Place Merging Algorithms",
   307  //    Denham Coates-Evely, Department of Computer Science, Kings College,
   308  //    January 2004 and the reverences in there.
   309  //  - Often "optimal" algorithms are optimal in the number of assignments
   310  //    but Interface has only Swap as operation.
   311  
   312  // Stable sorts data while keeping the original order of equal elements.
   313  //
   314  // It makes one call to data.Len to determine n, O(n*log(n)) calls to
   315  // data.Less and O(n*log(n)*log(n)) calls to data.Swap.
   316  func Stable(data Interface) {
   317  	n := data.Len()
   318  	blockSize := 20 // must be > 0
   319  	a, b := 0, blockSize
   320  	for b <= n {
   321  		insertionSort(data, a, b)
   322  		a = b
   323  		b += blockSize
   324  	}
   325  	insertionSort(data, a, n)
   326  
   327  	for blockSize < n {
   328  		a, b = 0, 2*blockSize
   329  		for b <= n {
   330  			symMerge(data, a, a+blockSize, b)
   331  			a = b
   332  			b += 2 * blockSize
   333  		}
   334  		if m := a + blockSize; m < n {
   335  			symMerge(data, a, m, n)
   336  		}
   337  		blockSize *= 2
   338  	}
   339  }
   340  
   341  // SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using
   342  // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
   343  // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
   344  // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
   345  // Computer Science, pages 714-723. Springer, 2004.
   346  //
   347  // Let M = m-a and N = b-n. Wolog M < N.
   348  // The recursion depth is bound by ceil(log(N+M)).
   349  // The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
   350  // The algorithm needs O((M+N)*log(M)) calls to data.Swap.
   351  //
   352  // The paper gives O((M+N)*log(M)) as the number of assignments assuming a
   353  // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
   354  // in the paper carries through for Swap operations, especially as the block
   355  // swapping rotate uses only O(M+N) Swaps.
   356  //
   357  // symMerge assumes non-degenerate arguments: a < m && m < b.
   358  // Having the caller check this condition eliminates many leaf recursion calls,
   359  // which improves performance.
   360  func symMerge(data Interface, a, m, b int) {
   361  	// Avoid unnecessary recursions of symMerge
   362  	// by direct insertion of data[a] into data[m:b]
   363  	// if data[a:m] only contains one element.
   364  	if m-a == 1 {
   365  		// Use binary search to find the lowest index i
   366  		// such that data[i] >= data[a] for m <= i < b.
   367  		// Exit the search loop with i == b in case no such index exists.
   368  		i := m
   369  		j := b
   370  		for i < j {
   371  			h := i + (j-i)/2
   372  			if data.Less(h, a) {
   373  				i = h + 1
   374  			} else {
   375  				j = h
   376  			}
   377  		}
   378  		// Swap values until data[a] reaches the position before i.
   379  		for k := a; k < i-1; k++ {
   380  			data.Swap(k, k+1)
   381  		}
   382  		return
   383  	}
   384  
   385  	// Avoid unnecessary recursions of symMerge
   386  	// by direct insertion of data[m] into data[a:m]
   387  	// if data[m:b] only contains one element.
   388  	if b-m == 1 {
   389  		// Use binary search to find the lowest index i
   390  		// such that data[i] > data[m] for a <= i < m.
   391  		// Exit the search loop with i == m in case no such index exists.
   392  		i := a
   393  		j := m
   394  		for i < j {
   395  			h := i + (j-i)/2
   396  			if !data.Less(m, h) {
   397  				i = h + 1
   398  			} else {
   399  				j = h
   400  			}
   401  		}
   402  		// Swap values until data[m] reaches the position i.
   403  		for k := m; k > i; k-- {
   404  			data.Swap(k, k-1)
   405  		}
   406  		return
   407  	}
   408  
   409  	mid := a + (b-a)/2
   410  	n := mid + m
   411  	var start, r int
   412  	if m > mid {
   413  		start = n - b
   414  		r = mid
   415  	} else {
   416  		start = a
   417  		r = m
   418  	}
   419  	p := n - 1
   420  
   421  	for start < r {
   422  		c := start + (r-start)/2
   423  		if !data.Less(p-c, c) {
   424  			start = c + 1
   425  		} else {
   426  			r = c
   427  		}
   428  	}
   429  
   430  	end := n - start
   431  	if start < m && m < end {
   432  		rotate(data, start, m, end)
   433  	}
   434  	if a < start && start < mid {
   435  		symMerge(data, a, start, mid)
   436  	}
   437  	if mid < end && end < b {
   438  		symMerge(data, mid, end, b)
   439  	}
   440  }
   441  
   442  // Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data:
   443  // Data of the form 'x u v y' is changed to 'x v u y'.
   444  // Rotate performs at most b-a many calls to data.Swap.
   445  // Rotate assumes non-degenerate arguments: a < m && m < b.
   446  func rotate(data Interface, a, m, b int) {
   447  	i := m - a
   448  	j := b - m
   449  
   450  	for i != j {
   451  		if i > j {
   452  			swapRange(data, m-i, m, j)
   453  			i -= j
   454  		} else {
   455  			swapRange(data, m-i, m+j-i, i)
   456  			j -= i
   457  		}
   458  	}
   459  	// i == j
   460  	swapRange(data, m-i, m, i)
   461  }
   462  
   463  /*
   464  Complexity of Stable Sorting
   465  
   466  
   467  Complexity of block swapping rotation
   468  
   469  Each Swap puts one new element into its correct, final position.
   470  Elements which reach their final position are no longer moved.
   471  Thus block swapping rotation needs |u|+|v| calls to Swaps.
   472  This is best possible as each element might need a move.
   473  
   474  Pay attention when comparing to other optimal algorithms which
   475  typically count the number of assignments instead of swaps:
   476  E.g. the optimal algorithm of Dudzinski and Dydek for in-place
   477  rotations uses O(u + v + gcd(u,v)) assignments which is
   478  better than our O(3 * (u+v)) as gcd(u,v) <= u.
   479  
   480  
   481  Stable sorting by SymMerge and BlockSwap rotations
   482  
   483  SymMerg complexity for same size input M = N:
   484  Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
   485  Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
   486  
   487  (The following argument does not fuzz over a missing -1 or
   488  other stuff which does not impact the final result).
   489  
   490  Let n = data.Len(). Assume n = 2^k.
   491  
   492  Plain merge sort performs log(n) = k iterations.
   493  On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
   494  
   495  Thus iteration i of merge sort performs:
   496  Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
   497  Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
   498  
   499  In total k = log(n) iterations are performed; so in total:
   500  Calls to Less O(log(n) * n)
   501  Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
   502     = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
   503  
   504  
   505  Above results should generalize to arbitrary n = 2^k + p
   506  and should not be influenced by the initial insertion sort phase:
   507  Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
   508  size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
   509  Merge sort iterations start at i = log(bs). With t = log(bs) constant:
   510  Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
   511     = O(n * log(n))
   512  Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
   513  
   514  */