github.com/zach-klippenstein/go@v0.0.0-20150108044943-fcfbeb3adf58/src/sort/sort.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package sort provides primitives for sorting slices and user-defined
     6  // collections.
     7  package sort
     8  
     9  // A type, typically a collection, that satisfies sort.Interface can be
    10  // sorted by the routines in this package.  The methods require that the
    11  // elements of the collection be enumerated by an integer index.
    12  type Interface interface {
    13  	// Len is the number of elements in the collection.
    14  	Len() int
    15  	// Less reports whether the element with
    16  	// index i should sort before the element with index j.
    17  	Less(i, j int) bool
    18  	// Swap swaps the elements with indexes i and j.
    19  	Swap(i, j int)
    20  }
    21  
    22  func min(a, b int) int {
    23  	if a < b {
    24  		return a
    25  	}
    26  	return b
    27  }
    28  
    29  // Insertion sort
    30  func insertionSort(data Interface, a, b int) {
    31  	for i := a + 1; i < b; i++ {
    32  		for j := i; j > a && data.Less(j, j-1); j-- {
    33  			data.Swap(j, j-1)
    34  		}
    35  	}
    36  }
    37  
    38  // siftDown implements the heap property on data[lo, hi).
    39  // first is an offset into the array where the root of the heap lies.
    40  func siftDown(data Interface, lo, hi, first int) {
    41  	root := lo
    42  	for {
    43  		child := 2*root + 1
    44  		if child >= hi {
    45  			break
    46  		}
    47  		if child+1 < hi && data.Less(first+child, first+child+1) {
    48  			child++
    49  		}
    50  		if !data.Less(first+root, first+child) {
    51  			return
    52  		}
    53  		data.Swap(first+root, first+child)
    54  		root = child
    55  	}
    56  }
    57  
    58  func heapSort(data Interface, a, b int) {
    59  	first := a
    60  	lo := 0
    61  	hi := b - a
    62  
    63  	// Build heap with greatest element at top.
    64  	for i := (hi - 1) / 2; i >= 0; i-- {
    65  		siftDown(data, i, hi, first)
    66  	}
    67  
    68  	// Pop elements, largest first, into end of data.
    69  	for i := hi - 1; i >= 0; i-- {
    70  		data.Swap(first, first+i)
    71  		siftDown(data, lo, i, first)
    72  	}
    73  }
    74  
    75  // Quicksort, following Bentley and McIlroy,
    76  // ``Engineering a Sort Function,'' SP&E November 1993.
    77  
    78  // medianOfThree moves the median of the three values data[a], data[b], data[c] into data[a].
    79  func medianOfThree(data Interface, a, b, c int) {
    80  	m0 := b
    81  	m1 := a
    82  	m2 := c
    83  	// bubble sort on 3 elements
    84  	if data.Less(m1, m0) {
    85  		data.Swap(m1, m0)
    86  	}
    87  	if data.Less(m2, m1) {
    88  		data.Swap(m2, m1)
    89  	}
    90  	if data.Less(m1, m0) {
    91  		data.Swap(m1, m0)
    92  	}
    93  	// now data[m0] <= data[m1] <= data[m2]
    94  }
    95  
    96  func swapRange(data Interface, a, b, n int) {
    97  	for i := 0; i < n; i++ {
    98  		data.Swap(a+i, b+i)
    99  	}
   100  }
   101  
   102  func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
   103  	m := lo + (hi-lo)/2 // Written like this to avoid integer overflow.
   104  	if hi-lo > 40 {
   105  		// Tukey's ``Ninther,'' median of three medians of three.
   106  		s := (hi - lo) / 8
   107  		medianOfThree(data, lo, lo+s, lo+2*s)
   108  		medianOfThree(data, m, m-s, m+s)
   109  		medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
   110  	}
   111  	medianOfThree(data, lo, m, hi-1)
   112  
   113  	// Invariants are:
   114  	//	data[lo] = pivot (set up by ChoosePivot)
   115  	//	data[lo <= i < a] = pivot
   116  	//	data[a <= i < b] < pivot
   117  	//	data[b <= i < c] is unexamined
   118  	//	data[c <= i < d] > pivot
   119  	//	data[d <= i < hi] = pivot
   120  	//
   121  	// Once b meets c, can swap the "= pivot" sections
   122  	// into the middle of the slice.
   123  	pivot := lo
   124  	a, b, c, d := lo+1, lo+1, hi, hi
   125  	for {
   126  		for b < c {
   127  			if data.Less(b, pivot) { // data[b] < pivot
   128  				b++
   129  			} else if !data.Less(pivot, b) { // data[b] = pivot
   130  				data.Swap(a, b)
   131  				a++
   132  				b++
   133  			} else {
   134  				break
   135  			}
   136  		}
   137  		for b < c {
   138  			if data.Less(pivot, c-1) { // data[c-1] > pivot
   139  				c--
   140  			} else if !data.Less(c-1, pivot) { // data[c-1] = pivot
   141  				data.Swap(c-1, d-1)
   142  				c--
   143  				d--
   144  			} else {
   145  				break
   146  			}
   147  		}
   148  		if b >= c {
   149  			break
   150  		}
   151  		// data[b] > pivot; data[c-1] < pivot
   152  		data.Swap(b, c-1)
   153  		b++
   154  		c--
   155  	}
   156  
   157  	n := min(b-a, a-lo)
   158  	swapRange(data, lo, b-n, n)
   159  
   160  	n = min(hi-d, d-c)
   161  	swapRange(data, c, hi-n, n)
   162  
   163  	return lo + b - a, hi - (d - c)
   164  }
   165  
   166  func quickSort(data Interface, a, b, maxDepth int) {
   167  	for b-a > 7 {
   168  		if maxDepth == 0 {
   169  			heapSort(data, a, b)
   170  			return
   171  		}
   172  		maxDepth--
   173  		mlo, mhi := doPivot(data, a, b)
   174  		// Avoiding recursion on the larger subproblem guarantees
   175  		// a stack depth of at most lg(b-a).
   176  		if mlo-a < b-mhi {
   177  			quickSort(data, a, mlo, maxDepth)
   178  			a = mhi // i.e., quickSort(data, mhi, b)
   179  		} else {
   180  			quickSort(data, mhi, b, maxDepth)
   181  			b = mlo // i.e., quickSort(data, a, mlo)
   182  		}
   183  	}
   184  	if b-a > 1 {
   185  		insertionSort(data, a, b)
   186  	}
   187  }
   188  
   189  // Sort sorts data.
   190  // It makes one call to data.Len to determine n, and O(n*log(n)) calls to
   191  // data.Less and data.Swap. The sort is not guaranteed to be stable.
   192  func Sort(data Interface) {
   193  	// Switch to heapsort if depth of 2*ceil(lg(n+1)) is reached.
   194  	n := data.Len()
   195  	maxDepth := 0
   196  	for i := n; i > 0; i >>= 1 {
   197  		maxDepth++
   198  	}
   199  	maxDepth *= 2
   200  	quickSort(data, 0, n, maxDepth)
   201  }
   202  
   203  type reverse struct {
   204  	// This embedded Interface permits Reverse to use the methods of
   205  	// another Interface implementation.
   206  	Interface
   207  }
   208  
   209  // Less returns the opposite of the embedded implementation's Less method.
   210  func (r reverse) Less(i, j int) bool {
   211  	return r.Interface.Less(j, i)
   212  }
   213  
   214  // Reverse returns the reverse order for data.
   215  func Reverse(data Interface) Interface {
   216  	return &reverse{data}
   217  }
   218  
   219  // IsSorted reports whether data is sorted.
   220  func IsSorted(data Interface) bool {
   221  	n := data.Len()
   222  	for i := n - 1; i > 0; i-- {
   223  		if data.Less(i, i-1) {
   224  			return false
   225  		}
   226  	}
   227  	return true
   228  }
   229  
   230  // Convenience types for common cases
   231  
   232  // IntSlice attaches the methods of Interface to []int, sorting in increasing order.
   233  type IntSlice []int
   234  
   235  func (p IntSlice) Len() int           { return len(p) }
   236  func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] }
   237  func (p IntSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   238  
   239  // Sort is a convenience method.
   240  func (p IntSlice) Sort() { Sort(p) }
   241  
   242  // Float64Slice attaches the methods of Interface to []float64, sorting in increasing order.
   243  type Float64Slice []float64
   244  
   245  func (p Float64Slice) Len() int           { return len(p) }
   246  func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) }
   247  func (p Float64Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   248  
   249  // isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
   250  func isNaN(f float64) bool {
   251  	return f != f
   252  }
   253  
   254  // Sort is a convenience method.
   255  func (p Float64Slice) Sort() { Sort(p) }
   256  
   257  // StringSlice attaches the methods of Interface to []string, sorting in increasing order.
   258  type StringSlice []string
   259  
   260  func (p StringSlice) Len() int           { return len(p) }
   261  func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] }
   262  func (p StringSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   263  
   264  // Sort is a convenience method.
   265  func (p StringSlice) Sort() { Sort(p) }
   266  
   267  // Convenience wrappers for common cases
   268  
   269  // Ints sorts a slice of ints in increasing order.
   270  func Ints(a []int) { Sort(IntSlice(a)) }
   271  
   272  // Float64s sorts a slice of float64s in increasing order.
   273  func Float64s(a []float64) { Sort(Float64Slice(a)) }
   274  
   275  // Strings sorts a slice of strings in increasing order.
   276  func Strings(a []string) { Sort(StringSlice(a)) }
   277  
   278  // IntsAreSorted tests whether a slice of ints is sorted in increasing order.
   279  func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) }
   280  
   281  // Float64sAreSorted tests whether a slice of float64s is sorted in increasing order.
   282  func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) }
   283  
   284  // StringsAreSorted tests whether a slice of strings is sorted in increasing order.
   285  func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) }
   286  
   287  // Notes on stable sorting:
   288  // The used algorithms are simple and provable correct on all input and use
   289  // only logarithmic additional stack space.  They perform well if compared
   290  // experimentally to other stable in-place sorting algorithms.
   291  //
   292  // Remarks on other algorithms evaluated:
   293  //  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
   294  //    Not faster.
   295  //  - GCC's __rotate for block rotations: Not faster.
   296  //  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
   297  //    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
   298  //    The given algorithms are in-place, number of Swap and Assignments
   299  //    grow as n log n but the algorithm is not stable.
   300  //  - "Fast Stable In-Plcae Sorting with O(n) Data Moves" J.I. Munro and
   301  //    V. Raman in Algorithmica (1996) 16, 115-160:
   302  //    This algorithm either needs additional 2n bits or works only if there
   303  //    are enough different elements available to encode some permutations
   304  //    which have to be undone later (so not stable an any input).
   305  //  - All the optimal in-place sorting/merging algorithms I found are either
   306  //    unstable or rely on enough different elements in each step to encode the
   307  //    performed block rearrangements. See also "In-Place Merging Algorithms",
   308  //    Denham Coates-Evely, Department of Computer Science, Kings College,
   309  //    January 2004 and the reverences in there.
   310  //  - Often "optimal" algorithms are optimal in the number of assignments
   311  //    but Interface has only Swap as operation.
   312  
   313  // Stable sorts data while keeping the original order of equal elements.
   314  //
   315  // It makes one call to data.Len to determine n, O(n*log(n)) calls to
   316  // data.Less and O(n*log(n)*log(n)) calls to data.Swap.
   317  func Stable(data Interface) {
   318  	n := data.Len()
   319  	blockSize := 20 // must be > 0
   320  	a, b := 0, blockSize
   321  	for b <= n {
   322  		insertionSort(data, a, b)
   323  		a = b
   324  		b += blockSize
   325  	}
   326  	insertionSort(data, a, n)
   327  
   328  	for blockSize < n {
   329  		a, b = 0, 2*blockSize
   330  		for b <= n {
   331  			symMerge(data, a, a+blockSize, b)
   332  			a = b
   333  			b += 2 * blockSize
   334  		}
   335  		if m := a + blockSize; m < n {
   336  			symMerge(data, a, m, n)
   337  		}
   338  		blockSize *= 2
   339  	}
   340  }
   341  
   342  // SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using
   343  // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
   344  // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
   345  // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
   346  // Computer Science, pages 714-723. Springer, 2004.
   347  //
   348  // Let M = m-a and N = b-n. Wolog M < N.
   349  // The recursion depth is bound by ceil(log(N+M)).
   350  // The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
   351  // The algorithm needs O((M+N)*log(M)) calls to data.Swap.
   352  //
   353  // The paper gives O((M+N)*log(M)) as the number of assignments assuming a
   354  // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
   355  // in the paper carries through for Swap operations, especially as the block
   356  // swapping rotate uses only O(M+N) Swaps.
   357  //
   358  // symMerge assumes non-degenerate arguments: a < m && m < b.
   359  // Having the caller check this condition eliminates many leaf recursion calls,
   360  // which improves performance.
   361  func symMerge(data Interface, a, m, b int) {
   362  	// Avoid unnecessary recursions of symMerge
   363  	// by direct insertion of data[a] into data[m:b]
   364  	// if data[a:m] only contains one element.
   365  	if m-a == 1 {
   366  		// Use binary search to find the lowest index i
   367  		// such that data[i] >= data[a] for m <= i < b.
   368  		// Exit the search loop with i == b in case no such index exists.
   369  		i := m
   370  		j := b
   371  		for i < j {
   372  			h := i + (j-i)/2
   373  			if data.Less(h, a) {
   374  				i = h + 1
   375  			} else {
   376  				j = h
   377  			}
   378  		}
   379  		// Swap values until data[a] reaches the position before i.
   380  		for k := a; k < i-1; k++ {
   381  			data.Swap(k, k+1)
   382  		}
   383  		return
   384  	}
   385  
   386  	// Avoid unnecessary recursions of symMerge
   387  	// by direct insertion of data[m] into data[a:m]
   388  	// if data[m:b] only contains one element.
   389  	if b-m == 1 {
   390  		// Use binary search to find the lowest index i
   391  		// such that data[i] > data[m] for a <= i < m.
   392  		// Exit the search loop with i == m in case no such index exists.
   393  		i := a
   394  		j := m
   395  		for i < j {
   396  			h := i + (j-i)/2
   397  			if !data.Less(m, h) {
   398  				i = h + 1
   399  			} else {
   400  				j = h
   401  			}
   402  		}
   403  		// Swap values until data[m] reaches the position i.
   404  		for k := m; k > i; k-- {
   405  			data.Swap(k, k-1)
   406  		}
   407  		return
   408  	}
   409  
   410  	mid := a + (b-a)/2
   411  	n := mid + m
   412  	var start, r int
   413  	if m > mid {
   414  		start = n - b
   415  		r = mid
   416  	} else {
   417  		start = a
   418  		r = m
   419  	}
   420  	p := n - 1
   421  
   422  	for start < r {
   423  		c := start + (r-start)/2
   424  		if !data.Less(p-c, c) {
   425  			start = c + 1
   426  		} else {
   427  			r = c
   428  		}
   429  	}
   430  
   431  	end := n - start
   432  	if start < m && m < end {
   433  		rotate(data, start, m, end)
   434  	}
   435  	if a < start && start < mid {
   436  		symMerge(data, a, start, mid)
   437  	}
   438  	if mid < end && end < b {
   439  		symMerge(data, mid, end, b)
   440  	}
   441  }
   442  
   443  // Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data:
   444  // Data of the form 'x u v y' is changed to 'x v u y'.
   445  // Rotate performs at most b-a many calls to data.Swap.
   446  // Rotate assumes non-degenerate arguments: a < m && m < b.
   447  func rotate(data Interface, a, m, b int) {
   448  	i := m - a
   449  	j := b - m
   450  
   451  	for i != j {
   452  		if i > j {
   453  			swapRange(data, m-i, m, j)
   454  			i -= j
   455  		} else {
   456  			swapRange(data, m-i, m+j-i, i)
   457  			j -= i
   458  		}
   459  	}
   460  	// i == j
   461  	swapRange(data, m-i, m, i)
   462  }
   463  
   464  /*
   465  Complexity of Stable Sorting
   466  
   467  
   468  Complexity of block swapping rotation
   469  
   470  Each Swap puts one new element into its correct, final position.
   471  Elements which reach their final position are no longer moved.
   472  Thus block swapping rotation needs |u|+|v| calls to Swaps.
   473  This is best possible as each element might need a move.
   474  
   475  Pay attention when comparing to other optimal algorithms which
   476  typically count the number of assignments instead of swaps:
   477  E.g. the optimal algorithm of Dudzinski and Dydek for in-place
   478  rotations uses O(u + v + gcd(u,v)) assignments which is
   479  better than our O(3 * (u+v)) as gcd(u,v) <= u.
   480  
   481  
   482  Stable sorting by SymMerge and BlockSwap rotations
   483  
   484  SymMerg complexity for same size input M = N:
   485  Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
   486  Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
   487  
   488  (The following argument does not fuzz over a missing -1 or
   489  other stuff which does not impact the final result).
   490  
   491  Let n = data.Len(). Assume n = 2^k.
   492  
   493  Plain merge sort performs log(n) = k iterations.
   494  On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
   495  
   496  Thus iteration i of merge sort performs:
   497  Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
   498  Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
   499  
   500  In total k = log(n) iterations are performed; so in total:
   501  Calls to Less O(log(n) * n)
   502  Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
   503     = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
   504  
   505  
   506  Above results should generalize to arbitrary n = 2^k + p
   507  and should not be influenced by the initial insertion sort phase:
   508  Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
   509  size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
   510  Merge sort iterations start at i = log(bs). With t = log(bs) constant:
   511  Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
   512     = O(n * log(n))
   513  Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
   514  
   515  */