github.com/primecitizens/pcz/std@v0.2.1/algo/sort/algo.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  //
     4  // Copyright 2022 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  package sort
     9  
    10  import (
    11  	"github.com/primecitizens/pcz/std/core/bits"
    12  )
    13  
    14  type sortedHint int // hint for pdqsort when choosing the pivot
    15  
    16  const (
    17  	unknownHint sortedHint = iota
    18  	increasingHint
    19  	decreasingHint
    20  )
    21  
    22  // xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf
    23  type xorshift uint64
    24  
    25  func (r *xorshift) next() uint64 {
    26  	*r ^= *r << 13
    27  	*r ^= *r >> 17
    28  	*r ^= *r << 5
    29  	return uint64(*r)
    30  }
    31  
    32  func nextPowerOfTwo(length int) uint {
    33  	return 1 << bits.Len(uint(length))
    34  }
    35  
    36  // insertionSort sorts data[a:b] using insertion sort.
    37  func insertionSort[T Interface](data T, a, b int) {
    38  	for i := a + 1; i < b; i++ {
    39  		for j := i; j > a && data.Less(j, j-1); j-- {
    40  			data.Swap(j, j-1)
    41  		}
    42  	}
    43  }
    44  
    45  // siftDown implements the heap property on data[lo:hi].
    46  // first is an offset into the array where the root of the heap lies.
    47  func siftDown[T Interface](data T, lo, hi, first int) {
    48  	root := lo
    49  	for {
    50  		child := 2*root + 1
    51  		if child >= hi {
    52  			break
    53  		}
    54  		if child+1 < hi && data.Less(first+child, first+child+1) {
    55  			child++
    56  		}
    57  		if !data.Less(first+root, first+child) {
    58  			return
    59  		}
    60  		data.Swap(first+root, first+child)
    61  		root = child
    62  	}
    63  }
    64  
    65  func heapSort[T Interface](data T, a, b int) {
    66  	first := a
    67  	lo := 0
    68  	hi := b - a
    69  
    70  	// Build heap with greatest element at top.
    71  	for i := (hi - 1) / 2; i >= 0; i-- {
    72  		siftDown(data, i, hi, first)
    73  	}
    74  
    75  	// Pop elements, largest first, into end of data.
    76  	for i := hi - 1; i >= 0; i-- {
    77  		data.Swap(first, first+i)
    78  		siftDown(data, lo, i, first)
    79  	}
    80  }
    81  
    82  // pdqsort sorts data[a:b].
    83  // The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort.
    84  // pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf
    85  // C++ implementation: https://github.com/orlp/pdqsort
    86  // Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/
    87  // limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort.
    88  func pdqsort[T Interface](data T, a, b, limit int) {
    89  	const maxInsertion = 12
    90  
    91  	var (
    92  		wasBalanced    = true // whether the last partitioning was reasonably balanced
    93  		wasPartitioned = true // whether the slice was already partitioned
    94  	)
    95  
    96  	for {
    97  		length := b - a
    98  
    99  		if length <= maxInsertion {
   100  			insertionSort(data, a, b)
   101  			return
   102  		}
   103  
   104  		// Fall back to heapsort if too many bad choices were made.
   105  		if limit == 0 {
   106  			heapSort(data, a, b)
   107  			return
   108  		}
   109  
   110  		// If the last partitioning was imbalanced, we need to breaking patterns.
   111  		if !wasBalanced {
   112  			breakPatterns(data, a, b)
   113  			limit--
   114  		}
   115  
   116  		pivot, hint := choosePivot(data, a, b)
   117  		if hint == decreasingHint {
   118  			reverseRange(data, a, b)
   119  			// The chosen pivot was pivot-a elements after the start of the array.
   120  			// After reversing it is pivot-a elements before the end of the array.
   121  			// The idea came from Rust's implementation.
   122  			pivot = (b - 1) - (pivot - a)
   123  			hint = increasingHint
   124  		}
   125  
   126  		// The slice is likely already sorted.
   127  		if wasBalanced && wasPartitioned && hint == increasingHint {
   128  			if partialInsertionSort(data, a, b) {
   129  				return
   130  			}
   131  		}
   132  
   133  		// Probably the slice contains many duplicate elements, partition the slice into
   134  		// elements equal to and elements greater than the pivot.
   135  		if a > 0 && !data.Less(a-1, pivot) {
   136  			mid := partitionEqual(data, a, b, pivot)
   137  			a = mid
   138  			continue
   139  		}
   140  
   141  		mid, alreadyPartitioned := partition(data, a, b, pivot)
   142  		wasPartitioned = alreadyPartitioned
   143  
   144  		leftLen, rightLen := mid-a, b-mid
   145  		balanceThreshold := length / 8
   146  		if leftLen < rightLen {
   147  			wasBalanced = leftLen >= balanceThreshold
   148  			pdqsort(data, a, mid, limit)
   149  			a = mid + 1
   150  		} else {
   151  			wasBalanced = rightLen >= balanceThreshold
   152  			pdqsort(data, mid+1, b, limit)
   153  			b = mid
   154  		}
   155  	}
   156  }
   157  
   158  // partition does one quicksort partition.
   159  // Let p = data[pivot]
   160  // Moves elements in data[a:b] around, so that data[i]<p and data[j]>=p for i<newpivot and j>newpivot.
   161  // On return, data[newpivot] = p
   162  func partition[T Interface](data T, a, b, pivot int) (newpivot int, alreadyPartitioned bool) {
   163  	data.Swap(a, pivot)
   164  	i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
   165  
   166  	for i <= j && data.Less(i, a) {
   167  		i++
   168  	}
   169  	for i <= j && !data.Less(j, a) {
   170  		j--
   171  	}
   172  	if i > j {
   173  		data.Swap(j, a)
   174  		return j, true
   175  	}
   176  	data.Swap(i, j)
   177  	i++
   178  	j--
   179  
   180  	for {
   181  		for i <= j && data.Less(i, a) {
   182  			i++
   183  		}
   184  		for i <= j && !data.Less(j, a) {
   185  			j--
   186  		}
   187  		if i > j {
   188  			break
   189  		}
   190  		data.Swap(i, j)
   191  		i++
   192  		j--
   193  	}
   194  	data.Swap(j, a)
   195  	return j, false
   196  }
   197  
   198  // partitionEqual partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot].
   199  // It assumed that data[a:b] does not contain elements smaller than the data[pivot].
   200  func partitionEqual[T Interface](data T, a, b, pivot int) (newpivot int) {
   201  	data.Swap(a, pivot)
   202  	i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
   203  
   204  	for {
   205  		for i <= j && !data.Less(a, i) {
   206  			i++
   207  		}
   208  		for i <= j && data.Less(a, j) {
   209  			j--
   210  		}
   211  		if i > j {
   212  			break
   213  		}
   214  		data.Swap(i, j)
   215  		i++
   216  		j--
   217  	}
   218  	return i
   219  }
   220  
   221  // partialInsertionSort partially sorts a slice, returns true if the slice is sorted at the end.
   222  func partialInsertionSort[T Interface](data T, a, b int) bool {
   223  	const (
   224  		maxSteps         = 5  // maximum number of adjacent out-of-order pairs that will get shifted
   225  		shortestShifting = 50 // don't shift any elements on short arrays
   226  	)
   227  	i := a + 1
   228  	for j := 0; j < maxSteps; j++ {
   229  		for i < b && !data.Less(i, i-1) {
   230  			i++
   231  		}
   232  
   233  		if i == b {
   234  			return true
   235  		}
   236  
   237  		if b-a < shortestShifting {
   238  			return false
   239  		}
   240  
   241  		data.Swap(i, i-1)
   242  
   243  		// Shift the smaller one to the left.
   244  		if i-a >= 2 {
   245  			for j := i - 1; j >= 1; j-- {
   246  				if !data.Less(j, j-1) {
   247  					break
   248  				}
   249  				data.Swap(j, j-1)
   250  			}
   251  		}
   252  		// Shift the greater one to the right.
   253  		if b-i >= 2 {
   254  			for j := i + 1; j < b; j++ {
   255  				if !data.Less(j, j-1) {
   256  					break
   257  				}
   258  				data.Swap(j, j-1)
   259  			}
   260  		}
   261  	}
   262  	return false
   263  }
   264  
   265  // breakPatterns scatters some elements around in an attempt to break some patterns
   266  // that might cause imbalanced partitions in quicksort.
   267  func breakPatterns[T Interface](data T, a, b int) {
   268  	length := b - a
   269  	if length >= 8 {
   270  		random := xorshift(length)
   271  		modulus := nextPowerOfTwo(length)
   272  
   273  		for idx := a + (length/4)*2 - 1; idx <= a+(length/4)*2+1; idx++ {
   274  			other := int(uint(random.next()) & (modulus - 1))
   275  			if other >= length {
   276  				other -= length
   277  			}
   278  			data.Swap(idx, a+other)
   279  		}
   280  	}
   281  }
   282  
   283  // choosePivot chooses a pivot in data[a:b].
   284  //
   285  // [0,8): chooses a static pivot.
   286  // [8,shortestNinther): uses the simple median-of-three method.
   287  // [shortestNinther,∞): uses the Tukey ninther method.
   288  func choosePivot[T Interface](data T, a, b int) (pivot int, hint sortedHint) {
   289  	const (
   290  		shortestNinther = 50
   291  		maxSwaps        = 4 * 3
   292  	)
   293  
   294  	l := b - a
   295  
   296  	var (
   297  		swaps int
   298  		i     = a + l/4*1
   299  		j     = a + l/4*2
   300  		k     = a + l/4*3
   301  	)
   302  
   303  	if l >= 8 {
   304  		if l >= shortestNinther {
   305  			// Tukey ninther method, the idea came from Rust's implementation.
   306  			i = medianAdjacent(data, i, &swaps)
   307  			j = medianAdjacent(data, j, &swaps)
   308  			k = medianAdjacent(data, k, &swaps)
   309  		}
   310  		// Find the median among i, j, k and stores it into j.
   311  		j = median(data, i, j, k, &swaps)
   312  	}
   313  
   314  	switch swaps {
   315  	case 0:
   316  		return j, increasingHint
   317  	case maxSwaps:
   318  		return j, decreasingHint
   319  	default:
   320  		return j, unknownHint
   321  	}
   322  }
   323  
   324  // order2 returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a.
   325  func order2[T Interface](data T, a, b int, swaps *int) (int, int) {
   326  	if data.Less(b, a) {
   327  		*swaps++
   328  		return b, a
   329  	}
   330  	return a, b
   331  }
   332  
   333  // median returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c.
   334  func median[T Interface](data T, a, b, c int, swaps *int) int {
   335  	a, b = order2(data, a, b, swaps)
   336  	b, c = order2(data, b, c, swaps)
   337  	a, b = order2(data, a, b, swaps)
   338  	return b
   339  }
   340  
   341  // medianAdjacent finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a.
   342  func medianAdjacent[T Interface](data T, a int, swaps *int) int {
   343  	return median(data, a-1, a, a+1, swaps)
   344  }
   345  
   346  func reverseRange[T Interface](data T, a, b int) {
   347  	i := a
   348  	j := b - 1
   349  	for i < j {
   350  		data.Swap(i, j)
   351  		i++
   352  		j--
   353  	}
   354  }
   355  
   356  func swapRange[T Interface](data T, a, b, n int) {
   357  	for i := 0; i < n; i++ {
   358  		data.Swap(a+i, b+i)
   359  	}
   360  }
   361  
   362  func stable[T Interface](data T, n int) {
   363  	blockSize := 20 // must be > 0
   364  	a, b := 0, blockSize
   365  	for b <= n {
   366  		insertionSort(data, a, b)
   367  		a = b
   368  		b += blockSize
   369  	}
   370  	insertionSort(data, a, n)
   371  
   372  	for blockSize < n {
   373  		a, b = 0, 2*blockSize
   374  		for b <= n {
   375  			symMerge(data, a, a+blockSize, b)
   376  			a = b
   377  			b += 2 * blockSize
   378  		}
   379  		if m := a + blockSize; m < n {
   380  			symMerge(data, a, m, n)
   381  		}
   382  		blockSize *= 2
   383  	}
   384  }
   385  
   386  // symMerge merges the two sorted subsequences data[a:m] and data[m:b] using
   387  // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
   388  // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
   389  // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
   390  // Computer Science, pages 714-723. Springer, 2004.
   391  //
   392  // Let M = m-a and N = b-n. Wolog M < N.
   393  // The recursion depth is bound by ceil(log(N+M)).
   394  // The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
   395  // The algorithm needs O((M+N)*log(M)) calls to data.Swap.
   396  //
   397  // The paper gives O((M+N)*log(M)) as the number of assignments assuming a
   398  // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
   399  // in the paper carries through for Swap operations, especially as the block
   400  // swapping rotate uses only O(M+N) Swaps.
   401  //
   402  // symMerge assumes non-degenerate arguments: a < m && m < b.
   403  // Having the caller check this condition eliminates many leaf recursion calls,
   404  // which improves performance.
   405  func symMerge[T Interface](data T, a, m, b int) {
   406  	// Avoid unnecessary recursions of symMerge
   407  	// by direct insertion of data[a] into data[m:b]
   408  	// if data[a:m] only contains one element.
   409  	if m-a == 1 {
   410  		// Use binary search to find the lowest index i
   411  		// such that data[i] >= data[a] for m <= i < b.
   412  		// Exit the search loop with i == b in case no such index exists.
   413  		i := m
   414  		j := b
   415  		for i < j {
   416  			h := int(uint(i+j) >> 1)
   417  			if data.Less(h, a) {
   418  				i = h + 1
   419  			} else {
   420  				j = h
   421  			}
   422  		}
   423  		// Swap values until data[a] reaches the position before i.
   424  		for k := a; k < i-1; k++ {
   425  			data.Swap(k, k+1)
   426  		}
   427  		return
   428  	}
   429  
   430  	// Avoid unnecessary recursions of symMerge
   431  	// by direct insertion of data[m] into data[a:m]
   432  	// if data[m:b] only contains one element.
   433  	if b-m == 1 {
   434  		// Use binary search to find the lowest index i
   435  		// such that data[i] > data[m] for a <= i < m.
   436  		// Exit the search loop with i == m in case no such index exists.
   437  		i := a
   438  		j := m
   439  		for i < j {
   440  			h := int(uint(i+j) >> 1)
   441  			if !data.Less(m, h) {
   442  				i = h + 1
   443  			} else {
   444  				j = h
   445  			}
   446  		}
   447  		// Swap values until data[m] reaches the position i.
   448  		for k := m; k > i; k-- {
   449  			data.Swap(k, k-1)
   450  		}
   451  		return
   452  	}
   453  
   454  	mid := int(uint(a+b) >> 1)
   455  	n := mid + m
   456  	var start, r int
   457  	if m > mid {
   458  		start = n - b
   459  		r = mid
   460  	} else {
   461  		start = a
   462  		r = m
   463  	}
   464  	p := n - 1
   465  
   466  	for start < r {
   467  		c := int(uint(start+r) >> 1)
   468  		if !data.Less(p-c, c) {
   469  			start = c + 1
   470  		} else {
   471  			r = c
   472  		}
   473  	}
   474  
   475  	end := n - start
   476  	if start < m && m < end {
   477  		rotate(data, start, m, end)
   478  	}
   479  	if a < start && start < mid {
   480  		symMerge(data, a, start, mid)
   481  	}
   482  	if mid < end && end < b {
   483  		symMerge(data, mid, end, b)
   484  	}
   485  }
   486  
   487  // rotate rotates two consecutive blocks u = data[a:m] and v = data[m:b] in data:
   488  // Data of the form 'x u v y' is changed to 'x v u y'.
   489  // rotate performs at most b-a many calls to data.Swap,
   490  // and it assumes non-degenerate arguments: a < m && m < b.
   491  func rotate[T Interface](data T, a, m, b int) {
   492  	i := m - a
   493  	j := b - m
   494  
   495  	for i != j {
   496  		if i > j {
   497  			swapRange(data, m-i, m, j)
   498  			i -= j
   499  		} else {
   500  			swapRange(data, m-i, m+j-i, i)
   501  			j -= i
   502  		}
   503  	}
   504  	// i == j
   505  	swapRange(data, m-i, m, i)
   506  }