github.com/jfcg/sorty@v1.2.0/sortyU8.go (about)

     1  /*	Copyright (c) 2019, Serhat Şevki Dinçer.
     2  	This Source Code Form is subject to the terms of the Mozilla Public
     3  	License, v. 2.0. If a copy of the MPL was not distributed with this
     4  	file, You can obtain one at http://mozilla.org/MPL/2.0/.
     5  */
     6  
     7  package sorty
     8  
     9  import "sync/atomic"
    10  
    11  // IsSortedU8 returns 0 if ar is sorted in ascending order,
    12  // otherwise it returns i > 0 with ar[i] < ar[i-1]
    13  func IsSortedU8(ar []uint64) int {
    14  	for i := len(ar) - 1; i > 0; i-- {
    15  		if ar[i] < ar[i-1] {
    16  			return i
    17  		}
    18  	}
    19  	return 0
    20  }
    21  
    22  // insertion sort, assumes len(ar) >= 2
    23  func insertionU8(ar []uint64) {
    24  	hi := len(ar) - 1
    25  	for l, h := (hi-3)>>1, hi; l >= 0; {
    26  		if ar[h] < ar[l] {
    27  			ar[l], ar[h] = ar[h], ar[l]
    28  		}
    29  		l--
    30  		h--
    31  	}
    32  	for h := 0; ; {
    33  		l := h
    34  		h++
    35  		v := ar[h]
    36  		if v < ar[l] {
    37  			for {
    38  				ar[l+1] = ar[l]
    39  				l--
    40  				if l < 0 || v >= ar[l] {
    41  					break
    42  				}
    43  			}
    44  			ar[l+1] = v
    45  		}
    46  		if h >= hi {
    47  			break
    48  		}
    49  	}
    50  }
    51  
    52  // pivotU8 divides ar into 2n+1 equal intervals, sorts mid-points of them
    53  // to find median-of-2n+1 pivot. ensures lo/hi ranges have at least n elements by
    54  // moving 2n of mid-points to n positions at lo/hi ends.
    55  // assumes n > 0, len(ar) > 4n+2. returns remaining slice,pivot for partitioning.
    56  func pivotU8(ar []uint64, n int) ([]uint64, uint64) {
    57  	m := len(ar) >> 1
    58  	s := len(ar) / (2*n + 1) // step > 1
    59  	l, h := m-n*s, m+n*s
    60  
    61  	for q, k := h, m-2*s; k >= l; { // insertion sort ar[m+i*s], i=-n..n
    62  		if ar[q] < ar[k] {
    63  			ar[k], ar[q] = ar[q], ar[k]
    64  		}
    65  		q -= s
    66  		k -= s
    67  	}
    68  	for q := l; ; {
    69  		k := q
    70  		q += s
    71  		v := ar[q]
    72  		if v < ar[k] {
    73  			for {
    74  				ar[k+s] = ar[k]
    75  				k -= s
    76  				if k < l || v >= ar[k] {
    77  					break
    78  				}
    79  			}
    80  			ar[k+s] = v
    81  		}
    82  		if q >= h {
    83  			break
    84  		}
    85  	}
    86  
    87  	lo, hi := 0, len(ar)
    88  
    89  	// move lo/hi mid-points to lo/hi ends
    90  	for {
    91  		hi--
    92  		ar[l], ar[lo] = ar[lo], ar[l]
    93  		ar[h], ar[hi] = ar[hi], ar[h]
    94  		l += s
    95  		h -= s
    96  		lo++
    97  		if h <= m {
    98  			break
    99  		}
   100  	}
   101  
   102  	return ar[lo:hi:hi], ar[m] // lo <= m-s+1, m+s-1 < hi
   103  }
   104  
   105  // partition ar into <= and >= pivot, assumes len(ar) >= 2
   106  // returns k with ar[:k] <= pivot, ar[k:] >= pivot
   107  func partition1U8(ar []uint64, pv uint64) int {
   108  	l, h := 0, len(ar)-1
   109  	for {
   110  		if ar[h] < pv { // avoid unnecessary comparisons
   111  			for {
   112  				if pv < ar[l] {
   113  					ar[l], ar[h] = ar[h], ar[l]
   114  					break
   115  				}
   116  				l++
   117  				if l >= h {
   118  					return l + 1
   119  				}
   120  			}
   121  		} else if pv < ar[l] { // extend ranges in balance
   122  			for {
   123  				h--
   124  				if l >= h {
   125  					return l
   126  				}
   127  				if ar[h] < pv {
   128  					ar[l], ar[h] = ar[h], ar[l]
   129  					break
   130  				}
   131  			}
   132  		}
   133  		l++
   134  		h--
   135  		if l >= h {
   136  			break
   137  		}
   138  	}
   139  	if l == h && ar[h] < pv { // classify mid element
   140  		l++
   141  	}
   142  	return l
   143  }
   144  
   145  // rearrange ar[:a] and ar[b:] into <= and >= pivot, assumes 0 < a < b < len(ar)
   146  // gap (a,b) expands until one of the intervals is fully consumed
   147  func partition2U8(ar []uint64, a, b int, pv uint64) (int, int) {
   148  	a--
   149  	for {
   150  		if ar[b] < pv { // avoid unnecessary comparisons
   151  			for {
   152  				if pv < ar[a] {
   153  					ar[a], ar[b] = ar[b], ar[a]
   154  					break
   155  				}
   156  				a--
   157  				if a < 0 {
   158  					return a, b
   159  				}
   160  			}
   161  		} else if pv < ar[a] { // extend ranges in balance
   162  			for {
   163  				b++
   164  				if b >= len(ar) {
   165  					return a, b
   166  				}
   167  				if ar[b] < pv {
   168  					ar[a], ar[b] = ar[b], ar[a]
   169  					break
   170  				}
   171  			}
   172  		}
   173  		a--
   174  		b++
   175  		if a < 0 || b >= len(ar) {
   176  			return a, b
   177  		}
   178  	}
   179  }
   180  
   181  // new-goroutine partition
   182  func gpart1U8(ar []uint64, pv uint64, ch chan int) {
   183  	ch <- partition1U8(ar, pv)
   184  }
   185  
   186  // concurrent dual partitioning of ar
   187  // returns k with ar[:k] <= pivot, ar[k:] >= pivot
   188  func cdualparU8(ar []uint64, ch chan int) int {
   189  
   190  	aq, pv := pivotU8(ar, 4) // median-of-9
   191  	k := len(aq) >> 1
   192  	a, b := k>>1, mid(k, len(aq))
   193  
   194  	go gpart1U8(aq[a:b:b], pv, ch) // mid half range
   195  
   196  	t := a
   197  	a, b = partition2U8(aq, a, b, pv) // left/right quarter ranges
   198  	k = <-ch
   199  	k += t // convert k indice to aq
   200  
   201  	// only one gap is possible
   202  	for ; 0 <= a; a-- { // gap left in low range?
   203  		if pv < aq[a] {
   204  			k--
   205  			aq[a], aq[k] = aq[k], aq[a]
   206  		}
   207  	}
   208  	for ; b < len(aq); b++ { // gap left in high range?
   209  		if aq[b] < pv {
   210  			aq[b], aq[k] = aq[k], aq[b]
   211  			k++
   212  		}
   213  	}
   214  	return k + 4 // convert k indice to ar
   215  }
   216  
   217  // short range sort function, assumes Mli < len(ar) <= Mlr
   218  func shortU8(ar []uint64) {
   219  start:
   220  	aq, pv := pivotU8(ar, 2)
   221  	k := partition1U8(aq, pv) // median-of-5 partitioning
   222  
   223  	k += 2 // convert k indice from aq to ar
   224  
   225  	if k < len(ar)-k {
   226  		aq = ar[:k:k]
   227  		ar = ar[k:] // ar is the longer range
   228  	} else {
   229  		aq = ar[k:]
   230  		ar = ar[:k:k]
   231  	}
   232  
   233  	if len(aq) > Mli {
   234  		shortU8(aq) // recurse on the shorter range
   235  		goto start
   236  	}
   237  	insertionU8(aq) // at least one insertion range
   238  
   239  	if len(ar) > Mli {
   240  		goto start
   241  	}
   242  	insertionU8(ar) // two insertion ranges
   243  }
   244  
   245  // long range sort function (single goroutine), assumes len(ar) > Mlr
   246  func slongU8(ar []uint64) {
   247  start:
   248  	aq, pv := pivotU8(ar, 3)
   249  	k := partition1U8(aq, pv) // median-of-7 partitioning
   250  
   251  	k += 3 // convert k indice from aq to ar
   252  
   253  	if k < len(ar)-k {
   254  		aq = ar[:k:k]
   255  		ar = ar[k:] // ar is the longer range
   256  	} else {
   257  		aq = ar[k:]
   258  		ar = ar[:k:k]
   259  	}
   260  
   261  	if len(aq) > Mlr { // at least one not-long range?
   262  		slongU8(aq) // recurse on the shorter range
   263  		goto start
   264  	}
   265  
   266  	if len(aq) > Mli {
   267  		shortU8(aq)
   268  	} else {
   269  		insertionU8(aq)
   270  	}
   271  
   272  	if len(ar) > Mlr { // two not-long ranges?
   273  		goto start
   274  	}
   275  	shortU8(ar) // we know len(ar) > Mli
   276  }
   277  
   278  // new-goroutine sort function
   279  func glongU8(ar []uint64, sv *syncVar) {
   280  	longU8(ar, sv)
   281  
   282  	if atomic.AddUint32(&sv.ngr, ^uint32(0)) == 0 { // decrease goroutine counter
   283  		sv.done <- 0 // we are the last, all done
   284  	}
   285  }
   286  
   287  // long range sort function, assumes len(ar) > Mlr
   288  func longU8(ar []uint64, sv *syncVar) {
   289  start:
   290  	aq, pv := pivotU8(ar, 3)
   291  	k := partition1U8(aq, pv) // median-of-7 partitioning
   292  
   293  	k += 3 // convert k indice from aq to ar
   294  
   295  	if k < len(ar)-k {
   296  		aq = ar[:k:k]
   297  		ar = ar[k:] // ar is the longer range
   298  	} else {
   299  		aq = ar[k:]
   300  		ar = ar[:k:k]
   301  	}
   302  
   303  	// branches below are optimal for fewer total jumps
   304  	if len(aq) <= Mlr { // at least one not-long range?
   305  
   306  		if len(aq) > Mli {
   307  			shortU8(aq)
   308  		} else {
   309  			insertionU8(aq)
   310  		}
   311  
   312  		if len(ar) > Mlr { // two not-long ranges?
   313  			goto start
   314  		}
   315  		shortU8(ar) // we know len(ar) > Mli
   316  		return
   317  	}
   318  
   319  	// max goroutines? not atomic but good enough
   320  	if sv.ngr >= Mxg {
   321  		longU8(aq, sv) // recurse on the shorter range
   322  		goto start
   323  	}
   324  
   325  	if atomic.AddUint32(&sv.ngr, 1) == 0 { // increase goroutine counter
   326  		panic("sorty: longU8: counter overflow")
   327  	}
   328  	// new-goroutine sort on the longer range only when
   329  	// both ranges are big and max goroutines is not exceeded
   330  	go glongU8(ar, sv)
   331  	ar = aq
   332  	goto start
   333  }
   334  
   335  // SortU8 concurrently sorts ar in ascending order.
   336  func SortU8(ar []uint64) {
   337  
   338  	if len(ar) < 2*(Mlr+1) || Mxg <= 1 {
   339  
   340  		// single-goroutine sorting
   341  		if len(ar) > Mlr {
   342  			slongU8(ar)
   343  		} else if len(ar) > Mli {
   344  			shortU8(ar)
   345  		} else if len(ar) > 1 {
   346  			insertionU8(ar)
   347  		}
   348  		return
   349  	}
   350  
   351  	// create channel only when concurrent partitioning & sorting
   352  	sv := syncVar{1, // number of goroutines including this
   353  		make(chan int)} // end signal
   354  	for {
   355  		// median-of-9 concurrent dual partitioning with done
   356  		k := cdualparU8(ar, sv.done)
   357  		var aq []uint64
   358  
   359  		if k < len(ar)-k {
   360  			aq = ar[:k:k]
   361  			ar = ar[k:] // ar is the longer range
   362  		} else {
   363  			aq = ar[k:]
   364  			ar = ar[:k:k]
   365  		}
   366  
   367  		// handle shorter range
   368  		if len(aq) > Mlr {
   369  			if atomic.AddUint32(&sv.ngr, 1) == 0 { // increase goroutine counter
   370  				panic("sorty: SortU8: counter overflow")
   371  			}
   372  			go glongU8(aq, &sv)
   373  
   374  		} else if len(aq) > Mli {
   375  			shortU8(aq)
   376  		} else {
   377  			insertionU8(aq)
   378  		}
   379  
   380  		// longer range big enough? max goroutines?
   381  		if len(ar) < 2*(Mlr+1) || sv.ngr >= Mxg {
   382  			break
   383  		}
   384  		// dual partition longer range
   385  	}
   386  
   387  	longU8(ar, &sv) // we know len(ar) > Mlr
   388  
   389  	if atomic.AddUint32(&sv.ngr, ^uint32(0)) != 0 { // decrease goroutine counter
   390  		<-sv.done // we are not the last, wait
   391  	}
   392  }