github.com/jfcg/sorty@v1.2.0/sortyB.go (about)

     1  /*	Copyright (c) 2021, Serhat Şevki Dinçer.
     2  	This Source Code Form is subject to the terms of the Mozilla Public
     3  	License, v. 2.0. If a copy of the MPL was not distributed with this
     4  	file, You can obtain one at http://mozilla.org/MPL/2.0/.
     5  */
     6  
     7  package sorty
     8  
     9  import (
    10  	"sync/atomic"
    11  
    12  	"github.com/jfcg/sixb"
    13  )
    14  
    15  // IsSortedB returns 0 if ar is sorted in ascending lexicographical order,
    16  // otherwise it returns i > 0 with ar[i] < ar[i-1]
    17  func IsSortedB(ar [][]byte) int {
    18  	for i := len(ar) - 1; i > 0; i-- {
    19  		if sixb.BtoS(ar[i]) < sixb.BtoS(ar[i-1]) {
    20  			return i
    21  		}
    22  	}
    23  	return 0
    24  }
    25  
    26  // insertion sort, assumes len(ar) >= 2
    27  func insertionB(ar [][]byte) {
    28  	hi := len(ar) - 1
    29  	for l, h := (hi-3)>>1, hi; l >= 0; {
    30  		if sixb.BtoS(ar[h]) < sixb.BtoS(ar[l]) {
    31  			ar[l], ar[h] = ar[h], ar[l]
    32  		}
    33  		l--
    34  		h--
    35  	}
    36  	for h := 0; ; {
    37  		l := h
    38  		h++
    39  		x := ar[h]
    40  		v := sixb.BtoS(x)
    41  		if v < sixb.BtoS(ar[l]) {
    42  			for {
    43  				ar[l+1] = ar[l]
    44  				l--
    45  				if l < 0 || v >= sixb.BtoS(ar[l]) {
    46  					break
    47  				}
    48  			}
    49  			ar[l+1] = x
    50  		}
    51  		if h >= hi {
    52  			break
    53  		}
    54  	}
    55  }
    56  
    57  // pivotB divides ar into 2n+1 equal intervals, sorts mid-points of them
    58  // to find median-of-2n+1 pivot. ensures lo/hi ranges have at least n elements by
    59  // moving 2n of mid-points to n positions at lo/hi ends.
    60  // assumes n > 0, len(ar) > 4n+2. returns remaining slice,pivot for partitioning.
    61  func pivotB(ar [][]byte, n int) ([][]byte, string) {
    62  	m := len(ar) >> 1
    63  	s := len(ar) / (2*n + 1) // step > 1
    64  	l, h := m-n*s, m+n*s
    65  
    66  	for q, k := h, m-2*s; k >= l; { // insertion sort ar[m+i*s], i=-n..n
    67  		if sixb.BtoS(ar[q]) < sixb.BtoS(ar[k]) {
    68  			ar[k], ar[q] = ar[q], ar[k]
    69  		}
    70  		q -= s
    71  		k -= s
    72  	}
    73  	for q := l; ; {
    74  		k := q
    75  		q += s
    76  		x := ar[q]
    77  		v := sixb.BtoS(x)
    78  		if v < sixb.BtoS(ar[k]) {
    79  			for {
    80  				ar[k+s] = ar[k]
    81  				k -= s
    82  				if k < l || v >= sixb.BtoS(ar[k]) {
    83  					break
    84  				}
    85  			}
    86  			ar[k+s] = x
    87  		}
    88  		if q >= h {
    89  			break
    90  		}
    91  	}
    92  
    93  	lo, hi := 0, len(ar)
    94  
    95  	// move lo/hi mid-points to lo/hi ends
    96  	for {
    97  		hi--
    98  		ar[l], ar[lo] = ar[lo], ar[l]
    99  		ar[h], ar[hi] = ar[hi], ar[h]
   100  		l += s
   101  		h -= s
   102  		lo++
   103  		if h <= m {
   104  			break
   105  		}
   106  	}
   107  
   108  	return ar[lo:hi:hi], sixb.BtoS(ar[m]) // lo <= m-s+1, m+s-1 < hi
   109  }
   110  
   111  // partition ar into <= and >= pivot, assumes len(ar) >= 2
   112  // returns k with ar[:k] <= pivot, ar[k:] >= pivot
   113  func partition1B(ar [][]byte, pv string) int {
   114  	l, h := 0, len(ar)-1
   115  	for {
   116  		if sixb.BtoS(ar[h]) < pv { // avoid unnecessary comparisons
   117  			for {
   118  				if pv < sixb.BtoS(ar[l]) {
   119  					ar[l], ar[h] = ar[h], ar[l]
   120  					break
   121  				}
   122  				l++
   123  				if l >= h {
   124  					return l + 1
   125  				}
   126  			}
   127  		} else if pv < sixb.BtoS(ar[l]) { // extend ranges in balance
   128  			for {
   129  				h--
   130  				if l >= h {
   131  					return l
   132  				}
   133  				if sixb.BtoS(ar[h]) < pv {
   134  					ar[l], ar[h] = ar[h], ar[l]
   135  					break
   136  				}
   137  			}
   138  		}
   139  		l++
   140  		h--
   141  		if l >= h {
   142  			break
   143  		}
   144  	}
   145  	if l == h && sixb.BtoS(ar[h]) < pv { // classify mid element
   146  		l++
   147  	}
   148  	return l
   149  }
   150  
   151  // rearrange ar[:a] and ar[b:] into <= and >= pivot, assumes 0 < a < b < len(ar)
   152  // gap (a,b) expands until one of the intervals is fully consumed
   153  func partition2B(ar [][]byte, a, b int, pv string) (int, int) {
   154  	a--
   155  	for {
   156  		if sixb.BtoS(ar[b]) < pv { // avoid unnecessary comparisons
   157  			for {
   158  				if pv < sixb.BtoS(ar[a]) {
   159  					ar[a], ar[b] = ar[b], ar[a]
   160  					break
   161  				}
   162  				a--
   163  				if a < 0 {
   164  					return a, b
   165  				}
   166  			}
   167  		} else if pv < sixb.BtoS(ar[a]) { // extend ranges in balance
   168  			for {
   169  				b++
   170  				if b >= len(ar) {
   171  					return a, b
   172  				}
   173  				if sixb.BtoS(ar[b]) < pv {
   174  					ar[a], ar[b] = ar[b], ar[a]
   175  					break
   176  				}
   177  			}
   178  		}
   179  		a--
   180  		b++
   181  		if a < 0 || b >= len(ar) {
   182  			return a, b
   183  		}
   184  	}
   185  }
   186  
   187  // new-goroutine partition
   188  func gpart1B(ar [][]byte, pv string, ch chan int) {
   189  	ch <- partition1B(ar, pv)
   190  }
   191  
   192  // concurrent dual partitioning of ar
   193  // returns k with ar[:k] <= pivot, ar[k:] >= pivot
   194  func cdualparB(ar [][]byte, ch chan int) int {
   195  
   196  	aq, pv := pivotB(ar, 4) // median-of-9
   197  	k := len(aq) >> 1
   198  	a, b := k>>1, mid(k, len(aq))
   199  
   200  	go gpart1B(aq[a:b:b], pv, ch) // mid half range
   201  
   202  	t := a
   203  	a, b = partition2B(aq, a, b, pv) // left/right quarter ranges
   204  	k = <-ch
   205  	k += t // convert k indice to aq
   206  
   207  	// only one gap is possible
   208  	for ; 0 <= a; a-- { // gap left in low range?
   209  		if pv < sixb.BtoS(aq[a]) {
   210  			k--
   211  			aq[a], aq[k] = aq[k], aq[a]
   212  		}
   213  	}
   214  	for ; b < len(aq); b++ { // gap left in high range?
   215  		if sixb.BtoS(aq[b]) < pv {
   216  			aq[b], aq[k] = aq[k], aq[b]
   217  			k++
   218  		}
   219  	}
   220  	return k + 4 // convert k indice to ar
   221  }
   222  
   223  // short range sort function, assumes Hmli < len(ar) <= Mlr
   224  func shortB(ar [][]byte) {
   225  start:
   226  	aq, pv := pivotB(ar, 2)
   227  	k := partition1B(aq, pv) // median-of-5 partitioning
   228  
   229  	k += 2 // convert k indice from aq to ar
   230  
   231  	if k < len(ar)-k {
   232  		aq = ar[:k:k]
   233  		ar = ar[k:] // ar is the longer range
   234  	} else {
   235  		aq = ar[k:]
   236  		ar = ar[:k:k]
   237  	}
   238  
   239  	if len(aq) > Hmli {
   240  		shortB(aq) // recurse on the shorter range
   241  		goto start
   242  	}
   243  	insertionB(aq) // at least one insertion range
   244  
   245  	if len(ar) > Hmli {
   246  		goto start
   247  	}
   248  	insertionB(ar) // two insertion ranges
   249  }
   250  
   251  // long range sort function (single goroutine), assumes len(ar) > Mlr
   252  func slongB(ar [][]byte) {
   253  start:
   254  	aq, pv := pivotB(ar, 3)
   255  	k := partition1B(aq, pv) // median-of-7 partitioning
   256  
   257  	k += 3 // convert k indice from aq to ar
   258  
   259  	if k < len(ar)-k {
   260  		aq = ar[:k:k]
   261  		ar = ar[k:] // ar is the longer range
   262  	} else {
   263  		aq = ar[k:]
   264  		ar = ar[:k:k]
   265  	}
   266  
   267  	if len(aq) > Mlr { // at least one not-long range?
   268  		slongB(aq) // recurse on the shorter range
   269  		goto start
   270  	}
   271  
   272  	if len(aq) > Hmli {
   273  		shortB(aq)
   274  	} else {
   275  		insertionB(aq)
   276  	}
   277  
   278  	if len(ar) > Mlr { // two not-long ranges?
   279  		goto start
   280  	}
   281  	shortB(ar) // we know len(ar) > Hmli
   282  }
   283  
   284  // new-goroutine sort function
   285  func glongB(ar [][]byte, sv *syncVar) {
   286  	longB(ar, sv)
   287  
   288  	if atomic.AddUint32(&sv.ngr, ^uint32(0)) == 0 { // decrease goroutine counter
   289  		sv.done <- 0 // we are the last, all done
   290  	}
   291  }
   292  
   293  // long range sort function, assumes len(ar) > Mlr
   294  func longB(ar [][]byte, sv *syncVar) {
   295  start:
   296  	aq, pv := pivotB(ar, 3)
   297  	k := partition1B(aq, pv) // median-of-7 partitioning
   298  
   299  	k += 3 // convert k indice from aq to ar
   300  
   301  	if k < len(ar)-k {
   302  		aq = ar[:k:k]
   303  		ar = ar[k:] // ar is the longer range
   304  	} else {
   305  		aq = ar[k:]
   306  		ar = ar[:k:k]
   307  	}
   308  
   309  	// branches below are optimal for fewer total jumps
   310  	if len(aq) <= Mlr { // at least one not-long range?
   311  
   312  		if len(aq) > Hmli {
   313  			shortB(aq)
   314  		} else {
   315  			insertionB(aq)
   316  		}
   317  
   318  		if len(ar) > Mlr { // two not-long ranges?
   319  			goto start
   320  		}
   321  		shortB(ar) // we know len(ar) > Hmli
   322  		return
   323  	}
   324  
   325  	// max goroutines? not atomic but good enough
   326  	if sv.ngr >= Mxg {
   327  		longB(aq, sv) // recurse on the shorter range
   328  		goto start
   329  	}
   330  
   331  	if atomic.AddUint32(&sv.ngr, 1) == 0 { // increase goroutine counter
   332  		panic("sorty: longB: counter overflow")
   333  	}
   334  	// new-goroutine sort on the longer range only when
   335  	// both ranges are big and max goroutines is not exceeded
   336  	go glongB(ar, sv)
   337  	ar = aq
   338  	goto start
   339  }
   340  
   341  // SortB concurrently sorts ar in ascending lexicographical order.
   342  func SortB(ar [][]byte) {
   343  
   344  	if len(ar) < 2*(Mlr+1) || Mxg <= 1 {
   345  
   346  		// single-goroutine sorting
   347  		if len(ar) > Mlr {
   348  			slongB(ar)
   349  		} else if len(ar) > Hmli {
   350  			shortB(ar)
   351  		} else if len(ar) > 1 {
   352  			insertionB(ar)
   353  		}
   354  		return
   355  	}
   356  
   357  	// create channel only when concurrent partitioning & sorting
   358  	sv := syncVar{1, // number of goroutines including this
   359  		make(chan int)} // end signal
   360  	for {
   361  		// median-of-9 concurrent dual partitioning with done
   362  		k := cdualparB(ar, sv.done)
   363  		var aq [][]byte
   364  
   365  		if k < len(ar)-k {
   366  			aq = ar[:k:k]
   367  			ar = ar[k:] // ar is the longer range
   368  		} else {
   369  			aq = ar[k:]
   370  			ar = ar[:k:k]
   371  		}
   372  
   373  		// handle shorter range
   374  		if len(aq) > Mlr {
   375  			if atomic.AddUint32(&sv.ngr, 1) == 0 { // increase goroutine counter
   376  				panic("sorty: SortB: counter overflow")
   377  			}
   378  			go glongB(aq, &sv)
   379  
   380  		} else if len(aq) > Hmli {
   381  			shortB(aq)
   382  		} else {
   383  			insertionB(aq)
   384  		}
   385  
   386  		// longer range big enough? max goroutines?
   387  		if len(ar) < 2*(Mlr+1) || sv.ngr >= Mxg {
   388  			break
   389  		}
   390  		// dual partition longer range
   391  	}
   392  
   393  	longB(ar, &sv) // we know len(ar) > Mlr
   394  
   395  	if atomic.AddUint32(&sv.ngr, ^uint32(0)) != 0 { // decrease goroutine counter
   396  		<-sv.done // we are not the last, wait
   397  	}
   398  }