github.com/jfcg/sorty@v1.2.0/sortyB.go (about) 1 /* Copyright (c) 2021, Serhat Şevki Dinçer. 2 This Source Code Form is subject to the terms of the Mozilla Public 3 License, v. 2.0. If a copy of the MPL was not distributed with this 4 file, You can obtain one at http://mozilla.org/MPL/2.0/. 5 */ 6 7 package sorty 8 9 import ( 10 "sync/atomic" 11 12 "github.com/jfcg/sixb" 13 ) 14 15 // IsSortedB returns 0 if ar is sorted in ascending lexicographical order, 16 // otherwise it returns i > 0 with ar[i] < ar[i-1] 17 func IsSortedB(ar [][]byte) int { 18 for i := len(ar) - 1; i > 0; i-- { 19 if sixb.BtoS(ar[i]) < sixb.BtoS(ar[i-1]) { 20 return i 21 } 22 } 23 return 0 24 } 25 26 // insertion sort, assumes len(ar) >= 2 27 func insertionB(ar [][]byte) { 28 hi := len(ar) - 1 29 for l, h := (hi-3)>>1, hi; l >= 0; { 30 if sixb.BtoS(ar[h]) < sixb.BtoS(ar[l]) { 31 ar[l], ar[h] = ar[h], ar[l] 32 } 33 l-- 34 h-- 35 } 36 for h := 0; ; { 37 l := h 38 h++ 39 x := ar[h] 40 v := sixb.BtoS(x) 41 if v < sixb.BtoS(ar[l]) { 42 for { 43 ar[l+1] = ar[l] 44 l-- 45 if l < 0 || v >= sixb.BtoS(ar[l]) { 46 break 47 } 48 } 49 ar[l+1] = x 50 } 51 if h >= hi { 52 break 53 } 54 } 55 } 56 57 // pivotB divides ar into 2n+1 equal intervals, sorts mid-points of them 58 // to find median-of-2n+1 pivot. ensures lo/hi ranges have at least n elements by 59 // moving 2n of mid-points to n positions at lo/hi ends. 60 // assumes n > 0, len(ar) > 4n+2. returns remaining slice,pivot for partitioning. 61 func pivotB(ar [][]byte, n int) ([][]byte, string) { 62 m := len(ar) >> 1 63 s := len(ar) / (2*n + 1) // step > 1 64 l, h := m-n*s, m+n*s 65 66 for q, k := h, m-2*s; k >= l; { // insertion sort ar[m+i*s], i=-n..n 67 if sixb.BtoS(ar[q]) < sixb.BtoS(ar[k]) { 68 ar[k], ar[q] = ar[q], ar[k] 69 } 70 q -= s 71 k -= s 72 } 73 for q := l; ; { 74 k := q 75 q += s 76 x := ar[q] 77 v := sixb.BtoS(x) 78 if v < sixb.BtoS(ar[k]) { 79 for { 80 ar[k+s] = ar[k] 81 k -= s 82 if k < l || v >= sixb.BtoS(ar[k]) { 83 break 84 } 85 } 86 ar[k+s] = x 87 } 88 if q >= h { 89 break 90 } 91 } 92 93 lo, hi := 0, len(ar) 94 95 // move lo/hi mid-points to lo/hi ends 96 for { 97 hi-- 98 ar[l], ar[lo] = ar[lo], ar[l] 99 ar[h], ar[hi] = ar[hi], ar[h] 100 l += s 101 h -= s 102 lo++ 103 if h <= m { 104 break 105 } 106 } 107 108 return ar[lo:hi:hi], sixb.BtoS(ar[m]) // lo <= m-s+1, m+s-1 < hi 109 } 110 111 // partition ar into <= and >= pivot, assumes len(ar) >= 2 112 // returns k with ar[:k] <= pivot, ar[k:] >= pivot 113 func partition1B(ar [][]byte, pv string) int { 114 l, h := 0, len(ar)-1 115 for { 116 if sixb.BtoS(ar[h]) < pv { // avoid unnecessary comparisons 117 for { 118 if pv < sixb.BtoS(ar[l]) { 119 ar[l], ar[h] = ar[h], ar[l] 120 break 121 } 122 l++ 123 if l >= h { 124 return l + 1 125 } 126 } 127 } else if pv < sixb.BtoS(ar[l]) { // extend ranges in balance 128 for { 129 h-- 130 if l >= h { 131 return l 132 } 133 if sixb.BtoS(ar[h]) < pv { 134 ar[l], ar[h] = ar[h], ar[l] 135 break 136 } 137 } 138 } 139 l++ 140 h-- 141 if l >= h { 142 break 143 } 144 } 145 if l == h && sixb.BtoS(ar[h]) < pv { // classify mid element 146 l++ 147 } 148 return l 149 } 150 151 // rearrange ar[:a] and ar[b:] into <= and >= pivot, assumes 0 < a < b < len(ar) 152 // gap (a,b) expands until one of the intervals is fully consumed 153 func partition2B(ar [][]byte, a, b int, pv string) (int, int) { 154 a-- 155 for { 156 if sixb.BtoS(ar[b]) < pv { // avoid unnecessary comparisons 157 for { 158 if pv < sixb.BtoS(ar[a]) { 159 ar[a], ar[b] = ar[b], ar[a] 160 break 161 } 162 a-- 163 if a < 0 { 164 return a, b 165 } 166 } 167 } else if pv < sixb.BtoS(ar[a]) { // extend ranges in balance 168 for { 169 b++ 170 if b >= len(ar) { 171 return a, b 172 } 173 if sixb.BtoS(ar[b]) < pv { 174 ar[a], ar[b] = ar[b], ar[a] 175 break 176 } 177 } 178 } 179 a-- 180 b++ 181 if a < 0 || b >= len(ar) { 182 return a, b 183 } 184 } 185 } 186 187 // new-goroutine partition 188 func gpart1B(ar [][]byte, pv string, ch chan int) { 189 ch <- partition1B(ar, pv) 190 } 191 192 // concurrent dual partitioning of ar 193 // returns k with ar[:k] <= pivot, ar[k:] >= pivot 194 func cdualparB(ar [][]byte, ch chan int) int { 195 196 aq, pv := pivotB(ar, 4) // median-of-9 197 k := len(aq) >> 1 198 a, b := k>>1, mid(k, len(aq)) 199 200 go gpart1B(aq[a:b:b], pv, ch) // mid half range 201 202 t := a 203 a, b = partition2B(aq, a, b, pv) // left/right quarter ranges 204 k = <-ch 205 k += t // convert k indice to aq 206 207 // only one gap is possible 208 for ; 0 <= a; a-- { // gap left in low range? 209 if pv < sixb.BtoS(aq[a]) { 210 k-- 211 aq[a], aq[k] = aq[k], aq[a] 212 } 213 } 214 for ; b < len(aq); b++ { // gap left in high range? 215 if sixb.BtoS(aq[b]) < pv { 216 aq[b], aq[k] = aq[k], aq[b] 217 k++ 218 } 219 } 220 return k + 4 // convert k indice to ar 221 } 222 223 // short range sort function, assumes Hmli < len(ar) <= Mlr 224 func shortB(ar [][]byte) { 225 start: 226 aq, pv := pivotB(ar, 2) 227 k := partition1B(aq, pv) // median-of-5 partitioning 228 229 k += 2 // convert k indice from aq to ar 230 231 if k < len(ar)-k { 232 aq = ar[:k:k] 233 ar = ar[k:] // ar is the longer range 234 } else { 235 aq = ar[k:] 236 ar = ar[:k:k] 237 } 238 239 if len(aq) > Hmli { 240 shortB(aq) // recurse on the shorter range 241 goto start 242 } 243 insertionB(aq) // at least one insertion range 244 245 if len(ar) > Hmli { 246 goto start 247 } 248 insertionB(ar) // two insertion ranges 249 } 250 251 // long range sort function (single goroutine), assumes len(ar) > Mlr 252 func slongB(ar [][]byte) { 253 start: 254 aq, pv := pivotB(ar, 3) 255 k := partition1B(aq, pv) // median-of-7 partitioning 256 257 k += 3 // convert k indice from aq to ar 258 259 if k < len(ar)-k { 260 aq = ar[:k:k] 261 ar = ar[k:] // ar is the longer range 262 } else { 263 aq = ar[k:] 264 ar = ar[:k:k] 265 } 266 267 if len(aq) > Mlr { // at least one not-long range? 268 slongB(aq) // recurse on the shorter range 269 goto start 270 } 271 272 if len(aq) > Hmli { 273 shortB(aq) 274 } else { 275 insertionB(aq) 276 } 277 278 if len(ar) > Mlr { // two not-long ranges? 279 goto start 280 } 281 shortB(ar) // we know len(ar) > Hmli 282 } 283 284 // new-goroutine sort function 285 func glongB(ar [][]byte, sv *syncVar) { 286 longB(ar, sv) 287 288 if atomic.AddUint32(&sv.ngr, ^uint32(0)) == 0 { // decrease goroutine counter 289 sv.done <- 0 // we are the last, all done 290 } 291 } 292 293 // long range sort function, assumes len(ar) > Mlr 294 func longB(ar [][]byte, sv *syncVar) { 295 start: 296 aq, pv := pivotB(ar, 3) 297 k := partition1B(aq, pv) // median-of-7 partitioning 298 299 k += 3 // convert k indice from aq to ar 300 301 if k < len(ar)-k { 302 aq = ar[:k:k] 303 ar = ar[k:] // ar is the longer range 304 } else { 305 aq = ar[k:] 306 ar = ar[:k:k] 307 } 308 309 // branches below are optimal for fewer total jumps 310 if len(aq) <= Mlr { // at least one not-long range? 311 312 if len(aq) > Hmli { 313 shortB(aq) 314 } else { 315 insertionB(aq) 316 } 317 318 if len(ar) > Mlr { // two not-long ranges? 319 goto start 320 } 321 shortB(ar) // we know len(ar) > Hmli 322 return 323 } 324 325 // max goroutines? not atomic but good enough 326 if sv.ngr >= Mxg { 327 longB(aq, sv) // recurse on the shorter range 328 goto start 329 } 330 331 if atomic.AddUint32(&sv.ngr, 1) == 0 { // increase goroutine counter 332 panic("sorty: longB: counter overflow") 333 } 334 // new-goroutine sort on the longer range only when 335 // both ranges are big and max goroutines is not exceeded 336 go glongB(ar, sv) 337 ar = aq 338 goto start 339 } 340 341 // SortB concurrently sorts ar in ascending lexicographical order. 342 func SortB(ar [][]byte) { 343 344 if len(ar) < 2*(Mlr+1) || Mxg <= 1 { 345 346 // single-goroutine sorting 347 if len(ar) > Mlr { 348 slongB(ar) 349 } else if len(ar) > Hmli { 350 shortB(ar) 351 } else if len(ar) > 1 { 352 insertionB(ar) 353 } 354 return 355 } 356 357 // create channel only when concurrent partitioning & sorting 358 sv := syncVar{1, // number of goroutines including this 359 make(chan int)} // end signal 360 for { 361 // median-of-9 concurrent dual partitioning with done 362 k := cdualparB(ar, sv.done) 363 var aq [][]byte 364 365 if k < len(ar)-k { 366 aq = ar[:k:k] 367 ar = ar[k:] // ar is the longer range 368 } else { 369 aq = ar[k:] 370 ar = ar[:k:k] 371 } 372 373 // handle shorter range 374 if len(aq) > Mlr { 375 if atomic.AddUint32(&sv.ngr, 1) == 0 { // increase goroutine counter 376 panic("sorty: SortB: counter overflow") 377 } 378 go glongB(aq, &sv) 379 380 } else if len(aq) > Hmli { 381 shortB(aq) 382 } else { 383 insertionB(aq) 384 } 385 386 // longer range big enough? max goroutines? 387 if len(ar) < 2*(Mlr+1) || sv.ngr >= Mxg { 388 break 389 } 390 // dual partition longer range 391 } 392 393 longB(ar, &sv) // we know len(ar) > Mlr 394 395 if atomic.AddUint32(&sv.ngr, ^uint32(0)) != 0 { // decrease goroutine counter 396 <-sv.done // we are not the last, wait 397 } 398 }