github.com/jfcg/sorty@v1.2.0/sortyI4.go (about) 1 /* Copyright (c) 2019, Serhat Şevki Dinçer. 2 This Source Code Form is subject to the terms of the Mozilla Public 3 License, v. 2.0. If a copy of the MPL was not distributed with this 4 file, You can obtain one at http://mozilla.org/MPL/2.0/. 5 */ 6 7 package sorty 8 9 import "sync/atomic" 10 11 // IsSortedI4 returns 0 if ar is sorted in ascending order, 12 // otherwise it returns i > 0 with ar[i] < ar[i-1] 13 func IsSortedI4(ar []int32) int { 14 for i := len(ar) - 1; i > 0; i-- { 15 if ar[i] < ar[i-1] { 16 return i 17 } 18 } 19 return 0 20 } 21 22 // insertion sort, assumes len(ar) >= 2 23 func insertionI4(ar []int32) { 24 hi := len(ar) - 1 25 for l, h := (hi-3)>>1, hi; l >= 0; { 26 if ar[h] < ar[l] { 27 ar[l], ar[h] = ar[h], ar[l] 28 } 29 l-- 30 h-- 31 } 32 for h := 0; ; { 33 l := h 34 h++ 35 v := ar[h] 36 if v < ar[l] { 37 for { 38 ar[l+1] = ar[l] 39 l-- 40 if l < 0 || v >= ar[l] { 41 break 42 } 43 } 44 ar[l+1] = v 45 } 46 if h >= hi { 47 break 48 } 49 } 50 } 51 52 // pivotI4 divides ar into 2n+1 equal intervals, sorts mid-points of them 53 // to find median-of-2n+1 pivot. ensures lo/hi ranges have at least n elements by 54 // moving 2n of mid-points to n positions at lo/hi ends. 55 // assumes n > 0, len(ar) > 4n+2. returns remaining slice,pivot for partitioning. 56 func pivotI4(ar []int32, n int) ([]int32, int32) { 57 m := len(ar) >> 1 58 s := len(ar) / (2*n + 1) // step > 1 59 l, h := m-n*s, m+n*s 60 61 for q, k := h, m-2*s; k >= l; { // insertion sort ar[m+i*s], i=-n..n 62 if ar[q] < ar[k] { 63 ar[k], ar[q] = ar[q], ar[k] 64 } 65 q -= s 66 k -= s 67 } 68 for q := l; ; { 69 k := q 70 q += s 71 v := ar[q] 72 if v < ar[k] { 73 for { 74 ar[k+s] = ar[k] 75 k -= s 76 if k < l || v >= ar[k] { 77 break 78 } 79 } 80 ar[k+s] = v 81 } 82 if q >= h { 83 break 84 } 85 } 86 87 lo, hi := 0, len(ar) 88 89 // move lo/hi mid-points to lo/hi ends 90 for { 91 hi-- 92 ar[l], ar[lo] = ar[lo], ar[l] 93 ar[h], ar[hi] = ar[hi], ar[h] 94 l += s 95 h -= s 96 lo++ 97 if h <= m { 98 break 99 } 100 } 101 102 return ar[lo:hi:hi], ar[m] // lo <= m-s+1, m+s-1 < hi 103 } 104 105 // partition ar into <= and >= pivot, assumes len(ar) >= 2 106 // returns k with ar[:k] <= pivot, ar[k:] >= pivot 107 func partition1I4(ar []int32, pv int32) int { 108 l, h := 0, len(ar)-1 109 for { 110 if ar[h] < pv { // avoid unnecessary comparisons 111 for { 112 if pv < ar[l] { 113 ar[l], ar[h] = ar[h], ar[l] 114 break 115 } 116 l++ 117 if l >= h { 118 return l + 1 119 } 120 } 121 } else if pv < ar[l] { // extend ranges in balance 122 for { 123 h-- 124 if l >= h { 125 return l 126 } 127 if ar[h] < pv { 128 ar[l], ar[h] = ar[h], ar[l] 129 break 130 } 131 } 132 } 133 l++ 134 h-- 135 if l >= h { 136 break 137 } 138 } 139 if l == h && ar[h] < pv { // classify mid element 140 l++ 141 } 142 return l 143 } 144 145 // rearrange ar[:a] and ar[b:] into <= and >= pivot, assumes 0 < a < b < len(ar) 146 // gap (a,b) expands until one of the intervals is fully consumed 147 func partition2I4(ar []int32, a, b int, pv int32) (int, int) { 148 a-- 149 for { 150 if ar[b] < pv { // avoid unnecessary comparisons 151 for { 152 if pv < ar[a] { 153 ar[a], ar[b] = ar[b], ar[a] 154 break 155 } 156 a-- 157 if a < 0 { 158 return a, b 159 } 160 } 161 } else if pv < ar[a] { // extend ranges in balance 162 for { 163 b++ 164 if b >= len(ar) { 165 return a, b 166 } 167 if ar[b] < pv { 168 ar[a], ar[b] = ar[b], ar[a] 169 break 170 } 171 } 172 } 173 a-- 174 b++ 175 if a < 0 || b >= len(ar) { 176 return a, b 177 } 178 } 179 } 180 181 // new-goroutine partition 182 func gpart1I4(ar []int32, pv int32, ch chan int) { 183 ch <- partition1I4(ar, pv) 184 } 185 186 // concurrent dual partitioning of ar 187 // returns k with ar[:k] <= pivot, ar[k:] >= pivot 188 func cdualparI4(ar []int32, ch chan int) int { 189 190 aq, pv := pivotI4(ar, 4) // median-of-9 191 k := len(aq) >> 1 192 a, b := k>>1, mid(k, len(aq)) 193 194 go gpart1I4(aq[a:b:b], pv, ch) // mid half range 195 196 t := a 197 a, b = partition2I4(aq, a, b, pv) // left/right quarter ranges 198 k = <-ch 199 k += t // convert k indice to aq 200 201 // only one gap is possible 202 for ; 0 <= a; a-- { // gap left in low range? 203 if pv < aq[a] { 204 k-- 205 aq[a], aq[k] = aq[k], aq[a] 206 } 207 } 208 for ; b < len(aq); b++ { // gap left in high range? 209 if aq[b] < pv { 210 aq[b], aq[k] = aq[k], aq[b] 211 k++ 212 } 213 } 214 return k + 4 // convert k indice to ar 215 } 216 217 // short range sort function, assumes Mli < len(ar) <= Mlr 218 func shortI4(ar []int32) { 219 start: 220 aq, pv := pivotI4(ar, 2) 221 k := partition1I4(aq, pv) // median-of-5 partitioning 222 223 k += 2 // convert k indice from aq to ar 224 225 if k < len(ar)-k { 226 aq = ar[:k:k] 227 ar = ar[k:] // ar is the longer range 228 } else { 229 aq = ar[k:] 230 ar = ar[:k:k] 231 } 232 233 if len(aq) > Mli { 234 shortI4(aq) // recurse on the shorter range 235 goto start 236 } 237 insertionI4(aq) // at least one insertion range 238 239 if len(ar) > Mli { 240 goto start 241 } 242 insertionI4(ar) // two insertion ranges 243 } 244 245 // long range sort function (single goroutine), assumes len(ar) > Mlr 246 func slongI4(ar []int32) { 247 start: 248 aq, pv := pivotI4(ar, 3) 249 k := partition1I4(aq, pv) // median-of-7 partitioning 250 251 k += 3 // convert k indice from aq to ar 252 253 if k < len(ar)-k { 254 aq = ar[:k:k] 255 ar = ar[k:] // ar is the longer range 256 } else { 257 aq = ar[k:] 258 ar = ar[:k:k] 259 } 260 261 if len(aq) > Mlr { // at least one not-long range? 262 slongI4(aq) // recurse on the shorter range 263 goto start 264 } 265 266 if len(aq) > Mli { 267 shortI4(aq) 268 } else { 269 insertionI4(aq) 270 } 271 272 if len(ar) > Mlr { // two not-long ranges? 273 goto start 274 } 275 shortI4(ar) // we know len(ar) > Mli 276 } 277 278 // new-goroutine sort function 279 func glongI4(ar []int32, sv *syncVar) { 280 longI4(ar, sv) 281 282 if atomic.AddUint32(&sv.ngr, ^uint32(0)) == 0 { // decrease goroutine counter 283 sv.done <- 0 // we are the last, all done 284 } 285 } 286 287 // long range sort function, assumes len(ar) > Mlr 288 func longI4(ar []int32, sv *syncVar) { 289 start: 290 aq, pv := pivotI4(ar, 3) 291 k := partition1I4(aq, pv) // median-of-7 partitioning 292 293 k += 3 // convert k indice from aq to ar 294 295 if k < len(ar)-k { 296 aq = ar[:k:k] 297 ar = ar[k:] // ar is the longer range 298 } else { 299 aq = ar[k:] 300 ar = ar[:k:k] 301 } 302 303 // branches below are optimal for fewer total jumps 304 if len(aq) <= Mlr { // at least one not-long range? 305 306 if len(aq) > Mli { 307 shortI4(aq) 308 } else { 309 insertionI4(aq) 310 } 311 312 if len(ar) > Mlr { // two not-long ranges? 313 goto start 314 } 315 shortI4(ar) // we know len(ar) > Mli 316 return 317 } 318 319 // max goroutines? not atomic but good enough 320 if sv.ngr >= Mxg { 321 longI4(aq, sv) // recurse on the shorter range 322 goto start 323 } 324 325 if atomic.AddUint32(&sv.ngr, 1) == 0 { // increase goroutine counter 326 panic("sorty: longI4: counter overflow") 327 } 328 // new-goroutine sort on the longer range only when 329 // both ranges are big and max goroutines is not exceeded 330 go glongI4(ar, sv) 331 ar = aq 332 goto start 333 } 334 335 // SortI4 concurrently sorts ar in ascending order. 336 func SortI4(ar []int32) { 337 338 if len(ar) < 2*(Mlr+1) || Mxg <= 1 { 339 340 // single-goroutine sorting 341 if len(ar) > Mlr { 342 slongI4(ar) 343 } else if len(ar) > Mli { 344 shortI4(ar) 345 } else if len(ar) > 1 { 346 insertionI4(ar) 347 } 348 return 349 } 350 351 // create channel only when concurrent partitioning & sorting 352 sv := syncVar{1, // number of goroutines including this 353 make(chan int)} // end signal 354 for { 355 // median-of-9 concurrent dual partitioning with done 356 k := cdualparI4(ar, sv.done) 357 var aq []int32 358 359 if k < len(ar)-k { 360 aq = ar[:k:k] 361 ar = ar[k:] // ar is the longer range 362 } else { 363 aq = ar[k:] 364 ar = ar[:k:k] 365 } 366 367 // handle shorter range 368 if len(aq) > Mlr { 369 if atomic.AddUint32(&sv.ngr, 1) == 0 { // increase goroutine counter 370 panic("sorty: SortI4: counter overflow") 371 } 372 go glongI4(aq, &sv) 373 374 } else if len(aq) > Mli { 375 shortI4(aq) 376 } else { 377 insertionI4(aq) 378 } 379 380 // longer range big enough? max goroutines? 381 if len(ar) < 2*(Mlr+1) || sv.ngr >= Mxg { 382 break 383 } 384 // dual partition longer range 385 } 386 387 longI4(ar, &sv) // we know len(ar) > Mlr 388 389 if atomic.AddUint32(&sv.ngr, ^uint32(0)) != 0 { // decrease goroutine counter 390 <-sv.done // we are not the last, wait 391 } 392 }