github.com/coyove/sdss@v0.0.0-20231129015646-c2ec58cca6a2/contrib/roaring/parallel.go (about) 1 package roaring 2 3 import ( 4 "container/heap" 5 "fmt" 6 "runtime" 7 "sync" 8 ) 9 10 var defaultWorkerCount = runtime.NumCPU() 11 12 type bitmapContainerKey struct { 13 key uint16 14 idx int 15 bitmap *Bitmap 16 } 17 18 type multipleContainers struct { 19 key uint16 20 containers []container 21 idx int 22 } 23 24 type keyedContainer struct { 25 key uint16 26 container container 27 idx int 28 } 29 30 type bitmapContainerHeap []bitmapContainerKey 31 32 func (h bitmapContainerHeap) Len() int { return len(h) } 33 func (h bitmapContainerHeap) Less(i, j int) bool { return h[i].key < h[j].key } 34 func (h bitmapContainerHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } 35 36 func (h *bitmapContainerHeap) Push(x interface{}) { 37 // Push and Pop use pointer receivers because they modify the slice's length, 38 // not just its contents. 39 *h = append(*h, x.(bitmapContainerKey)) 40 } 41 42 func (h *bitmapContainerHeap) Pop() interface{} { 43 old := *h 44 n := len(old) 45 x := old[n-1] 46 *h = old[0 : n-1] 47 return x 48 } 49 50 func (h bitmapContainerHeap) Peek() bitmapContainerKey { 51 return h[0] 52 } 53 54 func (h *bitmapContainerHeap) popIncrementing() (key uint16, container container) { 55 k := h.Peek() 56 key = k.key 57 container = k.bitmap.highlowcontainer.containers[k.idx] 58 59 newIdx := k.idx + 1 60 if newIdx < k.bitmap.highlowcontainer.size() { 61 k = bitmapContainerKey{ 62 k.bitmap.highlowcontainer.keys[newIdx], 63 newIdx, 64 k.bitmap, 65 } 66 (*h)[0] = k 67 heap.Fix(h, 0) 68 } else { 69 heap.Pop(h) 70 } 71 72 return 73 } 74 75 func (h *bitmapContainerHeap) Next(containers []container) multipleContainers { 76 if h.Len() == 0 { 77 return multipleContainers{} 78 } 79 80 key, container := h.popIncrementing() 81 containers = append(containers, container) 82 83 for h.Len() > 0 && key == h.Peek().key { 84 _, container = h.popIncrementing() 85 containers = append(containers, container) 86 } 87 88 return multipleContainers{ 89 key, 90 containers, 91 -1, 92 } 93 } 94 95 func newBitmapContainerHeap(bitmaps ...*Bitmap) bitmapContainerHeap { 96 // Initialize heap 97 var h bitmapContainerHeap = make([]bitmapContainerKey, 0, len(bitmaps)) 98 for _, bitmap := range bitmaps { 99 if !bitmap.IsEmpty() { 100 key := bitmapContainerKey{ 101 bitmap.highlowcontainer.keys[0], 102 0, 103 bitmap, 104 } 105 h = append(h, key) 106 } 107 } 108 109 heap.Init(&h) 110 111 return h 112 } 113 114 func repairAfterLazy(c container) container { 115 switch t := c.(type) { 116 case *bitmapContainer: 117 if t.cardinality == invalidCardinality { 118 t.computeCardinality() 119 } 120 121 if t.getCardinality() <= arrayDefaultMaxSize { 122 return t.toArrayContainer() 123 } else if c.(*bitmapContainer).isFull() { 124 return newRunContainer16Range(0, MaxUint16) 125 } 126 } 127 128 return c 129 } 130 131 func toBitmapContainer(c container) container { 132 switch t := c.(type) { 133 case *arrayContainer: 134 return t.toBitmapContainer() 135 case *runContainer16: 136 if !t.isFull() { 137 return t.toBitmapContainer() 138 } 139 } 140 return c 141 } 142 143 func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) { 144 expectedKeys := -1 145 appendedKeys := 0 146 var keys []uint16 147 var containers []container 148 for appendedKeys != expectedKeys { 149 select { 150 case item := <-resultChan: 151 if len(keys) <= item.idx { 152 keys = append(keys, make([]uint16, item.idx-len(keys)+1)...) 153 containers = append(containers, make([]container, item.idx-len(containers)+1)...) 154 } 155 keys[item.idx] = item.key 156 containers[item.idx] = item.container 157 158 appendedKeys++ 159 case msg := <-expectedKeysChan: 160 expectedKeys = msg 161 } 162 } 163 answer := &Bitmap{ 164 roaringArray{ 165 make([]uint16, 0, expectedKeys), 166 make([]container, 0, expectedKeys), 167 make([]bool, 0, expectedKeys), 168 false, 169 }, 170 } 171 for i := range keys { 172 if containers[i] != nil { // in case a resulting container was empty, see ParAnd function 173 answer.highlowcontainer.appendContainer(keys[i], containers[i], false) 174 } 175 } 176 177 bitmapChan <- answer 178 } 179 180 // ParHeapOr computes the union (OR) of all provided bitmaps in parallel, 181 // where the parameter "parallelism" determines how many workers are to be used 182 // (if it is set to 0, a default number of workers is chosen) 183 // ParHeapOr uses a heap to compute the union. For rare cases it might be faster than ParOr 184 func ParHeapOr(parallelism int, bitmaps ...*Bitmap) *Bitmap { 185 186 bitmapCount := len(bitmaps) 187 if bitmapCount == 0 { 188 return NewBitmap() 189 } else if bitmapCount == 1 { 190 return bitmaps[0].Clone() 191 } 192 193 if parallelism == 0 { 194 parallelism = defaultWorkerCount 195 } 196 197 h := newBitmapContainerHeap(bitmaps...) 198 199 bitmapChan := make(chan *Bitmap) 200 inputChan := make(chan multipleContainers, 128) 201 resultChan := make(chan keyedContainer, 32) 202 expectedKeysChan := make(chan int) 203 204 pool := sync.Pool{ 205 New: func() interface{} { 206 return make([]container, 0, len(bitmaps)) 207 }, 208 } 209 210 orFunc := func() { 211 // Assumes only structs with >=2 containers are passed 212 for input := range inputChan { 213 c := toBitmapContainer(input.containers[0]).lazyOR(input.containers[1]) 214 for _, next := range input.containers[2:] { 215 c = c.lazyIOR(next) 216 } 217 c = repairAfterLazy(c) 218 kx := keyedContainer{ 219 input.key, 220 c, 221 input.idx, 222 } 223 resultChan <- kx 224 pool.Put(input.containers[:0]) 225 } 226 } 227 228 go appenderRoutine(bitmapChan, resultChan, expectedKeysChan) 229 230 for i := 0; i < parallelism; i++ { 231 go orFunc() 232 } 233 234 idx := 0 235 for h.Len() > 0 { 236 ck := h.Next(pool.Get().([]container)) 237 if len(ck.containers) == 1 { 238 resultChan <- keyedContainer{ 239 ck.key, 240 ck.containers[0], 241 idx, 242 } 243 pool.Put(ck.containers[:0]) 244 } else { 245 ck.idx = idx 246 inputChan <- ck 247 } 248 idx++ 249 } 250 expectedKeysChan <- idx 251 252 bitmap := <-bitmapChan 253 254 close(inputChan) 255 close(resultChan) 256 close(expectedKeysChan) 257 258 return bitmap 259 } 260 261 // ParAnd computes the intersection (AND) of all provided bitmaps in parallel, 262 // where the parameter "parallelism" determines how many workers are to be used 263 // (if it is set to 0, a default number of workers is chosen) 264 func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap { 265 bitmapCount := len(bitmaps) 266 if bitmapCount == 0 { 267 return NewBitmap() 268 } else if bitmapCount == 1 { 269 return bitmaps[0].Clone() 270 } 271 272 if parallelism == 0 { 273 parallelism = defaultWorkerCount 274 } 275 276 h := newBitmapContainerHeap(bitmaps...) 277 278 bitmapChan := make(chan *Bitmap) 279 inputChan := make(chan multipleContainers, 128) 280 resultChan := make(chan keyedContainer, 32) 281 expectedKeysChan := make(chan int) 282 283 andFunc := func() { 284 // Assumes only structs with >=2 containers are passed 285 for input := range inputChan { 286 c := input.containers[0].and(input.containers[1]) 287 for _, next := range input.containers[2:] { 288 if c.isEmpty() { 289 break 290 } 291 c = c.iand(next) 292 } 293 294 // Send a nil explicitly if the result of the intersection is an empty container 295 if c.isEmpty() { 296 c = nil 297 } 298 299 kx := keyedContainer{ 300 input.key, 301 c, 302 input.idx, 303 } 304 resultChan <- kx 305 } 306 } 307 308 go appenderRoutine(bitmapChan, resultChan, expectedKeysChan) 309 310 for i := 0; i < parallelism; i++ { 311 go andFunc() 312 } 313 314 idx := 0 315 for h.Len() > 0 { 316 ck := h.Next(make([]container, 0, 4)) 317 if len(ck.containers) == bitmapCount { 318 ck.idx = idx 319 inputChan <- ck 320 idx++ 321 } 322 } 323 expectedKeysChan <- idx 324 325 bitmap := <-bitmapChan 326 327 close(inputChan) 328 close(resultChan) 329 close(expectedKeysChan) 330 331 return bitmap 332 } 333 334 // ParOr computes the union (OR) of all provided bitmaps in parallel, 335 // where the parameter "parallelism" determines how many workers are to be used 336 // (if it is set to 0, a default number of workers is chosen) 337 func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap { 338 var lKey uint16 = MaxUint16 339 var hKey uint16 340 341 bitmapsFiltered := bitmaps[:0] 342 for _, b := range bitmaps { 343 if !b.IsEmpty() { 344 bitmapsFiltered = append(bitmapsFiltered, b) 345 } 346 } 347 bitmaps = bitmapsFiltered 348 349 for _, b := range bitmaps { 350 lKey = minOfUint16(lKey, b.highlowcontainer.keys[0]) 351 hKey = maxOfUint16(hKey, b.highlowcontainer.keys[b.highlowcontainer.size()-1]) 352 } 353 354 if lKey == MaxUint16 && hKey == 0 { 355 return New() 356 } else if len(bitmaps) == 1 { 357 return bitmaps[0].Clone() 358 } 359 360 keyRange := int(hKey) - int(lKey) + 1 361 if keyRange == 1 { 362 // revert to FastOr. Since the key range is 0 363 // no container-level aggregation parallelism is achievable 364 return FastOr(bitmaps...) 365 } 366 367 if parallelism == 0 { 368 parallelism = defaultWorkerCount 369 } 370 371 var chunkSize int 372 var chunkCount int 373 if parallelism*4 > int(keyRange) { 374 chunkSize = 1 375 chunkCount = int(keyRange) 376 } else { 377 chunkCount = parallelism * 4 378 chunkSize = (int(keyRange) + chunkCount - 1) / chunkCount 379 } 380 381 if chunkCount*chunkSize < int(keyRange) { 382 // it's fine to panic to indicate an implementation error 383 panic(fmt.Sprintf("invariant check failed: chunkCount * chunkSize < keyRange, %d * %d < %d", chunkCount, chunkSize, keyRange)) 384 } 385 386 chunks := make([]*roaringArray, chunkCount) 387 388 chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), int(chunkCount))) 389 chunkChan := make(chan parChunk, minOfInt(32, int(chunkCount))) 390 391 orFunc := func() { 392 for spec := range chunkSpecChan { 393 ra := lazyOrOnRange(&bitmaps[0].highlowcontainer, &bitmaps[1].highlowcontainer, spec.start, spec.end) 394 for _, b := range bitmaps[2:] { 395 ra = lazyIOrOnRange(ra, &b.highlowcontainer, spec.start, spec.end) 396 } 397 398 for i, c := range ra.containers { 399 ra.containers[i] = repairAfterLazy(c) 400 } 401 402 chunkChan <- parChunk{ra, spec.idx} 403 } 404 } 405 406 for i := 0; i < parallelism; i++ { 407 go orFunc() 408 } 409 410 go func() { 411 for i := 0; i < chunkCount; i++ { 412 spec := parChunkSpec{ 413 start: uint16(int(lKey) + i*chunkSize), 414 end: uint16(minOfInt(int(lKey)+(i+1)*chunkSize-1, int(hKey))), 415 idx: int(i), 416 } 417 chunkSpecChan <- spec 418 } 419 }() 420 421 chunksRemaining := chunkCount 422 for chunk := range chunkChan { 423 chunks[chunk.idx] = chunk.ra 424 chunksRemaining-- 425 if chunksRemaining == 0 { 426 break 427 } 428 } 429 close(chunkChan) 430 close(chunkSpecChan) 431 432 containerCount := 0 433 for _, chunk := range chunks { 434 containerCount += chunk.size() 435 } 436 437 result := Bitmap{ 438 roaringArray{ 439 containers: make([]container, containerCount), 440 keys: make([]uint16, containerCount), 441 needCopyOnWrite: make([]bool, containerCount), 442 }, 443 } 444 445 resultOffset := 0 446 for _, chunk := range chunks { 447 copy(result.highlowcontainer.containers[resultOffset:], chunk.containers) 448 copy(result.highlowcontainer.keys[resultOffset:], chunk.keys) 449 copy(result.highlowcontainer.needCopyOnWrite[resultOffset:], chunk.needCopyOnWrite) 450 resultOffset += chunk.size() 451 } 452 453 return &result 454 } 455 456 type parChunkSpec struct { 457 start uint16 458 end uint16 459 idx int 460 } 461 462 type parChunk struct { 463 ra *roaringArray 464 idx int 465 } 466 467 func (c parChunk) size() int { 468 return c.ra.size() 469 } 470 471 func parNaiveStartAt(ra *roaringArray, start uint16, last uint16) int { 472 for idx, key := range ra.keys { 473 if key >= start && key <= last { 474 return idx 475 } else if key > last { 476 break 477 } 478 } 479 return ra.size() 480 } 481 482 func lazyOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray { 483 answer := newRoaringArray() 484 length1 := ra1.size() 485 length2 := ra2.size() 486 487 idx1 := parNaiveStartAt(ra1, start, last) 488 idx2 := parNaiveStartAt(ra2, start, last) 489 490 var key1 uint16 491 var key2 uint16 492 if idx1 < length1 && idx2 < length2 { 493 key1 = ra1.getKeyAtIndex(idx1) 494 key2 = ra2.getKeyAtIndex(idx2) 495 496 for key1 <= last && key2 <= last { 497 498 if key1 < key2 { 499 answer.appendCopy(*ra1, idx1) 500 idx1++ 501 if idx1 == length1 { 502 break 503 } 504 key1 = ra1.getKeyAtIndex(idx1) 505 } else if key1 > key2 { 506 answer.appendCopy(*ra2, idx2) 507 idx2++ 508 if idx2 == length2 { 509 break 510 } 511 key2 = ra2.getKeyAtIndex(idx2) 512 } else { 513 c1 := ra1.getFastContainerAtIndex(idx1, false) 514 515 answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false) 516 idx1++ 517 idx2++ 518 if idx1 == length1 || idx2 == length2 { 519 break 520 } 521 522 key1 = ra1.getKeyAtIndex(idx1) 523 key2 = ra2.getKeyAtIndex(idx2) 524 } 525 } 526 } 527 528 if idx2 < length2 { 529 key2 = ra2.getKeyAtIndex(idx2) 530 for key2 <= last { 531 answer.appendCopy(*ra2, idx2) 532 idx2++ 533 if idx2 == length2 { 534 break 535 } 536 key2 = ra2.getKeyAtIndex(idx2) 537 } 538 } 539 540 if idx1 < length1 { 541 key1 = ra1.getKeyAtIndex(idx1) 542 for key1 <= last { 543 answer.appendCopy(*ra1, idx1) 544 idx1++ 545 if idx1 == length1 { 546 break 547 } 548 key1 = ra1.getKeyAtIndex(idx1) 549 } 550 } 551 return answer 552 } 553 554 func lazyIOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray { 555 length1 := ra1.size() 556 length2 := ra2.size() 557 558 idx1 := 0 559 idx2 := parNaiveStartAt(ra2, start, last) 560 561 var key1 uint16 562 var key2 uint16 563 if idx1 < length1 && idx2 < length2 { 564 key1 = ra1.getKeyAtIndex(idx1) 565 key2 = ra2.getKeyAtIndex(idx2) 566 567 for key1 <= last && key2 <= last { 568 if key1 < key2 { 569 idx1++ 570 if idx1 >= length1 { 571 break 572 } 573 key1 = ra1.getKeyAtIndex(idx1) 574 } else if key1 > key2 { 575 ra1.insertNewKeyValueAt(idx1, key2, ra2.getContainerAtIndex(idx2)) 576 ra1.needCopyOnWrite[idx1] = true 577 idx2++ 578 idx1++ 579 length1++ 580 if idx2 >= length2 { 581 break 582 } 583 key2 = ra2.getKeyAtIndex(idx2) 584 } else { 585 c1 := ra1.getFastContainerAtIndex(idx1, true) 586 587 ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2)) 588 ra1.needCopyOnWrite[idx1] = false 589 idx1++ 590 idx2++ 591 if idx1 >= length1 || idx2 >= length2 { 592 break 593 } 594 595 key1 = ra1.getKeyAtIndex(idx1) 596 key2 = ra2.getKeyAtIndex(idx2) 597 } 598 } 599 } 600 if idx2 < length2 { 601 key2 = ra2.getKeyAtIndex(idx2) 602 for key2 <= last { 603 ra1.appendCopy(*ra2, idx2) 604 idx2++ 605 if idx2 >= length2 { 606 break 607 } 608 key2 = ra2.getKeyAtIndex(idx2) 609 } 610 } 611 return ra1 612 }