github.com/coyove/sdss@v0.0.0-20231129015646-c2ec58cca6a2/contrib/roaring/parallel.go (about)

     1  package roaring
     2  
     3  import (
     4  	"container/heap"
     5  	"fmt"
     6  	"runtime"
     7  	"sync"
     8  )
     9  
    10  var defaultWorkerCount = runtime.NumCPU()
    11  
    12  type bitmapContainerKey struct {
    13  	key    uint16
    14  	idx    int
    15  	bitmap *Bitmap
    16  }
    17  
    18  type multipleContainers struct {
    19  	key        uint16
    20  	containers []container
    21  	idx        int
    22  }
    23  
    24  type keyedContainer struct {
    25  	key       uint16
    26  	container container
    27  	idx       int
    28  }
    29  
    30  type bitmapContainerHeap []bitmapContainerKey
    31  
    32  func (h bitmapContainerHeap) Len() int           { return len(h) }
    33  func (h bitmapContainerHeap) Less(i, j int) bool { return h[i].key < h[j].key }
    34  func (h bitmapContainerHeap) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }
    35  
    36  func (h *bitmapContainerHeap) Push(x interface{}) {
    37  	// Push and Pop use pointer receivers because they modify the slice's length,
    38  	// not just its contents.
    39  	*h = append(*h, x.(bitmapContainerKey))
    40  }
    41  
    42  func (h *bitmapContainerHeap) Pop() interface{} {
    43  	old := *h
    44  	n := len(old)
    45  	x := old[n-1]
    46  	*h = old[0 : n-1]
    47  	return x
    48  }
    49  
    50  func (h bitmapContainerHeap) Peek() bitmapContainerKey {
    51  	return h[0]
    52  }
    53  
    54  func (h *bitmapContainerHeap) popIncrementing() (key uint16, container container) {
    55  	k := h.Peek()
    56  	key = k.key
    57  	container = k.bitmap.highlowcontainer.containers[k.idx]
    58  
    59  	newIdx := k.idx + 1
    60  	if newIdx < k.bitmap.highlowcontainer.size() {
    61  		k = bitmapContainerKey{
    62  			k.bitmap.highlowcontainer.keys[newIdx],
    63  			newIdx,
    64  			k.bitmap,
    65  		}
    66  		(*h)[0] = k
    67  		heap.Fix(h, 0)
    68  	} else {
    69  		heap.Pop(h)
    70  	}
    71  
    72  	return
    73  }
    74  
    75  func (h *bitmapContainerHeap) Next(containers []container) multipleContainers {
    76  	if h.Len() == 0 {
    77  		return multipleContainers{}
    78  	}
    79  
    80  	key, container := h.popIncrementing()
    81  	containers = append(containers, container)
    82  
    83  	for h.Len() > 0 && key == h.Peek().key {
    84  		_, container = h.popIncrementing()
    85  		containers = append(containers, container)
    86  	}
    87  
    88  	return multipleContainers{
    89  		key,
    90  		containers,
    91  		-1,
    92  	}
    93  }
    94  
    95  func newBitmapContainerHeap(bitmaps ...*Bitmap) bitmapContainerHeap {
    96  	// Initialize heap
    97  	var h bitmapContainerHeap = make([]bitmapContainerKey, 0, len(bitmaps))
    98  	for _, bitmap := range bitmaps {
    99  		if !bitmap.IsEmpty() {
   100  			key := bitmapContainerKey{
   101  				bitmap.highlowcontainer.keys[0],
   102  				0,
   103  				bitmap,
   104  			}
   105  			h = append(h, key)
   106  		}
   107  	}
   108  
   109  	heap.Init(&h)
   110  
   111  	return h
   112  }
   113  
   114  func repairAfterLazy(c container) container {
   115  	switch t := c.(type) {
   116  	case *bitmapContainer:
   117  		if t.cardinality == invalidCardinality {
   118  			t.computeCardinality()
   119  		}
   120  
   121  		if t.getCardinality() <= arrayDefaultMaxSize {
   122  			return t.toArrayContainer()
   123  		} else if c.(*bitmapContainer).isFull() {
   124  			return newRunContainer16Range(0, MaxUint16)
   125  		}
   126  	}
   127  
   128  	return c
   129  }
   130  
   131  func toBitmapContainer(c container) container {
   132  	switch t := c.(type) {
   133  	case *arrayContainer:
   134  		return t.toBitmapContainer()
   135  	case *runContainer16:
   136  		if !t.isFull() {
   137  			return t.toBitmapContainer()
   138  		}
   139  	}
   140  	return c
   141  }
   142  
   143  func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) {
   144  	expectedKeys := -1
   145  	appendedKeys := 0
   146  	var keys []uint16
   147  	var containers []container
   148  	for appendedKeys != expectedKeys {
   149  		select {
   150  		case item := <-resultChan:
   151  			if len(keys) <= item.idx {
   152  				keys = append(keys, make([]uint16, item.idx-len(keys)+1)...)
   153  				containers = append(containers, make([]container, item.idx-len(containers)+1)...)
   154  			}
   155  			keys[item.idx] = item.key
   156  			containers[item.idx] = item.container
   157  
   158  			appendedKeys++
   159  		case msg := <-expectedKeysChan:
   160  			expectedKeys = msg
   161  		}
   162  	}
   163  	answer := &Bitmap{
   164  		roaringArray{
   165  			make([]uint16, 0, expectedKeys),
   166  			make([]container, 0, expectedKeys),
   167  			make([]bool, 0, expectedKeys),
   168  			false,
   169  		},
   170  	}
   171  	for i := range keys {
   172  		if containers[i] != nil { // in case a resulting container was empty, see ParAnd function
   173  			answer.highlowcontainer.appendContainer(keys[i], containers[i], false)
   174  		}
   175  	}
   176  
   177  	bitmapChan <- answer
   178  }
   179  
   180  // ParHeapOr computes the union (OR) of all provided bitmaps in parallel,
   181  // where the parameter "parallelism" determines how many workers are to be used
   182  // (if it is set to 0, a default number of workers is chosen)
   183  // ParHeapOr uses a heap to compute the union. For rare cases it might be faster than ParOr
   184  func ParHeapOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
   185  
   186  	bitmapCount := len(bitmaps)
   187  	if bitmapCount == 0 {
   188  		return NewBitmap()
   189  	} else if bitmapCount == 1 {
   190  		return bitmaps[0].Clone()
   191  	}
   192  
   193  	if parallelism == 0 {
   194  		parallelism = defaultWorkerCount
   195  	}
   196  
   197  	h := newBitmapContainerHeap(bitmaps...)
   198  
   199  	bitmapChan := make(chan *Bitmap)
   200  	inputChan := make(chan multipleContainers, 128)
   201  	resultChan := make(chan keyedContainer, 32)
   202  	expectedKeysChan := make(chan int)
   203  
   204  	pool := sync.Pool{
   205  		New: func() interface{} {
   206  			return make([]container, 0, len(bitmaps))
   207  		},
   208  	}
   209  
   210  	orFunc := func() {
   211  		// Assumes only structs with >=2 containers are passed
   212  		for input := range inputChan {
   213  			c := toBitmapContainer(input.containers[0]).lazyOR(input.containers[1])
   214  			for _, next := range input.containers[2:] {
   215  				c = c.lazyIOR(next)
   216  			}
   217  			c = repairAfterLazy(c)
   218  			kx := keyedContainer{
   219  				input.key,
   220  				c,
   221  				input.idx,
   222  			}
   223  			resultChan <- kx
   224  			pool.Put(input.containers[:0])
   225  		}
   226  	}
   227  
   228  	go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
   229  
   230  	for i := 0; i < parallelism; i++ {
   231  		go orFunc()
   232  	}
   233  
   234  	idx := 0
   235  	for h.Len() > 0 {
   236  		ck := h.Next(pool.Get().([]container))
   237  		if len(ck.containers) == 1 {
   238  			resultChan <- keyedContainer{
   239  				ck.key,
   240  				ck.containers[0],
   241  				idx,
   242  			}
   243  			pool.Put(ck.containers[:0])
   244  		} else {
   245  			ck.idx = idx
   246  			inputChan <- ck
   247  		}
   248  		idx++
   249  	}
   250  	expectedKeysChan <- idx
   251  
   252  	bitmap := <-bitmapChan
   253  
   254  	close(inputChan)
   255  	close(resultChan)
   256  	close(expectedKeysChan)
   257  
   258  	return bitmap
   259  }
   260  
   261  // ParAnd computes the intersection (AND) of all provided bitmaps in parallel,
   262  // where the parameter "parallelism" determines how many workers are to be used
   263  // (if it is set to 0, a default number of workers is chosen)
   264  func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap {
   265  	bitmapCount := len(bitmaps)
   266  	if bitmapCount == 0 {
   267  		return NewBitmap()
   268  	} else if bitmapCount == 1 {
   269  		return bitmaps[0].Clone()
   270  	}
   271  
   272  	if parallelism == 0 {
   273  		parallelism = defaultWorkerCount
   274  	}
   275  
   276  	h := newBitmapContainerHeap(bitmaps...)
   277  
   278  	bitmapChan := make(chan *Bitmap)
   279  	inputChan := make(chan multipleContainers, 128)
   280  	resultChan := make(chan keyedContainer, 32)
   281  	expectedKeysChan := make(chan int)
   282  
   283  	andFunc := func() {
   284  		// Assumes only structs with >=2 containers are passed
   285  		for input := range inputChan {
   286  			c := input.containers[0].and(input.containers[1])
   287  			for _, next := range input.containers[2:] {
   288  				if c.isEmpty() {
   289  					break
   290  				}
   291  				c = c.iand(next)
   292  			}
   293  
   294  			// Send a nil explicitly if the result of the intersection is an empty container
   295  			if c.isEmpty() {
   296  				c = nil
   297  			}
   298  
   299  			kx := keyedContainer{
   300  				input.key,
   301  				c,
   302  				input.idx,
   303  			}
   304  			resultChan <- kx
   305  		}
   306  	}
   307  
   308  	go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
   309  
   310  	for i := 0; i < parallelism; i++ {
   311  		go andFunc()
   312  	}
   313  
   314  	idx := 0
   315  	for h.Len() > 0 {
   316  		ck := h.Next(make([]container, 0, 4))
   317  		if len(ck.containers) == bitmapCount {
   318  			ck.idx = idx
   319  			inputChan <- ck
   320  			idx++
   321  		}
   322  	}
   323  	expectedKeysChan <- idx
   324  
   325  	bitmap := <-bitmapChan
   326  
   327  	close(inputChan)
   328  	close(resultChan)
   329  	close(expectedKeysChan)
   330  
   331  	return bitmap
   332  }
   333  
   334  // ParOr computes the union (OR) of all provided bitmaps in parallel,
   335  // where the parameter "parallelism" determines how many workers are to be used
   336  // (if it is set to 0, a default number of workers is chosen)
   337  func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
   338  	var lKey uint16 = MaxUint16
   339  	var hKey uint16
   340  
   341  	bitmapsFiltered := bitmaps[:0]
   342  	for _, b := range bitmaps {
   343  		if !b.IsEmpty() {
   344  			bitmapsFiltered = append(bitmapsFiltered, b)
   345  		}
   346  	}
   347  	bitmaps = bitmapsFiltered
   348  
   349  	for _, b := range bitmaps {
   350  		lKey = minOfUint16(lKey, b.highlowcontainer.keys[0])
   351  		hKey = maxOfUint16(hKey, b.highlowcontainer.keys[b.highlowcontainer.size()-1])
   352  	}
   353  
   354  	if lKey == MaxUint16 && hKey == 0 {
   355  		return New()
   356  	} else if len(bitmaps) == 1 {
   357  		return bitmaps[0].Clone()
   358  	}
   359  
   360  	keyRange := int(hKey) - int(lKey) + 1
   361  	if keyRange == 1 {
   362  		// revert to FastOr. Since the key range is 0
   363  		// no container-level aggregation parallelism is achievable
   364  		return FastOr(bitmaps...)
   365  	}
   366  
   367  	if parallelism == 0 {
   368  		parallelism = defaultWorkerCount
   369  	}
   370  
   371  	var chunkSize int
   372  	var chunkCount int
   373  	if parallelism*4 > int(keyRange) {
   374  		chunkSize = 1
   375  		chunkCount = int(keyRange)
   376  	} else {
   377  		chunkCount = parallelism * 4
   378  		chunkSize = (int(keyRange) + chunkCount - 1) / chunkCount
   379  	}
   380  
   381  	if chunkCount*chunkSize < int(keyRange) {
   382  		// it's fine to panic to indicate an implementation error
   383  		panic(fmt.Sprintf("invariant check failed: chunkCount * chunkSize < keyRange, %d * %d < %d", chunkCount, chunkSize, keyRange))
   384  	}
   385  
   386  	chunks := make([]*roaringArray, chunkCount)
   387  
   388  	chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), int(chunkCount)))
   389  	chunkChan := make(chan parChunk, minOfInt(32, int(chunkCount)))
   390  
   391  	orFunc := func() {
   392  		for spec := range chunkSpecChan {
   393  			ra := lazyOrOnRange(&bitmaps[0].highlowcontainer, &bitmaps[1].highlowcontainer, spec.start, spec.end)
   394  			for _, b := range bitmaps[2:] {
   395  				ra = lazyIOrOnRange(ra, &b.highlowcontainer, spec.start, spec.end)
   396  			}
   397  
   398  			for i, c := range ra.containers {
   399  				ra.containers[i] = repairAfterLazy(c)
   400  			}
   401  
   402  			chunkChan <- parChunk{ra, spec.idx}
   403  		}
   404  	}
   405  
   406  	for i := 0; i < parallelism; i++ {
   407  		go orFunc()
   408  	}
   409  
   410  	go func() {
   411  		for i := 0; i < chunkCount; i++ {
   412  			spec := parChunkSpec{
   413  				start: uint16(int(lKey) + i*chunkSize),
   414  				end:   uint16(minOfInt(int(lKey)+(i+1)*chunkSize-1, int(hKey))),
   415  				idx:   int(i),
   416  			}
   417  			chunkSpecChan <- spec
   418  		}
   419  	}()
   420  
   421  	chunksRemaining := chunkCount
   422  	for chunk := range chunkChan {
   423  		chunks[chunk.idx] = chunk.ra
   424  		chunksRemaining--
   425  		if chunksRemaining == 0 {
   426  			break
   427  		}
   428  	}
   429  	close(chunkChan)
   430  	close(chunkSpecChan)
   431  
   432  	containerCount := 0
   433  	for _, chunk := range chunks {
   434  		containerCount += chunk.size()
   435  	}
   436  
   437  	result := Bitmap{
   438  		roaringArray{
   439  			containers:      make([]container, containerCount),
   440  			keys:            make([]uint16, containerCount),
   441  			needCopyOnWrite: make([]bool, containerCount),
   442  		},
   443  	}
   444  
   445  	resultOffset := 0
   446  	for _, chunk := range chunks {
   447  		copy(result.highlowcontainer.containers[resultOffset:], chunk.containers)
   448  		copy(result.highlowcontainer.keys[resultOffset:], chunk.keys)
   449  		copy(result.highlowcontainer.needCopyOnWrite[resultOffset:], chunk.needCopyOnWrite)
   450  		resultOffset += chunk.size()
   451  	}
   452  
   453  	return &result
   454  }
   455  
   456  type parChunkSpec struct {
   457  	start uint16
   458  	end   uint16
   459  	idx   int
   460  }
   461  
   462  type parChunk struct {
   463  	ra  *roaringArray
   464  	idx int
   465  }
   466  
   467  func (c parChunk) size() int {
   468  	return c.ra.size()
   469  }
   470  
   471  func parNaiveStartAt(ra *roaringArray, start uint16, last uint16) int {
   472  	for idx, key := range ra.keys {
   473  		if key >= start && key <= last {
   474  			return idx
   475  		} else if key > last {
   476  			break
   477  		}
   478  	}
   479  	return ra.size()
   480  }
   481  
   482  func lazyOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
   483  	answer := newRoaringArray()
   484  	length1 := ra1.size()
   485  	length2 := ra2.size()
   486  
   487  	idx1 := parNaiveStartAt(ra1, start, last)
   488  	idx2 := parNaiveStartAt(ra2, start, last)
   489  
   490  	var key1 uint16
   491  	var key2 uint16
   492  	if idx1 < length1 && idx2 < length2 {
   493  		key1 = ra1.getKeyAtIndex(idx1)
   494  		key2 = ra2.getKeyAtIndex(idx2)
   495  
   496  		for key1 <= last && key2 <= last {
   497  
   498  			if key1 < key2 {
   499  				answer.appendCopy(*ra1, idx1)
   500  				idx1++
   501  				if idx1 == length1 {
   502  					break
   503  				}
   504  				key1 = ra1.getKeyAtIndex(idx1)
   505  			} else if key1 > key2 {
   506  				answer.appendCopy(*ra2, idx2)
   507  				idx2++
   508  				if idx2 == length2 {
   509  					break
   510  				}
   511  				key2 = ra2.getKeyAtIndex(idx2)
   512  			} else {
   513  				c1 := ra1.getFastContainerAtIndex(idx1, false)
   514  
   515  				answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false)
   516  				idx1++
   517  				idx2++
   518  				if idx1 == length1 || idx2 == length2 {
   519  					break
   520  				}
   521  
   522  				key1 = ra1.getKeyAtIndex(idx1)
   523  				key2 = ra2.getKeyAtIndex(idx2)
   524  			}
   525  		}
   526  	}
   527  
   528  	if idx2 < length2 {
   529  		key2 = ra2.getKeyAtIndex(idx2)
   530  		for key2 <= last {
   531  			answer.appendCopy(*ra2, idx2)
   532  			idx2++
   533  			if idx2 == length2 {
   534  				break
   535  			}
   536  			key2 = ra2.getKeyAtIndex(idx2)
   537  		}
   538  	}
   539  
   540  	if idx1 < length1 {
   541  		key1 = ra1.getKeyAtIndex(idx1)
   542  		for key1 <= last {
   543  			answer.appendCopy(*ra1, idx1)
   544  			idx1++
   545  			if idx1 == length1 {
   546  				break
   547  			}
   548  			key1 = ra1.getKeyAtIndex(idx1)
   549  		}
   550  	}
   551  	return answer
   552  }
   553  
   554  func lazyIOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
   555  	length1 := ra1.size()
   556  	length2 := ra2.size()
   557  
   558  	idx1 := 0
   559  	idx2 := parNaiveStartAt(ra2, start, last)
   560  
   561  	var key1 uint16
   562  	var key2 uint16
   563  	if idx1 < length1 && idx2 < length2 {
   564  		key1 = ra1.getKeyAtIndex(idx1)
   565  		key2 = ra2.getKeyAtIndex(idx2)
   566  
   567  		for key1 <= last && key2 <= last {
   568  			if key1 < key2 {
   569  				idx1++
   570  				if idx1 >= length1 {
   571  					break
   572  				}
   573  				key1 = ra1.getKeyAtIndex(idx1)
   574  			} else if key1 > key2 {
   575  				ra1.insertNewKeyValueAt(idx1, key2, ra2.getContainerAtIndex(idx2))
   576  				ra1.needCopyOnWrite[idx1] = true
   577  				idx2++
   578  				idx1++
   579  				length1++
   580  				if idx2 >= length2 {
   581  					break
   582  				}
   583  				key2 = ra2.getKeyAtIndex(idx2)
   584  			} else {
   585  				c1 := ra1.getFastContainerAtIndex(idx1, true)
   586  
   587  				ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2))
   588  				ra1.needCopyOnWrite[idx1] = false
   589  				idx1++
   590  				idx2++
   591  				if idx1 >= length1 || idx2 >= length2 {
   592  					break
   593  				}
   594  
   595  				key1 = ra1.getKeyAtIndex(idx1)
   596  				key2 = ra2.getKeyAtIndex(idx2)
   597  			}
   598  		}
   599  	}
   600  	if idx2 < length2 {
   601  		key2 = ra2.getKeyAtIndex(idx2)
   602  		for key2 <= last {
   603  			ra1.appendCopy(*ra2, idx2)
   604  			idx2++
   605  			if idx2 >= length2 {
   606  				break
   607  			}
   608  			key2 = ra2.getKeyAtIndex(idx2)
   609  		}
   610  	}
   611  	return ra1
   612  }