github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/roaringset/helpers.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package roaringset
    13  
    14  import (
    15  	"runtime"
    16  	"sync"
    17  
    18  	"github.com/sirupsen/logrus"
    19  	enterrors "github.com/weaviate/weaviate/entities/errors"
    20  
    21  	"github.com/weaviate/sroar"
    22  )
    23  
    24  var (
    25  	prefillBufferSize  = 65_536
    26  	prefillMaxRoutines = 4
    27  	_NUMCPU            = runtime.NumCPU()
    28  )
    29  
    30  func NewBitmap(values ...uint64) *sroar.Bitmap {
    31  	bm := sroar.NewBitmap()
    32  	bm.SetMany(values)
    33  	return bm
    34  }
    35  
    36  // Operations on bitmaps may result in oversized instances in relation to
    37  // number of elements currently contained in bitmap
    38  // Examples of such operations:
    39  // - And-ing bitmaps may results in size being sum of both sizes
    40  // (especially and-ing bitmap with itself)
    41  // - Removing elements from bitmap results in size not being reduced
    42  // (even if there is only few or no elements left)
    43  //
    44  // Method should be used before saving bitmap to file, to ensure
    45  // minimal required size
    46  //
    47  // For most cases Or between empty bitmap and used bitmap
    48  // works pretty well for reducing its final size, except for use case,
    49  // where used bitmap uses internally bitmap - it will not be converted
    50  // to underlying array, even if there are single elements left
    51  func Condense(bm *sroar.Bitmap) *sroar.Bitmap {
    52  	condensed := sroar.NewBitmap()
    53  	condensed.Or(bm)
    54  	return condensed
    55  }
    56  
    57  // NewInvertedBitmap creates a bitmap that as all IDs filled from 0 to maxVal.
    58  // Then the source bitmap is subtracted (AndNot) from the all-ids bitmap,
    59  // resulting in a bitmap containing all ids from 0 to maxVal except the ones
    60  // that were set on the source.
    61  func NewInvertedBitmap(source *sroar.Bitmap, maxVal uint64, logger logrus.FieldLogger) *sroar.Bitmap {
    62  	bm := NewBitmapPrefill(maxVal, logger)
    63  	bm.AndNot(source)
    64  	return bm
    65  }
    66  
    67  // Creates prefilled bitmap with values from 0 to maxVal (included).
    68  //
    69  // It is designed to be more performant both
    70  // time-wise (compared to Set/SetMany)
    71  // and memory-wise (compared to FromSortedList accepting entire slice of elements)
    72  // Method creates multiple small bitmaps using FromSortedList (slice is reusable)
    73  // and ORs them together to get final bitmap.
    74  // For maxVal > prefillBufferSize (65_536) and multiple CPUs available task is performed
    75  // by up to prefillMaxRoutines (4) goroutines.
    76  func NewBitmapPrefill(maxVal uint64, logger logrus.FieldLogger) *sroar.Bitmap {
    77  	routinesLimit := prefillMaxRoutines
    78  	if _NUMCPU < routinesLimit {
    79  		routinesLimit = _NUMCPU
    80  	}
    81  	if routinesLimit == 1 || maxVal <= uint64(prefillBufferSize) {
    82  		return newBitmapPrefillSequential(maxVal)
    83  	}
    84  	return newBitmapPrefillParallel(maxVal, routinesLimit, logger)
    85  }
    86  
    87  func newBitmapPrefillSequential(maxVal uint64) *sroar.Bitmap {
    88  	inc := uint64(prefillBufferSize)
    89  	buf := make([]uint64, prefillBufferSize)
    90  	finalBM := sroar.NewBitmap()
    91  
    92  	for i := uint64(0); i <= maxVal; i += inc {
    93  		j := uint64(0)
    94  		for ; j < inc && i+j <= maxVal; j++ {
    95  			buf[j] = i + j
    96  		}
    97  		finalBM.Or(sroar.FromSortedList(buf[:j]))
    98  	}
    99  	return finalBM
   100  }
   101  
   102  func newBitmapPrefillParallel(maxVal uint64, routinesLimit int, logger logrus.FieldLogger) *sroar.Bitmap {
   103  	inc := uint64(prefillBufferSize / routinesLimit)
   104  	lock := new(sync.Mutex)
   105  	ch := make(chan uint64, routinesLimit)
   106  	wg := new(sync.WaitGroup)
   107  	wg.Add(routinesLimit)
   108  	finalBM := sroar.NewBitmap()
   109  
   110  	for r := 0; r < routinesLimit; r++ {
   111  		f := func() {
   112  			buf := make([]uint64, inc)
   113  
   114  			for i := range ch {
   115  				j := uint64(0)
   116  				for ; j < inc && i+j <= maxVal; j++ {
   117  					buf[j] = i + j
   118  				}
   119  				bm := sroar.FromSortedList(buf[:j])
   120  
   121  				lock.Lock()
   122  				finalBM.Or(bm)
   123  				lock.Unlock()
   124  			}
   125  			wg.Done()
   126  		}
   127  		enterrors.GoWrapper(f, logger)
   128  	}
   129  
   130  	for i := uint64(0); i <= maxVal; i += inc {
   131  		ch <- i
   132  	}
   133  	close(ch)
   134  	wg.Wait()
   135  	return finalBM
   136  }
   137  
   138  type MaxValGetterFunc func() uint64
   139  
   140  const (
   141  	// DefaultBufferIncrement  is the amount of bits greater than <maxVal>
   142  	// to reduce the amount of times BitmapFactory has to reallocate.
   143  	DefaultBufferIncrement = uint64(100)
   144  )
   145  
   146  // BitmapFactory exists to prevent an expensive call to
   147  // NewBitmapPrefill each time NewInvertedBitmap is invoked
   148  type BitmapFactory struct {
   149  	bitmap        *sroar.Bitmap
   150  	maxValGetter  MaxValGetterFunc
   151  	currentMaxVal uint64
   152  	lock          sync.RWMutex
   153  }
   154  
   155  func NewBitmapFactory(maxValGetter MaxValGetterFunc, logger logrus.FieldLogger) *BitmapFactory {
   156  	maxVal := maxValGetter() + DefaultBufferIncrement
   157  	return &BitmapFactory{
   158  		bitmap:        NewBitmapPrefill(maxVal, logger),
   159  		maxValGetter:  maxValGetter,
   160  		currentMaxVal: maxVal,
   161  	}
   162  }
   163  
   164  // GetBitmap returns a prefilled bitmap, which is cloned from a shared internal.
   165  // This method is safe to call concurrently. The purpose behind sharing an
   166  // internal bitmap, is that a Clone() operation is much cheaper than prefilling
   167  // a map up to <maxDocID> elements is an expensive operation, and this way we
   168  // only have to do it once.
   169  func (bmf *BitmapFactory) GetBitmap() *sroar.Bitmap {
   170  	bmf.lock.RLock()
   171  	maxVal := bmf.maxValGetter()
   172  
   173  	// We don't need to expand, maxVal is unchanged
   174  	{
   175  		if maxVal <= bmf.currentMaxVal {
   176  			cloned := bmf.bitmap.Clone()
   177  			bmf.lock.RUnlock()
   178  			return cloned
   179  		}
   180  	}
   181  
   182  	bmf.lock.RUnlock()
   183  	bmf.lock.Lock()
   184  	defer bmf.lock.Unlock()
   185  
   186  	// 2nd check to ensure bitmap wasn't expanded by
   187  	// concurrent request white waiting for write lock
   188  	{
   189  		maxVal = bmf.maxValGetter()
   190  		if maxVal <= bmf.currentMaxVal {
   191  			return bmf.bitmap.Clone()
   192  		}
   193  	}
   194  
   195  	// maxVal has grown to exceed even the buffer,
   196  	// time to expand
   197  	{
   198  		length := maxVal + DefaultBufferIncrement - bmf.currentMaxVal
   199  		list := make([]uint64, length)
   200  		for i := uint64(0); i < length; i++ {
   201  			list[i] = bmf.currentMaxVal + i + 1
   202  		}
   203  
   204  		bmf.bitmap.Or(sroar.FromSortedList(list))
   205  		bmf.currentMaxVal = maxVal + DefaultBufferIncrement
   206  	}
   207  
   208  	return bmf.bitmap.Clone()
   209  }
   210  
   211  // ActualMaxVal returns the highest value in the bitmap not including the buffer
   212  func (bmf *BitmapFactory) ActualMaxVal() uint64 {
   213  	bmf.lock.RLock()
   214  	defer bmf.lock.RUnlock()
   215  	return bmf.maxValGetter()
   216  }