github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/pqmr/pqmatchresults.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package pqmr
    18  
    19  import (
    20  	"io"
    21  	"os"
    22  	"path/filepath"
    23  	"sync"
    24  
    25  	"github.com/bits-and-blooms/bitset"
    26  	segutils "github.com/siglens/siglens/pkg/segment/utils"
    27  	"github.com/siglens/siglens/pkg/utils"
    28  	log "github.com/sirupsen/logrus"
    29  )
    30  
    31  type SegmentPQMRResults struct {
    32  	allBlockResults map[uint16]*PQMatchResults
    33  	accessLock      *sync.RWMutex
    34  }
    35  
    36  type PQMatchResults struct {
    37  	b *bitset.BitSet
    38  }
    39  
    40  func CreatePQMatchResults(maxEntries uint) *PQMatchResults {
    41  	retval := &PQMatchResults{}
    42  	retval.b = bitset.New(maxEntries)
    43  	return retval
    44  }
    45  
    46  func CreatePQMatchResultsFromBs(b *bitset.BitSet) *PQMatchResults {
    47  	retval := &PQMatchResults{}
    48  	retval.b = b
    49  	return retval
    50  }
    51  
    52  func (pqmr *PQMatchResults) AddMatchedRecord(recNum uint) {
    53  	pqmr.b.Set(recNum)
    54  }
    55  
    56  func (pqmr *PQMatchResults) DoesRecordMatch(recNum uint) bool {
    57  	return pqmr.b.Test(recNum)
    58  }
    59  
    60  func (pqmr *PQMatchResults) ClearBit(recNum uint) {
    61  	pqmr.b.Clear(recNum)
    62  }
    63  
    64  func (pqmr *PQMatchResults) ResetAll() {
    65  	pqmr.b.ClearAll()
    66  }
    67  
    68  func (pqmr *PQMatchResults) InPlaceIntersection(compare *PQMatchResults) {
    69  	pqmr.b.InPlaceIntersection(compare.b)
    70  }
    71  
    72  func (pqmr *PQMatchResults) InPlaceUnion(compare *PQMatchResults) {
    73  	pqmr.b.InPlaceUnion(compare.b)
    74  }
    75  
    76  func (pqmr *PQMatchResults) Any() bool {
    77  	return pqmr.b.Any()
    78  }
    79  
    80  func Clone(srcPqmr *PQMatchResults) *PQMatchResults {
    81  	retval := &PQMatchResults{}
    82  	retval.b = srcPqmr.b.Clone()
    83  	return retval
    84  }
    85  
    86  func (pqmr *PQMatchResults) GetNumberOfBits() uint {
    87  	return pqmr.b.Len()
    88  }
    89  
    90  func (pqmr *PQMatchResults) GetNumberOfSetBits() uint {
    91  	return pqmr.b.Count()
    92  }
    93  
    94  func (pqmr *PQMatchResults) GetInMemSize() uint64 {
    95  	return uint64(pqmr.b.BinaryStorageSize())
    96  }
    97  
    98  func (pqmr *PQMatchResults) All() bool {
    99  	return pqmr.b.All()
   100  }
   101  
   102  func (pqmr *PQMatchResults) Copy() *PQMatchResults {
   103  	return &PQMatchResults{
   104  		b: pqmr.b.Clone(),
   105  	}
   106  }
   107  
   108  func InitSegmentPQMResults() *SegmentPQMRResults {
   109  	return &SegmentPQMRResults{
   110  		allBlockResults: make(map[uint16]*PQMatchResults),
   111  		accessLock:      &sync.RWMutex{},
   112  	}
   113  }
   114  
   115  // Returns the PQMatchResults, and a boolean indicating whether if blkNum was found
   116  // if bool is false, PQMatchResults is nil
   117  func (spqmr *SegmentPQMRResults) GetBlockResults(blkNum uint16) (*PQMatchResults, bool) {
   118  	spqmr.accessLock.RLock()
   119  	pqmr, ok := spqmr.allBlockResults[blkNum]
   120  	spqmr.accessLock.RUnlock()
   121  	return pqmr, ok
   122  }
   123  
   124  // Returns a boolean indicating whether blkNum exists for the spqmr
   125  func (spqmr *SegmentPQMRResults) DoesBlockExist(blkNum uint16) bool {
   126  	spqmr.accessLock.RLock()
   127  	_, ok := spqmr.allBlockResults[blkNum]
   128  	spqmr.accessLock.RUnlock()
   129  	return ok
   130  }
   131  
   132  func (spqmr *SegmentPQMRResults) GetNumBlocks() uint16 {
   133  	spqmr.accessLock.Lock()
   134  	len := uint16(len(spqmr.allBlockResults))
   135  	spqmr.accessLock.Unlock()
   136  	return len
   137  }
   138  
   139  // returns all the blocks found in the spqmr
   140  func (spqmr *SegmentPQMRResults) GetAllBlocks() []uint16 {
   141  	i := 0
   142  	spqmr.accessLock.Lock()
   143  	retVal := make([]uint16, len(spqmr.allBlockResults))
   144  	for blkNum := range spqmr.allBlockResults {
   145  		retVal[i] = blkNum
   146  		i++
   147  	}
   148  	spqmr.accessLock.Unlock()
   149  	return retVal
   150  }
   151  
   152  // returns the size of the copy
   153  func (spqmr *SegmentPQMRResults) CopyBlockResults(blkNum uint16, og *PQMatchResults) uint64 {
   154  
   155  	spqmr.accessLock.Lock()
   156  	new := bitset.New(og.b.Len())
   157  	_ = og.b.Copy(new)
   158  	spqmr.allBlockResults[blkNum] = &PQMatchResults{new}
   159  	spqmr.accessLock.Unlock()
   160  	return uint64(new.BinaryStorageSize())
   161  }
   162  
   163  // Sets the block results. This should only be used for testing
   164  func (spqmr *SegmentPQMRResults) SetBlockResults(blkNum uint16, og *PQMatchResults) {
   165  	spqmr.accessLock.Lock()
   166  	spqmr.allBlockResults[blkNum] = og
   167  	spqmr.accessLock.Unlock()
   168  }
   169  
   170  // [blkNum - uint16][bitSetLen - uint16][raw bitset….]
   171  func (pqmr *PQMatchResults) FlushPqmr(fname *string, blkNum uint16) error {
   172  
   173  	dirName := filepath.Dir(*fname)
   174  	if _, err := os.Stat(dirName); os.IsNotExist(err) {
   175  		err := os.MkdirAll(dirName, os.FileMode(0764))
   176  		if err != nil {
   177  			log.Errorf("Failed to create directory %s: %v", dirName, err)
   178  			return err
   179  		}
   180  	}
   181  	fd, err := os.OpenFile(*fname, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
   182  	if err != nil {
   183  		log.Errorf("FlushPqmr: open failed fname=%v, err=%v", *fname, err)
   184  		return err
   185  	}
   186  
   187  	defer fd.Close()
   188  
   189  	if _, err = fd.Write(utils.Uint16ToBytesLittleEndian(blkNum)); err != nil {
   190  		log.Errorf("FlushPqmr: blkNum size write failed fname=%v, err=%v", *fname, err)
   191  		return err
   192  	}
   193  
   194  	bytesWritten := uint16(pqmr.b.BinaryStorageSize())
   195  	// copy the blockLen
   196  	if _, err = fd.Write(utils.Uint16ToBytesLittleEndian(uint16(bytesWritten))); err != nil {
   197  		log.Errorf("FlushPqmr: blklen write failed fname=%v, err=%v", *fname, err)
   198  		return err
   199  	}
   200  
   201  	// copy the actual bitset
   202  	_, err = pqmr.b.WriteTo(fd)
   203  	if err != nil {
   204  		log.Errorf("FlushPqmr: bitset write failed fname=%v, err=%v", *fname, err)
   205  		return err
   206  	}
   207  
   208  	return nil
   209  }
   210  
   211  // read the pqmr file which has match results for each block
   212  // return each of those pqmr blocks
   213  func ReadPqmr(fname *string) (*SegmentPQMRResults, error) {
   214  
   215  	res := make(map[uint16]*PQMatchResults)
   216  	// todo pass the pre-alloced bsBlk so that we can reuse it, divide by 8 because one record takes 1 bit
   217  	bsBlk := make([]byte, segutils.WIP_NUM_RECS/8)
   218  
   219  	fd, err := os.OpenFile(*fname, os.O_RDONLY, 0644)
   220  	if err != nil {
   221  		log.Errorf("ReadPqmr: open failed fname=%v, err=[%v]", *fname, err)
   222  		return nil, err
   223  	}
   224  	defer fd.Close()
   225  
   226  	bbBlkNum := make([]byte, segutils.LEN_BLKNUM_CMI_SIZE) // blkNum (2)
   227  	bbBlkSize := make([]byte, segutils.LEN_PQMR_BLK_SIZE)
   228  	offset := int64(0)
   229  	var blkNum, bsSize uint16
   230  
   231  	for {
   232  		_, err = fd.ReadAt(bbBlkNum, offset)
   233  		if err != nil {
   234  			if err != io.EOF {
   235  				log.Errorf("ReadPqmr: failed to read blkNum len err=[%+v]", err)
   236  				return nil, err
   237  			}
   238  			break
   239  		}
   240  		offset += segutils.LEN_BLKNUM_CMI_SIZE
   241  		blkNum = utils.BytesToUint16LittleEndian(bbBlkNum[:])
   242  
   243  		_, err = fd.ReadAt(bbBlkSize, offset)
   244  		if err != nil {
   245  			if err != io.EOF {
   246  				log.Errorf("ReadPqmr: failed to read bitsetSize len err=[%+v]", err)
   247  				return nil, err
   248  			}
   249  			break
   250  		}
   251  		offset += segutils.LEN_PQMR_BLK_SIZE
   252  		bsSize = utils.BytesToUint16LittleEndian(bbBlkSize[:])
   253  
   254  		if bufflen := uint16(len(bsBlk)); bufflen < bsSize {
   255  			newSlice := make([]byte, bsSize-bufflen)
   256  			bsBlk = append(bsBlk, newSlice...)
   257  		}
   258  
   259  		_, err = fd.ReadAt(bsBlk[:bsSize], offset)
   260  		if err != nil {
   261  			if err != io.EOF {
   262  				log.Errorf("ReadPqmr: failed to read bitset err=[%+v]", err)
   263  				return nil, err
   264  			}
   265  			break
   266  		}
   267  		offset += int64(bsSize)
   268  
   269  		bs := bitset.New(0)
   270  		err = bs.UnmarshalBinary(bsBlk[:bsSize])
   271  		if err != nil {
   272  			if err != io.EOF {
   273  				log.Errorf("ReadPqmr: failed to unmarshall bitset err=[%+v] blkNum=%v", err, blkNum)
   274  				return nil, err
   275  			}
   276  			break
   277  		}
   278  
   279  		pqmr := &PQMatchResults{b: bs}
   280  
   281  		res[blkNum] = pqmr
   282  	}
   283  
   284  	return &SegmentPQMRResults{allBlockResults: res, accessLock: &sync.RWMutex{}}, nil
   285  }
   286  
   287  func (pqmr *PQMatchResults) Shrink(lastIdx uint) *PQMatchResults {
   288  	retval := &PQMatchResults{}
   289  	retval.b = pqmr.b.Shrink(lastIdx)
   290  	return retval
   291  }
   292  
   293  func (pqmr *PQMatchResults) WriteTo(fd *os.File) error {
   294  	_, err := pqmr.b.WriteTo(fd)
   295  	return err
   296  }
   297  
   298  func (pqmr *PQMatchResults) EncodePqmr(buf []byte, blkNum uint16) (uint16, error) {
   299  	var idx uint16
   300  	// write blkNum
   301  	copy(buf[idx:], utils.Uint16ToBytesLittleEndian(blkNum))
   302  	idx += 2
   303  	// write the size of bitset
   304  	bitsetSize := uint16(pqmr.b.BinaryStorageSize())
   305  	copy(buf[idx:], utils.Uint16ToBytesLittleEndian(bitsetSize))
   306  	idx += 2
   307  	// write actual bitset
   308  	actualBitset, err := pqmr.b.MarshalBinary()
   309  	if err != nil {
   310  		log.Errorf("EncodePqmr: Error in encoding a BitSet into a binary form, err=%v", err)
   311  		return idx, err
   312  	}
   313  	copy(buf[idx:], actualBitset)
   314  	idx += uint16(len(actualBitset))
   315  	return idx, nil
   316  
   317  }
   318  
   319  func WritePqmrToDisk(buf []byte, fileName string) error {
   320  	dirName := filepath.Dir(fileName)
   321  	if _, err := os.Stat(dirName); os.IsNotExist(err) {
   322  		err := os.MkdirAll(dirName, os.FileMode(0764))
   323  		if err != nil {
   324  			log.Errorf("Failed to create directory %s: %v", dirName, err)
   325  			return err
   326  		}
   327  	}
   328  	fd, err := os.OpenFile(fileName, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
   329  	if err != nil {
   330  		log.Errorf("WritePqmrToDisk: open failed fname=%v, err=%v", fileName, err)
   331  		return err
   332  	}
   333  
   334  	defer fd.Close()
   335  
   336  	_, err = fd.Write(buf)
   337  	if err != nil {
   338  		log.Errorf("WritePqmrToDisk: buf write failed fname=%v, err=%v", fileName, err)
   339  		return err
   340  	}
   341  
   342  	err = fd.Sync()
   343  	if err != nil {
   344  		log.Errorf("WritePqmrToDisk: sync failed filename=%v,err=%v", fileName, err)
   345  		return err
   346  	}
   347  	return nil
   348  }