github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/pqmr/pqmatchresults.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package pqmr 18 19 import ( 20 "io" 21 "os" 22 "path/filepath" 23 "sync" 24 25 "github.com/bits-and-blooms/bitset" 26 segutils "github.com/siglens/siglens/pkg/segment/utils" 27 "github.com/siglens/siglens/pkg/utils" 28 log "github.com/sirupsen/logrus" 29 ) 30 31 type SegmentPQMRResults struct { 32 allBlockResults map[uint16]*PQMatchResults 33 accessLock *sync.RWMutex 34 } 35 36 type PQMatchResults struct { 37 b *bitset.BitSet 38 } 39 40 func CreatePQMatchResults(maxEntries uint) *PQMatchResults { 41 retval := &PQMatchResults{} 42 retval.b = bitset.New(maxEntries) 43 return retval 44 } 45 46 func CreatePQMatchResultsFromBs(b *bitset.BitSet) *PQMatchResults { 47 retval := &PQMatchResults{} 48 retval.b = b 49 return retval 50 } 51 52 func (pqmr *PQMatchResults) AddMatchedRecord(recNum uint) { 53 pqmr.b.Set(recNum) 54 } 55 56 func (pqmr *PQMatchResults) DoesRecordMatch(recNum uint) bool { 57 return pqmr.b.Test(recNum) 58 } 59 60 func (pqmr *PQMatchResults) ClearBit(recNum uint) { 61 pqmr.b.Clear(recNum) 62 } 63 64 func (pqmr *PQMatchResults) ResetAll() { 65 pqmr.b.ClearAll() 66 } 67 68 func (pqmr *PQMatchResults) InPlaceIntersection(compare *PQMatchResults) { 69 pqmr.b.InPlaceIntersection(compare.b) 70 } 71 72 func (pqmr *PQMatchResults) InPlaceUnion(compare *PQMatchResults) { 73 pqmr.b.InPlaceUnion(compare.b) 74 } 75 76 func (pqmr *PQMatchResults) Any() bool { 77 return pqmr.b.Any() 78 } 79 80 func Clone(srcPqmr *PQMatchResults) *PQMatchResults { 81 retval := &PQMatchResults{} 82 retval.b = srcPqmr.b.Clone() 83 return retval 84 } 85 86 func (pqmr *PQMatchResults) GetNumberOfBits() uint { 87 return pqmr.b.Len() 88 } 89 90 func (pqmr *PQMatchResults) GetNumberOfSetBits() uint { 91 return pqmr.b.Count() 92 } 93 94 func (pqmr *PQMatchResults) GetInMemSize() uint64 { 95 return uint64(pqmr.b.BinaryStorageSize()) 96 } 97 98 func (pqmr *PQMatchResults) All() bool { 99 return pqmr.b.All() 100 } 101 102 func (pqmr *PQMatchResults) Copy() *PQMatchResults { 103 return &PQMatchResults{ 104 b: pqmr.b.Clone(), 105 } 106 } 107 108 func InitSegmentPQMResults() *SegmentPQMRResults { 109 return &SegmentPQMRResults{ 110 allBlockResults: make(map[uint16]*PQMatchResults), 111 accessLock: &sync.RWMutex{}, 112 } 113 } 114 115 // Returns the PQMatchResults, and a boolean indicating whether if blkNum was found 116 // if bool is false, PQMatchResults is nil 117 func (spqmr *SegmentPQMRResults) GetBlockResults(blkNum uint16) (*PQMatchResults, bool) { 118 spqmr.accessLock.RLock() 119 pqmr, ok := spqmr.allBlockResults[blkNum] 120 spqmr.accessLock.RUnlock() 121 return pqmr, ok 122 } 123 124 // Returns a boolean indicating whether blkNum exists for the spqmr 125 func (spqmr *SegmentPQMRResults) DoesBlockExist(blkNum uint16) bool { 126 spqmr.accessLock.RLock() 127 _, ok := spqmr.allBlockResults[blkNum] 128 spqmr.accessLock.RUnlock() 129 return ok 130 } 131 132 func (spqmr *SegmentPQMRResults) GetNumBlocks() uint16 { 133 spqmr.accessLock.Lock() 134 len := uint16(len(spqmr.allBlockResults)) 135 spqmr.accessLock.Unlock() 136 return len 137 } 138 139 // returns all the blocks found in the spqmr 140 func (spqmr *SegmentPQMRResults) GetAllBlocks() []uint16 { 141 i := 0 142 spqmr.accessLock.Lock() 143 retVal := make([]uint16, len(spqmr.allBlockResults)) 144 for blkNum := range spqmr.allBlockResults { 145 retVal[i] = blkNum 146 i++ 147 } 148 spqmr.accessLock.Unlock() 149 return retVal 150 } 151 152 // returns the size of the copy 153 func (spqmr *SegmentPQMRResults) CopyBlockResults(blkNum uint16, og *PQMatchResults) uint64 { 154 155 spqmr.accessLock.Lock() 156 new := bitset.New(og.b.Len()) 157 _ = og.b.Copy(new) 158 spqmr.allBlockResults[blkNum] = &PQMatchResults{new} 159 spqmr.accessLock.Unlock() 160 return uint64(new.BinaryStorageSize()) 161 } 162 163 // Sets the block results. This should only be used for testing 164 func (spqmr *SegmentPQMRResults) SetBlockResults(blkNum uint16, og *PQMatchResults) { 165 spqmr.accessLock.Lock() 166 spqmr.allBlockResults[blkNum] = og 167 spqmr.accessLock.Unlock() 168 } 169 170 // [blkNum - uint16][bitSetLen - uint16][raw bitset….] 171 func (pqmr *PQMatchResults) FlushPqmr(fname *string, blkNum uint16) error { 172 173 dirName := filepath.Dir(*fname) 174 if _, err := os.Stat(dirName); os.IsNotExist(err) { 175 err := os.MkdirAll(dirName, os.FileMode(0764)) 176 if err != nil { 177 log.Errorf("Failed to create directory %s: %v", dirName, err) 178 return err 179 } 180 } 181 fd, err := os.OpenFile(*fname, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644) 182 if err != nil { 183 log.Errorf("FlushPqmr: open failed fname=%v, err=%v", *fname, err) 184 return err 185 } 186 187 defer fd.Close() 188 189 if _, err = fd.Write(utils.Uint16ToBytesLittleEndian(blkNum)); err != nil { 190 log.Errorf("FlushPqmr: blkNum size write failed fname=%v, err=%v", *fname, err) 191 return err 192 } 193 194 bytesWritten := uint16(pqmr.b.BinaryStorageSize()) 195 // copy the blockLen 196 if _, err = fd.Write(utils.Uint16ToBytesLittleEndian(uint16(bytesWritten))); err != nil { 197 log.Errorf("FlushPqmr: blklen write failed fname=%v, err=%v", *fname, err) 198 return err 199 } 200 201 // copy the actual bitset 202 _, err = pqmr.b.WriteTo(fd) 203 if err != nil { 204 log.Errorf("FlushPqmr: bitset write failed fname=%v, err=%v", *fname, err) 205 return err 206 } 207 208 return nil 209 } 210 211 // read the pqmr file which has match results for each block 212 // return each of those pqmr blocks 213 func ReadPqmr(fname *string) (*SegmentPQMRResults, error) { 214 215 res := make(map[uint16]*PQMatchResults) 216 // todo pass the pre-alloced bsBlk so that we can reuse it, divide by 8 because one record takes 1 bit 217 bsBlk := make([]byte, segutils.WIP_NUM_RECS/8) 218 219 fd, err := os.OpenFile(*fname, os.O_RDONLY, 0644) 220 if err != nil { 221 log.Errorf("ReadPqmr: open failed fname=%v, err=[%v]", *fname, err) 222 return nil, err 223 } 224 defer fd.Close() 225 226 bbBlkNum := make([]byte, segutils.LEN_BLKNUM_CMI_SIZE) // blkNum (2) 227 bbBlkSize := make([]byte, segutils.LEN_PQMR_BLK_SIZE) 228 offset := int64(0) 229 var blkNum, bsSize uint16 230 231 for { 232 _, err = fd.ReadAt(bbBlkNum, offset) 233 if err != nil { 234 if err != io.EOF { 235 log.Errorf("ReadPqmr: failed to read blkNum len err=[%+v]", err) 236 return nil, err 237 } 238 break 239 } 240 offset += segutils.LEN_BLKNUM_CMI_SIZE 241 blkNum = utils.BytesToUint16LittleEndian(bbBlkNum[:]) 242 243 _, err = fd.ReadAt(bbBlkSize, offset) 244 if err != nil { 245 if err != io.EOF { 246 log.Errorf("ReadPqmr: failed to read bitsetSize len err=[%+v]", err) 247 return nil, err 248 } 249 break 250 } 251 offset += segutils.LEN_PQMR_BLK_SIZE 252 bsSize = utils.BytesToUint16LittleEndian(bbBlkSize[:]) 253 254 if bufflen := uint16(len(bsBlk)); bufflen < bsSize { 255 newSlice := make([]byte, bsSize-bufflen) 256 bsBlk = append(bsBlk, newSlice...) 257 } 258 259 _, err = fd.ReadAt(bsBlk[:bsSize], offset) 260 if err != nil { 261 if err != io.EOF { 262 log.Errorf("ReadPqmr: failed to read bitset err=[%+v]", err) 263 return nil, err 264 } 265 break 266 } 267 offset += int64(bsSize) 268 269 bs := bitset.New(0) 270 err = bs.UnmarshalBinary(bsBlk[:bsSize]) 271 if err != nil { 272 if err != io.EOF { 273 log.Errorf("ReadPqmr: failed to unmarshall bitset err=[%+v] blkNum=%v", err, blkNum) 274 return nil, err 275 } 276 break 277 } 278 279 pqmr := &PQMatchResults{b: bs} 280 281 res[blkNum] = pqmr 282 } 283 284 return &SegmentPQMRResults{allBlockResults: res, accessLock: &sync.RWMutex{}}, nil 285 } 286 287 func (pqmr *PQMatchResults) Shrink(lastIdx uint) *PQMatchResults { 288 retval := &PQMatchResults{} 289 retval.b = pqmr.b.Shrink(lastIdx) 290 return retval 291 } 292 293 func (pqmr *PQMatchResults) WriteTo(fd *os.File) error { 294 _, err := pqmr.b.WriteTo(fd) 295 return err 296 } 297 298 func (pqmr *PQMatchResults) EncodePqmr(buf []byte, blkNum uint16) (uint16, error) { 299 var idx uint16 300 // write blkNum 301 copy(buf[idx:], utils.Uint16ToBytesLittleEndian(blkNum)) 302 idx += 2 303 // write the size of bitset 304 bitsetSize := uint16(pqmr.b.BinaryStorageSize()) 305 copy(buf[idx:], utils.Uint16ToBytesLittleEndian(bitsetSize)) 306 idx += 2 307 // write actual bitset 308 actualBitset, err := pqmr.b.MarshalBinary() 309 if err != nil { 310 log.Errorf("EncodePqmr: Error in encoding a BitSet into a binary form, err=%v", err) 311 return idx, err 312 } 313 copy(buf[idx:], actualBitset) 314 idx += uint16(len(actualBitset)) 315 return idx, nil 316 317 } 318 319 func WritePqmrToDisk(buf []byte, fileName string) error { 320 dirName := filepath.Dir(fileName) 321 if _, err := os.Stat(dirName); os.IsNotExist(err) { 322 err := os.MkdirAll(dirName, os.FileMode(0764)) 323 if err != nil { 324 log.Errorf("Failed to create directory %s: %v", dirName, err) 325 return err 326 } 327 } 328 fd, err := os.OpenFile(fileName, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644) 329 if err != nil { 330 log.Errorf("WritePqmrToDisk: open failed fname=%v, err=%v", fileName, err) 331 return err 332 } 333 334 defer fd.Close() 335 336 _, err = fd.Write(buf) 337 if err != nil { 338 log.Errorf("WritePqmrToDisk: buf write failed fname=%v, err=%v", fileName, err) 339 return err 340 } 341 342 err = fd.Sync() 343 if err != nil { 344 log.Errorf("WritePqmrToDisk: sync failed filename=%v,err=%v", fileName, err) 345 return err 346 } 347 return nil 348 }