github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/reader/metrics/series/seriesreader.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package series 18 19 import ( 20 "bytes" 21 "fmt" 22 "os" 23 "sync" 24 "time" 25 26 "github.com/siglens/siglens/pkg/segment/structs" 27 segutils "github.com/siglens/siglens/pkg/segment/utils" 28 "github.com/siglens/siglens/pkg/segment/writer/metrics/compress" 29 "github.com/siglens/siglens/pkg/utils" 30 log "github.com/sirupsen/logrus" 31 ) 32 33 /* 34 Holder struct to read a single time series segment 35 36 Exposes function to access readers for each block 37 */ 38 type TimeSeriesSegmentReader struct { 39 mKey string // base metrics key directory 40 tsoBuf []byte // raw buffer used to decode the TSO 41 tsgBuf []byte // raw buffer used to decode the TSO 42 43 allBuffers [][]byte // list of all buffers used to read TSO/TSG files 44 } 45 46 /* 47 Struct to access data within a single block. 48 49 Exposes functions that will return a TimeSeriesIterator for the given tsids 50 */ 51 type TimeSeriesBlockReader struct { 52 rawTSO []byte // raw read TSO file 53 rawTSG []byte // raw read TSG file 54 numTSIDs uint16 55 56 lastTSID uint64 57 lastTSidx uint32 // index of the last tsid in the tso file 58 first bool 59 } 60 61 type SharedTimeSeriesSegmentReader struct { 62 TimeSeriesBlockReader []*TimeSeriesSegmentReader 63 numReaders int 64 rwLock *sync.Mutex 65 } 66 67 var seriesBufferPool = sync.Pool{ 68 New: func() interface{} { 69 // The Pool's New function should generally only return pointer 70 // types, since a pointer can be put into the return interface 71 // value without an allocation: 72 73 buff := float64(segutils.METRICS_SEARCH_ALLOCATE_BLOCK) 74 slice := make([]byte, 0, int(buff)) 75 return &slice 76 }, 77 } 78 79 /* 80 Exposes init functions for timeseries block readers. 81 82 # This allocates all required buffers for the readers 83 84 It is up to the caller to call .Close() to return all buffers 85 */ 86 func InitTimeSeriesReader(mKey string) (*TimeSeriesSegmentReader, error) { 87 // load tso/tsg file as needd 88 return &TimeSeriesSegmentReader{ 89 mKey: mKey, 90 tsoBuf: *seriesBufferPool.Get().(*[]byte), 91 tsgBuf: *seriesBufferPool.Get().(*[]byte), 92 allBuffers: make([][]byte, 0), 93 }, nil 94 } 95 96 /* 97 Closes the iterator by returning all buffers back to the pool 98 */ 99 func (tssr *TimeSeriesSegmentReader) Close() error { 100 // load tso/tsg file as needd 101 102 seriesBufferPool.Put(&tssr.tsoBuf) 103 seriesBufferPool.Put(&tssr.tsgBuf) 104 for i := range tssr.allBuffers { 105 seriesBufferPool.Put(&tssr.allBuffers[i]) 106 } 107 108 return nil 109 } 110 111 func InitSharedTimeSeriesSegmentReader(mKey string, numReaders int) (*SharedTimeSeriesSegmentReader, error) { 112 sharedTimeSeriesSegmentReader := &SharedTimeSeriesSegmentReader{ 113 TimeSeriesBlockReader: make([]*TimeSeriesSegmentReader, numReaders), 114 numReaders: numReaders, 115 rwLock: &sync.Mutex{}, 116 } 117 118 for i := 0; i < numReaders; i++ { 119 currReader, err := InitTimeSeriesReader(mKey) 120 if err != nil { 121 sharedTimeSeriesSegmentReader.Close() 122 return sharedTimeSeriesSegmentReader, err 123 } 124 sharedTimeSeriesSegmentReader.TimeSeriesBlockReader[i] = currReader 125 } 126 return sharedTimeSeriesSegmentReader, nil 127 } 128 129 func (stssr *SharedTimeSeriesSegmentReader) Close() error { 130 for _, reader := range stssr.TimeSeriesBlockReader { 131 reader.Close() 132 } 133 return nil 134 } 135 136 /* 137 Exposes init functions for timeseries block readers. 138 139 After calling this function, all previous blockreaders will become invalid. 140 141 It is up to the caller to ensure that all previous blockreaders are no longer being used 142 */ 143 func (tssr *TimeSeriesSegmentReader) InitReaderForBlock(blkNum uint16, queryMetrics *structs.MetricsQueryProcessingMetrics) (*TimeSeriesBlockReader, error) { 144 // load tso/tsg file as need 145 tsoFName := fmt.Sprintf("%s_%d.tso", tssr.mKey, blkNum) 146 sTime := time.Now() 147 readTSO, nTSIDs, err := tssr.loadTSOFile(tsoFName, tssr.tsoBuf) 148 if err != nil { 149 log.Errorf("InitReaderForBlock: failed to init reader for block %v! Err:%+v", blkNum, err) 150 return nil, err 151 } 152 153 queryMetrics.SetTimeLoadingTSOFiles(time.Since(sTime)) 154 queryMetrics.IncrementNumTSOFilesLoaded(1) 155 156 tsgFName := fmt.Sprintf("%s_%d.tsg", tssr.mKey, blkNum) 157 sTime = time.Now() 158 readTSG, err := tssr.loadTSGFile(tsgFName, tssr.tsgBuf) 159 160 if err != nil { 161 log.Errorf("InitReaderForBlock: failed to init reader for block %v! Err:%+v", blkNum, err) 162 return nil, err 163 } 164 165 queryMetrics.SetTimeLoadingTSGFiles(time.Since(sTime)) 166 queryMetrics.IncrementNumTSGFilesLoaded(1) 167 168 return &TimeSeriesBlockReader{ 169 rawTSO: readTSO, 170 rawTSG: readTSG, 171 numTSIDs: nTSIDs, 172 first: true, 173 lastTSidx: 0, 174 lastTSID: 0, 175 }, nil 176 } 177 178 /* 179 Exposes function that will return a TimeSeriesIterator for a given tsid 180 181 # Returns a Series Iterator, a bool, or an error 182 183 The bool indicates if the series was found. If the series is not found, the iterator will be nil 184 185 Internally, looks up the tsid in the .tso file and returns a TimeSeriesIterator after loading the csg at the read offset 186 This function will keep the encoded csg values as a []byte 187 */ 188 func (tsbr *TimeSeriesBlockReader) GetTimeSeriesIterator(tsid uint64) (*compress.DecompressIterator, bool, error) { 189 // load tso/tsg file as needd 190 191 var found bool 192 var offset uint32 193 var tsIDX uint32 194 if !tsbr.first { 195 if tsid < tsbr.lastTSID { 196 found, tsIDX, offset = getOffsetFromTsoFile(0, tsbr.lastTSidx, uint32(tsbr.numTSIDs), tsid, tsbr.rawTSO) 197 } else if tsid > tsbr.lastTSID { 198 found, tsIDX, offset = getOffsetFromTsoFile(tsbr.lastTSidx, uint32(tsbr.numTSIDs-1), uint32(tsbr.numTSIDs), tsid, tsbr.rawTSO) 199 } 200 } else { 201 found, tsIDX, offset = getOffsetFromTsoFile(0, uint32(tsbr.numTSIDs-1), uint32(tsbr.numTSIDs), tsid, tsbr.rawTSO) 202 } 203 204 if !found { 205 return nil, false, nil 206 } 207 tsbr.first = false 208 tsbr.lastTSID = tsid 209 tsbr.lastTSidx = tsIDX 210 211 offset += 9 // 1 byte for version + 8 bytes is for tsid 212 tsgLen := utils.BytesToUint32LittleEndian(tsbr.rawTSG[offset : offset+4]) 213 offset += 4 214 rawSeries := bytes.NewReader(tsbr.rawTSG[offset : offset+tsgLen]) 215 it, err := compress.NewDecompressIterator(rawSeries) 216 if err != nil { 217 log.Errorf("GetTimeSeriesIterator: Error initialising a decompressor! err: %v", err) 218 return nil, true, err 219 } 220 return it, true, nil 221 } 222 223 // returns bool if found. If true, returns the tsidx and offset in the TSG file 224 func getOffsetFromTsoFile(low uint32, high uint32, nTsids uint32, tsid uint64, tsoBuf []byte) (bool, uint32, uint32) { 225 for low <= high { 226 mid := (high + low) / 2 227 // adding 3 because the first byte for version and the next two bytes are for number of entries 228 // multiplying 'mid' by 12 because every tsid info takes 8 bytes for tsid and 4 bytes for tsid offset 229 offsetMid := 3 + mid*12 230 // tsid takes 8 bytes in the tso buffer 231 tempBuffer := tsoBuf[offsetMid : offsetMid+8] 232 midTsid := utils.BytesToUint64LittleEndian(tempBuffer) 233 if midTsid < tsid { 234 low = mid + 1 235 } else if midTsid > tsid { 236 if mid == 0 { 237 return false, mid, 0 238 } 239 high = mid - 1 240 } else { 241 off := tsoBuf[offsetMid+8 : offsetMid+12] 242 return true, mid, utils.BytesToUint32LittleEndian(off) 243 } 244 } 245 return false, 0, 0 246 } 247 248 func (tssr *TimeSeriesSegmentReader) loadTSOFile(fileName string, rbuf []byte) ([]byte, uint16, error) { 249 250 fd, err := os.OpenFile(fileName, os.O_RDONLY, 0644) 251 if err != nil { 252 log.Infof("loadTSOFile: failed to open fileName: %v Error: %v", fileName, err) 253 return nil, 0, err 254 } 255 defer fd.Close() 256 257 finfo, err := fd.Stat() 258 if err != nil { 259 log.Errorf("loadTSOFile: error when trying to stat file=%+v. Error=%+v", fileName, err) 260 return nil, 0, err 261 } 262 263 fileSize := finfo.Size() 264 rbuf = rbuf[:cap(rbuf)] 265 sizeToAdd := fileSize - int64(len(rbuf)) 266 if sizeToAdd > 0 { 267 newArr := *seriesBufferPool.Get().(*[]byte) 268 if diff := sizeToAdd - int64(len(newArr)); diff <= 0 { 269 newArr = newArr[:sizeToAdd] 270 } else { 271 extend := make([]byte, diff) 272 newArr = append(newArr, extend...) 273 } 274 tssr.allBuffers = append(tssr.allBuffers, newArr) 275 rbuf = append(rbuf, newArr...) 276 } else { 277 rbuf = rbuf[:fileSize] 278 } 279 _, err = fd.ReadAt(rbuf, 0) 280 if err != nil { 281 log.Errorf("loadTSOFile: Error reading TSO file: %v, err: %v", fileName, err) 282 return nil, 0, err 283 } 284 // rbuf[0] gives the version byte 285 versionTsoFile := make([]byte, 1) 286 copy(versionTsoFile, rbuf[:1]) 287 if versionTsoFile[0] != segutils.VERSION_TSOFILE[0] { 288 return nil, 0, fmt.Errorf("loadTSOFile: the file version doesn't match") 289 } 290 nEntries := utils.BytesToUint16LittleEndian(rbuf[1:3]) 291 return rbuf, nEntries, nil 292 } 293 294 func (tssr *TimeSeriesSegmentReader) loadTSGFile(fileName string, rbuf []byte) ([]byte, error) { 295 fd, err := os.OpenFile(fileName, os.O_RDONLY, 0644) 296 if err != nil { 297 log.Errorf("loadTSGFile: error when trying to open file=%+v. Error=%+v", fileName, err) 298 return nil, err 299 } 300 defer fd.Close() 301 302 finfo, err := fd.Stat() 303 if err != nil { 304 log.Errorf("loadTSGFile: error when trying to stat file=%+v. Error=%+v", fileName, err) 305 return nil, err 306 } 307 fileSize := finfo.Size() 308 rbuf = rbuf[:cap(rbuf)] 309 sizeToAdd := fileSize - int64(len(rbuf)) 310 if sizeToAdd > 0 { 311 newArr := *seriesBufferPool.Get().(*[]byte) 312 if diff := sizeToAdd - int64(len(newArr)); diff <= 0 { 313 newArr = newArr[:sizeToAdd] 314 } else { 315 extend := make([]byte, diff) 316 newArr = append(newArr, extend...) 317 } 318 tssr.allBuffers = append(tssr.allBuffers, newArr) 319 rbuf = append(rbuf, newArr...) 320 } else { 321 rbuf = rbuf[:fileSize] 322 } 323 _, err = fd.ReadAt(rbuf, 0) 324 if err != nil { 325 log.Errorf("loadTSGFile: Error reading TSG file: %v, err: %v", fileName, err) 326 return nil, err 327 } 328 versionTsgFile := make([]byte, 1) 329 copy(versionTsgFile, rbuf[:1]) 330 if versionTsgFile[0] != segutils.VERSION_TSGFILE[0] { 331 return nil, fmt.Errorf("loadTSGFile: the file version doesn't match") 332 } 333 return rbuf, nil 334 }