github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/reader/metrics/series/seriesreader.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package series
    18  
    19  import (
    20  	"bytes"
    21  	"fmt"
    22  	"os"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/siglens/siglens/pkg/segment/structs"
    27  	segutils "github.com/siglens/siglens/pkg/segment/utils"
    28  	"github.com/siglens/siglens/pkg/segment/writer/metrics/compress"
    29  	"github.com/siglens/siglens/pkg/utils"
    30  	log "github.com/sirupsen/logrus"
    31  )
    32  
    33  /*
    34  Holder struct to read a single time series segment
    35  
    36  Exposes function to access readers for each block
    37  */
    38  type TimeSeriesSegmentReader struct {
    39  	mKey   string // base metrics key directory
    40  	tsoBuf []byte // raw buffer used to decode the TSO
    41  	tsgBuf []byte // raw buffer used to decode the TSO
    42  
    43  	allBuffers [][]byte // list of all buffers used to read TSO/TSG files
    44  }
    45  
    46  /*
    47  Struct to access data within a single block.
    48  
    49  Exposes functions that will return a TimeSeriesIterator for the given tsids
    50  */
    51  type TimeSeriesBlockReader struct {
    52  	rawTSO   []byte // raw read TSO file
    53  	rawTSG   []byte // raw read TSG file
    54  	numTSIDs uint16
    55  
    56  	lastTSID  uint64
    57  	lastTSidx uint32 // index of the last tsid in the tso file
    58  	first     bool
    59  }
    60  
    61  type SharedTimeSeriesSegmentReader struct {
    62  	TimeSeriesBlockReader []*TimeSeriesSegmentReader
    63  	numReaders            int
    64  	rwLock                *sync.Mutex
    65  }
    66  
    67  var seriesBufferPool = sync.Pool{
    68  	New: func() interface{} {
    69  		// The Pool's New function should generally only return pointer
    70  		// types, since a pointer can be put into the return interface
    71  		// value without an allocation:
    72  
    73  		buff := float64(segutils.METRICS_SEARCH_ALLOCATE_BLOCK)
    74  		slice := make([]byte, 0, int(buff))
    75  		return &slice
    76  	},
    77  }
    78  
    79  /*
    80  Exposes init functions for timeseries block readers.
    81  
    82  # This allocates all required buffers for the readers
    83  
    84  It is up to the caller to call .Close() to return all buffers
    85  */
    86  func InitTimeSeriesReader(mKey string) (*TimeSeriesSegmentReader, error) {
    87  	// load tso/tsg file as needd
    88  	return &TimeSeriesSegmentReader{
    89  		mKey:       mKey,
    90  		tsoBuf:     *seriesBufferPool.Get().(*[]byte),
    91  		tsgBuf:     *seriesBufferPool.Get().(*[]byte),
    92  		allBuffers: make([][]byte, 0),
    93  	}, nil
    94  }
    95  
    96  /*
    97  Closes the iterator by returning all buffers back to the pool
    98  */
    99  func (tssr *TimeSeriesSegmentReader) Close() error {
   100  	// load tso/tsg file as needd
   101  
   102  	seriesBufferPool.Put(&tssr.tsoBuf)
   103  	seriesBufferPool.Put(&tssr.tsgBuf)
   104  	for i := range tssr.allBuffers {
   105  		seriesBufferPool.Put(&tssr.allBuffers[i])
   106  	}
   107  
   108  	return nil
   109  }
   110  
   111  func InitSharedTimeSeriesSegmentReader(mKey string, numReaders int) (*SharedTimeSeriesSegmentReader, error) {
   112  	sharedTimeSeriesSegmentReader := &SharedTimeSeriesSegmentReader{
   113  		TimeSeriesBlockReader: make([]*TimeSeriesSegmentReader, numReaders),
   114  		numReaders:            numReaders,
   115  		rwLock:                &sync.Mutex{},
   116  	}
   117  
   118  	for i := 0; i < numReaders; i++ {
   119  		currReader, err := InitTimeSeriesReader(mKey)
   120  		if err != nil {
   121  			sharedTimeSeriesSegmentReader.Close()
   122  			return sharedTimeSeriesSegmentReader, err
   123  		}
   124  		sharedTimeSeriesSegmentReader.TimeSeriesBlockReader[i] = currReader
   125  	}
   126  	return sharedTimeSeriesSegmentReader, nil
   127  }
   128  
   129  func (stssr *SharedTimeSeriesSegmentReader) Close() error {
   130  	for _, reader := range stssr.TimeSeriesBlockReader {
   131  		reader.Close()
   132  	}
   133  	return nil
   134  }
   135  
   136  /*
   137  Exposes init functions for timeseries block readers.
   138  
   139  After calling this function, all previous blockreaders will become invalid.
   140  
   141  It is up to the caller to ensure that all previous blockreaders are no longer being used
   142  */
   143  func (tssr *TimeSeriesSegmentReader) InitReaderForBlock(blkNum uint16, queryMetrics *structs.MetricsQueryProcessingMetrics) (*TimeSeriesBlockReader, error) {
   144  	// load tso/tsg file as need
   145  	tsoFName := fmt.Sprintf("%s_%d.tso", tssr.mKey, blkNum)
   146  	sTime := time.Now()
   147  	readTSO, nTSIDs, err := tssr.loadTSOFile(tsoFName, tssr.tsoBuf)
   148  	if err != nil {
   149  		log.Errorf("InitReaderForBlock: failed to init reader for block %v! Err:%+v", blkNum, err)
   150  		return nil, err
   151  	}
   152  
   153  	queryMetrics.SetTimeLoadingTSOFiles(time.Since(sTime))
   154  	queryMetrics.IncrementNumTSOFilesLoaded(1)
   155  
   156  	tsgFName := fmt.Sprintf("%s_%d.tsg", tssr.mKey, blkNum)
   157  	sTime = time.Now()
   158  	readTSG, err := tssr.loadTSGFile(tsgFName, tssr.tsgBuf)
   159  
   160  	if err != nil {
   161  		log.Errorf("InitReaderForBlock: failed to init reader for block %v! Err:%+v", blkNum, err)
   162  		return nil, err
   163  	}
   164  
   165  	queryMetrics.SetTimeLoadingTSGFiles(time.Since(sTime))
   166  	queryMetrics.IncrementNumTSGFilesLoaded(1)
   167  
   168  	return &TimeSeriesBlockReader{
   169  		rawTSO:    readTSO,
   170  		rawTSG:    readTSG,
   171  		numTSIDs:  nTSIDs,
   172  		first:     true,
   173  		lastTSidx: 0,
   174  		lastTSID:  0,
   175  	}, nil
   176  }
   177  
   178  /*
   179  Exposes function that will return a TimeSeriesIterator for a given tsid
   180  
   181  # Returns a Series Iterator, a bool, or an error
   182  
   183  The bool indicates if the series was found. If the series is not found, the iterator will be nil
   184  
   185  Internally, looks up the tsid in the .tso file and returns a TimeSeriesIterator after loading the csg at the read offset
   186  This function will keep the encoded csg values as a []byte
   187  */
   188  func (tsbr *TimeSeriesBlockReader) GetTimeSeriesIterator(tsid uint64) (*compress.DecompressIterator, bool, error) {
   189  	// load tso/tsg file as needd
   190  
   191  	var found bool
   192  	var offset uint32
   193  	var tsIDX uint32
   194  	if !tsbr.first {
   195  		if tsid < tsbr.lastTSID {
   196  			found, tsIDX, offset = getOffsetFromTsoFile(0, tsbr.lastTSidx, uint32(tsbr.numTSIDs), tsid, tsbr.rawTSO)
   197  		} else if tsid > tsbr.lastTSID {
   198  			found, tsIDX, offset = getOffsetFromTsoFile(tsbr.lastTSidx, uint32(tsbr.numTSIDs-1), uint32(tsbr.numTSIDs), tsid, tsbr.rawTSO)
   199  		}
   200  	} else {
   201  		found, tsIDX, offset = getOffsetFromTsoFile(0, uint32(tsbr.numTSIDs-1), uint32(tsbr.numTSIDs), tsid, tsbr.rawTSO)
   202  	}
   203  
   204  	if !found {
   205  		return nil, false, nil
   206  	}
   207  	tsbr.first = false
   208  	tsbr.lastTSID = tsid
   209  	tsbr.lastTSidx = tsIDX
   210  
   211  	offset += 9 // 1 byte for version + 8 bytes is for tsid
   212  	tsgLen := utils.BytesToUint32LittleEndian(tsbr.rawTSG[offset : offset+4])
   213  	offset += 4
   214  	rawSeries := bytes.NewReader(tsbr.rawTSG[offset : offset+tsgLen])
   215  	it, err := compress.NewDecompressIterator(rawSeries)
   216  	if err != nil {
   217  		log.Errorf("GetTimeSeriesIterator: Error initialising a decompressor! err: %v", err)
   218  		return nil, true, err
   219  	}
   220  	return it, true, nil
   221  }
   222  
   223  // returns bool if found. If true, returns the tsidx and offset in the TSG file
   224  func getOffsetFromTsoFile(low uint32, high uint32, nTsids uint32, tsid uint64, tsoBuf []byte) (bool, uint32, uint32) {
   225  	for low <= high {
   226  		mid := (high + low) / 2
   227  		// adding 3 because the first byte for version and the next two bytes are for number of entries
   228  		// multiplying 'mid' by 12 because every tsid info takes 8 bytes for tsid and 4 bytes for tsid offset
   229  		offsetMid := 3 + mid*12
   230  		// tsid takes 8 bytes in the tso buffer
   231  		tempBuffer := tsoBuf[offsetMid : offsetMid+8]
   232  		midTsid := utils.BytesToUint64LittleEndian(tempBuffer)
   233  		if midTsid < tsid {
   234  			low = mid + 1
   235  		} else if midTsid > tsid {
   236  			if mid == 0 {
   237  				return false, mid, 0
   238  			}
   239  			high = mid - 1
   240  		} else {
   241  			off := tsoBuf[offsetMid+8 : offsetMid+12]
   242  			return true, mid, utils.BytesToUint32LittleEndian(off)
   243  		}
   244  	}
   245  	return false, 0, 0
   246  }
   247  
   248  func (tssr *TimeSeriesSegmentReader) loadTSOFile(fileName string, rbuf []byte) ([]byte, uint16, error) {
   249  
   250  	fd, err := os.OpenFile(fileName, os.O_RDONLY, 0644)
   251  	if err != nil {
   252  		log.Infof("loadTSOFile: failed to open fileName: %v  Error: %v", fileName, err)
   253  		return nil, 0, err
   254  	}
   255  	defer fd.Close()
   256  
   257  	finfo, err := fd.Stat()
   258  	if err != nil {
   259  		log.Errorf("loadTSOFile: error when trying to stat file=%+v. Error=%+v", fileName, err)
   260  		return nil, 0, err
   261  	}
   262  
   263  	fileSize := finfo.Size()
   264  	rbuf = rbuf[:cap(rbuf)]
   265  	sizeToAdd := fileSize - int64(len(rbuf))
   266  	if sizeToAdd > 0 {
   267  		newArr := *seriesBufferPool.Get().(*[]byte)
   268  		if diff := sizeToAdd - int64(len(newArr)); diff <= 0 {
   269  			newArr = newArr[:sizeToAdd]
   270  		} else {
   271  			extend := make([]byte, diff)
   272  			newArr = append(newArr, extend...)
   273  		}
   274  		tssr.allBuffers = append(tssr.allBuffers, newArr)
   275  		rbuf = append(rbuf, newArr...)
   276  	} else {
   277  		rbuf = rbuf[:fileSize]
   278  	}
   279  	_, err = fd.ReadAt(rbuf, 0)
   280  	if err != nil {
   281  		log.Errorf("loadTSOFile: Error reading TSO file: %v, err: %v", fileName, err)
   282  		return nil, 0, err
   283  	}
   284  	// rbuf[0] gives the version byte
   285  	versionTsoFile := make([]byte, 1)
   286  	copy(versionTsoFile, rbuf[:1])
   287  	if versionTsoFile[0] != segutils.VERSION_TSOFILE[0] {
   288  		return nil, 0, fmt.Errorf("loadTSOFile: the file version doesn't match")
   289  	}
   290  	nEntries := utils.BytesToUint16LittleEndian(rbuf[1:3])
   291  	return rbuf, nEntries, nil
   292  }
   293  
   294  func (tssr *TimeSeriesSegmentReader) loadTSGFile(fileName string, rbuf []byte) ([]byte, error) {
   295  	fd, err := os.OpenFile(fileName, os.O_RDONLY, 0644)
   296  	if err != nil {
   297  		log.Errorf("loadTSGFile: error when trying to open file=%+v. Error=%+v", fileName, err)
   298  		return nil, err
   299  	}
   300  	defer fd.Close()
   301  
   302  	finfo, err := fd.Stat()
   303  	if err != nil {
   304  		log.Errorf("loadTSGFile: error when trying to stat file=%+v. Error=%+v", fileName, err)
   305  		return nil, err
   306  	}
   307  	fileSize := finfo.Size()
   308  	rbuf = rbuf[:cap(rbuf)]
   309  	sizeToAdd := fileSize - int64(len(rbuf))
   310  	if sizeToAdd > 0 {
   311  		newArr := *seriesBufferPool.Get().(*[]byte)
   312  		if diff := sizeToAdd - int64(len(newArr)); diff <= 0 {
   313  			newArr = newArr[:sizeToAdd]
   314  		} else {
   315  			extend := make([]byte, diff)
   316  			newArr = append(newArr, extend...)
   317  		}
   318  		tssr.allBuffers = append(tssr.allBuffers, newArr)
   319  		rbuf = append(rbuf, newArr...)
   320  	} else {
   321  		rbuf = rbuf[:fileSize]
   322  	}
   323  	_, err = fd.ReadAt(rbuf, 0)
   324  	if err != nil {
   325  		log.Errorf("loadTSGFile: Error reading TSG file: %v, err: %v", fileName, err)
   326  		return nil, err
   327  	}
   328  	versionTsgFile := make([]byte, 1)
   329  	copy(versionTsgFile, rbuf[:1])
   330  	if versionTsgFile[0] != segutils.VERSION_TSGFILE[0] {
   331  		return nil, fmt.Errorf("loadTSGFile: the file version doesn't match")
   332  	}
   333  	return rbuf, nil
   334  }