github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/results/mresults/seriesresult.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package mresults
    18  
    19  import (
    20  	"fmt"
    21  	"sort"
    22  
    23  	"github.com/siglens/siglens/pkg/segment/structs"
    24  	"github.com/siglens/siglens/pkg/segment/utils"
    25  	segutils "github.com/siglens/siglens/pkg/segment/utils"
    26  	log "github.com/sirupsen/logrus"
    27  	"github.com/valyala/bytebufferpool"
    28  )
    29  
    30  /*
    31  	Defines functions used to store and merge series results with metrics results
    32  */
    33  
    34  type Series struct {
    35  	idx       int // entries[:idx] is guaranteed to have valid results
    36  	len       int // the number of available elements. Once idx==len, entries needs to be resized
    37  	entries   []Entry
    38  	dsSeconds uint32
    39  	sorted    bool
    40  	grpID     *bytebufferpool.ByteBuffer
    41  
    42  	// if original Downsampler Aggregator is `Avg`, convertedDownsampleAggFn is equal to `Sum` else equal to original Downsampler Aggregator
    43  	convertedDownsampleAggFn utils.AggregateFunctions
    44  	aggregationConstant      float64
    45  }
    46  
    47  type DownsampleSeries struct {
    48  	idx    int // runningEntries[:idx] is guaranteed to have valid results
    49  	len    int // denotes the number of available elements. When idx==len, the underlying slice needs to be resized
    50  	sorted bool
    51  
    52  	// original Downsampler Aggregator which comes in with metricsQuery
    53  	downsampleAggFn     utils.AggregateFunctions
    54  	aggregationConstant float64
    55  	runningEntries      []RunningEntry
    56  	grpID               *bytebufferpool.ByteBuffer
    57  }
    58  
    59  type RunningEntry struct {
    60  	runningCount    uint64
    61  	runningVal      float64
    62  	downsampledTime uint32
    63  }
    64  
    65  type Entry struct {
    66  	downsampledTime uint32
    67  	dpVal           float64
    68  }
    69  
    70  var initial_len = 10
    71  var extend_capacity = 50
    72  
    73  /*
    74  Allocates a series from the pool and returns.
    75  
    76  The allocated series should be returned to the pools via (mr *MetricsResults).DownsampleResults()
    77  */
    78  func InitSeriesHolder(mQuery *structs.MetricsQuery, tsGroupId *bytebufferpool.ByteBuffer) *Series {
    79  	// have some info about downsample
    80  	ds := mQuery.Downsampler
    81  	downsampleAggFn := mQuery.Downsampler.Aggregator.AggregatorFunction
    82  	convertedDownsampleAggFn := mQuery.Downsampler.Aggregator.AggregatorFunction
    83  	if downsampleAggFn == utils.Avg {
    84  		convertedDownsampleAggFn = utils.Sum
    85  	}
    86  	aggregationConstant := mQuery.Aggregator.FuncConstant
    87  
    88  	retVal := make([]Entry, initial_len, extend_capacity)
    89  	return &Series{
    90  		idx:                      0,
    91  		len:                      initial_len,
    92  		entries:                  retVal,
    93  		dsSeconds:                ds.GetIntervalTimeInSeconds(),
    94  		sorted:                   false,
    95  		convertedDownsampleAggFn: convertedDownsampleAggFn,
    96  		aggregationConstant:      aggregationConstant,
    97  		grpID:                    tsGroupId,
    98  	}
    99  }
   100  
   101  func (s *Series) GetIdx() int {
   102  	return s.idx
   103  }
   104  
   105  func (s *Series) AddEntry(ts uint32, dp float64) {
   106  	s.entries[s.idx].downsampledTime = (ts / s.dsSeconds) * s.dsSeconds
   107  	s.entries[s.idx].dpVal = dp
   108  	s.idx++
   109  	if s.idx >= s.len {
   110  		if cap(s.entries)-len(s.entries) > 0 {
   111  			s.entries = s.entries[:cap(s.entries)]
   112  			s.len = cap(s.entries)
   113  		} else {
   114  			newBuf := make([]Entry, extend_capacity)
   115  			s.entries = append(s.entries, newBuf...)
   116  			s.len += extend_capacity
   117  		}
   118  	}
   119  }
   120  
   121  func (s *Series) sortEntries() {
   122  	if s.sorted {
   123  		return
   124  	}
   125  
   126  	s.entries = s.entries[:s.idx]
   127  	sort.Slice(s.entries, func(i, j int) bool {
   128  		return s.entries[i].downsampledTime < s.entries[j].downsampledTime
   129  	})
   130  	s.sorted = true
   131  }
   132  
   133  func (s *Series) Merge(toJoin *Series) {
   134  	toJoinEntries := toJoin.entries[:toJoin.idx]
   135  	s.entries = s.entries[:s.idx]
   136  	s.len = s.idx
   137  	s.entries = append(s.entries, toJoinEntries...)
   138  	s.idx += toJoin.idx
   139  	s.sorted = false
   140  	s.len += toJoin.idx
   141  }
   142  
   143  func (s *Series) Downsample(downsampler structs.Downsampler) (*DownsampleSeries, error) {
   144  	// get downsampled series
   145  	s.sortEntries()
   146  	ds := initDownsampleSeries(downsampler.Aggregator)
   147  	for i := 0; i < s.idx; i++ {
   148  		currDSTime := s.entries[i].downsampledTime
   149  		maxJ := sort.Search(len(s.entries), func(j int) bool {
   150  			return s.entries[j].downsampledTime > currDSTime
   151  		})
   152  		retVal, err := reduceEntries(s.entries[i:maxJ], s.convertedDownsampleAggFn, s.aggregationConstant)
   153  		if err != nil {
   154  			log.Errorf("Downsample: failed to reduce entries: %v", err)
   155  			return nil, err
   156  		}
   157  		ds.Add(retVal, s.entries[i].downsampledTime, uint64(maxJ-i))
   158  		i = maxJ - 1
   159  	}
   160  	ds.grpID = s.grpID
   161  	return ds, nil
   162  }
   163  
   164  func initDownsampleSeries(agg structs.Aggreation) *DownsampleSeries {
   165  
   166  	runningEntries := make([]RunningEntry, initial_len, extend_capacity)
   167  	return &DownsampleSeries{
   168  		idx:                 0,
   169  		len:                 initial_len,
   170  		runningEntries:      runningEntries,
   171  		downsampleAggFn:     agg.AggregatorFunction,
   172  		aggregationConstant: agg.FuncConstant,
   173  		sorted:              false,
   174  	}
   175  }
   176  
   177  func (dss *DownsampleSeries) Add(retVal float64, ts uint32, count uint64) {
   178  	dss.runningEntries[dss.idx].runningCount = count
   179  	dss.runningEntries[dss.idx].runningVal = retVal
   180  	dss.runningEntries[dss.idx].downsampledTime = ts
   181  	dss.idx++
   182  	if dss.idx >= dss.len {
   183  		if cap(dss.runningEntries)-len(dss.runningEntries) > 0 {
   184  			dss.runningEntries = dss.runningEntries[:cap(dss.runningEntries)]
   185  			dss.len = cap(dss.runningEntries)
   186  		} else {
   187  			newBuf := make([]RunningEntry, extend_capacity)
   188  			dss.runningEntries = append(dss.runningEntries, newBuf...)
   189  			dss.len += extend_capacity
   190  		}
   191  	}
   192  	dss.sorted = false
   193  }
   194  
   195  /*
   196  Merge takes the first toJoin.idx elements of the incoming running entires and adds them to the current entries
   197  */
   198  func (dss *DownsampleSeries) Merge(toJoin *DownsampleSeries) {
   199  	toJoinEntries := toJoin.runningEntries[:toJoin.idx]
   200  	dss.runningEntries = dss.runningEntries[:dss.idx]
   201  	dss.len = dss.idx
   202  	dss.runningEntries = append(dss.runningEntries, toJoinEntries...)
   203  	dss.idx += toJoin.idx
   204  	dss.len += toJoin.idx
   205  	dss.sorted = false
   206  }
   207  
   208  func (dss *DownsampleSeries) Aggregate() (map[uint32]float64, error) {
   209  	// dss has a list of RunningEntry that caputre downsampled time per tsid
   210  	// many tsids will exist but they will share the grpID
   211  	dss.sortEntries()
   212  	retVal := make(map[uint32]float64)
   213  	for i := 0; i < dss.idx; i++ {
   214  		// find the first index where the downsampled time is greater than the current buckets time
   215  		currDSTime := dss.runningEntries[i].downsampledTime
   216  		maxJ := sort.Search(len(dss.runningEntries), func(j int) bool {
   217  			return dss.runningEntries[j].downsampledTime > currDSTime
   218  		})
   219  		currVal, err := reduceRunningEntries(dss.runningEntries[i:maxJ], dss.downsampleAggFn, dss.aggregationConstant)
   220  		if err != nil {
   221  			log.Errorf("Aggregate: failed to reduce running entries: %v", err)
   222  			return nil, err
   223  		}
   224  		retVal[dss.runningEntries[i].downsampledTime] = currVal
   225  		i = maxJ - 1
   226  	}
   227  	return retVal, nil
   228  }
   229  
   230  func ApplyRangeFunction(ts map[uint32]float64, function segutils.RangeFunctions) (map[uint32]float64, error) {
   231  	// Convert ts to a sorted list of Entry's
   232  	sortedTimeSeries := make([]Entry, 0, len(ts))
   233  	for time, value := range ts {
   234  		entry := Entry{
   235  			downsampledTime: time,
   236  			dpVal:           value,
   237  		}
   238  		sortedTimeSeries = append(sortedTimeSeries, entry)
   239  	}
   240  
   241  	sort.Slice(sortedTimeSeries, func(i int, k int) bool {
   242  		return sortedTimeSeries[i].downsampledTime < sortedTimeSeries[k].downsampledTime
   243  	})
   244  
   245  	// ts is a time series mapping timestamps to values
   246  	switch function {
   247  	case segutils.Derivative:
   248  		// Calculate the derivative at each timestamp and store it in the resulting map
   249  		var timestamps []uint32
   250  		var values []float64
   251  
   252  		for timestamp, value := range ts {
   253  			timestamps = append(timestamps, timestamp)
   254  			values = append(values, value)
   255  		}
   256  
   257  		for i := 0; i < len(timestamps); i++ {
   258  			timestamp := timestamps[i]
   259  
   260  			// Find neighboring data points for linear regression
   261  			var x []float64
   262  			var y []float64
   263  
   264  			// Collect data points for linear regression
   265  			for j := i - 1; j <= i+1; j++ {
   266  				if j >= 0 && j < len(timestamps) {
   267  					x = append(x, float64(timestamps[j]))
   268  					y = append(y, values[j])
   269  				}
   270  			}
   271  
   272  			if len(x) < 2 {
   273  				log.Errorf("ApplyFunctions: %v does not have enough sample points", function)
   274  				continue
   275  			}
   276  
   277  			var sumX, sumY, sumXY, sumX2 float64
   278  			for k := 0; k < len(x); k++ {
   279  				sumX += x[k]
   280  				sumY += y[k]
   281  				sumXY += x[k] * y[k]
   282  				sumX2 += x[k] * x[k]
   283  			}
   284  			n := float64(len(x))
   285  			slope := (n*sumXY - sumX*sumY) / (n*sumX2 - sumX*sumX)
   286  			ts[timestamp] = slope
   287  		}
   288  		// derivtives at edges do not exist
   289  		delete(ts, timestamps[len(timestamps)-1])
   290  		delete(ts, timestamps[0])
   291  		return ts, nil
   292  	case segutils.Rate:
   293  		// Calculate the rate (per-second rate) for each timestamp and store it in the resulting map
   294  		if len(sortedTimeSeries) == 0 {
   295  			return nil, nil
   296  		}
   297  
   298  		var dx, dt float64
   299  		prevVal := sortedTimeSeries[0].dpVal
   300  		for i := 1; i < len(sortedTimeSeries); i++ {
   301  			// Calculate the time difference between consecutive data points
   302  			dt = float64(sortedTimeSeries[i].downsampledTime - sortedTimeSeries[i-1].downsampledTime)
   303  			curVal := sortedTimeSeries[i].dpVal
   304  
   305  			if curVal > prevVal {
   306  				dx = curVal - prevVal
   307  			} else {
   308  				// This metric was reset.
   309  				dx = curVal
   310  			}
   311  
   312  			ts[sortedTimeSeries[i].downsampledTime] = dx / dt
   313  			prevVal = curVal
   314  		}
   315  
   316  		// Rate at edge does not exist.
   317  		delete(ts, sortedTimeSeries[0].downsampledTime)
   318  		return ts, nil
   319  	default:
   320  		return ts, fmt.Errorf("ApplyFunctions: Unknown function type")
   321  	}
   322  }
   323  
   324  func (dss *DownsampleSeries) sortEntries() {
   325  	if dss.sorted {
   326  		return
   327  	}
   328  	dss.runningEntries = dss.runningEntries[:dss.idx]
   329  	sort.Slice(dss.runningEntries, func(i, j int) bool {
   330  		return dss.runningEntries[i].downsampledTime < dss.runningEntries[j].downsampledTime
   331  	})
   332  	dss.sorted = true
   333  }
   334  
   335  func reduceEntries(entries []Entry, fn utils.AggregateFunctions, fnConstant float64) (float64, error) {
   336  	var ret float64
   337  	switch fn {
   338  	case utils.Sum:
   339  		for i := range entries {
   340  			ret += entries[i].dpVal
   341  		}
   342  	case utils.Min:
   343  		for i := range entries {
   344  			if i == 0 || entries[i].dpVal < ret {
   345  				ret = entries[i].dpVal
   346  			}
   347  		}
   348  	case utils.Max:
   349  		for i := range entries {
   350  			if i == 0 || entries[i].dpVal > ret {
   351  				ret = entries[i].dpVal
   352  			}
   353  		}
   354  	case utils.Count:
   355  		ret += float64(len(entries))
   356  	case utils.Quantile: //valid range for fnConstant is 0 <= fnConstant <= 1
   357  		// TODO: calculate the quantile without needing to sort the elements.
   358  
   359  		entriesCopy := make([]Entry, len(entries))
   360  		copy(entriesCopy, entries)
   361  		sort.Slice(entriesCopy, func(i, k int) bool {
   362  			return entriesCopy[i].dpVal < entriesCopy[k].dpVal
   363  		})
   364  
   365  		index := fnConstant * float64(len(entriesCopy)-1)
   366  
   367  		// Check for special cases when quantile position doesn't fall on an exact index
   368  		if index != float64(int(index)) && int(index)+1 < len(entriesCopy) {
   369  			// Calculate the weight for interpolation
   370  			fraction := index - float64(int(index))
   371  
   372  			dpVal1 := entriesCopy[int(index)].dpVal
   373  			dpVal2 := entriesCopy[int(index)+1].dpVal
   374  
   375  			ret = dpVal1 + fraction*(dpVal2-dpVal1)
   376  		} else {
   377  			ret = entriesCopy[int(index)].dpVal
   378  		}
   379  	default:
   380  		err := fmt.Errorf("reduceEntries: unsupported AggregateFunction: %v", fn)
   381  		log.Errorf("%v", err)
   382  		return 0.0, err
   383  	}
   384  
   385  	return ret, nil
   386  }
   387  
   388  func reduceRunningEntries(entries []RunningEntry, fn utils.AggregateFunctions, fnConstant float64) (float64, error) {
   389  	var ret float64
   390  	switch fn {
   391  	case utils.Avg:
   392  		for i := range entries {
   393  			ret += entries[i].runningVal
   394  		}
   395  		ret = ret / float64(len(entries))
   396  	case utils.Sum:
   397  		for i := range entries {
   398  			ret += entries[i].runningVal
   399  		}
   400  	case utils.Min:
   401  		for i := range entries {
   402  			if i == 0 || entries[i].runningVal < ret {
   403  				ret = entries[i].runningVal
   404  			}
   405  		}
   406  	case utils.Max:
   407  		for i := range entries {
   408  			if i == 0 || entries[i].runningVal > ret {
   409  				ret = entries[i].runningVal
   410  			}
   411  		}
   412  	case utils.Count:
   413  		ret += float64(len(entries))
   414  	case utils.Quantile: //valid range for fnConstant is 0 <= fnConstant <= 1
   415  		// TODO: calculate the quantile without needing to sort the elements.
   416  
   417  		entriesCopy := make([]RunningEntry, len(entries))
   418  		copy(entriesCopy, entries)
   419  		sort.Slice(entriesCopy, func(i, k int) bool {
   420  			return entriesCopy[i].runningVal < entriesCopy[k].runningVal
   421  		})
   422  
   423  		index := fnConstant * float64(len(entriesCopy)-1)
   424  		// Check for special cases when quantile position doesn't fall on an exact index
   425  		if index != float64(int(index)) && int(index)+1 < len(entriesCopy) {
   426  			// Calculate the weight for interpolation
   427  			fraction := index - float64(int(index))
   428  
   429  			dpVal1 := entriesCopy[int(index)].runningVal
   430  			dpVal2 := entriesCopy[int(index)+1].runningVal
   431  
   432  			ret = dpVal1 + fraction*(dpVal2-dpVal1)
   433  		} else {
   434  			ret = entriesCopy[int(index)].runningVal
   435  		}
   436  	default:
   437  		err := fmt.Errorf("reduceRunningEntries: unsupported AggregateFunction: %v", fn)
   438  		log.Errorf("%v", err)
   439  		return 0.0, err
   440  	}
   441  
   442  	return ret, nil
   443  }