github.com/m3db/m3@v1.5.0/src/aggregator/aggregation/quantile/cm/stream.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package cm
    22  
    23  import (
    24  	"math"
    25  )
    26  
    27  const (
    28  	minSamplesToCompress = 3
    29  )
    30  
    31  var (
    32  	nan = math.NaN()
    33  )
    34  
    35  type threshold struct {
    36  	rank      int64
    37  	threshold int64
    38  }
    39  
    40  // Stream represents a data stream.
    41  type Stream struct {
    42  	compressCursor           *Sample // compression cursor
    43  	streamPool               StreamPool
    44  	insertCursor             *Sample
    45  	samples                  sampleList
    46  	computedQuantiles        []float64   // sorted computed target quantiles
    47  	thresholdBuf             []threshold // temporary buffer for computed thresholds
    48  	bufLess                  minHeap     // sample buffer whose value is less than that at the insertion cursor
    49  	bufMore                  minHeap     // sample buffer whose value is more than that at the insertion cursor
    50  	quantiles                []float64   // sorted target quantiles
    51  	numValues                int64       // number of values inserted into the sorted stream
    52  	insertAndCompressCounter int         // insertion and compression counter
    53  	insertAndCompressEvery   int         // stream insertion and compression frequency
    54  	capacity                 int         // initial stream sample buffer capacity
    55  	eps                      float64     // desired epsilon for errors
    56  	compressMinRank          int64       // compression min rank
    57  	closed                   bool        // whether the stream is closed
    58  	flushed                  bool        // whether the stream is flushed
    59  }
    60  
    61  // NewStream creates a new sample stream.
    62  func NewStream(opts Options) *Stream {
    63  	if opts == nil {
    64  		opts = NewOptions()
    65  	}
    66  
    67  	s := &Stream{
    68  		streamPool:             opts.StreamPool(),
    69  		eps:                    opts.Eps(),
    70  		capacity:               opts.Capacity(),
    71  		insertAndCompressEvery: opts.InsertAndCompressEvery(),
    72  	}
    73  
    74  	return s
    75  }
    76  
    77  // AddBatch adds a batch of sample values.
    78  func (s *Stream) AddBatch(values []float64) {
    79  	s.flushed = false
    80  
    81  	if len(values) == 0 {
    82  		return
    83  	}
    84  
    85  	if s.samples.Len() == 0 {
    86  		sample := s.samples.Acquire()
    87  		sample.value = values[0]
    88  		sample.numRanks = 1
    89  		sample.delta = 0
    90  		s.samples.PushBack(sample)
    91  		s.insertCursor = s.samples.Front()
    92  		s.numValues++
    93  		values = values[1:]
    94  	}
    95  
    96  	var (
    97  		insertPointValue = s.insertCursor.value
    98  		insertCounter    = s.insertAndCompressCounter
    99  	)
   100  
   101  	for _, value := range values {
   102  		if value < insertPointValue {
   103  			s.bufLess.Push(value)
   104  		} else {
   105  			s.bufMore.Push(value)
   106  		}
   107  
   108  		if insertCounter == s.insertAndCompressEvery {
   109  			s.insert()
   110  			s.compress()
   111  			insertCounter = 0
   112  		}
   113  		insertCounter++
   114  	}
   115  	s.insertAndCompressCounter = insertCounter
   116  }
   117  
   118  // Add adds a sample value.
   119  func (s *Stream) Add(value float64) {
   120  	s.AddBatch([]float64{value})
   121  }
   122  
   123  // Flush flushes the internal buffer.
   124  func (s *Stream) Flush() {
   125  	if s.flushed {
   126  		return
   127  	}
   128  
   129  	for s.bufLess.Len() > 0 || s.bufMore.Len() > 0 {
   130  		if s.bufMore.Len() == 0 {
   131  			s.resetInsertCursor()
   132  		}
   133  		s.insert()
   134  		s.compress()
   135  	}
   136  	s.calcQuantiles()
   137  	s.flushed = true
   138  }
   139  
   140  // Min returns the minimum value.
   141  func (s *Stream) Min() float64 {
   142  	return s.Quantile(0.0)
   143  }
   144  
   145  // Max returns the maximum value.
   146  func (s *Stream) Max() float64 {
   147  	return s.Quantile(1.0)
   148  }
   149  
   150  // Quantile returns the quantile value.
   151  func (s *Stream) Quantile(q float64) float64 {
   152  	if q < 0.0 || q > 1.0 {
   153  		return nan
   154  	}
   155  
   156  	if s.samples.Empty() {
   157  		return 0.0
   158  	}
   159  
   160  	if q == 0.0 {
   161  		return s.samples.Front().value
   162  	}
   163  	if q == 1.0 {
   164  		return s.samples.Back().value
   165  	}
   166  
   167  	for i, qt := range s.quantiles {
   168  		if qt >= q {
   169  			return s.computedQuantiles[i]
   170  		}
   171  	}
   172  	return math.NaN()
   173  }
   174  
   175  // ResetSetData resets the stream and sets data.
   176  func (s *Stream) ResetSetData(quantiles []float64) {
   177  	s.quantiles = quantiles
   178  
   179  	if len(quantiles) > cap(s.computedQuantiles) {
   180  		s.computedQuantiles = make([]float64, len(quantiles))
   181  		s.thresholdBuf = make([]threshold, len(quantiles))
   182  	} else {
   183  		s.computedQuantiles = s.computedQuantiles[:len(quantiles)]
   184  		s.thresholdBuf = s.thresholdBuf[:len(quantiles)]
   185  	}
   186  
   187  	s.closed = false
   188  }
   189  
   190  // Close closes the stream.
   191  func (s *Stream) Close() {
   192  	if s.closed {
   193  		return
   194  	}
   195  	s.closed = true
   196  
   197  	s.bufMore.Reset()
   198  	s.bufLess.Reset()
   199  
   200  	s.samples.Reset()
   201  	s.insertCursor = nil
   202  	s.compressCursor = nil
   203  	s.insertAndCompressCounter = 0
   204  	s.numValues = 0
   205  	s.compressMinRank = 0
   206  	s.streamPool.Put(s)
   207  }
   208  
   209  // quantilesFromBuf calculates quantiles from buffer if there were too few samples to compress
   210  func (s *Stream) quantilesFromBuf() {
   211  	var (
   212  		curr = s.samples.Front()
   213  		buf  = make([]float64, 0, minSamplesToCompress)
   214  	)
   215  
   216  	for curr != nil {
   217  		buf = append(buf, curr.value)
   218  		curr = curr.next
   219  	}
   220  
   221  	n := len(buf)
   222  	for i, q := range s.quantiles {
   223  		idx := int(q * float64(n))
   224  		if idx >= n {
   225  			idx = n - 1
   226  		}
   227  		s.computedQuantiles[i] = buf[idx]
   228  	}
   229  }
   230  
   231  func (s *Stream) calcQuantiles() {
   232  	if len(s.quantiles) == 0 || s.numValues == 0 {
   233  		return
   234  	} else if s.numValues <= minSamplesToCompress {
   235  		// too few values for compress(), need to compute quantiles directly
   236  		s.quantilesFromBuf()
   237  		return
   238  	}
   239  
   240  	var (
   241  		minRank int64
   242  		maxRank int64
   243  		idx     int
   244  		curr    = s.samples.Front()
   245  		prev    = s.samples.Front()
   246  	)
   247  
   248  	for i, q := range s.quantiles {
   249  		rank := int64(math.Ceil(q * float64(s.numValues)))
   250  		s.thresholdBuf[i].rank = rank
   251  		s.thresholdBuf[i].threshold = int64(
   252  			math.Ceil(float64(s.threshold(rank)) / 2.0),
   253  		)
   254  	}
   255  
   256  	for curr != nil && idx < len(s.computedQuantiles) {
   257  		maxRank = minRank + curr.numRanks + curr.delta
   258  		rank, threshold := s.thresholdBuf[idx].rank, s.thresholdBuf[idx].threshold
   259  
   260  		if maxRank > rank+threshold || minRank > rank {
   261  			s.computedQuantiles[idx] = prev.value
   262  			idx++
   263  		}
   264  
   265  		minRank += curr.numRanks
   266  		prev = curr
   267  		curr = curr.next
   268  	}
   269  
   270  	// check if the last sample value should satisfy unprocessed quantiles
   271  	for i := idx; i < len(s.thresholdBuf); i++ {
   272  		rank, threshold := s.thresholdBuf[i].rank, s.thresholdBuf[i].threshold
   273  		if maxRank >= rank+threshold || minRank > rank {
   274  			s.computedQuantiles[i] = prev.value
   275  		}
   276  	}
   277  }
   278  
   279  // insert inserts a sample into the stream.
   280  func (s *Stream) insert() {
   281  	var (
   282  		compCur          = s.compressCursor
   283  		compValue        = math.NaN()
   284  		samples          = &s.samples
   285  		insertPointValue float64
   286  		sample           *Sample
   287  	)
   288  
   289  	if compCur != nil {
   290  		compValue = compCur.value
   291  	}
   292  
   293  	// break heap invariant and just sort all the times, as we'll consume all of them in one go
   294  	s.bufMore.SortDesc()
   295  
   296  	var (
   297  		vals = []float64(s.bufMore)
   298  		idx  = len(vals) - 1
   299  	)
   300  
   301  	for s.insertCursor != nil && idx < len(vals) {
   302  		curr := s.insertCursor
   303  		insertPointValue = curr.value
   304  
   305  		for idx >= 0 && vals[idx] <= insertPointValue {
   306  			val := vals[idx]
   307  			idx--
   308  			sample = s.samples.Acquire()
   309  			sample.value = val
   310  			sample.numRanks = 1
   311  			sample.delta = curr.numRanks + curr.delta - 1
   312  
   313  			samples.InsertBefore(sample, curr)
   314  
   315  			if compValue >= val {
   316  				s.compressMinRank++
   317  			}
   318  			s.numValues++
   319  		}
   320  
   321  		s.insertCursor = s.insertCursor.next
   322  	}
   323  
   324  	if s.insertCursor == nil && idx < len(vals) {
   325  		for idx >= 0 && vals[idx] >= samples.Back().value {
   326  			val := vals[idx]
   327  			idx--
   328  			sample = s.samples.Acquire()
   329  			sample.value = val
   330  			sample.numRanks = 1
   331  			sample.delta = 0
   332  			samples.PushBack(sample)
   333  			s.numValues++
   334  		}
   335  	}
   336  
   337  	s.bufMore = s.bufMore[:0]
   338  	s.resetInsertCursor()
   339  }
   340  
   341  // compress compresses the samples in the stream.
   342  func (s *Stream) compress() {
   343  	// Bail early if there is nothing to compress.
   344  	if s.samples.Len() < minSamplesToCompress {
   345  		return
   346  	}
   347  
   348  	if s.compressCursor == nil {
   349  		s.compressCursor = s.samples.Back().prev
   350  		s.compressMinRank = s.numValues - 1 - s.compressCursor.numRanks
   351  		s.compressCursor = s.compressCursor.prev
   352  	}
   353  
   354  	var (
   355  		numVals = s.numValues
   356  		eps     = 2.0 * s.eps
   357  	)
   358  
   359  	for s.compressCursor != s.samples.Front() {
   360  		var (
   361  			curr = s.compressCursor
   362  			next = curr.next
   363  			prev = curr.prev
   364  
   365  			maxRank = s.compressMinRank + curr.numRanks + curr.delta
   366  
   367  			threshold   = int64(math.MaxInt64)
   368  			quantileMin int64
   369  		)
   370  
   371  		for i := range s.quantiles {
   372  			if maxRank >= int64(s.quantiles[i]*float64(numVals)) {
   373  				quantileMin = int64(eps * float64(maxRank) / s.quantiles[i])
   374  			} else {
   375  				quantileMin = int64(eps * float64(numVals-maxRank) / (1.0 - s.quantiles[i]))
   376  			}
   377  			if quantileMin < threshold {
   378  				threshold = quantileMin
   379  			}
   380  		}
   381  
   382  		s.compressMinRank -= curr.numRanks
   383  		testVal := curr.numRanks + next.numRanks + next.delta
   384  
   385  		if testVal <= threshold {
   386  			if s.insertCursor == curr {
   387  				s.insertCursor = next
   388  			}
   389  
   390  			next.numRanks += curr.numRanks
   391  
   392  			// no need to release sample here
   393  			s.samples.Remove(curr)
   394  		}
   395  		s.compressCursor = prev
   396  	}
   397  
   398  	if s.compressCursor == s.samples.Front() {
   399  		s.compressCursor = nil
   400  	}
   401  }
   402  
   403  // threshold computes the minimum threshold value.
   404  func (s *Stream) threshold(rank int64) int64 {
   405  	var (
   406  		minVal      = int64(math.MaxInt64)
   407  		numVals     = s.numValues
   408  		eps         = 2.0 * s.eps
   409  		quantileMin int64
   410  	)
   411  	for _, quantile := range s.quantiles {
   412  		if rank >= int64(quantile*float64(numVals)) {
   413  			quantileMin = int64(eps * float64(rank) / quantile)
   414  		} else {
   415  			quantileMin = int64(eps * float64(numVals-rank) / (1.0 - quantile))
   416  		}
   417  		if quantileMin < minVal {
   418  			minVal = quantileMin
   419  		}
   420  	}
   421  
   422  	return minVal
   423  }
   424  
   425  // resetInsertCursor resets the insert cursor.
   426  func (s *Stream) resetInsertCursor() {
   427  	s.bufLess, s.bufMore = s.bufMore, s.bufLess
   428  	s.insertCursor = s.samples.Front()
   429  }