github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/aggregator/aggregation/quantile/cm/stream.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package cm 22 23 import ( 24 "math" 25 ) 26 27 const ( 28 minSamplesToCompress = 3 29 ) 30 31 var ( 32 nan = math.NaN() 33 ) 34 35 type threshold struct { 36 rank int64 37 threshold int64 38 } 39 40 // Stream represents a data stream. 41 type Stream struct { 42 compressCursor *Sample // compression cursor 43 streamPool StreamPool 44 insertCursor *Sample 45 samples sampleList 46 computedQuantiles []float64 // sorted computed target quantiles 47 thresholdBuf []threshold // temporary buffer for computed thresholds 48 bufLess minHeap // sample buffer whose value is less than that at the insertion cursor 49 bufMore minHeap // sample buffer whose value is more than that at the insertion cursor 50 quantiles []float64 // sorted target quantiles 51 numValues int64 // number of values inserted into the sorted stream 52 insertAndCompressCounter int // insertion and compression counter 53 insertAndCompressEvery int // stream insertion and compression frequency 54 capacity int // initial stream sample buffer capacity 55 eps float64 // desired epsilon for errors 56 compressMinRank int64 // compression min rank 57 closed bool // whether the stream is closed 58 flushed bool // whether the stream is flushed 59 } 60 61 // NewStream creates a new sample stream. 62 func NewStream(opts Options) *Stream { 63 if opts == nil { 64 opts = NewOptions() 65 } 66 67 s := &Stream{ 68 streamPool: opts.StreamPool(), 69 eps: opts.Eps(), 70 capacity: opts.Capacity(), 71 insertAndCompressEvery: opts.InsertAndCompressEvery(), 72 } 73 74 return s 75 } 76 77 // AddBatch adds a batch of sample values. 78 func (s *Stream) AddBatch(values []float64) { 79 s.flushed = false 80 81 if len(values) == 0 { 82 return 83 } 84 85 if s.samples.Len() == 0 { 86 sample := s.samples.Acquire() 87 sample.value = values[0] 88 sample.numRanks = 1 89 sample.delta = 0 90 s.samples.PushBack(sample) 91 s.insertCursor = s.samples.Front() 92 s.numValues++ 93 values = values[1:] 94 } 95 96 var ( 97 insertPointValue = s.insertCursor.value 98 insertCounter = s.insertAndCompressCounter 99 ) 100 101 for _, value := range values { 102 if value < insertPointValue { 103 s.bufLess.Push(value) 104 } else { 105 s.bufMore.Push(value) 106 } 107 108 if insertCounter == s.insertAndCompressEvery { 109 s.insert() 110 s.compress() 111 insertCounter = 0 112 } 113 insertCounter++ 114 } 115 s.insertAndCompressCounter = insertCounter 116 } 117 118 // Add adds a sample value. 119 func (s *Stream) Add(value float64) { 120 s.AddBatch([]float64{value}) 121 } 122 123 // Flush flushes the internal buffer. 124 func (s *Stream) Flush() { 125 if s.flushed { 126 return 127 } 128 129 for s.bufLess.Len() > 0 || s.bufMore.Len() > 0 { 130 if s.bufMore.Len() == 0 { 131 s.resetInsertCursor() 132 } 133 s.insert() 134 s.compress() 135 } 136 s.calcQuantiles() 137 s.flushed = true 138 } 139 140 // Min returns the minimum value. 141 func (s *Stream) Min() float64 { 142 return s.Quantile(0.0) 143 } 144 145 // Max returns the maximum value. 146 func (s *Stream) Max() float64 { 147 return s.Quantile(1.0) 148 } 149 150 // Quantile returns the quantile value. 151 func (s *Stream) Quantile(q float64) float64 { 152 if q < 0.0 || q > 1.0 { 153 return nan 154 } 155 156 if s.samples.Empty() { 157 return 0.0 158 } 159 160 if q == 0.0 { 161 return s.samples.Front().value 162 } 163 if q == 1.0 { 164 return s.samples.Back().value 165 } 166 167 for i, qt := range s.quantiles { 168 if qt >= q { 169 return s.computedQuantiles[i] 170 } 171 } 172 return math.NaN() 173 } 174 175 // ResetSetData resets the stream and sets data. 176 func (s *Stream) ResetSetData(quantiles []float64) { 177 s.quantiles = quantiles 178 179 if len(quantiles) > cap(s.computedQuantiles) { 180 s.computedQuantiles = make([]float64, len(quantiles)) 181 s.thresholdBuf = make([]threshold, len(quantiles)) 182 } else { 183 s.computedQuantiles = s.computedQuantiles[:len(quantiles)] 184 s.thresholdBuf = s.thresholdBuf[:len(quantiles)] 185 } 186 187 s.closed = false 188 } 189 190 // Close closes the stream. 191 func (s *Stream) Close() { 192 if s.closed { 193 return 194 } 195 s.closed = true 196 197 s.bufMore.Reset() 198 s.bufLess.Reset() 199 200 s.samples.Reset() 201 s.insertCursor = nil 202 s.compressCursor = nil 203 s.insertAndCompressCounter = 0 204 s.numValues = 0 205 s.compressMinRank = 0 206 s.streamPool.Put(s) 207 } 208 209 // quantilesFromBuf calculates quantiles from buffer if there were too few samples to compress 210 func (s *Stream) quantilesFromBuf() { 211 var ( 212 curr = s.samples.Front() 213 buf = make([]float64, 0, minSamplesToCompress) 214 ) 215 216 for curr != nil { 217 buf = append(buf, curr.value) 218 curr = curr.next 219 } 220 221 n := len(buf) 222 for i, q := range s.quantiles { 223 idx := int(q * float64(n)) 224 if idx >= n { 225 idx = n - 1 226 } 227 s.computedQuantiles[i] = buf[idx] 228 } 229 } 230 231 func (s *Stream) calcQuantiles() { 232 if len(s.quantiles) == 0 || s.numValues == 0 { 233 return 234 } else if s.numValues <= minSamplesToCompress { 235 // too few values for compress(), need to compute quantiles directly 236 s.quantilesFromBuf() 237 return 238 } 239 240 var ( 241 minRank int64 242 maxRank int64 243 idx int 244 curr = s.samples.Front() 245 prev = s.samples.Front() 246 ) 247 248 for i, q := range s.quantiles { 249 rank := int64(math.Ceil(q * float64(s.numValues))) 250 s.thresholdBuf[i].rank = rank 251 s.thresholdBuf[i].threshold = int64( 252 math.Ceil(float64(s.threshold(rank)) / 2.0), 253 ) 254 } 255 256 for curr != nil && idx < len(s.computedQuantiles) { 257 maxRank = minRank + curr.numRanks + curr.delta 258 rank, threshold := s.thresholdBuf[idx].rank, s.thresholdBuf[idx].threshold 259 260 if maxRank > rank+threshold || minRank > rank { 261 s.computedQuantiles[idx] = prev.value 262 idx++ 263 } 264 265 minRank += curr.numRanks 266 prev = curr 267 curr = curr.next 268 } 269 270 // check if the last sample value should satisfy unprocessed quantiles 271 for i := idx; i < len(s.thresholdBuf); i++ { 272 rank, threshold := s.thresholdBuf[i].rank, s.thresholdBuf[i].threshold 273 if maxRank >= rank+threshold || minRank > rank { 274 s.computedQuantiles[i] = prev.value 275 } 276 } 277 } 278 279 // insert inserts a sample into the stream. 280 func (s *Stream) insert() { 281 var ( 282 compCur = s.compressCursor 283 compValue = math.NaN() 284 samples = &s.samples 285 insertPointValue float64 286 sample *Sample 287 ) 288 289 if compCur != nil { 290 compValue = compCur.value 291 } 292 293 // break heap invariant and just sort all the times, as we'll consume all of them in one go 294 s.bufMore.SortDesc() 295 296 var ( 297 vals = []float64(s.bufMore) 298 idx = len(vals) - 1 299 ) 300 301 for s.insertCursor != nil && idx < len(vals) { 302 curr := s.insertCursor 303 insertPointValue = curr.value 304 305 for idx >= 0 && vals[idx] <= insertPointValue { 306 val := vals[idx] 307 idx-- 308 sample = s.samples.Acquire() 309 sample.value = val 310 sample.numRanks = 1 311 sample.delta = curr.numRanks + curr.delta - 1 312 313 samples.InsertBefore(sample, curr) 314 315 if compValue >= val { 316 s.compressMinRank++ 317 } 318 s.numValues++ 319 } 320 321 s.insertCursor = s.insertCursor.next 322 } 323 324 if s.insertCursor == nil && idx < len(vals) { 325 for idx >= 0 && vals[idx] >= samples.Back().value { 326 val := vals[idx] 327 idx-- 328 sample = s.samples.Acquire() 329 sample.value = val 330 sample.numRanks = 1 331 sample.delta = 0 332 samples.PushBack(sample) 333 s.numValues++ 334 } 335 } 336 337 s.bufMore = s.bufMore[:0] 338 s.resetInsertCursor() 339 } 340 341 // compress compresses the samples in the stream. 342 func (s *Stream) compress() { 343 // Bail early if there is nothing to compress. 344 if s.samples.Len() < minSamplesToCompress { 345 return 346 } 347 348 if s.compressCursor == nil { 349 s.compressCursor = s.samples.Back().prev 350 s.compressMinRank = s.numValues - 1 - s.compressCursor.numRanks 351 s.compressCursor = s.compressCursor.prev 352 } 353 354 var ( 355 numVals = s.numValues 356 eps = 2.0 * s.eps 357 ) 358 359 for s.compressCursor != s.samples.Front() { 360 var ( 361 curr = s.compressCursor 362 next = curr.next 363 prev = curr.prev 364 365 maxRank = s.compressMinRank + curr.numRanks + curr.delta 366 367 threshold = int64(math.MaxInt64) 368 quantileMin int64 369 ) 370 371 for i := range s.quantiles { 372 if maxRank >= int64(s.quantiles[i]*float64(numVals)) { 373 quantileMin = int64(eps * float64(maxRank) / s.quantiles[i]) 374 } else { 375 quantileMin = int64(eps * float64(numVals-maxRank) / (1.0 - s.quantiles[i])) 376 } 377 if quantileMin < threshold { 378 threshold = quantileMin 379 } 380 } 381 382 s.compressMinRank -= curr.numRanks 383 testVal := curr.numRanks + next.numRanks + next.delta 384 385 if testVal <= threshold { 386 if s.insertCursor == curr { 387 s.insertCursor = next 388 } 389 390 next.numRanks += curr.numRanks 391 392 // no need to release sample here 393 s.samples.Remove(curr) 394 } 395 s.compressCursor = prev 396 } 397 398 if s.compressCursor == s.samples.Front() { 399 s.compressCursor = nil 400 } 401 } 402 403 // threshold computes the minimum threshold value. 404 func (s *Stream) threshold(rank int64) int64 { 405 var ( 406 minVal = int64(math.MaxInt64) 407 numVals = s.numValues 408 eps = 2.0 * s.eps 409 quantileMin int64 410 ) 411 for _, quantile := range s.quantiles { 412 if rank >= int64(quantile*float64(numVals)) { 413 quantileMin = int64(eps * float64(rank) / quantile) 414 } else { 415 quantileMin = int64(eps * float64(numVals-rank) / (1.0 - quantile)) 416 } 417 if quantileMin < minVal { 418 minVal = quantileMin 419 } 420 } 421 422 return minVal 423 } 424 425 // resetInsertCursor resets the insert cursor. 426 func (s *Stream) resetInsertCursor() { 427 s.bufLess, s.bufMore = s.bufMore, s.bufLess 428 s.insertCursor = s.samples.Front() 429 }