github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/storage/segment/segment.go (about) 1 package segment 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "math/big" 8 "os" 9 "path/filepath" 10 "runtime/trace" 11 "sync" 12 "time" 13 14 "github.com/pyroscope-io/pyroscope/pkg/storage/metadata" 15 ) 16 17 type streeNode struct { 18 depth int 19 time time.Time 20 present bool 21 samples uint64 22 writes uint64 23 children []*streeNode 24 } 25 26 func (sn *streeNode) replace(child *streeNode) { 27 i := child.time.Sub(sn.time) / durations[child.depth] 28 sn.children[i] = child 29 } 30 31 func (sn *streeNode) relationship(st, et time.Time) rel { 32 t2 := sn.time.Add(durations[sn.depth]) 33 return relationship(sn.time, t2, st, et) 34 } 35 36 func (sn *streeNode) isBefore(rt time.Time) bool { 37 t2 := sn.time.Add(durations[sn.depth]) 38 return !t2.After(rt) 39 } 40 41 func (sn *streeNode) isAfter(rt time.Time) bool { 42 return sn.time.After(rt) 43 } 44 45 func (sn *streeNode) endTime() time.Time { 46 return sn.time.Add(durations[sn.depth]) 47 } 48 49 func (sn *streeNode) overlapRead(st, et time.Time) *big.Rat { 50 t2 := sn.time.Add(durations[sn.depth]) 51 return overlapRead(sn.time, t2, st, et, durations[0]) 52 } 53 54 func (sn *streeNode) overlapWrite(st, et time.Time) *big.Rat { 55 t2 := sn.time.Add(durations[sn.depth]) 56 return overlapWrite(sn.time, t2, st, et, durations[0]) 57 } 58 59 func (sn *streeNode) findAddons() []Addon { 60 res := []Addon{} 61 if sn.present { 62 res = append(res, Addon{ 63 Depth: sn.depth, 64 T: sn.time, 65 }) 66 } else { 67 for _, child := range sn.children { 68 if child != nil { 69 res = append(res, child.findAddons()...) 70 } 71 } 72 } 73 return res 74 } 75 76 func (sn *streeNode) put(st, et time.Time, samples uint64, cb func(n *streeNode, depth int, dt time.Time, r *big.Rat, addons []Addon)) { 77 nodes := []*streeNode{sn} 78 79 for len(nodes) > 0 { 80 sn = nodes[0] 81 nodes = nodes[1:] 82 83 rel := sn.relationship(st, et) 84 if rel != outside { 85 childrenCount := 0 86 createNewChildren := rel == inside || rel == overlap 87 for i, v := range sn.children { 88 if createNewChildren && v == nil { // maybe create a new child 89 childT := sn.time.Truncate(durations[sn.depth]).Add(time.Duration(i) * durations[sn.depth-1]) 90 91 rel2 := relationship(childT, childT.Add(durations[sn.depth-1]), st, et) 92 if rel2 != outside { 93 sn.children[i] = newNode(childT, sn.depth-1, 10) 94 } 95 } 96 97 if sn.children[i] != nil { 98 childrenCount++ 99 nodes = append(nodes, sn.children[i]) 100 } 101 } 102 var addons []Addon 103 104 r := sn.overlapWrite(st, et) 105 fv, _ := r.Float64() 106 sn.samples += uint64(float64(samples) * fv) 107 sn.writes += uint64(1) 108 109 // relationship overlap read overlap write 110 // inside rel = iota // | S E | <1 1/1 111 // match // matching ranges 1/1 1/1 112 // outside // | | S E 0/1 0/1 113 // overlap // | S | E <1 <1 114 // contain // S | | E 1/1 <1 115 116 if rel == match || rel == contain || childrenCount > 1 || sn.present { 117 if !sn.present { 118 addons = sn.findAddons() 119 } 120 121 cb(sn, sn.depth, sn.time, r, addons) 122 sn.present = true 123 } 124 } 125 } 126 } 127 128 func normalize(st, et time.Time) (time.Time, time.Time) { 129 st = st.Truncate(durations[0]) 130 et2 := et.Truncate(durations[0]) 131 if et2.Equal(et) && !st.Equal(et2) { 132 return st, et 133 } 134 return st, et2.Add(durations[0]) 135 } 136 137 func normalizeTime(t time.Time) time.Time { 138 return t.Truncate(durations[0]) 139 } 140 141 // get traverses through the tree searching for the nodes satisfying 142 // the given time range. If no nodes were found, the most precise 143 // down-sampling root node will be passed to the callback function, 144 // and relationship r will be proportional to the down-sampling factor. 145 // 146 // relationship overlap read overlap write 147 // inside rel = iota // | S E | <1 1/1 148 // match // matching ranges 1/1 1/1 149 // outside // | | S E 0/1 0/1 150 // overlap // | S | E <1 <1 151 // contain // S | | E 1/1 <1 152 func (sn *streeNode) get(ctx context.Context, s *Segment, st, et time.Time, cb func(*streeNode, *big.Rat)) { 153 r := sn.relationship(st, et) 154 trace.Logf(ctx, traceCatNodeGet, "D=%d T=%v P=%v R=%v", sn.depth, sn.time.Unix(), sn.present, r) 155 switch r { 156 case outside: 157 return 158 case inside, overlap: 159 // Defer to children. 160 case contain, match: 161 // Take the node as is. 162 if sn.present { 163 cb(sn, big.NewRat(1, 1)) 164 return 165 } 166 } 167 trace.Log(ctx, traceCatNodeGet, "drill down") 168 // Whether child nodes are outside the retention period. 169 if sn.time.Before(s.watermarks.levels[sn.depth-1]) && sn.present { 170 trace.Log(ctx, traceCatNodeGet, "sampled") 171 // Create a sampled tree from the current node. 172 cb(sn, sn.overlapRead(st, et)) 173 return 174 } 175 // Traverse nodes recursively. 176 for _, v := range sn.children { 177 if v != nil { 178 v.get(ctx, s, st, et, cb) 179 } 180 } 181 } 182 183 // deleteDataBefore returns true if the node should be deleted. 184 func (sn *streeNode) deleteNodesBefore(t *RetentionPolicy) (bool, error) { 185 if sn.isAfter(t.AbsoluteTime) && t.Levels == nil { 186 return false, nil 187 } 188 remove := t.isToBeDeleted(sn) 189 for i, v := range sn.children { 190 if v == nil { 191 continue 192 } 193 ok, err := v.deleteNodesBefore(t) 194 if err != nil { 195 return false, err 196 } 197 if ok { 198 sn.children[i] = nil 199 } 200 } 201 return remove, nil 202 } 203 204 func (sn *streeNode) walkNodesToDelete(t *RetentionPolicy, cb func(depth int, t time.Time) error) (bool, error) { 205 if sn.isAfter(t.AbsoluteTime) && t.Levels == nil { 206 return false, nil 207 } 208 var err error 209 remove := t.isToBeDeleted(sn) 210 if remove { 211 if err = cb(sn.depth, sn.time); err != nil { 212 return false, err 213 } 214 } 215 for _, v := range sn.children { 216 if v == nil { 217 continue 218 } 219 if _, err = v.walkNodesToDelete(t, cb); err != nil { 220 return false, err 221 } 222 } 223 return remove, nil 224 } 225 226 type Segment struct { 227 m sync.RWMutex 228 root *streeNode 229 230 spyName string 231 sampleRate uint32 232 units metadata.Units 233 aggregationType metadata.AggregationType 234 235 watermarks 236 } 237 238 type watermarks struct { 239 absoluteTime time.Time 240 levels map[int]time.Time 241 } 242 243 func newNode(t time.Time, depth, multiplier int) *streeNode { 244 sn := &streeNode{ 245 depth: depth, 246 time: t, 247 } 248 if depth > 0 { 249 sn.children = make([]*streeNode, multiplier) 250 } 251 return sn 252 } 253 254 func New() *Segment { 255 return &Segment{watermarks: watermarks{ 256 levels: make(map[int]time.Time), 257 }} 258 } 259 260 // TODO: DRY 261 func maxTime(a, b time.Time) time.Time { 262 if a.After(b) { 263 return a 264 } 265 return b 266 } 267 268 func minTime(a, b time.Time) time.Time { 269 if a.Before(b) { 270 return a 271 } 272 return b 273 } 274 275 func (s *Segment) growTree(st, et time.Time) bool { 276 var prevVal *streeNode 277 if s.root != nil { 278 st = minTime(st, s.root.time) 279 et = maxTime(et, s.root.endTime()) 280 } else { 281 st = st.Truncate(durations[0]) 282 s.root = newNode(st, 0, multiplier) 283 } 284 285 for { 286 rel := s.root.relationship(st, et) 287 288 if rel == inside || rel == match { 289 break 290 } 291 292 prevVal = s.root 293 newDepth := prevVal.depth + 1 294 if newDepth >= len(durations) { 295 return false 296 } 297 s.root = newNode(prevVal.time.Truncate(durations[newDepth]), newDepth, multiplier) 298 if prevVal != nil { 299 s.root.samples = prevVal.samples 300 s.root.writes = prevVal.writes 301 s.root.replace(prevVal) 302 } 303 } 304 return true 305 } 306 307 type Addon struct { 308 Depth int 309 T time.Time 310 } 311 312 var errStartTimeBeforeEndTime = errors.New("start time cannot be after end time") 313 var errTreeMaxSize = errors.New("segment tree reached max size, check start / end time parameters") 314 315 // TODO: simplify arguments 316 // TODO: validate st < et 317 func (s *Segment) Put(st, et time.Time, samples uint64, cb func(depth int, t time.Time, r *big.Rat, addons []Addon)) error { 318 s.m.Lock() 319 defer s.m.Unlock() 320 321 st, et = normalize(st, et) 322 if st.After(et) { 323 return errStartTimeBeforeEndTime 324 } 325 326 if !s.growTree(st, et) { 327 return errTreeMaxSize 328 } 329 v := newVis() 330 s.root.put(st, et, samples, func(sn *streeNode, depth int, tm time.Time, r *big.Rat, addons []Addon) { 331 v.add(sn, r, true) 332 cb(depth, tm, r, addons) 333 }) 334 v.print(filepath.Join(os.TempDir(), fmt.Sprintf("0-put-%s-%s.html", st.String(), et.String()))) 335 return nil 336 } 337 338 const ( 339 traceRegionGet = "segment.Get" 340 traceCatGet = traceRegionGet 341 traceCatNodeGet = "node.get" 342 ) 343 344 //revive:disable-next-line:get-return callback 345 func (s *Segment) Get(st, et time.Time, cb func(depth int, samples, writes uint64, t time.Time, r *big.Rat)) { 346 // TODO: simplify arguments 347 // TODO: validate st < et 348 s.GetContext(context.Background(), st, et, cb) 349 } 350 351 //revive:disable-next-line:get-return callback 352 func (s *Segment) GetContext(ctx context.Context, st, et time.Time, cb func(depth int, samples, writes uint64, t time.Time, r *big.Rat)) { 353 defer trace.StartRegion(ctx, traceRegionGet).End() 354 s.m.RLock() 355 defer s.m.RUnlock() 356 if st.Before(s.watermarks.absoluteTime) { 357 trace.Logf(ctx, traceCatGet, "start time %s is outside the retention period; set to %s", st, s.watermarks.absoluteTime) 358 st = s.watermarks.absoluteTime 359 } 360 st, et = normalize(st, et) 361 if s.root == nil { 362 trace.Log(ctx, traceCatGet, "empty") 363 return 364 } 365 // divider := int(et.Sub(st) / durations[0]) 366 v := newVis() 367 s.root.get(ctx, s, st, et, func(sn *streeNode, r *big.Rat) { 368 // TODO: pass m / d from .get() ? 369 v.add(sn, r, true) 370 cb(sn.depth, sn.samples, sn.writes, sn.time, r) 371 }) 372 v.print(filepath.Join(os.TempDir(), fmt.Sprintf("0-get-%s-%s.html", st.String(), et.String()))) 373 } 374 375 func (s *Segment) DeleteNodesBefore(t *RetentionPolicy) (bool, error) { 376 s.m.Lock() 377 defer s.m.Unlock() 378 if s.root == nil { 379 return true, nil 380 } 381 ok, err := s.root.deleteNodesBefore(t.normalize()) 382 if err != nil { 383 return false, err 384 } 385 if ok { 386 s.root = nil 387 } 388 s.updateWatermarks(t) 389 return ok, nil 390 } 391 392 func (s *Segment) updateWatermarks(t *RetentionPolicy) { 393 if t.AbsoluteTime.After(s.watermarks.absoluteTime) { 394 s.watermarks.absoluteTime = t.AbsoluteTime 395 } 396 for k, v := range t.Levels { 397 if level, ok := s.watermarks.levels[k]; ok && v.Before(level) { 398 continue 399 } 400 s.watermarks.levels[k] = v 401 } 402 } 403 404 func (s *Segment) WalkNodesToDelete(t *RetentionPolicy, cb func(depth int, t time.Time) error) (bool, error) { 405 s.m.RLock() 406 defer s.m.RUnlock() 407 if s.root == nil { 408 return true, nil 409 } 410 return s.root.walkNodesToDelete(t.normalize(), cb) 411 } 412 413 func (s *Segment) SetMetadata(md metadata.Metadata) { 414 s.m.Lock() 415 s.spyName = md.SpyName 416 s.sampleRate = md.SampleRate 417 s.units = md.Units 418 s.aggregationType = md.AggregationType 419 s.m.Unlock() 420 } 421 422 func (s *Segment) GetMetadata() metadata.Metadata { 423 s.m.Lock() 424 md := metadata.Metadata{ 425 SpyName: s.spyName, 426 SampleRate: s.sampleRate, 427 Units: s.units, 428 AggregationType: s.aggregationType, 429 } 430 s.m.Unlock() 431 return md 432 } 433 434 var zeroTime time.Time 435 436 func (s *Segment) StartTime() time.Time { 437 if s.root == nil { 438 return zeroTime 439 } 440 n := s.root 441 442 for { 443 if len(n.children) == 0 { 444 return n.time 445 } 446 447 oldN := n 448 449 for _, child := range n.children { 450 if child != nil { 451 n = child 452 break 453 } 454 } 455 456 if n == oldN { 457 return n.time 458 } 459 } 460 }