github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/storage/segment/segment.go (about)

     1  package segment
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"math/big"
     8  	"os"
     9  	"path/filepath"
    10  	"runtime/trace"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/pyroscope-io/pyroscope/pkg/storage/metadata"
    15  )
    16  
    17  type streeNode struct {
    18  	depth    int
    19  	time     time.Time
    20  	present  bool
    21  	samples  uint64
    22  	writes   uint64
    23  	children []*streeNode
    24  }
    25  
    26  func (sn *streeNode) replace(child *streeNode) {
    27  	i := child.time.Sub(sn.time) / durations[child.depth]
    28  	sn.children[i] = child
    29  }
    30  
    31  func (sn *streeNode) relationship(st, et time.Time) rel {
    32  	t2 := sn.time.Add(durations[sn.depth])
    33  	return relationship(sn.time, t2, st, et)
    34  }
    35  
    36  func (sn *streeNode) isBefore(rt time.Time) bool {
    37  	t2 := sn.time.Add(durations[sn.depth])
    38  	return !t2.After(rt)
    39  }
    40  
    41  func (sn *streeNode) isAfter(rt time.Time) bool {
    42  	return sn.time.After(rt)
    43  }
    44  
    45  func (sn *streeNode) endTime() time.Time {
    46  	return sn.time.Add(durations[sn.depth])
    47  }
    48  
    49  func (sn *streeNode) overlapRead(st, et time.Time) *big.Rat {
    50  	t2 := sn.time.Add(durations[sn.depth])
    51  	return overlapRead(sn.time, t2, st, et, durations[0])
    52  }
    53  
    54  func (sn *streeNode) overlapWrite(st, et time.Time) *big.Rat {
    55  	t2 := sn.time.Add(durations[sn.depth])
    56  	return overlapWrite(sn.time, t2, st, et, durations[0])
    57  }
    58  
    59  func (sn *streeNode) findAddons() []Addon {
    60  	res := []Addon{}
    61  	if sn.present {
    62  		res = append(res, Addon{
    63  			Depth: sn.depth,
    64  			T:     sn.time,
    65  		})
    66  	} else {
    67  		for _, child := range sn.children {
    68  			if child != nil {
    69  				res = append(res, child.findAddons()...)
    70  			}
    71  		}
    72  	}
    73  	return res
    74  }
    75  
    76  func (sn *streeNode) put(st, et time.Time, samples uint64, cb func(n *streeNode, depth int, dt time.Time, r *big.Rat, addons []Addon)) {
    77  	nodes := []*streeNode{sn}
    78  
    79  	for len(nodes) > 0 {
    80  		sn = nodes[0]
    81  		nodes = nodes[1:]
    82  
    83  		rel := sn.relationship(st, et)
    84  		if rel != outside {
    85  			childrenCount := 0
    86  			createNewChildren := rel == inside || rel == overlap
    87  			for i, v := range sn.children {
    88  				if createNewChildren && v == nil { // maybe create a new child
    89  					childT := sn.time.Truncate(durations[sn.depth]).Add(time.Duration(i) * durations[sn.depth-1])
    90  
    91  					rel2 := relationship(childT, childT.Add(durations[sn.depth-1]), st, et)
    92  					if rel2 != outside {
    93  						sn.children[i] = newNode(childT, sn.depth-1, 10)
    94  					}
    95  				}
    96  
    97  				if sn.children[i] != nil {
    98  					childrenCount++
    99  					nodes = append(nodes, sn.children[i])
   100  				}
   101  			}
   102  			var addons []Addon
   103  
   104  			r := sn.overlapWrite(st, et)
   105  			fv, _ := r.Float64()
   106  			sn.samples += uint64(float64(samples) * fv)
   107  			sn.writes += uint64(1)
   108  
   109  			//  relationship                               overlap read             overlap write
   110  			// 	inside  rel = iota   // | S E |            <1                       1/1
   111  			// 	match                // matching ranges    1/1                      1/1
   112  			// 	outside              // | | S E            0/1                      0/1
   113  			// 	overlap              // | S | E            <1                       <1
   114  			// 	contain              // S | | E            1/1                      <1
   115  
   116  			if rel == match || rel == contain || childrenCount > 1 || sn.present {
   117  				if !sn.present {
   118  					addons = sn.findAddons()
   119  				}
   120  
   121  				cb(sn, sn.depth, sn.time, r, addons)
   122  				sn.present = true
   123  			}
   124  		}
   125  	}
   126  }
   127  
   128  func normalize(st, et time.Time) (time.Time, time.Time) {
   129  	st = st.Truncate(durations[0])
   130  	et2 := et.Truncate(durations[0])
   131  	if et2.Equal(et) && !st.Equal(et2) {
   132  		return st, et
   133  	}
   134  	return st, et2.Add(durations[0])
   135  }
   136  
   137  func normalizeTime(t time.Time) time.Time {
   138  	return t.Truncate(durations[0])
   139  }
   140  
   141  // get traverses through the tree searching for the nodes satisfying
   142  // the given time range. If no nodes were found, the most precise
   143  // down-sampling root node will be passed to the callback function,
   144  // and relationship r will be proportional to the down-sampling factor.
   145  //
   146  //  relationship                               overlap read             overlap write
   147  // 	inside  rel = iota   // | S E |            <1                       1/1
   148  // 	match                // matching ranges    1/1                      1/1
   149  // 	outside              // | | S E            0/1                      0/1
   150  // 	overlap              // | S | E            <1                       <1
   151  // 	contain              // S | | E            1/1                      <1
   152  func (sn *streeNode) get(ctx context.Context, s *Segment, st, et time.Time, cb func(*streeNode, *big.Rat)) {
   153  	r := sn.relationship(st, et)
   154  	trace.Logf(ctx, traceCatNodeGet, "D=%d T=%v P=%v R=%v", sn.depth, sn.time.Unix(), sn.present, r)
   155  	switch r {
   156  	case outside:
   157  		return
   158  	case inside, overlap:
   159  		// Defer to children.
   160  	case contain, match:
   161  		// Take the node as is.
   162  		if sn.present {
   163  			cb(sn, big.NewRat(1, 1))
   164  			return
   165  		}
   166  	}
   167  	trace.Log(ctx, traceCatNodeGet, "drill down")
   168  	// Whether child nodes are outside the retention period.
   169  	if sn.time.Before(s.watermarks.levels[sn.depth-1]) && sn.present {
   170  		trace.Log(ctx, traceCatNodeGet, "sampled")
   171  		// Create a sampled tree from the current node.
   172  		cb(sn, sn.overlapRead(st, et))
   173  		return
   174  	}
   175  	// Traverse nodes recursively.
   176  	for _, v := range sn.children {
   177  		if v != nil {
   178  			v.get(ctx, s, st, et, cb)
   179  		}
   180  	}
   181  }
   182  
   183  // deleteDataBefore returns true if the node should be deleted.
   184  func (sn *streeNode) deleteNodesBefore(t *RetentionPolicy) (bool, error) {
   185  	if sn.isAfter(t.AbsoluteTime) && t.Levels == nil {
   186  		return false, nil
   187  	}
   188  	remove := t.isToBeDeleted(sn)
   189  	for i, v := range sn.children {
   190  		if v == nil {
   191  			continue
   192  		}
   193  		ok, err := v.deleteNodesBefore(t)
   194  		if err != nil {
   195  			return false, err
   196  		}
   197  		if ok {
   198  			sn.children[i] = nil
   199  		}
   200  	}
   201  	return remove, nil
   202  }
   203  
   204  func (sn *streeNode) walkNodesToDelete(t *RetentionPolicy, cb func(depth int, t time.Time) error) (bool, error) {
   205  	if sn.isAfter(t.AbsoluteTime) && t.Levels == nil {
   206  		return false, nil
   207  	}
   208  	var err error
   209  	remove := t.isToBeDeleted(sn)
   210  	if remove {
   211  		if err = cb(sn.depth, sn.time); err != nil {
   212  			return false, err
   213  		}
   214  	}
   215  	for _, v := range sn.children {
   216  		if v == nil {
   217  			continue
   218  		}
   219  		if _, err = v.walkNodesToDelete(t, cb); err != nil {
   220  			return false, err
   221  		}
   222  	}
   223  	return remove, nil
   224  }
   225  
   226  type Segment struct {
   227  	m    sync.RWMutex
   228  	root *streeNode
   229  
   230  	spyName         string
   231  	sampleRate      uint32
   232  	units           metadata.Units
   233  	aggregationType metadata.AggregationType
   234  
   235  	watermarks
   236  }
   237  
   238  type watermarks struct {
   239  	absoluteTime time.Time
   240  	levels       map[int]time.Time
   241  }
   242  
   243  func newNode(t time.Time, depth, multiplier int) *streeNode {
   244  	sn := &streeNode{
   245  		depth: depth,
   246  		time:  t,
   247  	}
   248  	if depth > 0 {
   249  		sn.children = make([]*streeNode, multiplier)
   250  	}
   251  	return sn
   252  }
   253  
   254  func New() *Segment {
   255  	return &Segment{watermarks: watermarks{
   256  		levels: make(map[int]time.Time),
   257  	}}
   258  }
   259  
   260  // TODO: DRY
   261  func maxTime(a, b time.Time) time.Time {
   262  	if a.After(b) {
   263  		return a
   264  	}
   265  	return b
   266  }
   267  
   268  func minTime(a, b time.Time) time.Time {
   269  	if a.Before(b) {
   270  		return a
   271  	}
   272  	return b
   273  }
   274  
   275  func (s *Segment) growTree(st, et time.Time) bool {
   276  	var prevVal *streeNode
   277  	if s.root != nil {
   278  		st = minTime(st, s.root.time)
   279  		et = maxTime(et, s.root.endTime())
   280  	} else {
   281  		st = st.Truncate(durations[0])
   282  		s.root = newNode(st, 0, multiplier)
   283  	}
   284  
   285  	for {
   286  		rel := s.root.relationship(st, et)
   287  
   288  		if rel == inside || rel == match {
   289  			break
   290  		}
   291  
   292  		prevVal = s.root
   293  		newDepth := prevVal.depth + 1
   294  		if newDepth >= len(durations) {
   295  			return false
   296  		}
   297  		s.root = newNode(prevVal.time.Truncate(durations[newDepth]), newDepth, multiplier)
   298  		if prevVal != nil {
   299  			s.root.samples = prevVal.samples
   300  			s.root.writes = prevVal.writes
   301  			s.root.replace(prevVal)
   302  		}
   303  	}
   304  	return true
   305  }
   306  
   307  type Addon struct {
   308  	Depth int
   309  	T     time.Time
   310  }
   311  
   312  var errStartTimeBeforeEndTime = errors.New("start time cannot be after end time")
   313  var errTreeMaxSize = errors.New("segment tree reached max size, check start / end time parameters")
   314  
   315  // TODO: simplify arguments
   316  // TODO: validate st < et
   317  func (s *Segment) Put(st, et time.Time, samples uint64, cb func(depth int, t time.Time, r *big.Rat, addons []Addon)) error {
   318  	s.m.Lock()
   319  	defer s.m.Unlock()
   320  
   321  	st, et = normalize(st, et)
   322  	if st.After(et) {
   323  		return errStartTimeBeforeEndTime
   324  	}
   325  
   326  	if !s.growTree(st, et) {
   327  		return errTreeMaxSize
   328  	}
   329  	v := newVis()
   330  	s.root.put(st, et, samples, func(sn *streeNode, depth int, tm time.Time, r *big.Rat, addons []Addon) {
   331  		v.add(sn, r, true)
   332  		cb(depth, tm, r, addons)
   333  	})
   334  	v.print(filepath.Join(os.TempDir(), fmt.Sprintf("0-put-%s-%s.html", st.String(), et.String())))
   335  	return nil
   336  }
   337  
   338  const (
   339  	traceRegionGet  = "segment.Get"
   340  	traceCatGet     = traceRegionGet
   341  	traceCatNodeGet = "node.get"
   342  )
   343  
   344  //revive:disable-next-line:get-return callback
   345  func (s *Segment) Get(st, et time.Time, cb func(depth int, samples, writes uint64, t time.Time, r *big.Rat)) {
   346  	// TODO: simplify arguments
   347  	// TODO: validate st < et
   348  	s.GetContext(context.Background(), st, et, cb)
   349  }
   350  
   351  //revive:disable-next-line:get-return callback
   352  func (s *Segment) GetContext(ctx context.Context, st, et time.Time, cb func(depth int, samples, writes uint64, t time.Time, r *big.Rat)) {
   353  	defer trace.StartRegion(ctx, traceRegionGet).End()
   354  	s.m.RLock()
   355  	defer s.m.RUnlock()
   356  	if st.Before(s.watermarks.absoluteTime) {
   357  		trace.Logf(ctx, traceCatGet, "start time %s is outside the retention period; set to %s", st, s.watermarks.absoluteTime)
   358  		st = s.watermarks.absoluteTime
   359  	}
   360  	st, et = normalize(st, et)
   361  	if s.root == nil {
   362  		trace.Log(ctx, traceCatGet, "empty")
   363  		return
   364  	}
   365  	// divider := int(et.Sub(st) / durations[0])
   366  	v := newVis()
   367  	s.root.get(ctx, s, st, et, func(sn *streeNode, r *big.Rat) {
   368  		// TODO: pass m / d from .get() ?
   369  		v.add(sn, r, true)
   370  		cb(sn.depth, sn.samples, sn.writes, sn.time, r)
   371  	})
   372  	v.print(filepath.Join(os.TempDir(), fmt.Sprintf("0-get-%s-%s.html", st.String(), et.String())))
   373  }
   374  
   375  func (s *Segment) DeleteNodesBefore(t *RetentionPolicy) (bool, error) {
   376  	s.m.Lock()
   377  	defer s.m.Unlock()
   378  	if s.root == nil {
   379  		return true, nil
   380  	}
   381  	ok, err := s.root.deleteNodesBefore(t.normalize())
   382  	if err != nil {
   383  		return false, err
   384  	}
   385  	if ok {
   386  		s.root = nil
   387  	}
   388  	s.updateWatermarks(t)
   389  	return ok, nil
   390  }
   391  
   392  func (s *Segment) updateWatermarks(t *RetentionPolicy) {
   393  	if t.AbsoluteTime.After(s.watermarks.absoluteTime) {
   394  		s.watermarks.absoluteTime = t.AbsoluteTime
   395  	}
   396  	for k, v := range t.Levels {
   397  		if level, ok := s.watermarks.levels[k]; ok && v.Before(level) {
   398  			continue
   399  		}
   400  		s.watermarks.levels[k] = v
   401  	}
   402  }
   403  
   404  func (s *Segment) WalkNodesToDelete(t *RetentionPolicy, cb func(depth int, t time.Time) error) (bool, error) {
   405  	s.m.RLock()
   406  	defer s.m.RUnlock()
   407  	if s.root == nil {
   408  		return true, nil
   409  	}
   410  	return s.root.walkNodesToDelete(t.normalize(), cb)
   411  }
   412  
   413  func (s *Segment) SetMetadata(md metadata.Metadata) {
   414  	s.m.Lock()
   415  	s.spyName = md.SpyName
   416  	s.sampleRate = md.SampleRate
   417  	s.units = md.Units
   418  	s.aggregationType = md.AggregationType
   419  	s.m.Unlock()
   420  }
   421  
   422  func (s *Segment) GetMetadata() metadata.Metadata {
   423  	s.m.Lock()
   424  	md := metadata.Metadata{
   425  		SpyName:         s.spyName,
   426  		SampleRate:      s.sampleRate,
   427  		Units:           s.units,
   428  		AggregationType: s.aggregationType,
   429  	}
   430  	s.m.Unlock()
   431  	return md
   432  }
   433  
   434  var zeroTime time.Time
   435  
   436  func (s *Segment) StartTime() time.Time {
   437  	if s.root == nil {
   438  		return zeroTime
   439  	}
   440  	n := s.root
   441  
   442  	for {
   443  		if len(n.children) == 0 {
   444  			return n.time
   445  		}
   446  
   447  		oldN := n
   448  
   449  		for _, child := range n.children {
   450  			if child != nil {
   451  				n = child
   452  				break
   453  			}
   454  		}
   455  
   456  		if n == oldN {
   457  			return n.time
   458  		}
   459  	}
   460  }