github.com/grafana/pyroscope@v1.18.0/pkg/model/time_series_merger.go (about)

     1  package model
     2  
     3  import (
     4  	"cmp"
     5  	"slices"
     6  	"sort"
     7  	"strings"
     8  	"sync"
     9  
    10  	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
    11  )
    12  
    13  func MergeSeries(aggregation *typesv1.TimeSeriesAggregationType, series ...[]*typesv1.Series) []*typesv1.Series {
    14  	var m *TimeSeriesMerger
    15  	if aggregation == nil || *aggregation == typesv1.TimeSeriesAggregationType_TIME_SERIES_AGGREGATION_TYPE_SUM {
    16  		m = NewTimeSeriesMerger(true)
    17  	} else {
    18  		m = NewTimeSeriesMerger(false)
    19  	}
    20  	for _, s := range series {
    21  		m.MergeTimeSeries(s)
    22  	}
    23  	return m.TimeSeries()
    24  }
    25  
    26  // TopSeries returns the top k series by sum of values.
    27  // If k is zero, all series are returned.
    28  // Note that even if len(c) <= k or k == 0, the returned
    29  // series are sorted by value in descending order and then
    30  // lexicographically (in ascending order).
    31  func TopSeries(s []*typesv1.Series, k int) []*typesv1.Series {
    32  	type series struct {
    33  		*typesv1.Series
    34  		sum float64
    35  	}
    36  	aggregated := make([]series, len(s))
    37  	for i, x := range s {
    38  		var sum float64
    39  		for _, p := range x.Points {
    40  			sum += p.Value
    41  		}
    42  		aggregated[i] = series{Series: x, sum: sum}
    43  	}
    44  	slices.SortFunc(aggregated, func(a, b series) int {
    45  		c := cmp.Compare(a.sum, b.sum)
    46  		if c == 0 {
    47  			return CompareLabelPairs(a.Labels, b.Labels)
    48  		}
    49  		return -c // Invert to sort in descending order.
    50  	})
    51  	for i, a := range aggregated {
    52  		s[i] = a.Series
    53  	}
    54  	if k > 0 && len(s) > k {
    55  		return s[:k]
    56  	}
    57  	return s
    58  }
    59  
    60  type TimeSeriesMerger struct {
    61  	mu     sync.Mutex
    62  	series map[uint64]*typesv1.Series
    63  	sum    bool
    64  }
    65  
    66  // NewTimeSeriesMerger creates a new series merger. If sum is set, samples
    67  // with matching timestamps are summed, otherwise duplicates are retained.
    68  func NewTimeSeriesMerger(sum bool) *TimeSeriesMerger {
    69  	return &TimeSeriesMerger{
    70  		series: make(map[uint64]*typesv1.Series),
    71  		sum:    sum,
    72  	}
    73  }
    74  
    75  func (m *TimeSeriesMerger) MergeTimeSeries(s []*typesv1.Series) {
    76  	m.mu.Lock()
    77  	defer m.mu.Unlock()
    78  	for _, x := range s {
    79  		h := Labels(x.Labels).Hash()
    80  		d, ok := m.series[h]
    81  		if !ok {
    82  			m.series[h] = x
    83  			continue
    84  		}
    85  		d.Points = append(d.Points, x.Points...)
    86  	}
    87  }
    88  
    89  func (m *TimeSeriesMerger) IsEmpty() bool {
    90  	return len(m.series) == 0
    91  }
    92  
    93  func (m *TimeSeriesMerger) TimeSeries() []*typesv1.Series {
    94  	r := m.mergeTimeSeries()
    95  	sort.Slice(r, func(i, j int) bool {
    96  		return CompareLabelPairs(r[i].Labels, r[j].Labels) < 0
    97  	})
    98  	return r
    99  }
   100  
   101  func (m *TimeSeriesMerger) mergeTimeSeries() []*typesv1.Series {
   102  	if len(m.series) == 0 {
   103  		return nil
   104  	}
   105  	r := make([]*typesv1.Series, len(m.series))
   106  	var i int
   107  	for _, s := range m.series {
   108  		s.Points = s.Points[:m.mergePoints(s.Points)]
   109  		r[i] = s
   110  		i++
   111  	}
   112  	return r
   113  }
   114  
   115  func (m *TimeSeriesMerger) Top(n int) []*typesv1.Series {
   116  	return TopSeries(m.mergeTimeSeries(), n)
   117  }
   118  
   119  func (m *TimeSeriesMerger) mergePoints(points []*typesv1.Point) int {
   120  	l := len(points)
   121  	if l < 2 {
   122  		return l
   123  	}
   124  	sort.Slice(points, func(i, j int) bool {
   125  		return points[i].Timestamp < points[j].Timestamp
   126  	})
   127  	var j int
   128  	for i := 1; i < l; i++ {
   129  		if points[j].Timestamp != points[i].Timestamp || !m.sum {
   130  			j++
   131  			points[j] = points[i]
   132  			continue
   133  		}
   134  		if m.sum {
   135  			points[j].Value += points[i].Value
   136  			points[j].Annotations = mergeAnnotations(points[j].Annotations, points[i].Annotations)
   137  			points[j].Exemplars = mergeExemplars(points[j].Exemplars, points[i].Exemplars)
   138  		}
   139  	}
   140  	return j + 1
   141  }
   142  
   143  func compareAnnotations(a, b *typesv1.ProfileAnnotation) int {
   144  	if r := strings.Compare(a.Key, b.Key); r != 0 {
   145  		return r
   146  	}
   147  	return strings.Compare(a.Value, b.Value)
   148  }
   149  
   150  func mergeAnnotations(a, b []*typesv1.ProfileAnnotation) []*typesv1.ProfileAnnotation {
   151  	if len(a) == 0 {
   152  		return b
   153  	}
   154  	if len(b) == 0 {
   155  		return a
   156  	}
   157  
   158  	// Merge into a single slice
   159  	merged := append(a, b...)
   160  
   161  	// Sort by key and value
   162  	slices.SortFunc(merged, compareAnnotations)
   163  
   164  	// Remove duplicates in-place
   165  	j := 0
   166  	for i := 1; i < len(merged); i++ {
   167  		// Only keep if different from the current unique element
   168  		if merged[j].Key != merged[i].Key || merged[j].Value != merged[i].Value {
   169  			j++
   170  			merged[j] = merged[i]
   171  		}
   172  	}
   173  
   174  	// Return the slice with only unique elements
   175  	return merged[:j+1]
   176  }
   177  
   178  // mergeExemplars combines two exemplar lists.
   179  // For exemplars with the same profileID, it keeps the highest value and intersects labels.
   180  func mergeExemplars(a, b []*typesv1.Exemplar) []*typesv1.Exemplar {
   181  	if len(a) == 0 {
   182  		return b
   183  	}
   184  	if len(b) == 0 {
   185  		return a
   186  	}
   187  
   188  	type exemplarGroup struct {
   189  		exemplar  *typesv1.Exemplar
   190  		labelSets []Labels
   191  	}
   192  	byProfileID := make(map[string]*exemplarGroup)
   193  
   194  	for _, ex := range a {
   195  		byProfileID[ex.ProfileId] = &exemplarGroup{
   196  			exemplar:  ex,
   197  			labelSets: []Labels{ex.Labels},
   198  		}
   199  	}
   200  
   201  	for _, ex := range b {
   202  		existing, found := byProfileID[ex.ProfileId]
   203  		if !found {
   204  			byProfileID[ex.ProfileId] = &exemplarGroup{
   205  				exemplar:  ex,
   206  				labelSets: []Labels{Labels(ex.Labels)},
   207  			}
   208  		} else {
   209  			if ex.Value > existing.exemplar.Value {
   210  				existing.exemplar = ex
   211  			}
   212  			existing.labelSets = append(existing.labelSets, Labels(ex.Labels))
   213  		}
   214  	}
   215  
   216  	result := make([]*typesv1.Exemplar, 0, len(byProfileID))
   217  	for _, group := range byProfileID {
   218  		ex := group.exemplar
   219  		if len(group.labelSets) > 1 {
   220  			ex.Labels = IntersectAll(group.labelSets)
   221  		}
   222  		result = append(result, ex)
   223  	}
   224  
   225  	sort.Slice(result, func(i, j int) bool {
   226  		return result[i].ProfileId < result[j].ProfileId
   227  	})
   228  
   229  	return result
   230  }