github.com/freiheit-com/kuberpult@v1.24.2-0.20240328135542-315d5630abe6/services/rollout-service/pkg/metrics/metrics.go (about)

     1  /*This file is part of kuberpult.
     2  
     3  Kuberpult is free software: you can redistribute it and/or modify
     4  it under the terms of the Expat(MIT) License as published by
     5  the Free Software Foundation.
     6  
     7  Kuberpult is distributed in the hope that it will be useful,
     8  but WITHOUT ANY WARRANTY; without even the implied warranty of
     9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    10  MIT License for more details.
    11  
    12  You should have received a copy of the MIT License
    13  along with kuberpult. If not, see <https://directory.fsf.org/wiki/License:Expat>.
    14  
    15  Copyright 2023 freiheit.com*/
    16  
    17  package metrics
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	pkgmetrics "github.com/freiheit-com/kuberpult/pkg/metrics"
    23  	"math"
    24  	"sync"
    25  	"time"
    26  
    27  	api "github.com/freiheit-com/kuberpult/pkg/api/v1"
    28  	"github.com/freiheit-com/kuberpult/services/rollout-service/pkg/service"
    29  	"go.opentelemetry.io/otel/attribute"
    30  	"go.opentelemetry.io/otel/metric"
    31  )
    32  
    33  func Metrics(ctx context.Context, bc *service.Broadcast, meterProvider metric.MeterProvider, clock func() time.Time, done func()) error {
    34  	for {
    35  		err := metrics(ctx, bc, meterProvider, clock, done)
    36  		select {
    37  		case <-ctx.Done():
    38  			return err
    39  		default:
    40  		}
    41  	}
    42  }
    43  
    44  func metrics(ctx context.Context, bc *service.Broadcast, meterProvider metric.MeterProvider, clock func() time.Time, done func()) error {
    45  	if clock == nil {
    46  		clock = time.Now
    47  	}
    48  	var err error
    49  	meter := meterProvider.Meter("kuberpult")
    50  	argoLag, err := meter.Int64ObservableGauge("rollout_lag_seconds")
    51  	if err != nil {
    52  		return fmt.Errorf("registering meter: %w", err)
    53  	}
    54  	var stateMx sync.Mutex
    55  	state := map[service.Key]*appState{}
    56  
    57  	reg, err := meter.RegisterCallback(
    58  		func(_ context.Context, o metric.Observer) error {
    59  			stateMx.Lock()
    60  			defer stateMx.Unlock()
    61  			now := clock()
    62  			for _, st := range state {
    63  				if st != nil {
    64  					o.ObserveInt64(argoLag, st.value(now), metric.WithAttributeSet(st.Attributes))
    65  				}
    66  			}
    67  			return nil
    68  		},
    69  		argoLag,
    70  	)
    71  	if err != nil {
    72  		return fmt.Errorf("registering callback: %w", err)
    73  	}
    74  	defer func() {
    75  		err = reg.Unregister()
    76  	}()
    77  
    78  	st, ch, unsub := bc.Start()
    79  	defer unsub()
    80  
    81  	stateMx.Lock()
    82  	for _, ev := range st {
    83  		state[ev.Key] = state[ev.Key].update(ev)
    84  	}
    85  	done()
    86  	stateMx.Unlock()
    87  	for {
    88  		select {
    89  		case ev := <-ch:
    90  			if ev == nil {
    91  				return nil
    92  			}
    93  			stateMx.Lock()
    94  			state[ev.Key] = state[ev.Key].update(ev)
    95  			done()
    96  			stateMx.Unlock()
    97  		case <-ctx.Done():
    98  			return err
    99  		}
   100  	}
   101  }
   102  
   103  type appState struct {
   104  	Attributes attribute.Set
   105  	DeployedAt time.Time
   106  	Successful bool
   107  }
   108  
   109  func (a *appState) value(now time.Time) int64 {
   110  	if a.Successful {
   111  		return 0
   112  	} else {
   113  		return int64(math.Round(now.Sub(a.DeployedAt).Seconds()))
   114  	}
   115  }
   116  
   117  func (a *appState) update(ev *service.BroadcastEvent) *appState {
   118  	if ev.KuberpultVersion == nil {
   119  		// If we don't know the kuberpult version at all, then we can't write this metric
   120  		return nil
   121  	}
   122  	if ev.KuberpultVersion.DeployedAt.IsZero() {
   123  		// Absent deployed at means the date is just missing.
   124  		return nil
   125  	}
   126  	if ev.ArgocdVersion == nil {
   127  		// We also need to know that something is in argocd
   128  		return nil
   129  	}
   130  	sc := (ev.RolloutStatus == api.RolloutStatus_ROLLOUT_STATUS_SUCCESFUL || ev.RolloutStatus == api.RolloutStatus_ROLLOUT_STATUS_UNHEALTHY)
   131  	// The environment group is the only thing that can change
   132  	as := a.attributes(ev)
   133  	return &appState{
   134  		Attributes: as,
   135  		Successful: sc,
   136  		DeployedAt: ev.KuberpultVersion.DeployedAt,
   137  	}
   138  }
   139  
   140  func (a *appState) attributes(ev *service.BroadcastEvent) attribute.Set {
   141  	if a == nil {
   142  		return buildAttributes(ev)
   143  	}
   144  	eg, _ := a.Attributes.Value("kuberpult_environment_group")
   145  	if eg.AsString() == ev.EnvironmentGroup {
   146  		return a.Attributes
   147  	}
   148  	return buildAttributes(ev)
   149  }
   150  
   151  func buildAttributes(ev *service.BroadcastEvent) attribute.Set {
   152  	return attribute.NewSet(
   153  		attribute.String(pkgmetrics.EventTagApplication, ev.Application),
   154  		attribute.String(pkgmetrics.EventTagEnvironment, ev.Environment),
   155  		attribute.String(pkgmetrics.EventTagEnvironmentGroup, ev.EnvironmentGroup),
   156  	)
   157  }