github.com/freiheit-com/kuberpult@v1.24.2-0.20240328135542-315d5630abe6/services/rollout-service/pkg/service/broadcast.go (about)

     1  /*This file is part of kuberpult.
     2  
     3  Kuberpult is free software: you can redistribute it and/or modify
     4  it under the terms of the Expat(MIT) License as published by
     5  the Free Software Foundation.
     6  
     7  Kuberpult is distributed in the hope that it will be useful,
     8  but WITHOUT ANY WARRANTY; without even the implied warranty of
     9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    10  MIT License for more details.
    11  
    12  You should have received a copy of the MIT License
    13  along with kuberpult. If not, see <https://directory.fsf.org/wiki/License:Expat>.
    14  
    15  Copyright 2023 freiheit.com*/
    16  
    17  package service
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"sync"
    23  	"time"
    24  
    25  	api "github.com/freiheit-com/kuberpult/pkg/api/v1"
    26  	"github.com/freiheit-com/kuberpult/pkg/ptr"
    27  	"github.com/freiheit-com/kuberpult/services/rollout-service/pkg/versions"
    28  
    29  	"github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1"
    30  	"github.com/argoproj/gitops-engine/pkg/health"
    31  	"github.com/argoproj/gitops-engine/pkg/sync/common"
    32  )
    33  
    34  type Key struct {
    35  	Application string
    36  	Environment string
    37  }
    38  
    39  type appState struct {
    40  	argocdVersion    *versions.VersionInfo
    41  	kuberpultVersion *versions.VersionInfo
    42  	rolloutStatus    api.RolloutStatus
    43  	environmentGroup string
    44  	isProduction     *bool
    45  	team             string
    46  }
    47  
    48  func (a *appState) applyArgoEvent(ev *ArgoEvent) *BroadcastEvent {
    49  	status := rolloutStatus(ev)
    50  	if a.rolloutStatus != status || !a.argocdVersion.Equal(ev.Version) {
    51  		a.rolloutStatus = status
    52  		a.argocdVersion = ev.Version
    53  		return a.getEvent(ev.Application, ev.Environment)
    54  	}
    55  	return nil
    56  }
    57  
    58  func (a *appState) applyKuberpultEvent(ev *versions.KuberpultEvent) *BroadcastEvent {
    59  	if !a.argocdVersion.Equal(ev.Version) || a.isProduction == nil || *a.isProduction != ev.IsProduction {
    60  		a.kuberpultVersion = ev.Version
    61  		a.environmentGroup = ev.EnvironmentGroup
    62  		a.team = ev.Team
    63  		a.isProduction = ptr.Bool(ev.IsProduction)
    64  		return a.getEvent(ev.Application, ev.Environment)
    65  	}
    66  	return nil
    67  }
    68  
    69  func (a *appState) getEvent(application, environment string) *BroadcastEvent {
    70  	rs := a.rolloutStatus
    71  	if a.kuberpultVersion == nil || a.argocdVersion == nil {
    72  		if rs == api.RolloutStatus_ROLLOUT_STATUS_SUCCESFUL {
    73  			rs = api.RolloutStatus_ROLLOUT_STATUS_UNKNOWN
    74  		}
    75  	} else if a.kuberpultVersion.Version != a.argocdVersion.Version {
    76  		rs = api.RolloutStatus_ROLLOUT_STATUS_PENDING
    77  	}
    78  	return &BroadcastEvent{
    79  		Key: Key{
    80  			Environment: environment,
    81  			Application: application,
    82  		},
    83  		EnvironmentGroup: a.environmentGroup,
    84  		IsProduction:     a.isProduction,
    85  		ArgocdVersion:    a.argocdVersion,
    86  		RolloutStatus:    rs,
    87  		Team:             a.team,
    88  		KuberpultVersion: a.kuberpultVersion,
    89  	}
    90  }
    91  
    92  type Broadcast struct {
    93  	state    map[Key]*appState
    94  	mx       sync.Mutex
    95  	listener map[chan *BroadcastEvent]struct{}
    96  
    97  	// The waiting function is used in tests to trigger events after the subscription is set up.
    98  	waiting func()
    99  }
   100  
   101  func New() *Broadcast {
   102  	return &Broadcast{
   103  		mx:       sync.Mutex{},
   104  		waiting:  nil,
   105  		state:    map[Key]*appState{},
   106  		listener: map[chan *BroadcastEvent]struct{}{},
   107  	}
   108  }
   109  
   110  // ProcessArgoEvent implements service.EventProcessor
   111  func (b *Broadcast) ProcessArgoEvent(ctx context.Context, ev ArgoEvent) {
   112  	b.mx.Lock()
   113  	defer b.mx.Unlock()
   114  	k := Key{
   115  		Application: ev.Application,
   116  		Environment: ev.Environment,
   117  	}
   118  	if b.state[k] == nil {
   119  		//exhaustruct:ignore
   120  		b.state[k] = &appState{}
   121  	}
   122  	msg := b.state[k].applyArgoEvent(&ev)
   123  	if msg == nil {
   124  		return
   125  	}
   126  	desub := []chan *BroadcastEvent{}
   127  	for l := range b.listener {
   128  		select {
   129  		case l <- msg:
   130  		default:
   131  			close(l)
   132  			desub = append(desub, l)
   133  		}
   134  	}
   135  	for _, l := range desub {
   136  		delete(b.listener, l)
   137  	}
   138  }
   139  
   140  func (b *Broadcast) ProcessKuberpultEvent(ctx context.Context, ev versions.KuberpultEvent) {
   141  	b.mx.Lock()
   142  	defer b.mx.Unlock()
   143  	k := Key{
   144  		Application: ev.Application,
   145  		Environment: ev.Environment,
   146  	}
   147  	if b.state[k] == nil {
   148  		//exhaustruct:ignore
   149  		b.state[k] = &appState{}
   150  	}
   151  	msg := b.state[k].applyKuberpultEvent(&ev)
   152  	if msg == nil {
   153  		return
   154  	}
   155  	desub := []chan *BroadcastEvent{}
   156  	for l := range b.listener {
   157  		select {
   158  		case l <- msg:
   159  		default:
   160  			close(l)
   161  			desub = append(desub, l)
   162  		}
   163  	}
   164  	for _, l := range desub {
   165  		delete(b.listener, l)
   166  	}
   167  }
   168  
   169  // Disconnects all listeners. This is used in tests to check wheter subscribers handle reconnects
   170  func (b *Broadcast) DisconnectAll() {
   171  	b.mx.Lock()
   172  	defer b.mx.Unlock()
   173  	for l := range b.listener {
   174  		close(l)
   175  	}
   176  	b.listener = make(map[chan *BroadcastEvent]struct{})
   177  }
   178  
   179  func (b *Broadcast) StreamStatus(req *api.StreamStatusRequest, svc api.RolloutService_StreamStatusServer) error {
   180  	resp, ch, unsubscribe := b.Start()
   181  	defer unsubscribe()
   182  	for _, r := range resp {
   183  		err := svc.Send(streamStatus(r))
   184  		if err != nil {
   185  			return err
   186  		}
   187  	}
   188  	for {
   189  		select {
   190  		case r := <-ch:
   191  			if r == nil {
   192  				// closed
   193  				return nil
   194  			}
   195  			err := svc.Send(streamStatus(r))
   196  			if err != nil {
   197  				return err
   198  			}
   199  		case <-svc.Context().Done():
   200  			err := svc.Context().Err()
   201  			if errors.Is(err, context.Canceled) {
   202  				return nil
   203  			}
   204  			return err
   205  		}
   206  	}
   207  }
   208  
   209  func (b *Broadcast) GetStatus(ctx context.Context, req *api.GetStatusRequest) (*api.GetStatusResponse, error) {
   210  	var wait <-chan time.Time
   211  	if req.WaitSeconds > 0 {
   212  		wait = time.After(time.Duration(req.WaitSeconds) * time.Second)
   213  	}
   214  	resp, ch, unsubscribe := b.Start()
   215  	defer unsubscribe()
   216  	apps := map[Key]*api.GetStatusResponse_ApplicationStatus{}
   217  	for _, r := range resp {
   218  		s := filterApplication(req, r)
   219  		if s != nil {
   220  			apps[r.Key] = s
   221  		}
   222  	}
   223  	status := aggregateStatus(apps)
   224  	if wait != nil {
   225  		// The waiting function is used in testing to make sure, we are really processing delayed events.
   226  		if b.waiting != nil {
   227  			b.waiting()
   228  		}
   229  	waiting:
   230  		for {
   231  			status = aggregateStatus(apps)
   232  			if status == api.RolloutStatus_ROLLOUT_STATUS_SUCCESFUL || status == api.RolloutStatus_ROLLOUT_STATUS_ERROR {
   233  				break
   234  			}
   235  			select {
   236  			case r, ok := <-ch:
   237  				if !ok {
   238  					break waiting
   239  				}
   240  				s := filterApplication(req, r)
   241  				if s != nil {
   242  					apps[r.Key] = s
   243  				} else {
   244  					delete(apps, r.Key)
   245  				}
   246  			case <-ctx.Done():
   247  				break waiting
   248  			case <-wait:
   249  				break waiting
   250  			}
   251  		}
   252  	}
   253  
   254  	appList := make([]*api.GetStatusResponse_ApplicationStatus, 0, len(apps))
   255  	for _, app := range apps {
   256  		appList = append(appList, app)
   257  	}
   258  
   259  	return &api.GetStatusResponse{
   260  		Status:       status,
   261  		Applications: appList,
   262  	}, nil
   263  }
   264  
   265  // Removes irrelevant app states from the list.
   266  func filterApplication(req *api.GetStatusRequest, ev *BroadcastEvent) *api.GetStatusResponse_ApplicationStatus {
   267  	// Only apps that have the correct envgroup are considered
   268  	if ev.EnvironmentGroup != req.EnvironmentGroup {
   269  		return nil
   270  	}
   271  	// If it's filtered by team, then only apps with the correct team are considered.
   272  	if req.Team != "" && req.Team != ev.Team {
   273  		return nil
   274  	}
   275  	s := getStatus(ev)
   276  	// Successful apps are also irrelevant.
   277  	if s.RolloutStatus == api.RolloutStatus_ROLLOUT_STATUS_SUCCESFUL {
   278  		return nil
   279  	}
   280  	return s
   281  }
   282  
   283  // Calculates an aggregatted rollout status
   284  func aggregateStatus(apps map[Key]*api.GetStatusResponse_ApplicationStatus) api.RolloutStatus {
   285  	status := api.RolloutStatus_ROLLOUT_STATUS_SUCCESFUL
   286  	for _, app := range apps {
   287  		status = mostRelevantStatus(app.RolloutStatus, status)
   288  	}
   289  	return status
   290  }
   291  
   292  type unsubscribe func()
   293  
   294  func (b *Broadcast) Start() ([]*BroadcastEvent, <-chan *BroadcastEvent, unsubscribe) {
   295  	b.mx.Lock()
   296  	defer b.mx.Unlock()
   297  	result := make([]*BroadcastEvent, 0, len(b.state))
   298  	for key, app := range b.state {
   299  		result = append(result, app.getEvent(key.Application, key.Environment))
   300  	}
   301  	ch := make(chan *BroadcastEvent, 100)
   302  	b.listener[ch] = struct{}{}
   303  	return result, ch, func() {
   304  		b.mx.Lock()
   305  		defer b.mx.Unlock()
   306  		delete(b.listener, ch)
   307  	}
   308  }
   309  
   310  type BroadcastEvent struct {
   311  	Key
   312  	EnvironmentGroup string
   313  	Team             string
   314  	IsProduction     *bool
   315  	ArgocdVersion    *versions.VersionInfo
   316  	KuberpultVersion *versions.VersionInfo
   317  	RolloutStatus    api.RolloutStatus
   318  }
   319  
   320  func streamStatus(b *BroadcastEvent) *api.StreamStatusResponse {
   321  	version := uint64(0)
   322  	if b.ArgocdVersion != nil {
   323  		version = b.ArgocdVersion.Version
   324  	}
   325  	return &api.StreamStatusResponse{
   326  		Environment:   b.Environment,
   327  		Application:   b.Application,
   328  		Version:       version,
   329  		RolloutStatus: b.RolloutStatus,
   330  	}
   331  }
   332  
   333  func getStatus(b *BroadcastEvent) *api.GetStatusResponse_ApplicationStatus {
   334  	return &api.GetStatusResponse_ApplicationStatus{
   335  		Environment:   b.Environment,
   336  		Application:   b.Application,
   337  		RolloutStatus: b.RolloutStatus,
   338  	}
   339  }
   340  
   341  func rolloutStatus(ev *ArgoEvent) api.RolloutStatus {
   342  	if ev.OperationState != nil {
   343  		switch ev.OperationState.Phase {
   344  		case common.OperationError, common.OperationFailed:
   345  
   346  			return api.RolloutStatus_ROLLOUT_STATUS_ERROR
   347  		}
   348  	}
   349  	switch ev.SyncStatusCode {
   350  	case v1alpha1.SyncStatusCodeOutOfSync:
   351  		return api.RolloutStatus_ROLLOUT_STATUS_PROGRESSING
   352  	}
   353  	switch ev.HealthStatusCode {
   354  	case health.HealthStatusDegraded, health.HealthStatusMissing:
   355  		return api.RolloutStatus_ROLLOUT_STATUS_UNHEALTHY
   356  	case health.HealthStatusProgressing, health.HealthStatusSuspended:
   357  		return api.RolloutStatus_ROLLOUT_STATUS_PROGRESSING
   358  	case health.HealthStatusHealthy:
   359  		if ev.Version == nil {
   360  			return api.RolloutStatus_ROLLOUT_STATUS_UNKNOWN
   361  		}
   362  		return api.RolloutStatus_ROLLOUT_STATUS_SUCCESFUL
   363  	}
   364  	return api.RolloutStatus_ROLLOUT_STATUS_UNKNOWN
   365  }
   366  
   367  // Depending on the rollout state, there are different things a user should do.
   368  // 1. Nothing because everything is fine
   369  // 2. Wait longer
   370  // 3. Stop and call an operator
   371  // The sorting is the same as in the UI.
   372  var statusPriorities []api.RolloutStatus = []api.RolloutStatus{
   373  	// Error is not recoverable by waiting and requires manual intervention
   374  	api.RolloutStatus_ROLLOUT_STATUS_ERROR,
   375  
   376  	// These states may resolve by waiting longer
   377  	api.RolloutStatus_ROLLOUT_STATUS_PROGRESSING,
   378  	api.RolloutStatus_ROLLOUT_STATUS_UNHEALTHY,
   379  	api.RolloutStatus_ROLLOUT_STATUS_PENDING,
   380  	api.RolloutStatus_ROLLOUT_STATUS_UNKNOWN,
   381  
   382  	// This is the only successful state
   383  	api.RolloutStatus_ROLLOUT_STATUS_SUCCESFUL,
   384  }
   385  
   386  // 0 is the highest priority - (RolloutStatusSuccesful) is the lowest priority
   387  func statusPriority(a api.RolloutStatus) int {
   388  	for i, p := range statusPriorities {
   389  		if p == a {
   390  			return i
   391  		}
   392  	}
   393  	return len(statusPriorities) - 1
   394  }
   395  
   396  func mostRelevantStatus(a, b api.RolloutStatus) api.RolloutStatus {
   397  	ap := statusPriority(a)
   398  	bp := statusPriority(b)
   399  	if ap < bp {
   400  		return a
   401  	} else {
   402  		return b
   403  	}
   404  }
   405  
   406  var _ ArgoEventProcessor = (*Broadcast)(nil)
   407  var _ api.RolloutServiceServer = (*Broadcast)(nil)