github.com/freiheit-com/kuberpult@v1.24.2-0.20240328135542-315d5630abe6/services/rollout-service/pkg/service/broadcast.go (about) 1 /*This file is part of kuberpult. 2 3 Kuberpult is free software: you can redistribute it and/or modify 4 it under the terms of the Expat(MIT) License as published by 5 the Free Software Foundation. 6 7 Kuberpult is distributed in the hope that it will be useful, 8 but WITHOUT ANY WARRANTY; without even the implied warranty of 9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 MIT License for more details. 11 12 You should have received a copy of the MIT License 13 along with kuberpult. If not, see <https://directory.fsf.org/wiki/License:Expat>. 14 15 Copyright 2023 freiheit.com*/ 16 17 package service 18 19 import ( 20 "context" 21 "errors" 22 "sync" 23 "time" 24 25 api "github.com/freiheit-com/kuberpult/pkg/api/v1" 26 "github.com/freiheit-com/kuberpult/pkg/ptr" 27 "github.com/freiheit-com/kuberpult/services/rollout-service/pkg/versions" 28 29 "github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1" 30 "github.com/argoproj/gitops-engine/pkg/health" 31 "github.com/argoproj/gitops-engine/pkg/sync/common" 32 ) 33 34 type Key struct { 35 Application string 36 Environment string 37 } 38 39 type appState struct { 40 argocdVersion *versions.VersionInfo 41 kuberpultVersion *versions.VersionInfo 42 rolloutStatus api.RolloutStatus 43 environmentGroup string 44 isProduction *bool 45 team string 46 } 47 48 func (a *appState) applyArgoEvent(ev *ArgoEvent) *BroadcastEvent { 49 status := rolloutStatus(ev) 50 if a.rolloutStatus != status || !a.argocdVersion.Equal(ev.Version) { 51 a.rolloutStatus = status 52 a.argocdVersion = ev.Version 53 return a.getEvent(ev.Application, ev.Environment) 54 } 55 return nil 56 } 57 58 func (a *appState) applyKuberpultEvent(ev *versions.KuberpultEvent) *BroadcastEvent { 59 if !a.argocdVersion.Equal(ev.Version) || a.isProduction == nil || *a.isProduction != ev.IsProduction { 60 a.kuberpultVersion = ev.Version 61 a.environmentGroup = ev.EnvironmentGroup 62 a.team = ev.Team 63 a.isProduction = ptr.Bool(ev.IsProduction) 64 return a.getEvent(ev.Application, ev.Environment) 65 } 66 return nil 67 } 68 69 func (a *appState) getEvent(application, environment string) *BroadcastEvent { 70 rs := a.rolloutStatus 71 if a.kuberpultVersion == nil || a.argocdVersion == nil { 72 if rs == api.RolloutStatus_ROLLOUT_STATUS_SUCCESFUL { 73 rs = api.RolloutStatus_ROLLOUT_STATUS_UNKNOWN 74 } 75 } else if a.kuberpultVersion.Version != a.argocdVersion.Version { 76 rs = api.RolloutStatus_ROLLOUT_STATUS_PENDING 77 } 78 return &BroadcastEvent{ 79 Key: Key{ 80 Environment: environment, 81 Application: application, 82 }, 83 EnvironmentGroup: a.environmentGroup, 84 IsProduction: a.isProduction, 85 ArgocdVersion: a.argocdVersion, 86 RolloutStatus: rs, 87 Team: a.team, 88 KuberpultVersion: a.kuberpultVersion, 89 } 90 } 91 92 type Broadcast struct { 93 state map[Key]*appState 94 mx sync.Mutex 95 listener map[chan *BroadcastEvent]struct{} 96 97 // The waiting function is used in tests to trigger events after the subscription is set up. 98 waiting func() 99 } 100 101 func New() *Broadcast { 102 return &Broadcast{ 103 mx: sync.Mutex{}, 104 waiting: nil, 105 state: map[Key]*appState{}, 106 listener: map[chan *BroadcastEvent]struct{}{}, 107 } 108 } 109 110 // ProcessArgoEvent implements service.EventProcessor 111 func (b *Broadcast) ProcessArgoEvent(ctx context.Context, ev ArgoEvent) { 112 b.mx.Lock() 113 defer b.mx.Unlock() 114 k := Key{ 115 Application: ev.Application, 116 Environment: ev.Environment, 117 } 118 if b.state[k] == nil { 119 //exhaustruct:ignore 120 b.state[k] = &appState{} 121 } 122 msg := b.state[k].applyArgoEvent(&ev) 123 if msg == nil { 124 return 125 } 126 desub := []chan *BroadcastEvent{} 127 for l := range b.listener { 128 select { 129 case l <- msg: 130 default: 131 close(l) 132 desub = append(desub, l) 133 } 134 } 135 for _, l := range desub { 136 delete(b.listener, l) 137 } 138 } 139 140 func (b *Broadcast) ProcessKuberpultEvent(ctx context.Context, ev versions.KuberpultEvent) { 141 b.mx.Lock() 142 defer b.mx.Unlock() 143 k := Key{ 144 Application: ev.Application, 145 Environment: ev.Environment, 146 } 147 if b.state[k] == nil { 148 //exhaustruct:ignore 149 b.state[k] = &appState{} 150 } 151 msg := b.state[k].applyKuberpultEvent(&ev) 152 if msg == nil { 153 return 154 } 155 desub := []chan *BroadcastEvent{} 156 for l := range b.listener { 157 select { 158 case l <- msg: 159 default: 160 close(l) 161 desub = append(desub, l) 162 } 163 } 164 for _, l := range desub { 165 delete(b.listener, l) 166 } 167 } 168 169 // Disconnects all listeners. This is used in tests to check wheter subscribers handle reconnects 170 func (b *Broadcast) DisconnectAll() { 171 b.mx.Lock() 172 defer b.mx.Unlock() 173 for l := range b.listener { 174 close(l) 175 } 176 b.listener = make(map[chan *BroadcastEvent]struct{}) 177 } 178 179 func (b *Broadcast) StreamStatus(req *api.StreamStatusRequest, svc api.RolloutService_StreamStatusServer) error { 180 resp, ch, unsubscribe := b.Start() 181 defer unsubscribe() 182 for _, r := range resp { 183 err := svc.Send(streamStatus(r)) 184 if err != nil { 185 return err 186 } 187 } 188 for { 189 select { 190 case r := <-ch: 191 if r == nil { 192 // closed 193 return nil 194 } 195 err := svc.Send(streamStatus(r)) 196 if err != nil { 197 return err 198 } 199 case <-svc.Context().Done(): 200 err := svc.Context().Err() 201 if errors.Is(err, context.Canceled) { 202 return nil 203 } 204 return err 205 } 206 } 207 } 208 209 func (b *Broadcast) GetStatus(ctx context.Context, req *api.GetStatusRequest) (*api.GetStatusResponse, error) { 210 var wait <-chan time.Time 211 if req.WaitSeconds > 0 { 212 wait = time.After(time.Duration(req.WaitSeconds) * time.Second) 213 } 214 resp, ch, unsubscribe := b.Start() 215 defer unsubscribe() 216 apps := map[Key]*api.GetStatusResponse_ApplicationStatus{} 217 for _, r := range resp { 218 s := filterApplication(req, r) 219 if s != nil { 220 apps[r.Key] = s 221 } 222 } 223 status := aggregateStatus(apps) 224 if wait != nil { 225 // The waiting function is used in testing to make sure, we are really processing delayed events. 226 if b.waiting != nil { 227 b.waiting() 228 } 229 waiting: 230 for { 231 status = aggregateStatus(apps) 232 if status == api.RolloutStatus_ROLLOUT_STATUS_SUCCESFUL || status == api.RolloutStatus_ROLLOUT_STATUS_ERROR { 233 break 234 } 235 select { 236 case r, ok := <-ch: 237 if !ok { 238 break waiting 239 } 240 s := filterApplication(req, r) 241 if s != nil { 242 apps[r.Key] = s 243 } else { 244 delete(apps, r.Key) 245 } 246 case <-ctx.Done(): 247 break waiting 248 case <-wait: 249 break waiting 250 } 251 } 252 } 253 254 appList := make([]*api.GetStatusResponse_ApplicationStatus, 0, len(apps)) 255 for _, app := range apps { 256 appList = append(appList, app) 257 } 258 259 return &api.GetStatusResponse{ 260 Status: status, 261 Applications: appList, 262 }, nil 263 } 264 265 // Removes irrelevant app states from the list. 266 func filterApplication(req *api.GetStatusRequest, ev *BroadcastEvent) *api.GetStatusResponse_ApplicationStatus { 267 // Only apps that have the correct envgroup are considered 268 if ev.EnvironmentGroup != req.EnvironmentGroup { 269 return nil 270 } 271 // If it's filtered by team, then only apps with the correct team are considered. 272 if req.Team != "" && req.Team != ev.Team { 273 return nil 274 } 275 s := getStatus(ev) 276 // Successful apps are also irrelevant. 277 if s.RolloutStatus == api.RolloutStatus_ROLLOUT_STATUS_SUCCESFUL { 278 return nil 279 } 280 return s 281 } 282 283 // Calculates an aggregatted rollout status 284 func aggregateStatus(apps map[Key]*api.GetStatusResponse_ApplicationStatus) api.RolloutStatus { 285 status := api.RolloutStatus_ROLLOUT_STATUS_SUCCESFUL 286 for _, app := range apps { 287 status = mostRelevantStatus(app.RolloutStatus, status) 288 } 289 return status 290 } 291 292 type unsubscribe func() 293 294 func (b *Broadcast) Start() ([]*BroadcastEvent, <-chan *BroadcastEvent, unsubscribe) { 295 b.mx.Lock() 296 defer b.mx.Unlock() 297 result := make([]*BroadcastEvent, 0, len(b.state)) 298 for key, app := range b.state { 299 result = append(result, app.getEvent(key.Application, key.Environment)) 300 } 301 ch := make(chan *BroadcastEvent, 100) 302 b.listener[ch] = struct{}{} 303 return result, ch, func() { 304 b.mx.Lock() 305 defer b.mx.Unlock() 306 delete(b.listener, ch) 307 } 308 } 309 310 type BroadcastEvent struct { 311 Key 312 EnvironmentGroup string 313 Team string 314 IsProduction *bool 315 ArgocdVersion *versions.VersionInfo 316 KuberpultVersion *versions.VersionInfo 317 RolloutStatus api.RolloutStatus 318 } 319 320 func streamStatus(b *BroadcastEvent) *api.StreamStatusResponse { 321 version := uint64(0) 322 if b.ArgocdVersion != nil { 323 version = b.ArgocdVersion.Version 324 } 325 return &api.StreamStatusResponse{ 326 Environment: b.Environment, 327 Application: b.Application, 328 Version: version, 329 RolloutStatus: b.RolloutStatus, 330 } 331 } 332 333 func getStatus(b *BroadcastEvent) *api.GetStatusResponse_ApplicationStatus { 334 return &api.GetStatusResponse_ApplicationStatus{ 335 Environment: b.Environment, 336 Application: b.Application, 337 RolloutStatus: b.RolloutStatus, 338 } 339 } 340 341 func rolloutStatus(ev *ArgoEvent) api.RolloutStatus { 342 if ev.OperationState != nil { 343 switch ev.OperationState.Phase { 344 case common.OperationError, common.OperationFailed: 345 346 return api.RolloutStatus_ROLLOUT_STATUS_ERROR 347 } 348 } 349 switch ev.SyncStatusCode { 350 case v1alpha1.SyncStatusCodeOutOfSync: 351 return api.RolloutStatus_ROLLOUT_STATUS_PROGRESSING 352 } 353 switch ev.HealthStatusCode { 354 case health.HealthStatusDegraded, health.HealthStatusMissing: 355 return api.RolloutStatus_ROLLOUT_STATUS_UNHEALTHY 356 case health.HealthStatusProgressing, health.HealthStatusSuspended: 357 return api.RolloutStatus_ROLLOUT_STATUS_PROGRESSING 358 case health.HealthStatusHealthy: 359 if ev.Version == nil { 360 return api.RolloutStatus_ROLLOUT_STATUS_UNKNOWN 361 } 362 return api.RolloutStatus_ROLLOUT_STATUS_SUCCESFUL 363 } 364 return api.RolloutStatus_ROLLOUT_STATUS_UNKNOWN 365 } 366 367 // Depending on the rollout state, there are different things a user should do. 368 // 1. Nothing because everything is fine 369 // 2. Wait longer 370 // 3. Stop and call an operator 371 // The sorting is the same as in the UI. 372 var statusPriorities []api.RolloutStatus = []api.RolloutStatus{ 373 // Error is not recoverable by waiting and requires manual intervention 374 api.RolloutStatus_ROLLOUT_STATUS_ERROR, 375 376 // These states may resolve by waiting longer 377 api.RolloutStatus_ROLLOUT_STATUS_PROGRESSING, 378 api.RolloutStatus_ROLLOUT_STATUS_UNHEALTHY, 379 api.RolloutStatus_ROLLOUT_STATUS_PENDING, 380 api.RolloutStatus_ROLLOUT_STATUS_UNKNOWN, 381 382 // This is the only successful state 383 api.RolloutStatus_ROLLOUT_STATUS_SUCCESFUL, 384 } 385 386 // 0 is the highest priority - (RolloutStatusSuccesful) is the lowest priority 387 func statusPriority(a api.RolloutStatus) int { 388 for i, p := range statusPriorities { 389 if p == a { 390 return i 391 } 392 } 393 return len(statusPriorities) - 1 394 } 395 396 func mostRelevantStatus(a, b api.RolloutStatus) api.RolloutStatus { 397 ap := statusPriority(a) 398 bp := statusPriority(b) 399 if ap < bp { 400 return a 401 } else { 402 return b 403 } 404 } 405 406 var _ ArgoEventProcessor = (*Broadcast)(nil) 407 var _ api.RolloutServiceServer = (*Broadcast)(nil)