github.com/helmwave/helmwave@v0.36.4-0.20240509190856-b35563eba4c6/pkg/plan/up.go (about) 1 package plan 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "os" 8 "sync" 9 "time" 10 11 "github.com/gofrs/flock" 12 "github.com/helmwave/helmwave/pkg/clictx" 13 "github.com/helmwave/helmwave/pkg/helper" 14 "github.com/helmwave/helmwave/pkg/kubedog" 15 "github.com/helmwave/helmwave/pkg/monitor" 16 "github.com/helmwave/helmwave/pkg/parallel" 17 regi "github.com/helmwave/helmwave/pkg/registry" 18 "github.com/helmwave/helmwave/pkg/release" 19 "github.com/helmwave/helmwave/pkg/release/dependency" 20 "github.com/helmwave/helmwave/pkg/release/uniqname" 21 "github.com/helmwave/helmwave/pkg/repo" 22 "github.com/olekukonko/tablewriter" 23 log "github.com/sirupsen/logrus" 24 "github.com/werf/kubedog/pkg/kube" 25 "github.com/werf/kubedog/pkg/tracker" 26 "github.com/werf/kubedog/pkg/trackers/rollout/multitrack" 27 "golang.org/x/exp/maps" 28 helmRepo "helm.sh/helm/v3/pkg/repo" 29 ) 30 31 // Up syncs repositories and releases. 32 func (p *Plan) Up(ctx context.Context, dog *kubedog.Config) (err error) { 33 // Run hooks 34 err = p.body.Lifecycle.RunPreUp(ctx) 35 if err != nil { 36 return 37 } 38 39 defer func() { 40 lifecycleErr := p.body.Lifecycle.RunPostUp(ctx) 41 if lifecycleErr != nil { 42 log.Errorf("got an error from postup hooks: %v", lifecycleErr) 43 if err == nil { 44 err = lifecycleErr 45 } 46 } 47 }() 48 49 log.Info("🗄 sync repositories...") 50 err = SyncRepositories(ctx, p.body.Repositories) 51 if err != nil { 52 return 53 } 54 55 log.Info("🗄 sync registries...") 56 err = p.syncRegistries(ctx) 57 if err != nil { 58 return 59 } 60 61 if len(p.body.Releases) == 0 { 62 return 63 } 64 65 log.Info("🛥 sync releases...") 66 67 if dog.Enabled { 68 log.Warn("🐶 kubedog is enabled") 69 kubedog.FixLog(ctx, dog.LogWidth) 70 err = p.syncReleasesKubedog(ctx, dog) 71 } else { 72 err = p.syncReleases(ctx) 73 } 74 75 return 76 } 77 78 func (p *Plan) syncRegistries(ctx context.Context) (err error) { 79 wg := parallel.NewWaitGroup() 80 wg.Add(len(p.body.Registries)) 81 82 for i := range p.body.Registries { 83 go func(wg *parallel.WaitGroup, reg regi.Config) { 84 defer wg.Done() 85 err := reg.Install() 86 if err != nil { 87 wg.ErrChan() <- err 88 } 89 }(wg, p.body.Registries[i]) 90 } 91 92 if err := wg.WaitWithContext(ctx); err != nil { 93 return err 94 } 95 96 return err 97 } 98 99 // SyncRepositories initializes helm repository.yaml file with flock and installs provided repositories. 100 func SyncRepositories(ctx context.Context, repositories repo.Configs) error { 101 log.Trace("🗄 helm repository.yaml: ", helper.Helm.RepositoryConfig) 102 103 // Create if not exists 104 if !helper.IsExists(helper.Helm.RepositoryConfig) { 105 f, err := helper.CreateFile(helper.Helm.RepositoryConfig) 106 if err != nil { 107 return err 108 } 109 if err := f.Close(); err != nil { 110 return fmt.Errorf("failed to close fresh helm repository.yaml: %w", err) 111 } 112 } 113 114 // we need to get a flock first 115 lockPath := helper.Helm.RepositoryConfig + ".lock" 116 fileLock := flock.New(lockPath) 117 lockCtx, cancel := context.WithTimeout(ctx, 20*time.Second) 118 defer cancel() 119 120 // We need to unlock in deferred mode in case of any other errors returned 121 defer func(fileLock *flock.Flock) { 122 err := fileLock.Unlock() 123 if err != nil { 124 log.Errorf("failed to release flock %s: %v", fileLock.Path(), err) 125 } 126 }(fileLock) 127 128 locked, err := fileLock.TryLockContext(lockCtx, time.Second) 129 if err != nil && !locked { 130 return fmt.Errorf("failed to get lock %s: %w", fileLock.Path(), err) 131 } 132 133 f, err := helmRepo.LoadFile(helper.Helm.RepositoryConfig) 134 if err != nil { 135 return fmt.Errorf("failed to load helm repositories file: %w", err) 136 } 137 138 // We can't parallel repositories installation as helm manages single repositories.yaml. 139 // To prevent data race, we need to either make helm use futex or not parallel at all 140 for i := range repositories { 141 err := repositories[i].Install(ctx, helper.Helm, f) 142 if err != nil { 143 return fmt.Errorf("failed to install %s repository: %w", repositories[i].Name(), err) 144 } 145 } 146 147 err = f.WriteFile(helper.Helm.RepositoryConfig, os.FileMode(0o644)) 148 if err != nil { 149 return fmt.Errorf("failed to write repositories file: %w", err) 150 } 151 152 // If we haven't met any errors yet unlock the repository file. Deferred unlock will exit quickly after this. 153 if err := fileLock.Unlock(); err != nil { 154 return fmt.Errorf("failed to unlock %s: %w", fileLock.Path(), err) 155 } 156 157 return nil 158 } 159 160 func (p *planBody) generateDependencyGraph() (*dependency.Graph[uniqname.UniqName, release.Config], error) { 161 dependenciesGraph := dependency.NewGraph[uniqname.UniqName, release.Config]() 162 163 for _, rel := range p.Releases { 164 err := dependenciesGraph.NewNode(rel.Uniq(), rel) 165 if err != nil { 166 return nil, err 167 } 168 169 for _, dep := range rel.DependsOn() { 170 dependenciesGraph.AddDependency(rel.Uniq(), dep.Uniq()) 171 } 172 } 173 174 err := dependenciesGraph.Build() 175 if err != nil { 176 return nil, err 177 } 178 179 return dependenciesGraph, nil 180 } 181 182 func getParallelLimit(ctx context.Context, releases release.Configs) int { 183 parallelLimit, ok := clictx.GetFlagFromContext(ctx, "parallel-limit").(int) 184 if !ok { 185 parallelLimit = 0 186 } 187 if parallelLimit == 0 { 188 parallelLimit = len(releases) 189 } 190 191 return parallelLimit 192 } 193 194 func (p *planBody) generateMonitorsLockMap() map[string]*parallel.WaitGroup { 195 res := make(map[string]*parallel.WaitGroup) 196 197 for _, rel := range p.Releases { 198 allMons := rel.Monitors() 199 for i := range allMons { 200 mon := allMons[i] 201 if _, ok := res[mon.Name]; !ok { 202 res[mon.Name] = parallel.NewWaitGroup() 203 } 204 205 res[mon.Name].Add(1) 206 } 207 } 208 209 return res 210 } 211 212 func (p *Plan) syncReleases(ctx context.Context) (err error) { 213 dependenciesGraph, err := p.body.generateDependencyGraph() 214 if err != nil { 215 return err 216 } 217 218 parallelLimit := getParallelLimit(ctx, p.body.Releases) 219 220 const msg = "Deploying releases with limited parallelization" 221 if parallelLimit == len(p.body.Releases) { 222 log.WithField("limit", parallelLimit).Debug(msg) 223 } else { 224 log.WithField("limit", parallelLimit).Info(msg) 225 } 226 227 monitorsLockMap := p.body.generateMonitorsLockMap() 228 monitorsCtx, monitorsCancel := context.WithCancel(ctx) 229 defer monitorsCancel() 230 231 releasesNodesChan := dependenciesGraph.Run() 232 233 releasesWG := parallel.NewWaitGroup() 234 releasesWG.Add(parallelLimit) 235 236 monitorsWG := parallel.NewWaitGroup() 237 monitorsWG.Add(len(p.body.Monitors)) 238 239 releasesFails := make(map[release.Config]error) 240 monitorsFails := make(map[monitor.Config]error) 241 242 releasesMutex := &sync.Mutex{} 243 244 for range parallelLimit { 245 go p.syncReleasesWorker(ctx, releasesWG, releasesNodesChan, releasesMutex, releasesFails, monitorsLockMap) 246 } 247 248 for _, mon := range p.body.Monitors { 249 go p.monitorsWorker(monitorsCtx, monitorsWG, mon, monitorsFails, monitorsLockMap) 250 } 251 252 if err := releasesWG.WaitWithContext(ctx); err != nil { 253 return err 254 } 255 256 if err := monitorsWG.WaitWithContext(monitorsCtx); err != nil { 257 log.WithError(err).Error("monitors failed, need to take actions") 258 p.runMonitorsActions(ctx, monitorsFails) 259 } 260 261 return p.ApplyReport(releasesFails, monitorsFails) 262 } 263 264 func (p *Plan) runMonitorsActions( 265 ctx context.Context, 266 fails map[monitor.Config]error, 267 ) { 268 mons := maps.Keys(fails) 269 270 for _, rel := range p.body.Releases { 271 rel.NotifyMonitorsFailed(ctx, mons...) 272 } 273 } 274 275 func (p *Plan) syncReleasesWorker( 276 ctx context.Context, 277 wg *parallel.WaitGroup, 278 nodesChan <-chan *dependency.Node[release.Config], 279 mu *sync.Mutex, 280 fails map[release.Config]error, 281 monitorsLockMap map[string]*parallel.WaitGroup, 282 ) { 283 for n := range nodesChan { 284 p.syncRelease(ctx, wg, n, mu, fails, monitorsLockMap) 285 } 286 wg.Done() 287 } 288 289 func (p *Plan) syncRelease( 290 ctx context.Context, 291 wg *parallel.WaitGroup, 292 node *dependency.Node[release.Config], 293 mu *sync.Mutex, 294 fails map[release.Config]error, 295 monitorsLockMap map[string]*parallel.WaitGroup, 296 ) { 297 rel := node.Data 298 299 l := rel.Logger() 300 301 l.Info("🛥 deploying... ") 302 303 if _, err := rel.Sync(ctx, true); err != nil { 304 l.WithError(err).Error("❌ failed to deploy") 305 306 if rel.AllowFailure() { 307 l.Errorf("release is allowed to fail, marked as succeeded to dependencies") 308 node.SetSucceeded() 309 } else { 310 node.SetFailed() 311 } 312 313 mu.Lock() 314 fails[rel] = err 315 mu.Unlock() 316 317 wg.ErrChan() <- err 318 } else { 319 node.SetSucceeded() 320 l.Info("✅") 321 322 allMons := rel.Monitors() 323 for i := range allMons { 324 mon := allMons[i] 325 m := monitorsLockMap[mon.Name] 326 if m != nil { 327 m.Done() 328 } 329 } 330 } 331 } 332 333 func (p *Plan) monitorsWorker( 334 ctx context.Context, 335 wg *parallel.WaitGroup, 336 mon monitor.Config, 337 fails map[monitor.Config]error, 338 monitorsLockMap map[string]*parallel.WaitGroup, 339 ) { 340 defer wg.Done() 341 342 l := mon.Logger() 343 344 lock := monitorsLockMap[mon.Name()] 345 if lock == nil { 346 l.Error("BUG: monitor lock is empty, skipping monitor") 347 348 return 349 } 350 err := lock.WaitWithContext(ctx) 351 if err != nil { 352 l.WithError(err).Error("❌ monitor canceled") 353 fails[mon] = err 354 wg.ErrChan() <- err 355 } 356 357 err = mon.Run(ctx) 358 if err != nil { 359 l.WithError(err).Error("❌ monitor failed") 360 fails[mon] = err 361 wg.ErrChan() <- err 362 } else { 363 l.Info("✅") 364 } 365 } 366 367 // ApplyReport renders a table report for failed releases. 368 func (p *Plan) ApplyReport( 369 releasesFails map[release.Config]error, 370 monitorsFails map[monitor.Config]error, 371 ) error { 372 nReleases := len(p.body.Releases) 373 kReleases := len(releasesFails) 374 nMonitors := len(p.body.Monitors) 375 kMonitors := len(monitorsFails) 376 377 log.Infof("Releases Success %d / %d", nReleases-kReleases, nReleases) 378 log.Infof("Monitors Success %d / %d", nMonitors-kMonitors, nMonitors) 379 380 if len(releasesFails) > 0 { 381 table := tablewriter.NewWriter(os.Stdout) 382 table.SetHeader([]string{"name", "namespace", "chart", "version", "error"}) 383 table.SetAutoFormatHeaders(true) 384 table.SetBorder(false) 385 386 for r, err := range releasesFails { 387 row := []string{ 388 r.Name(), 389 r.Namespace(), 390 r.Chart().Name, 391 r.Chart().Version, 392 err.Error(), 393 } 394 395 table.Rich(row, []tablewriter.Colors{ 396 {}, 397 {}, 398 {}, 399 {}, 400 FailStatusColor, 401 }) 402 } 403 404 table.Render() 405 406 return ErrDeploy 407 } 408 409 if len(monitorsFails) > 0 { 410 table := tablewriter.NewWriter(os.Stdout) 411 table.SetHeader([]string{"name", "error"}) 412 table.SetAutoFormatHeaders(true) 413 table.SetBorder(false) 414 415 for r, err := range monitorsFails { 416 row := []string{ 417 r.Name(), 418 err.Error(), 419 } 420 421 table.Rich(row, []tablewriter.Colors{ 422 {}, 423 FailStatusColor, 424 }) 425 } 426 427 table.Render() 428 429 return ErrDeploy 430 } 431 432 return nil 433 } 434 435 func (p *Plan) syncReleasesKubedog(ctx context.Context, kubedogConfig *kubedog.Config) error { 436 ctxCancel, cancel := context.WithCancel(ctx) 437 defer cancel() // Don't forget! 438 439 specs, kubecontext, err := p.kubedogSyncSpecs(kubedogConfig) 440 if err != nil { 441 return err 442 } 443 444 err = helper.KubeInit(kubecontext) 445 if err != nil { 446 return err 447 } 448 449 opts := multitrack.MultitrackOptions{ 450 DynamicClient: kube.DynamicClient, 451 DiscoveryClient: kube.CachedDiscoveryClient, 452 Mapper: kube.Mapper, 453 StatusProgressPeriod: kubedogConfig.StatusInterval, 454 Options: tracker.Options{ 455 ParentContext: ctxCancel, 456 Timeout: kubedogConfig.Timeout, 457 LogsFromTime: time.Now(), 458 }, 459 } 460 461 // Run kubedog 462 dogroup := parallel.NewWaitGroup() 463 dogroup.Add(1) 464 go func() { 465 defer dogroup.Done() 466 log.Trace("Multitrack is starting...") 467 dogroup.ErrChan() <- multitrack.Multitrack(kube.Client, specs, opts) 468 }() 469 470 // Run helm 471 time.Sleep(kubedogConfig.StartDelay) 472 err = p.syncReleases(ctx) 473 if err != nil { 474 cancel() 475 476 return err 477 } 478 479 // Allow kubedog to catch release installed 480 time.Sleep(kubedogConfig.StatusInterval) 481 cancel() // stop kubedog 482 483 err = dogroup.WaitWithContext(ctx) 484 if err != nil && !errors.Is(err, context.Canceled) { 485 // Ignore kubedog error 486 log.WithError(err).Warn("kubedog has error while watching resources.") 487 } 488 489 return nil 490 } 491 492 func (p *Plan) kubedogSyncSpecs(kubedogConfig *kubedog.Config) (multitrack.MultitrackSpecs, string, error) { 493 return p.kubedogSpecs(kubedogConfig, p.kubedogSyncManifest) 494 } 495 496 func (p *Plan) kubedogSyncManifest(rel release.Config) (string, error) { 497 return p.manifests[rel.Uniq()], nil 498 }