github.com/helmwave/helmwave@v0.36.4-0.20240509190856-b35563eba4c6/pkg/plan/up.go (about)

     1  package plan
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"os"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/gofrs/flock"
    12  	"github.com/helmwave/helmwave/pkg/clictx"
    13  	"github.com/helmwave/helmwave/pkg/helper"
    14  	"github.com/helmwave/helmwave/pkg/kubedog"
    15  	"github.com/helmwave/helmwave/pkg/monitor"
    16  	"github.com/helmwave/helmwave/pkg/parallel"
    17  	regi "github.com/helmwave/helmwave/pkg/registry"
    18  	"github.com/helmwave/helmwave/pkg/release"
    19  	"github.com/helmwave/helmwave/pkg/release/dependency"
    20  	"github.com/helmwave/helmwave/pkg/release/uniqname"
    21  	"github.com/helmwave/helmwave/pkg/repo"
    22  	"github.com/olekukonko/tablewriter"
    23  	log "github.com/sirupsen/logrus"
    24  	"github.com/werf/kubedog/pkg/kube"
    25  	"github.com/werf/kubedog/pkg/tracker"
    26  	"github.com/werf/kubedog/pkg/trackers/rollout/multitrack"
    27  	"golang.org/x/exp/maps"
    28  	helmRepo "helm.sh/helm/v3/pkg/repo"
    29  )
    30  
    31  // Up syncs repositories and releases.
    32  func (p *Plan) Up(ctx context.Context, dog *kubedog.Config) (err error) {
    33  	// Run hooks
    34  	err = p.body.Lifecycle.RunPreUp(ctx)
    35  	if err != nil {
    36  		return
    37  	}
    38  
    39  	defer func() {
    40  		lifecycleErr := p.body.Lifecycle.RunPostUp(ctx)
    41  		if lifecycleErr != nil {
    42  			log.Errorf("got an error from postup hooks: %v", lifecycleErr)
    43  			if err == nil {
    44  				err = lifecycleErr
    45  			}
    46  		}
    47  	}()
    48  
    49  	log.Info("🗄 sync repositories...")
    50  	err = SyncRepositories(ctx, p.body.Repositories)
    51  	if err != nil {
    52  		return
    53  	}
    54  
    55  	log.Info("🗄 sync registries...")
    56  	err = p.syncRegistries(ctx)
    57  	if err != nil {
    58  		return
    59  	}
    60  
    61  	if len(p.body.Releases) == 0 {
    62  		return
    63  	}
    64  
    65  	log.Info("🛥 sync releases...")
    66  
    67  	if dog.Enabled {
    68  		log.Warn("🐶 kubedog is enabled")
    69  		kubedog.FixLog(ctx, dog.LogWidth)
    70  		err = p.syncReleasesKubedog(ctx, dog)
    71  	} else {
    72  		err = p.syncReleases(ctx)
    73  	}
    74  
    75  	return
    76  }
    77  
    78  func (p *Plan) syncRegistries(ctx context.Context) (err error) {
    79  	wg := parallel.NewWaitGroup()
    80  	wg.Add(len(p.body.Registries))
    81  
    82  	for i := range p.body.Registries {
    83  		go func(wg *parallel.WaitGroup, reg regi.Config) {
    84  			defer wg.Done()
    85  			err := reg.Install()
    86  			if err != nil {
    87  				wg.ErrChan() <- err
    88  			}
    89  		}(wg, p.body.Registries[i])
    90  	}
    91  
    92  	if err := wg.WaitWithContext(ctx); err != nil {
    93  		return err
    94  	}
    95  
    96  	return err
    97  }
    98  
    99  // SyncRepositories initializes helm repository.yaml file with flock and installs provided repositories.
   100  func SyncRepositories(ctx context.Context, repositories repo.Configs) error {
   101  	log.Trace("🗄 helm repository.yaml: ", helper.Helm.RepositoryConfig)
   102  
   103  	// Create if not exists
   104  	if !helper.IsExists(helper.Helm.RepositoryConfig) {
   105  		f, err := helper.CreateFile(helper.Helm.RepositoryConfig)
   106  		if err != nil {
   107  			return err
   108  		}
   109  		if err := f.Close(); err != nil {
   110  			return fmt.Errorf("failed to close fresh helm repository.yaml: %w", err)
   111  		}
   112  	}
   113  
   114  	// we need to get a flock first
   115  	lockPath := helper.Helm.RepositoryConfig + ".lock"
   116  	fileLock := flock.New(lockPath)
   117  	lockCtx, cancel := context.WithTimeout(ctx, 20*time.Second)
   118  	defer cancel()
   119  
   120  	// We need to unlock in deferred mode in case of any other errors returned
   121  	defer func(fileLock *flock.Flock) {
   122  		err := fileLock.Unlock()
   123  		if err != nil {
   124  			log.Errorf("failed to release flock %s: %v", fileLock.Path(), err)
   125  		}
   126  	}(fileLock)
   127  
   128  	locked, err := fileLock.TryLockContext(lockCtx, time.Second)
   129  	if err != nil && !locked {
   130  		return fmt.Errorf("failed to get lock %s: %w", fileLock.Path(), err)
   131  	}
   132  
   133  	f, err := helmRepo.LoadFile(helper.Helm.RepositoryConfig)
   134  	if err != nil {
   135  		return fmt.Errorf("failed to load helm repositories file: %w", err)
   136  	}
   137  
   138  	// We can't parallel repositories installation as helm manages single repositories.yaml.
   139  	// To prevent data race, we need to either make helm use futex or not parallel at all
   140  	for i := range repositories {
   141  		err := repositories[i].Install(ctx, helper.Helm, f)
   142  		if err != nil {
   143  			return fmt.Errorf("failed to install %s repository: %w", repositories[i].Name(), err)
   144  		}
   145  	}
   146  
   147  	err = f.WriteFile(helper.Helm.RepositoryConfig, os.FileMode(0o644))
   148  	if err != nil {
   149  		return fmt.Errorf("failed to write repositories file: %w", err)
   150  	}
   151  
   152  	// If we haven't met any errors yet unlock the repository file. Deferred unlock will exit quickly after this.
   153  	if err := fileLock.Unlock(); err != nil {
   154  		return fmt.Errorf("failed to unlock %s: %w", fileLock.Path(), err)
   155  	}
   156  
   157  	return nil
   158  }
   159  
   160  func (p *planBody) generateDependencyGraph() (*dependency.Graph[uniqname.UniqName, release.Config], error) {
   161  	dependenciesGraph := dependency.NewGraph[uniqname.UniqName, release.Config]()
   162  
   163  	for _, rel := range p.Releases {
   164  		err := dependenciesGraph.NewNode(rel.Uniq(), rel)
   165  		if err != nil {
   166  			return nil, err
   167  		}
   168  
   169  		for _, dep := range rel.DependsOn() {
   170  			dependenciesGraph.AddDependency(rel.Uniq(), dep.Uniq())
   171  		}
   172  	}
   173  
   174  	err := dependenciesGraph.Build()
   175  	if err != nil {
   176  		return nil, err
   177  	}
   178  
   179  	return dependenciesGraph, nil
   180  }
   181  
   182  func getParallelLimit(ctx context.Context, releases release.Configs) int {
   183  	parallelLimit, ok := clictx.GetFlagFromContext(ctx, "parallel-limit").(int)
   184  	if !ok {
   185  		parallelLimit = 0
   186  	}
   187  	if parallelLimit == 0 {
   188  		parallelLimit = len(releases)
   189  	}
   190  
   191  	return parallelLimit
   192  }
   193  
   194  func (p *planBody) generateMonitorsLockMap() map[string]*parallel.WaitGroup {
   195  	res := make(map[string]*parallel.WaitGroup)
   196  
   197  	for _, rel := range p.Releases {
   198  		allMons := rel.Monitors()
   199  		for i := range allMons {
   200  			mon := allMons[i]
   201  			if _, ok := res[mon.Name]; !ok {
   202  				res[mon.Name] = parallel.NewWaitGroup()
   203  			}
   204  
   205  			res[mon.Name].Add(1)
   206  		}
   207  	}
   208  
   209  	return res
   210  }
   211  
   212  func (p *Plan) syncReleases(ctx context.Context) (err error) {
   213  	dependenciesGraph, err := p.body.generateDependencyGraph()
   214  	if err != nil {
   215  		return err
   216  	}
   217  
   218  	parallelLimit := getParallelLimit(ctx, p.body.Releases)
   219  
   220  	const msg = "Deploying releases with limited parallelization"
   221  	if parallelLimit == len(p.body.Releases) {
   222  		log.WithField("limit", parallelLimit).Debug(msg)
   223  	} else {
   224  		log.WithField("limit", parallelLimit).Info(msg)
   225  	}
   226  
   227  	monitorsLockMap := p.body.generateMonitorsLockMap()
   228  	monitorsCtx, monitorsCancel := context.WithCancel(ctx)
   229  	defer monitorsCancel()
   230  
   231  	releasesNodesChan := dependenciesGraph.Run()
   232  
   233  	releasesWG := parallel.NewWaitGroup()
   234  	releasesWG.Add(parallelLimit)
   235  
   236  	monitorsWG := parallel.NewWaitGroup()
   237  	monitorsWG.Add(len(p.body.Monitors))
   238  
   239  	releasesFails := make(map[release.Config]error)
   240  	monitorsFails := make(map[monitor.Config]error)
   241  
   242  	releasesMutex := &sync.Mutex{}
   243  
   244  	for range parallelLimit {
   245  		go p.syncReleasesWorker(ctx, releasesWG, releasesNodesChan, releasesMutex, releasesFails, monitorsLockMap)
   246  	}
   247  
   248  	for _, mon := range p.body.Monitors {
   249  		go p.monitorsWorker(monitorsCtx, monitorsWG, mon, monitorsFails, monitorsLockMap)
   250  	}
   251  
   252  	if err := releasesWG.WaitWithContext(ctx); err != nil {
   253  		return err
   254  	}
   255  
   256  	if err := monitorsWG.WaitWithContext(monitorsCtx); err != nil {
   257  		log.WithError(err).Error("monitors failed, need to take actions")
   258  		p.runMonitorsActions(ctx, monitorsFails)
   259  	}
   260  
   261  	return p.ApplyReport(releasesFails, monitorsFails)
   262  }
   263  
   264  func (p *Plan) runMonitorsActions(
   265  	ctx context.Context,
   266  	fails map[monitor.Config]error,
   267  ) {
   268  	mons := maps.Keys(fails)
   269  
   270  	for _, rel := range p.body.Releases {
   271  		rel.NotifyMonitorsFailed(ctx, mons...)
   272  	}
   273  }
   274  
   275  func (p *Plan) syncReleasesWorker(
   276  	ctx context.Context,
   277  	wg *parallel.WaitGroup,
   278  	nodesChan <-chan *dependency.Node[release.Config],
   279  	mu *sync.Mutex,
   280  	fails map[release.Config]error,
   281  	monitorsLockMap map[string]*parallel.WaitGroup,
   282  ) {
   283  	for n := range nodesChan {
   284  		p.syncRelease(ctx, wg, n, mu, fails, monitorsLockMap)
   285  	}
   286  	wg.Done()
   287  }
   288  
   289  func (p *Plan) syncRelease(
   290  	ctx context.Context,
   291  	wg *parallel.WaitGroup,
   292  	node *dependency.Node[release.Config],
   293  	mu *sync.Mutex,
   294  	fails map[release.Config]error,
   295  	monitorsLockMap map[string]*parallel.WaitGroup,
   296  ) {
   297  	rel := node.Data
   298  
   299  	l := rel.Logger()
   300  
   301  	l.Info("🛥 deploying... ")
   302  
   303  	if _, err := rel.Sync(ctx, true); err != nil {
   304  		l.WithError(err).Error("❌ failed to deploy")
   305  
   306  		if rel.AllowFailure() {
   307  			l.Errorf("release is allowed to fail, marked as succeeded to dependencies")
   308  			node.SetSucceeded()
   309  		} else {
   310  			node.SetFailed()
   311  		}
   312  
   313  		mu.Lock()
   314  		fails[rel] = err
   315  		mu.Unlock()
   316  
   317  		wg.ErrChan() <- err
   318  	} else {
   319  		node.SetSucceeded()
   320  		l.Info("✅")
   321  
   322  		allMons := rel.Monitors()
   323  		for i := range allMons {
   324  			mon := allMons[i]
   325  			m := monitorsLockMap[mon.Name]
   326  			if m != nil {
   327  				m.Done()
   328  			}
   329  		}
   330  	}
   331  }
   332  
   333  func (p *Plan) monitorsWorker(
   334  	ctx context.Context,
   335  	wg *parallel.WaitGroup,
   336  	mon monitor.Config,
   337  	fails map[monitor.Config]error,
   338  	monitorsLockMap map[string]*parallel.WaitGroup,
   339  ) {
   340  	defer wg.Done()
   341  
   342  	l := mon.Logger()
   343  
   344  	lock := monitorsLockMap[mon.Name()]
   345  	if lock == nil {
   346  		l.Error("BUG: monitor lock is empty, skipping monitor")
   347  
   348  		return
   349  	}
   350  	err := lock.WaitWithContext(ctx)
   351  	if err != nil {
   352  		l.WithError(err).Error("❌ monitor canceled")
   353  		fails[mon] = err
   354  		wg.ErrChan() <- err
   355  	}
   356  
   357  	err = mon.Run(ctx)
   358  	if err != nil {
   359  		l.WithError(err).Error("❌ monitor failed")
   360  		fails[mon] = err
   361  		wg.ErrChan() <- err
   362  	} else {
   363  		l.Info("✅")
   364  	}
   365  }
   366  
   367  // ApplyReport renders a table report for failed releases.
   368  func (p *Plan) ApplyReport(
   369  	releasesFails map[release.Config]error,
   370  	monitorsFails map[monitor.Config]error,
   371  ) error {
   372  	nReleases := len(p.body.Releases)
   373  	kReleases := len(releasesFails)
   374  	nMonitors := len(p.body.Monitors)
   375  	kMonitors := len(monitorsFails)
   376  
   377  	log.Infof("Releases Success %d / %d", nReleases-kReleases, nReleases)
   378  	log.Infof("Monitors Success %d / %d", nMonitors-kMonitors, nMonitors)
   379  
   380  	if len(releasesFails) > 0 {
   381  		table := tablewriter.NewWriter(os.Stdout)
   382  		table.SetHeader([]string{"name", "namespace", "chart", "version", "error"})
   383  		table.SetAutoFormatHeaders(true)
   384  		table.SetBorder(false)
   385  
   386  		for r, err := range releasesFails {
   387  			row := []string{
   388  				r.Name(),
   389  				r.Namespace(),
   390  				r.Chart().Name,
   391  				r.Chart().Version,
   392  				err.Error(),
   393  			}
   394  
   395  			table.Rich(row, []tablewriter.Colors{
   396  				{},
   397  				{},
   398  				{},
   399  				{},
   400  				FailStatusColor,
   401  			})
   402  		}
   403  
   404  		table.Render()
   405  
   406  		return ErrDeploy
   407  	}
   408  
   409  	if len(monitorsFails) > 0 {
   410  		table := tablewriter.NewWriter(os.Stdout)
   411  		table.SetHeader([]string{"name", "error"})
   412  		table.SetAutoFormatHeaders(true)
   413  		table.SetBorder(false)
   414  
   415  		for r, err := range monitorsFails {
   416  			row := []string{
   417  				r.Name(),
   418  				err.Error(),
   419  			}
   420  
   421  			table.Rich(row, []tablewriter.Colors{
   422  				{},
   423  				FailStatusColor,
   424  			})
   425  		}
   426  
   427  		table.Render()
   428  
   429  		return ErrDeploy
   430  	}
   431  
   432  	return nil
   433  }
   434  
   435  func (p *Plan) syncReleasesKubedog(ctx context.Context, kubedogConfig *kubedog.Config) error {
   436  	ctxCancel, cancel := context.WithCancel(ctx)
   437  	defer cancel() // Don't forget!
   438  
   439  	specs, kubecontext, err := p.kubedogSyncSpecs(kubedogConfig)
   440  	if err != nil {
   441  		return err
   442  	}
   443  
   444  	err = helper.KubeInit(kubecontext)
   445  	if err != nil {
   446  		return err
   447  	}
   448  
   449  	opts := multitrack.MultitrackOptions{
   450  		DynamicClient:        kube.DynamicClient,
   451  		DiscoveryClient:      kube.CachedDiscoveryClient,
   452  		Mapper:               kube.Mapper,
   453  		StatusProgressPeriod: kubedogConfig.StatusInterval,
   454  		Options: tracker.Options{
   455  			ParentContext: ctxCancel,
   456  			Timeout:       kubedogConfig.Timeout,
   457  			LogsFromTime:  time.Now(),
   458  		},
   459  	}
   460  
   461  	// Run kubedog
   462  	dogroup := parallel.NewWaitGroup()
   463  	dogroup.Add(1)
   464  	go func() {
   465  		defer dogroup.Done()
   466  		log.Trace("Multitrack is starting...")
   467  		dogroup.ErrChan() <- multitrack.Multitrack(kube.Client, specs, opts)
   468  	}()
   469  
   470  	// Run helm
   471  	time.Sleep(kubedogConfig.StartDelay)
   472  	err = p.syncReleases(ctx)
   473  	if err != nil {
   474  		cancel()
   475  
   476  		return err
   477  	}
   478  
   479  	// Allow kubedog to catch release installed
   480  	time.Sleep(kubedogConfig.StatusInterval)
   481  	cancel() // stop kubedog
   482  
   483  	err = dogroup.WaitWithContext(ctx)
   484  	if err != nil && !errors.Is(err, context.Canceled) {
   485  		// Ignore kubedog error
   486  		log.WithError(err).Warn("kubedog has error while watching resources.")
   487  	}
   488  
   489  	return nil
   490  }
   491  
   492  func (p *Plan) kubedogSyncSpecs(kubedogConfig *kubedog.Config) (multitrack.MultitrackSpecs, string, error) {
   493  	return p.kubedogSpecs(kubedogConfig, p.kubedogSyncManifest)
   494  }
   495  
   496  func (p *Plan) kubedogSyncManifest(rel release.Config) (string, error) {
   497  	return p.manifests[rel.Uniq()], nil
   498  }