github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/cluster/calcium/replace.go (about)

     1  package calcium
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"sync"
     7  
     8  	enginetypes "github.com/projecteru2/core/engine/types"
     9  	"github.com/projecteru2/core/log"
    10  	"github.com/projecteru2/core/types"
    11  	"github.com/projecteru2/core/utils"
    12  
    13  	"github.com/cockroachdb/errors"
    14  )
    15  
    16  // ReplaceWorkload replace workloads with same resource
    17  func (c *Calcium) ReplaceWorkload(ctx context.Context, opts *types.ReplaceOptions) (chan *types.ReplaceWorkloadMessage, error) {
    18  	logger := log.WithFunc("calcium.ReplaceWorkload").WithField("opts", opts)
    19  	if err := opts.Validate(); err != nil {
    20  		logger.Error(ctx, err)
    21  		return nil, err
    22  	}
    23  	opts.Normalize()
    24  	if len(opts.IDs) == 0 {
    25  		if len(opts.NodeFilter.Includes) == 0 {
    26  			opts.NodeFilter.Includes = []string{""}
    27  		}
    28  		for _, nodename := range opts.NodeFilter.Includes {
    29  			workloads, err := c.ListWorkloads(ctx, &types.ListWorkloadsOptions{
    30  				Appname: opts.Name, Entrypoint: opts.Entrypoint.Name, Nodename: nodename,
    31  			})
    32  			if err != nil {
    33  				logger.Error(ctx, err)
    34  				return nil, err
    35  			}
    36  			for _, workload := range workloads {
    37  				opts.IDs = append(opts.IDs, workload.ID)
    38  			}
    39  		}
    40  	}
    41  	ch := make(chan *types.ReplaceWorkloadMessage)
    42  	_ = c.pool.Invoke(func() {
    43  		defer close(ch)
    44  		// 并发控制
    45  		wg := sync.WaitGroup{}
    46  		wg.Add(len(opts.IDs))
    47  		defer wg.Wait()
    48  		for index, ID := range opts.IDs {
    49  			_ = c.pool.Invoke(func(replaceOpts types.ReplaceOptions, index int, ID string) func() {
    50  				return func() {
    51  					_ = c.pool.Invoke(func() {
    52  						defer wg.Done()
    53  						var createMessage *types.CreateWorkloadMessage
    54  						removeMessage := &types.RemoveWorkloadMessage{WorkloadID: ID}
    55  						var err error
    56  						if err = c.withWorkloadLocked(ctx, ID, false, func(ctx context.Context, workload *types.Workload) error {
    57  							if opts.Podname != "" && workload.Podname != opts.Podname {
    58  								logger.Warnf(ctx, "Skip not in pod workload %s", workload.ID)
    59  								return errors.Wrapf(types.ErrWorkloadIgnored, "workload %s not in pod %s", workload.ID, opts.Podname)
    60  							}
    61  							// 使用复制之后的配置
    62  							// 停老的,起新的
    63  							// replaceOpts.ResourceOpts = workload.ResourceUsage
    64  							// 覆盖 podname 如果做全量更新的话
    65  							replaceOpts.Podname = workload.Podname
    66  							// 覆盖 Volumes
    67  							// 继承网络配置
    68  							if replaceOpts.NetworkInherit {
    69  								info, err := workload.Inspect(ctx)
    70  								if err != nil {
    71  									return err
    72  								} else if !info.Running {
    73  									return errors.Wrapf(types.ErrInvaildWorkloadOps, "workload %s is not running, can not inherit", workload.ID)
    74  								}
    75  								replaceOpts.Networks = info.Networks
    76  								logger.Infof(ctx, "Inherit old workload network configuration mode %+v", replaceOpts.Networks)
    77  							}
    78  							createMessage, removeMessage, err = c.doReplaceWorkload(ctx, workload, &replaceOpts, index)
    79  							return err
    80  						}); err != nil {
    81  							if errors.Is(err, types.ErrWorkloadIgnored) {
    82  								logger.Warnf(ctx, "ignore workload: %+v", err)
    83  								return
    84  							}
    85  							logger.Error(ctx, err, "Replace and remove failed, old workload restarted")
    86  						} else {
    87  							logger.Infof(ctx, "Replace and remove success %s", ID)
    88  							logger.Infof(ctx, "New workload %s", createMessage.WorkloadID)
    89  						}
    90  						ch <- &types.ReplaceWorkloadMessage{Create: createMessage, Remove: removeMessage, Error: err}
    91  					})
    92  				}
    93  			}(*opts, index, ID))
    94  		}
    95  	})
    96  	return ch, nil
    97  }
    98  
    99  func (c *Calcium) doReplaceWorkload(
   100  	ctx context.Context,
   101  	workload *types.Workload,
   102  	opts *types.ReplaceOptions,
   103  	index int,
   104  ) (*types.CreateWorkloadMessage, *types.RemoveWorkloadMessage, error) {
   105  	removeMessage := &types.RemoveWorkloadMessage{
   106  		WorkloadID: workload.ID,
   107  		Success:    false,
   108  		Hook:       []*bytes.Buffer{},
   109  	}
   110  	logger := log.WithFunc("calcium.doReplaceWorkload")
   111  	// label filter
   112  	if !utils.LabelsFilter(workload.Labels, opts.FilterLabels) {
   113  		return nil, removeMessage, types.ErrWorkloadIgnored
   114  	}
   115  	// prepare node
   116  	node, err := c.doGetAndPrepareNode(ctx, workload.Nodename, opts.Image, opts.IgnorePull)
   117  	if err != nil {
   118  		return nil, removeMessage, err
   119  	}
   120  	// 获得文件 io
   121  	for src, dst := range opts.Copy {
   122  		content, uid, gid, mode, err := workload.Engine.VirtualizationCopyFrom(ctx, workload.ID, src)
   123  		if err != nil {
   124  			return nil, removeMessage, err
   125  		}
   126  		opts.DeployOptions.Files = append(opts.DeployOptions.Files, types.LinuxFile{
   127  			Filename: dst,
   128  			Content:  content,
   129  			UID:      uid,
   130  			GID:      gid,
   131  			Mode:     mode,
   132  		})
   133  	}
   134  
   135  	// copy resource args
   136  	createMessage := &types.CreateWorkloadMessage{
   137  		Resources:    workload.Resources,
   138  		EngineParams: workload.EngineParams,
   139  	}
   140  
   141  	if err = utils.Txn(
   142  		ctx,
   143  		// if
   144  		func(ctx context.Context) (err error) {
   145  			removeMessage.Hook, err = c.doStopWorkload(ctx, workload, opts.IgnoreHook)
   146  			return err
   147  		},
   148  		// then
   149  		func(ctx context.Context) error {
   150  			return utils.Txn(
   151  				ctx,
   152  				// if
   153  				func(ctx context.Context) error {
   154  					vco := c.doMakeReplaceWorkloadOptions(ctx, index, createMessage, &opts.DeployOptions, node, workload.ID)
   155  					return c.doDeployOneWorkload(ctx, node, &opts.DeployOptions, createMessage, vco, false)
   156  				},
   157  				// then
   158  				func(ctx context.Context) (err error) {
   159  					if err = c.doRemoveWorkload(ctx, workload, true); err != nil {
   160  						logger.Error(ctx, err, "the new started but the old failed to stop")
   161  						return err
   162  					}
   163  					removeMessage.Success = true
   164  					return
   165  				},
   166  				nil,
   167  				c.config.GlobalTimeout,
   168  			)
   169  		},
   170  		// rollback
   171  		func(ctx context.Context, _ bool) (err error) {
   172  			messages, err := c.doStartWorkload(ctx, workload, opts.IgnoreHook)
   173  			if err != nil {
   174  				logger.Error(ctx, err, "Old workload %s restart failed", workload.ID)
   175  				removeMessage.Hook = append(removeMessage.Hook, bytes.NewBufferString(err.Error()))
   176  			} else {
   177  				removeMessage.Hook = append(removeMessage.Hook, messages...)
   178  			}
   179  			return err
   180  		},
   181  		c.config.GlobalTimeout,
   182  	); err != nil {
   183  		return createMessage, removeMessage, err
   184  	}
   185  
   186  	_ = c.pool.Invoke(func() { c.RemapResourceAndLog(ctx, logger, node) })
   187  
   188  	return createMessage, removeMessage, err
   189  }
   190  
   191  func (c *Calcium) doMakeReplaceWorkloadOptions(ctx context.Context, no int, msg *types.CreateWorkloadMessage, opts *types.DeployOptions, node *types.Node, ancestorWorkloadID string) *enginetypes.VirtualizationCreateOptions {
   192  	vco := c.doMakeWorkloadOptions(ctx, no, msg, opts, node)
   193  	vco.AncestorWorkloadID = ancestorWorkloadID
   194  	return vco
   195  }