github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/cluster/calcium/replace.go (about) 1 package calcium 2 3 import ( 4 "bytes" 5 "context" 6 "sync" 7 8 enginetypes "github.com/projecteru2/core/engine/types" 9 "github.com/projecteru2/core/log" 10 "github.com/projecteru2/core/types" 11 "github.com/projecteru2/core/utils" 12 13 "github.com/cockroachdb/errors" 14 ) 15 16 // ReplaceWorkload replace workloads with same resource 17 func (c *Calcium) ReplaceWorkload(ctx context.Context, opts *types.ReplaceOptions) (chan *types.ReplaceWorkloadMessage, error) { 18 logger := log.WithFunc("calcium.ReplaceWorkload").WithField("opts", opts) 19 if err := opts.Validate(); err != nil { 20 logger.Error(ctx, err) 21 return nil, err 22 } 23 opts.Normalize() 24 if len(opts.IDs) == 0 { 25 if len(opts.NodeFilter.Includes) == 0 { 26 opts.NodeFilter.Includes = []string{""} 27 } 28 for _, nodename := range opts.NodeFilter.Includes { 29 workloads, err := c.ListWorkloads(ctx, &types.ListWorkloadsOptions{ 30 Appname: opts.Name, Entrypoint: opts.Entrypoint.Name, Nodename: nodename, 31 }) 32 if err != nil { 33 logger.Error(ctx, err) 34 return nil, err 35 } 36 for _, workload := range workloads { 37 opts.IDs = append(opts.IDs, workload.ID) 38 } 39 } 40 } 41 ch := make(chan *types.ReplaceWorkloadMessage) 42 _ = c.pool.Invoke(func() { 43 defer close(ch) 44 // 并发控制 45 wg := sync.WaitGroup{} 46 wg.Add(len(opts.IDs)) 47 defer wg.Wait() 48 for index, ID := range opts.IDs { 49 _ = c.pool.Invoke(func(replaceOpts types.ReplaceOptions, index int, ID string) func() { 50 return func() { 51 _ = c.pool.Invoke(func() { 52 defer wg.Done() 53 var createMessage *types.CreateWorkloadMessage 54 removeMessage := &types.RemoveWorkloadMessage{WorkloadID: ID} 55 var err error 56 if err = c.withWorkloadLocked(ctx, ID, false, func(ctx context.Context, workload *types.Workload) error { 57 if opts.Podname != "" && workload.Podname != opts.Podname { 58 logger.Warnf(ctx, "Skip not in pod workload %s", workload.ID) 59 return errors.Wrapf(types.ErrWorkloadIgnored, "workload %s not in pod %s", workload.ID, opts.Podname) 60 } 61 // 使用复制之后的配置 62 // 停老的,起新的 63 // replaceOpts.ResourceOpts = workload.ResourceUsage 64 // 覆盖 podname 如果做全量更新的话 65 replaceOpts.Podname = workload.Podname 66 // 覆盖 Volumes 67 // 继承网络配置 68 if replaceOpts.NetworkInherit { 69 info, err := workload.Inspect(ctx) 70 if err != nil { 71 return err 72 } else if !info.Running { 73 return errors.Wrapf(types.ErrInvaildWorkloadOps, "workload %s is not running, can not inherit", workload.ID) 74 } 75 replaceOpts.Networks = info.Networks 76 logger.Infof(ctx, "Inherit old workload network configuration mode %+v", replaceOpts.Networks) 77 } 78 createMessage, removeMessage, err = c.doReplaceWorkload(ctx, workload, &replaceOpts, index) 79 return err 80 }); err != nil { 81 if errors.Is(err, types.ErrWorkloadIgnored) { 82 logger.Warnf(ctx, "ignore workload: %+v", err) 83 return 84 } 85 logger.Error(ctx, err, "Replace and remove failed, old workload restarted") 86 } else { 87 logger.Infof(ctx, "Replace and remove success %s", ID) 88 logger.Infof(ctx, "New workload %s", createMessage.WorkloadID) 89 } 90 ch <- &types.ReplaceWorkloadMessage{Create: createMessage, Remove: removeMessage, Error: err} 91 }) 92 } 93 }(*opts, index, ID)) 94 } 95 }) 96 return ch, nil 97 } 98 99 func (c *Calcium) doReplaceWorkload( 100 ctx context.Context, 101 workload *types.Workload, 102 opts *types.ReplaceOptions, 103 index int, 104 ) (*types.CreateWorkloadMessage, *types.RemoveWorkloadMessage, error) { 105 removeMessage := &types.RemoveWorkloadMessage{ 106 WorkloadID: workload.ID, 107 Success: false, 108 Hook: []*bytes.Buffer{}, 109 } 110 logger := log.WithFunc("calcium.doReplaceWorkload") 111 // label filter 112 if !utils.LabelsFilter(workload.Labels, opts.FilterLabels) { 113 return nil, removeMessage, types.ErrWorkloadIgnored 114 } 115 // prepare node 116 node, err := c.doGetAndPrepareNode(ctx, workload.Nodename, opts.Image, opts.IgnorePull) 117 if err != nil { 118 return nil, removeMessage, err 119 } 120 // 获得文件 io 121 for src, dst := range opts.Copy { 122 content, uid, gid, mode, err := workload.Engine.VirtualizationCopyFrom(ctx, workload.ID, src) 123 if err != nil { 124 return nil, removeMessage, err 125 } 126 opts.DeployOptions.Files = append(opts.DeployOptions.Files, types.LinuxFile{ 127 Filename: dst, 128 Content: content, 129 UID: uid, 130 GID: gid, 131 Mode: mode, 132 }) 133 } 134 135 // copy resource args 136 createMessage := &types.CreateWorkloadMessage{ 137 Resources: workload.Resources, 138 EngineParams: workload.EngineParams, 139 } 140 141 if err = utils.Txn( 142 ctx, 143 // if 144 func(ctx context.Context) (err error) { 145 removeMessage.Hook, err = c.doStopWorkload(ctx, workload, opts.IgnoreHook) 146 return err 147 }, 148 // then 149 func(ctx context.Context) error { 150 return utils.Txn( 151 ctx, 152 // if 153 func(ctx context.Context) error { 154 vco := c.doMakeReplaceWorkloadOptions(ctx, index, createMessage, &opts.DeployOptions, node, workload.ID) 155 return c.doDeployOneWorkload(ctx, node, &opts.DeployOptions, createMessage, vco, false) 156 }, 157 // then 158 func(ctx context.Context) (err error) { 159 if err = c.doRemoveWorkload(ctx, workload, true); err != nil { 160 logger.Error(ctx, err, "the new started but the old failed to stop") 161 return err 162 } 163 removeMessage.Success = true 164 return 165 }, 166 nil, 167 c.config.GlobalTimeout, 168 ) 169 }, 170 // rollback 171 func(ctx context.Context, _ bool) (err error) { 172 messages, err := c.doStartWorkload(ctx, workload, opts.IgnoreHook) 173 if err != nil { 174 logger.Error(ctx, err, "Old workload %s restart failed", workload.ID) 175 removeMessage.Hook = append(removeMessage.Hook, bytes.NewBufferString(err.Error())) 176 } else { 177 removeMessage.Hook = append(removeMessage.Hook, messages...) 178 } 179 return err 180 }, 181 c.config.GlobalTimeout, 182 ); err != nil { 183 return createMessage, removeMessage, err 184 } 185 186 _ = c.pool.Invoke(func() { c.RemapResourceAndLog(ctx, logger, node) }) 187 188 return createMessage, removeMessage, err 189 } 190 191 func (c *Calcium) doMakeReplaceWorkloadOptions(ctx context.Context, no int, msg *types.CreateWorkloadMessage, opts *types.DeployOptions, node *types.Node, ancestorWorkloadID string) *enginetypes.VirtualizationCreateOptions { 192 vco := c.doMakeWorkloadOptions(ctx, no, msg, opts, node) 193 vco.AncestorWorkloadID = ancestorWorkloadID 194 return vco 195 }