github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/cluster/calcium/create.go (about) 1 package calcium 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 "time" 8 9 "github.com/alphadose/haxmap" 10 "github.com/cockroachdb/errors" 11 12 "github.com/projecteru2/core/cluster" 13 enginetypes "github.com/projecteru2/core/engine/types" 14 "github.com/projecteru2/core/log" 15 "github.com/projecteru2/core/metrics" 16 resourcetypes "github.com/projecteru2/core/resource/types" 17 "github.com/projecteru2/core/types" 18 "github.com/projecteru2/core/utils" 19 "github.com/projecteru2/core/wal" 20 21 "github.com/sanity-io/litter" 22 ) 23 24 // CreateWorkload use options to create workloads 25 func (c *Calcium) CreateWorkload(ctx context.Context, opts *types.DeployOptions) (chan *types.CreateWorkloadMessage, error) { 26 logger := log.WithFunc("calcium.CreateWorkload").WithField("opts", opts) 27 if err := opts.Validate(); err != nil { 28 logger.Error(ctx, err) 29 return nil, err 30 } 31 opts.ProcessIdent = utils.RandomString(16) 32 logger = logger.WithField("ident", opts.ProcessIdent) 33 logger.Infof(ctx, "Creating workload ident %s with options:\n%s", opts.ProcessIdent, litter.Options{Compact: true}.Sdump(opts)) 34 // Count 要大于0 35 if opts.Count <= 0 { 36 err := errors.Wrapf(types.ErrInvaildDeployCount, "count: %d", opts.Count) 37 logger.Error(ctx, err) 38 return nil, err 39 } 40 41 return c.doCreateWorkloads(ctx, opts), nil 42 } 43 44 // transaction: resource metadata consistency 45 func (c *Calcium) doCreateWorkloads(ctx context.Context, opts *types.DeployOptions) chan *types.CreateWorkloadMessage { 46 logger := log.WithFunc("calcium.doCreateWorkloads").WithField("ident", opts.ProcessIdent) 47 ch := make(chan *types.CreateWorkloadMessage) 48 // RFC 计算当前 app 部署情况的时候需要保证同一时间只有这个 app 的这个 entrypoint 在跑 49 // 因此需要在这里加个全局锁,直到部署完毕才释放 50 // 通过 Processing 状态跟踪达成 18 Oct, 2018 51 52 var ( 53 deployMap map[string]int 54 rollbackMap map[string][]int 55 // map[nodename][]Resources 56 engineParamsMap = map[string][]resourcetypes.Resources{} 57 // map[nodename][]Resources 58 workloadResourcesMap = map[string][]resourcetypes.Resources{} 59 ) 60 61 _ = c.pool.Invoke(func() { 62 defer func() { 63 cctx, cancel := context.WithTimeout(utils.NewInheritCtx(ctx), c.config.GlobalTimeout) 64 for nodename := range deployMap { 65 processing := opts.GetProcessing(nodename) 66 if err := c.store.DeleteProcessing(cctx, processing); err != nil { 67 logger.Errorf(ctx, err, "delete processing failed for %s", nodename) 68 } 69 } 70 close(ch) 71 cancel() 72 }() 73 74 var resourceCommit wal.Commit 75 defer func() { 76 if resourceCommit != nil { 77 if err := resourceCommit(); err != nil { 78 logger.Errorf(ctx, err, "commit wal failed: %s", eventWorkloadResourceAllocated) 79 } 80 } 81 }() 82 83 var processingCommits map[string]wal.Commit 84 defer func() { 85 for nodename := range processingCommits { 86 if commit, ok := processingCommits[nodename]; ok { 87 if err := commit(); err != nil { 88 logger.Errorf(ctx, err, "commit wal failed: %s, %s", eventProcessingCreated, nodename) 89 } 90 } 91 } 92 }() 93 94 _ = utils.Txn( 95 ctx, 96 97 // if: alloc resources 98 func(ctx context.Context) (err error) { 99 defer func() { 100 if err != nil { 101 logger.Error(ctx, err) 102 ch <- &types.CreateWorkloadMessage{Error: err} 103 } 104 }() 105 return c.withNodesPodLocked(ctx, opts.NodeFilter, func(ctx context.Context, nodeMap map[string]*types.Node) (err error) { 106 if len(nodeMap) == 0 { 107 return types.ErrEmptyNodeMap 108 } 109 nodenames := []string{} 110 nodes := []*types.Node{} 111 for nodename, node := range nodeMap { 112 nodenames = append(nodenames, nodename) 113 nodes = append(nodes, node) 114 } 115 116 if resourceCommit, err = c.wal.Log(eventWorkloadResourceAllocated, nodes); err != nil { 117 return err 118 } 119 120 deployMap, err = c.doGetDeployStrategy(ctx, nodenames, opts) 121 if err != nil { 122 return err 123 } 124 125 // commit changes 126 processingCommits = make(map[string]wal.Commit) 127 for nodename, deploy := range deployMap { 128 nodes = append(nodes, nodeMap[nodename]) 129 if workloadResourcesMap[nodename], engineParamsMap[nodename], err = c.rmgr.Alloc(ctx, nodename, deploy, opts.Resources); err != nil { 130 return err 131 } 132 processing := opts.GetProcessing(nodename) 133 if processingCommits[nodename], err = c.wal.Log(eventProcessingCreated, processing); err != nil { 134 return err 135 } 136 if err = c.store.CreateProcessing(ctx, processing, deploy); err != nil { 137 return err 138 } 139 } 140 return nil 141 }) 142 }, 143 144 // then: deploy workloads 145 func(ctx context.Context) (err error) { 146 rollbackMap, err = c.doDeployWorkloads(ctx, ch, opts, engineParamsMap, workloadResourcesMap, deployMap) 147 return err 148 }, 149 150 // rollback: give back resources 151 func(ctx context.Context, failedOnCond bool) (err error) { 152 if failedOnCond { 153 return 154 } 155 for nodename, rollbackIndices := range rollbackMap { 156 if e := c.withNodePodLocked(ctx, nodename, func(ctx context.Context, _ *types.Node) error { 157 rollbackResources := utils.Map(rollbackIndices, func(idx int) resourcetypes.Resources { 158 return workloadResourcesMap[nodename][idx] 159 }) 160 return c.rmgr.RollbackAlloc(ctx, nodename, rollbackResources) 161 }); e != nil { 162 logger.Error(ctx, e) 163 err = e 164 } 165 } 166 return err 167 }, 168 169 c.config.GlobalTimeout, 170 ) 171 }) 172 173 return ch 174 } 175 176 func (c *Calcium) doDeployWorkloads(ctx context.Context, 177 ch chan *types.CreateWorkloadMessage, 178 opts *types.DeployOptions, 179 engineParamsMap map[string][]resourcetypes.Resources, 180 workloadResourcesMap map[string][]resourcetypes.Resources, 181 deployMap map[string]int) (_ map[string][]int, err error) { 182 183 wg := sync.WaitGroup{} 184 wg.Add(len(deployMap)) 185 syncRollbackMap := haxmap.New[string, []int]() 186 logger := log.WithFunc("calcium.doDeployWorkloads").WithField("ident", opts.ProcessIdent) 187 188 seq := 0 189 rollbackMap := make(map[string][]int) 190 for nodename, deploy := range deployMap { 191 _ = c.pool.Invoke(func(deploy int) func() { 192 return func() { 193 metrics.Client.SendDeployCount(ctx, deploy) 194 } 195 }(deploy)) 196 _ = c.pool.Invoke(func(nodename string, deploy, seq int) func() { 197 return func() { 198 defer wg.Done() 199 if indices, err := c.doDeployWorkloadsOnNode(ctx, ch, nodename, opts, deploy, engineParamsMap[nodename], workloadResourcesMap[nodename], seq); err != nil { 200 syncRollbackMap.Set(nodename, indices) 201 } 202 } 203 }(nodename, deploy, seq)) 204 205 seq += deploy 206 } 207 208 wg.Wait() 209 syncRollbackMap.ForEach(func(nodename string, indices []int) bool { 210 rollbackMap[nodename] = indices 211 return true 212 }) 213 logger.Debugf(ctx, "rollbackMap: %+v", rollbackMap) 214 if len(rollbackMap) != 0 { 215 err = types.ErrRollbackMapIsNotEmpty 216 } 217 return rollbackMap, err 218 } 219 220 // deploy scheduled workloads on one node 221 func (c *Calcium) doDeployWorkloadsOnNode(ctx context.Context, 222 ch chan *types.CreateWorkloadMessage, 223 nodename string, 224 opts *types.DeployOptions, 225 deploy int, 226 engineParams []resourcetypes.Resources, 227 workloadResources []resourcetypes.Resources, 228 seq int) (indices []int, err error) { 229 230 logger := log.WithFunc("calcium.doDeployWorkloadsOnNode").WithField("node", nodename).WithField("ident", opts.ProcessIdent).WithField("deploy", deploy).WithField("seq", seq) 231 node, err := c.doGetAndPrepareNode(ctx, nodename, opts.Image, opts.IgnorePull) 232 if err != nil { 233 for i := 0; i < deploy; i++ { 234 logger.Error(ctx, err) 235 ch <- &types.CreateWorkloadMessage{Error: err} 236 } 237 return utils.Range(deploy), err 238 } 239 240 appendLock := sync.Mutex{} 241 wg := &sync.WaitGroup{} 242 wg.Add(deploy) 243 for idx := 0; idx < deploy; idx++ { 244 idx := idx 245 createMsg := &types.CreateWorkloadMessage{ 246 Podname: opts.Podname, 247 Nodename: nodename, 248 Publish: map[string][]string{}, 249 } 250 251 _ = c.pool.Invoke(func() { 252 defer wg.Done() 253 var e error 254 defer func() { 255 if e != nil { 256 err = e 257 logger.Error(ctx, err) 258 createMsg.Error = err 259 appendLock.Lock() 260 indices = append(indices, idx) 261 appendLock.Unlock() 262 } 263 ch <- createMsg 264 }() 265 266 createMsg.EngineParams = engineParams[idx] 267 createMsg.Resources = workloadResources[idx] 268 269 createOpts := c.doMakeWorkloadOptions(ctx, seq+idx, createMsg, opts, node) 270 e = c.doDeployOneWorkload(ctx, node, opts, createMsg, createOpts, true) 271 }) 272 } 273 wg.Wait() 274 275 // remap 就不搞进事务了吧, 回滚代价太大了 276 // 放任 remap 失败的后果是, share pool 没有更新, 这个后果姑且认为是可以承受的 277 // 而且 remap 是一个幂等操作, 就算这次 remap 失败, 下次 remap 也能收敛到正确到状态 278 _ = c.pool.Invoke(func() { c.RemapResourceAndLog(ctx, logger, node) }) 279 280 return indices, err 281 } 282 283 func (c *Calcium) doGetAndPrepareNode(ctx context.Context, nodename, image string, ignorePull bool) (*types.Node, error) { 284 node, err := c.store.GetNode(ctx, nodename) 285 if err != nil { 286 return nil, err 287 } 288 if !ignorePull { 289 err = pullImage(ctx, node, image) 290 } 291 292 return node, err 293 } 294 295 // transaction: workload metadata consistency 296 func (c *Calcium) doDeployOneWorkload( 297 ctx context.Context, 298 node *types.Node, 299 opts *types.DeployOptions, 300 msg *types.CreateWorkloadMessage, 301 createOpts *enginetypes.VirtualizationCreateOptions, 302 decrProcessing bool, 303 ) (err error) { 304 logger := log.WithFunc("calcium.doDeployWorkload").WithField("node", node.Name).WithField("ident", opts.ProcessIdent).WithField("msg", msg) 305 workload := &types.Workload{ 306 Resources: msg.Resources, 307 EngineParams: msg.EngineParams, 308 Name: createOpts.Name, 309 Labels: createOpts.Labels, 310 Podname: opts.Podname, 311 Nodename: node.Name, 312 Hook: opts.Entrypoint.Hook, 313 Privileged: opts.Entrypoint.Privileged, 314 Engine: node.Engine, 315 Image: opts.Image, 316 Env: opts.Env, 317 User: opts.User, 318 CreateTime: time.Now().Unix(), 319 } 320 321 var commit wal.Commit 322 defer func() { 323 if commit != nil { 324 if err := commit(); err != nil { 325 logger.Errorf(ctx, err, "Commit WAL %s failed", eventWorkloadCreated) 326 } 327 } 328 }() 329 return utils.Txn( 330 ctx, 331 // create workload 332 func(ctx context.Context) error { 333 created, err := node.Engine.VirtualizationCreate(ctx, createOpts) 334 if err != nil { 335 return err 336 } 337 workload.ID = created.ID 338 339 for key, value := range created.Labels { // add Labels 340 workload.Labels[key] = value 341 } 342 343 // We couldn't WAL the workload ID above VirtualizationCreate temporarily, 344 // so there's a time gap window, once the core process crashes between 345 // VirtualizationCreate and logCreateWorkload then the worload is leaky. 346 commit, err = c.wal.Log(eventWorkloadCreated, &types.Workload{ 347 ID: workload.ID, 348 Nodename: workload.Nodename, 349 }) 350 return err 351 }, 352 353 func(ctx context.Context) (err error) { 354 // avoid to be interrupted by MakeDeployStatus 355 processing := opts.GetProcessing(node.Name) 356 if !decrProcessing { 357 processing = nil 358 } 359 // add workload metadata first 360 if err := c.store.AddWorkload(ctx, workload, processing); err != nil { 361 return err 362 } 363 logger.Infof(ctx, "workload %s metadata created", workload.ID) 364 365 // Copy data to workload 366 if len(opts.Files) > 0 { 367 for _, file := range opts.Files { 368 if err = c.doSendFileToWorkload(ctx, node.Engine, workload.ID, file); err != nil { 369 return err 370 } 371 } 372 } 373 374 // deal with hook 375 if len(opts.AfterCreate) > 0 { 376 if workload.Hook != nil { 377 workload.Hook = &types.Hook{ 378 AfterStart: append(opts.AfterCreate, workload.Hook.AfterStart...), 379 Force: workload.Hook.Force, 380 } 381 } else { 382 workload.Hook = &types.Hook{ 383 AfterStart: opts.AfterCreate, 384 Force: opts.IgnoreHook, 385 } 386 } 387 } 388 389 // start workload 390 msg.Hook, err = c.doStartWorkload(ctx, workload, opts.IgnoreHook) 391 if err != nil { 392 return err 393 } 394 395 // reset workload.hook 396 workload.Hook = opts.Entrypoint.Hook 397 398 // inspect real meta 399 var workloadInfo *enginetypes.VirtualizationInfo 400 workloadInfo, err = workload.Inspect(ctx) // 补充静态元数据 401 if err != nil { 402 return err 403 } 404 405 // update meta 406 if workloadInfo.Networks != nil { 407 msg.Publish = utils.MakePublishInfo(workloadInfo.Networks, opts.Entrypoint.Publish) 408 } 409 410 // if workload metadata changed, then update 411 if workloadInfo.User != workload.User { 412 // reset users 413 workload.User = workloadInfo.User 414 415 if err := c.store.UpdateWorkload(ctx, workload); err != nil { 416 return err 417 } 418 logger.Infof(ctx, "workload %s metadata updated", workload.ID) 419 } 420 421 msg.WorkloadID = workload.ID 422 msg.WorkloadName = workload.Name 423 msg.Podname = workload.Podname 424 msg.Nodename = workload.Nodename 425 return nil 426 }, 427 428 // remove workload 429 func(ctx context.Context, _ bool) error { 430 logger.Infof(ctx, "failed to deploy workload %s, rollback", workload.ID) 431 if workload.ID == "" { 432 return nil 433 } 434 435 if err := c.store.RemoveWorkload(ctx, workload); err != nil { 436 logger.Errorf(ctx, err, "failed to remove workload %s", workload.ID) 437 } 438 439 return workload.Remove(ctx, true) 440 }, 441 c.config.GlobalTimeout, 442 ) 443 } 444 445 func (c *Calcium) doMakeWorkloadOptions(ctx context.Context, no int, msg *types.CreateWorkloadMessage, opts *types.DeployOptions, node *types.Node) *enginetypes.VirtualizationCreateOptions { 446 createOpts := &enginetypes.VirtualizationCreateOptions{} 447 // general 448 createOpts.EngineParams = msg.EngineParams 449 createOpts.RawArgs = opts.RawArgs 450 createOpts.Lambda = opts.Lambda 451 createOpts.User = opts.User 452 createOpts.DNS = opts.DNS 453 createOpts.Image = opts.Image 454 createOpts.Stdin = opts.OpenStdin 455 createOpts.Hosts = opts.ExtraHosts 456 createOpts.Debug = opts.Debug 457 createOpts.Networks = opts.Networks 458 459 // entry 460 entry := opts.Entrypoint 461 createOpts.WorkingDir = entry.Dir 462 createOpts.Privileged = entry.Privileged 463 createOpts.Sysctl = entry.Sysctls 464 createOpts.Publish = entry.Publish 465 createOpts.Restart = entry.Restart 466 if entry.Log != nil { 467 createOpts.LogType = entry.Log.Type 468 createOpts.LogConfig = map[string]string{} 469 for k, v := range entry.Log.Config { 470 createOpts.LogConfig[k] = v 471 } 472 } 473 // name 474 suffix := utils.RandomString(6) 475 createOpts.Name = utils.MakeWorkloadName(opts.Name, opts.Entrypoint.Name, suffix) 476 msg.WorkloadName = createOpts.Name 477 // command and user 478 // extra args is dynamically 479 createOpts.Cmd = opts.Entrypoint.Commands 480 // env 481 env := append(opts.Env, fmt.Sprintf("APP_NAME=%s", opts.Name)) //nolint 482 env = append(env, fmt.Sprintf("ERU_POD=%s", opts.Podname)) 483 env = append(env, fmt.Sprintf("ERU_NODE_NAME=%s", node.Name)) 484 env = append(env, fmt.Sprintf("ERU_WORKLOAD_SEQ=%d", no)) 485 createOpts.Env = env 486 // basic labels, bind to LabelMeta 487 createOpts.Labels = map[string]string{ 488 cluster.ERUMark: "1", 489 cluster.LabelMeta: utils.EncodeMetaInLabel(ctx, &types.LabelMeta{ 490 Publish: opts.Entrypoint.Publish, 491 HealthCheck: entry.HealthCheck, 492 }), 493 cluster.LabelNodeName: node.Name, 494 cluster.LabelCoreID: c.identifier, 495 } 496 for key, value := range opts.Labels { 497 createOpts.Labels[key] = value 498 } 499 500 return createOpts 501 }