github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/cluster/calcium/create.go (about)

     1  package calcium
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/alphadose/haxmap"
    10  	"github.com/cockroachdb/errors"
    11  
    12  	"github.com/projecteru2/core/cluster"
    13  	enginetypes "github.com/projecteru2/core/engine/types"
    14  	"github.com/projecteru2/core/log"
    15  	"github.com/projecteru2/core/metrics"
    16  	resourcetypes "github.com/projecteru2/core/resource/types"
    17  	"github.com/projecteru2/core/types"
    18  	"github.com/projecteru2/core/utils"
    19  	"github.com/projecteru2/core/wal"
    20  
    21  	"github.com/sanity-io/litter"
    22  )
    23  
    24  // CreateWorkload use options to create workloads
    25  func (c *Calcium) CreateWorkload(ctx context.Context, opts *types.DeployOptions) (chan *types.CreateWorkloadMessage, error) {
    26  	logger := log.WithFunc("calcium.CreateWorkload").WithField("opts", opts)
    27  	if err := opts.Validate(); err != nil {
    28  		logger.Error(ctx, err)
    29  		return nil, err
    30  	}
    31  	opts.ProcessIdent = utils.RandomString(16)
    32  	logger = logger.WithField("ident", opts.ProcessIdent)
    33  	logger.Infof(ctx, "Creating workload ident %s with options:\n%s", opts.ProcessIdent, litter.Options{Compact: true}.Sdump(opts))
    34  	// Count 要大于0
    35  	if opts.Count <= 0 {
    36  		err := errors.Wrapf(types.ErrInvaildDeployCount, "count: %d", opts.Count)
    37  		logger.Error(ctx, err)
    38  		return nil, err
    39  	}
    40  
    41  	return c.doCreateWorkloads(ctx, opts), nil
    42  }
    43  
    44  // transaction: resource metadata consistency
    45  func (c *Calcium) doCreateWorkloads(ctx context.Context, opts *types.DeployOptions) chan *types.CreateWorkloadMessage {
    46  	logger := log.WithFunc("calcium.doCreateWorkloads").WithField("ident", opts.ProcessIdent)
    47  	ch := make(chan *types.CreateWorkloadMessage)
    48  	// RFC 计算当前 app 部署情况的时候需要保证同一时间只有这个 app 的这个 entrypoint 在跑
    49  	// 因此需要在这里加个全局锁,直到部署完毕才释放
    50  	// 通过 Processing 状态跟踪达成 18 Oct, 2018
    51  
    52  	var (
    53  		deployMap   map[string]int
    54  		rollbackMap map[string][]int
    55  		// map[nodename][]Resources
    56  		engineParamsMap = map[string][]resourcetypes.Resources{}
    57  		// map[nodename][]Resources
    58  		workloadResourcesMap = map[string][]resourcetypes.Resources{}
    59  	)
    60  
    61  	_ = c.pool.Invoke(func() {
    62  		defer func() {
    63  			cctx, cancel := context.WithTimeout(utils.NewInheritCtx(ctx), c.config.GlobalTimeout)
    64  			for nodename := range deployMap {
    65  				processing := opts.GetProcessing(nodename)
    66  				if err := c.store.DeleteProcessing(cctx, processing); err != nil {
    67  					logger.Errorf(ctx, err, "delete processing failed for %s", nodename)
    68  				}
    69  			}
    70  			close(ch)
    71  			cancel()
    72  		}()
    73  
    74  		var resourceCommit wal.Commit
    75  		defer func() {
    76  			if resourceCommit != nil {
    77  				if err := resourceCommit(); err != nil {
    78  					logger.Errorf(ctx, err, "commit wal failed: %s", eventWorkloadResourceAllocated)
    79  				}
    80  			}
    81  		}()
    82  
    83  		var processingCommits map[string]wal.Commit
    84  		defer func() {
    85  			for nodename := range processingCommits {
    86  				if commit, ok := processingCommits[nodename]; ok {
    87  					if err := commit(); err != nil {
    88  						logger.Errorf(ctx, err, "commit wal failed: %s, %s", eventProcessingCreated, nodename)
    89  					}
    90  				}
    91  			}
    92  		}()
    93  
    94  		_ = utils.Txn(
    95  			ctx,
    96  
    97  			// if: alloc resources
    98  			func(ctx context.Context) (err error) {
    99  				defer func() {
   100  					if err != nil {
   101  						logger.Error(ctx, err)
   102  						ch <- &types.CreateWorkloadMessage{Error: err}
   103  					}
   104  				}()
   105  				return c.withNodesPodLocked(ctx, opts.NodeFilter, func(ctx context.Context, nodeMap map[string]*types.Node) (err error) {
   106  					if len(nodeMap) == 0 {
   107  						return types.ErrEmptyNodeMap
   108  					}
   109  					nodenames := []string{}
   110  					nodes := []*types.Node{}
   111  					for nodename, node := range nodeMap {
   112  						nodenames = append(nodenames, nodename)
   113  						nodes = append(nodes, node)
   114  					}
   115  
   116  					if resourceCommit, err = c.wal.Log(eventWorkloadResourceAllocated, nodes); err != nil {
   117  						return err
   118  					}
   119  
   120  					deployMap, err = c.doGetDeployStrategy(ctx, nodenames, opts)
   121  					if err != nil {
   122  						return err
   123  					}
   124  
   125  					// commit changes
   126  					processingCommits = make(map[string]wal.Commit)
   127  					for nodename, deploy := range deployMap {
   128  						nodes = append(nodes, nodeMap[nodename])
   129  						if workloadResourcesMap[nodename], engineParamsMap[nodename], err = c.rmgr.Alloc(ctx, nodename, deploy, opts.Resources); err != nil {
   130  							return err
   131  						}
   132  						processing := opts.GetProcessing(nodename)
   133  						if processingCommits[nodename], err = c.wal.Log(eventProcessingCreated, processing); err != nil {
   134  							return err
   135  						}
   136  						if err = c.store.CreateProcessing(ctx, processing, deploy); err != nil {
   137  							return err
   138  						}
   139  					}
   140  					return nil
   141  				})
   142  			},
   143  
   144  			// then: deploy workloads
   145  			func(ctx context.Context) (err error) {
   146  				rollbackMap, err = c.doDeployWorkloads(ctx, ch, opts, engineParamsMap, workloadResourcesMap, deployMap)
   147  				return err
   148  			},
   149  
   150  			// rollback: give back resources
   151  			func(ctx context.Context, failedOnCond bool) (err error) {
   152  				if failedOnCond {
   153  					return
   154  				}
   155  				for nodename, rollbackIndices := range rollbackMap {
   156  					if e := c.withNodePodLocked(ctx, nodename, func(ctx context.Context, _ *types.Node) error {
   157  						rollbackResources := utils.Map(rollbackIndices, func(idx int) resourcetypes.Resources {
   158  							return workloadResourcesMap[nodename][idx]
   159  						})
   160  						return c.rmgr.RollbackAlloc(ctx, nodename, rollbackResources)
   161  					}); e != nil {
   162  						logger.Error(ctx, e)
   163  						err = e
   164  					}
   165  				}
   166  				return err
   167  			},
   168  
   169  			c.config.GlobalTimeout,
   170  		)
   171  	})
   172  
   173  	return ch
   174  }
   175  
   176  func (c *Calcium) doDeployWorkloads(ctx context.Context,
   177  	ch chan *types.CreateWorkloadMessage,
   178  	opts *types.DeployOptions,
   179  	engineParamsMap map[string][]resourcetypes.Resources,
   180  	workloadResourcesMap map[string][]resourcetypes.Resources,
   181  	deployMap map[string]int) (_ map[string][]int, err error) {
   182  
   183  	wg := sync.WaitGroup{}
   184  	wg.Add(len(deployMap))
   185  	syncRollbackMap := haxmap.New[string, []int]()
   186  	logger := log.WithFunc("calcium.doDeployWorkloads").WithField("ident", opts.ProcessIdent)
   187  
   188  	seq := 0
   189  	rollbackMap := make(map[string][]int)
   190  	for nodename, deploy := range deployMap {
   191  		_ = c.pool.Invoke(func(deploy int) func() {
   192  			return func() {
   193  				metrics.Client.SendDeployCount(ctx, deploy)
   194  			}
   195  		}(deploy))
   196  		_ = c.pool.Invoke(func(nodename string, deploy, seq int) func() {
   197  			return func() {
   198  				defer wg.Done()
   199  				if indices, err := c.doDeployWorkloadsOnNode(ctx, ch, nodename, opts, deploy, engineParamsMap[nodename], workloadResourcesMap[nodename], seq); err != nil {
   200  					syncRollbackMap.Set(nodename, indices)
   201  				}
   202  			}
   203  		}(nodename, deploy, seq))
   204  
   205  		seq += deploy
   206  	}
   207  
   208  	wg.Wait()
   209  	syncRollbackMap.ForEach(func(nodename string, indices []int) bool {
   210  		rollbackMap[nodename] = indices
   211  		return true
   212  	})
   213  	logger.Debugf(ctx, "rollbackMap: %+v", rollbackMap)
   214  	if len(rollbackMap) != 0 {
   215  		err = types.ErrRollbackMapIsNotEmpty
   216  	}
   217  	return rollbackMap, err
   218  }
   219  
   220  // deploy scheduled workloads on one node
   221  func (c *Calcium) doDeployWorkloadsOnNode(ctx context.Context,
   222  	ch chan *types.CreateWorkloadMessage,
   223  	nodename string,
   224  	opts *types.DeployOptions,
   225  	deploy int,
   226  	engineParams []resourcetypes.Resources,
   227  	workloadResources []resourcetypes.Resources,
   228  	seq int) (indices []int, err error) {
   229  
   230  	logger := log.WithFunc("calcium.doDeployWorkloadsOnNode").WithField("node", nodename).WithField("ident", opts.ProcessIdent).WithField("deploy", deploy).WithField("seq", seq)
   231  	node, err := c.doGetAndPrepareNode(ctx, nodename, opts.Image, opts.IgnorePull)
   232  	if err != nil {
   233  		for i := 0; i < deploy; i++ {
   234  			logger.Error(ctx, err)
   235  			ch <- &types.CreateWorkloadMessage{Error: err}
   236  		}
   237  		return utils.Range(deploy), err
   238  	}
   239  
   240  	appendLock := sync.Mutex{}
   241  	wg := &sync.WaitGroup{}
   242  	wg.Add(deploy)
   243  	for idx := 0; idx < deploy; idx++ {
   244  		idx := idx
   245  		createMsg := &types.CreateWorkloadMessage{
   246  			Podname:  opts.Podname,
   247  			Nodename: nodename,
   248  			Publish:  map[string][]string{},
   249  		}
   250  
   251  		_ = c.pool.Invoke(func() {
   252  			defer wg.Done()
   253  			var e error
   254  			defer func() {
   255  				if e != nil {
   256  					err = e
   257  					logger.Error(ctx, err)
   258  					createMsg.Error = err
   259  					appendLock.Lock()
   260  					indices = append(indices, idx)
   261  					appendLock.Unlock()
   262  				}
   263  				ch <- createMsg
   264  			}()
   265  
   266  			createMsg.EngineParams = engineParams[idx]
   267  			createMsg.Resources = workloadResources[idx]
   268  
   269  			createOpts := c.doMakeWorkloadOptions(ctx, seq+idx, createMsg, opts, node)
   270  			e = c.doDeployOneWorkload(ctx, node, opts, createMsg, createOpts, true)
   271  		})
   272  	}
   273  	wg.Wait()
   274  
   275  	// remap 就不搞进事务了吧, 回滚代价太大了
   276  	// 放任 remap 失败的后果是, share pool 没有更新, 这个后果姑且认为是可以承受的
   277  	// 而且 remap 是一个幂等操作, 就算这次 remap 失败, 下次 remap 也能收敛到正确到状态
   278  	_ = c.pool.Invoke(func() { c.RemapResourceAndLog(ctx, logger, node) })
   279  
   280  	return indices, err
   281  }
   282  
   283  func (c *Calcium) doGetAndPrepareNode(ctx context.Context, nodename, image string, ignorePull bool) (*types.Node, error) {
   284  	node, err := c.store.GetNode(ctx, nodename)
   285  	if err != nil {
   286  		return nil, err
   287  	}
   288  	if !ignorePull {
   289  		err = pullImage(ctx, node, image)
   290  	}
   291  
   292  	return node, err
   293  }
   294  
   295  // transaction: workload metadata consistency
   296  func (c *Calcium) doDeployOneWorkload(
   297  	ctx context.Context,
   298  	node *types.Node,
   299  	opts *types.DeployOptions,
   300  	msg *types.CreateWorkloadMessage,
   301  	createOpts *enginetypes.VirtualizationCreateOptions,
   302  	decrProcessing bool,
   303  ) (err error) {
   304  	logger := log.WithFunc("calcium.doDeployWorkload").WithField("node", node.Name).WithField("ident", opts.ProcessIdent).WithField("msg", msg)
   305  	workload := &types.Workload{
   306  		Resources:    msg.Resources,
   307  		EngineParams: msg.EngineParams,
   308  		Name:         createOpts.Name,
   309  		Labels:       createOpts.Labels,
   310  		Podname:      opts.Podname,
   311  		Nodename:     node.Name,
   312  		Hook:         opts.Entrypoint.Hook,
   313  		Privileged:   opts.Entrypoint.Privileged,
   314  		Engine:       node.Engine,
   315  		Image:        opts.Image,
   316  		Env:          opts.Env,
   317  		User:         opts.User,
   318  		CreateTime:   time.Now().Unix(),
   319  	}
   320  
   321  	var commit wal.Commit
   322  	defer func() {
   323  		if commit != nil {
   324  			if err := commit(); err != nil {
   325  				logger.Errorf(ctx, err, "Commit WAL %s failed", eventWorkloadCreated)
   326  			}
   327  		}
   328  	}()
   329  	return utils.Txn(
   330  		ctx,
   331  		// create workload
   332  		func(ctx context.Context) error {
   333  			created, err := node.Engine.VirtualizationCreate(ctx, createOpts)
   334  			if err != nil {
   335  				return err
   336  			}
   337  			workload.ID = created.ID
   338  
   339  			for key, value := range created.Labels { // add Labels
   340  				workload.Labels[key] = value
   341  			}
   342  
   343  			// We couldn't WAL the workload ID above VirtualizationCreate temporarily,
   344  			// so there's a time gap window, once the core process crashes between
   345  			// VirtualizationCreate and logCreateWorkload then the worload is leaky.
   346  			commit, err = c.wal.Log(eventWorkloadCreated, &types.Workload{
   347  				ID:       workload.ID,
   348  				Nodename: workload.Nodename,
   349  			})
   350  			return err
   351  		},
   352  
   353  		func(ctx context.Context) (err error) {
   354  			// avoid to be interrupted by MakeDeployStatus
   355  			processing := opts.GetProcessing(node.Name)
   356  			if !decrProcessing {
   357  				processing = nil
   358  			}
   359  			// add workload metadata first
   360  			if err := c.store.AddWorkload(ctx, workload, processing); err != nil {
   361  				return err
   362  			}
   363  			logger.Infof(ctx, "workload %s metadata created", workload.ID)
   364  
   365  			// Copy data to workload
   366  			if len(opts.Files) > 0 {
   367  				for _, file := range opts.Files {
   368  					if err = c.doSendFileToWorkload(ctx, node.Engine, workload.ID, file); err != nil {
   369  						return err
   370  					}
   371  				}
   372  			}
   373  
   374  			// deal with hook
   375  			if len(opts.AfterCreate) > 0 {
   376  				if workload.Hook != nil {
   377  					workload.Hook = &types.Hook{
   378  						AfterStart: append(opts.AfterCreate, workload.Hook.AfterStart...),
   379  						Force:      workload.Hook.Force,
   380  					}
   381  				} else {
   382  					workload.Hook = &types.Hook{
   383  						AfterStart: opts.AfterCreate,
   384  						Force:      opts.IgnoreHook,
   385  					}
   386  				}
   387  			}
   388  
   389  			// start workload
   390  			msg.Hook, err = c.doStartWorkload(ctx, workload, opts.IgnoreHook)
   391  			if err != nil {
   392  				return err
   393  			}
   394  
   395  			// reset workload.hook
   396  			workload.Hook = opts.Entrypoint.Hook
   397  
   398  			// inspect real meta
   399  			var workloadInfo *enginetypes.VirtualizationInfo
   400  			workloadInfo, err = workload.Inspect(ctx) // 补充静态元数据
   401  			if err != nil {
   402  				return err
   403  			}
   404  
   405  			// update meta
   406  			if workloadInfo.Networks != nil {
   407  				msg.Publish = utils.MakePublishInfo(workloadInfo.Networks, opts.Entrypoint.Publish)
   408  			}
   409  
   410  			// if workload metadata changed, then update
   411  			if workloadInfo.User != workload.User {
   412  				// reset users
   413  				workload.User = workloadInfo.User
   414  
   415  				if err := c.store.UpdateWorkload(ctx, workload); err != nil {
   416  					return err
   417  				}
   418  				logger.Infof(ctx, "workload %s metadata updated", workload.ID)
   419  			}
   420  
   421  			msg.WorkloadID = workload.ID
   422  			msg.WorkloadName = workload.Name
   423  			msg.Podname = workload.Podname
   424  			msg.Nodename = workload.Nodename
   425  			return nil
   426  		},
   427  
   428  		// remove workload
   429  		func(ctx context.Context, _ bool) error {
   430  			logger.Infof(ctx, "failed to deploy workload %s, rollback", workload.ID)
   431  			if workload.ID == "" {
   432  				return nil
   433  			}
   434  
   435  			if err := c.store.RemoveWorkload(ctx, workload); err != nil {
   436  				logger.Errorf(ctx, err, "failed to remove workload %s", workload.ID)
   437  			}
   438  
   439  			return workload.Remove(ctx, true)
   440  		},
   441  		c.config.GlobalTimeout,
   442  	)
   443  }
   444  
   445  func (c *Calcium) doMakeWorkloadOptions(ctx context.Context, no int, msg *types.CreateWorkloadMessage, opts *types.DeployOptions, node *types.Node) *enginetypes.VirtualizationCreateOptions {
   446  	createOpts := &enginetypes.VirtualizationCreateOptions{}
   447  	// general
   448  	createOpts.EngineParams = msg.EngineParams
   449  	createOpts.RawArgs = opts.RawArgs
   450  	createOpts.Lambda = opts.Lambda
   451  	createOpts.User = opts.User
   452  	createOpts.DNS = opts.DNS
   453  	createOpts.Image = opts.Image
   454  	createOpts.Stdin = opts.OpenStdin
   455  	createOpts.Hosts = opts.ExtraHosts
   456  	createOpts.Debug = opts.Debug
   457  	createOpts.Networks = opts.Networks
   458  
   459  	// entry
   460  	entry := opts.Entrypoint
   461  	createOpts.WorkingDir = entry.Dir
   462  	createOpts.Privileged = entry.Privileged
   463  	createOpts.Sysctl = entry.Sysctls
   464  	createOpts.Publish = entry.Publish
   465  	createOpts.Restart = entry.Restart
   466  	if entry.Log != nil {
   467  		createOpts.LogType = entry.Log.Type
   468  		createOpts.LogConfig = map[string]string{}
   469  		for k, v := range entry.Log.Config {
   470  			createOpts.LogConfig[k] = v
   471  		}
   472  	}
   473  	// name
   474  	suffix := utils.RandomString(6)
   475  	createOpts.Name = utils.MakeWorkloadName(opts.Name, opts.Entrypoint.Name, suffix)
   476  	msg.WorkloadName = createOpts.Name
   477  	// command and user
   478  	// extra args is dynamically
   479  	createOpts.Cmd = opts.Entrypoint.Commands
   480  	// env
   481  	env := append(opts.Env, fmt.Sprintf("APP_NAME=%s", opts.Name)) //nolint
   482  	env = append(env, fmt.Sprintf("ERU_POD=%s", opts.Podname))
   483  	env = append(env, fmt.Sprintf("ERU_NODE_NAME=%s", node.Name))
   484  	env = append(env, fmt.Sprintf("ERU_WORKLOAD_SEQ=%d", no))
   485  	createOpts.Env = env
   486  	// basic labels, bind to LabelMeta
   487  	createOpts.Labels = map[string]string{
   488  		cluster.ERUMark: "1",
   489  		cluster.LabelMeta: utils.EncodeMetaInLabel(ctx, &types.LabelMeta{
   490  			Publish:     opts.Entrypoint.Publish,
   491  			HealthCheck: entry.HealthCheck,
   492  		}),
   493  		cluster.LabelNodeName: node.Name,
   494  		cluster.LabelCoreID:   c.identifier,
   495  	}
   496  	for key, value := range opts.Labels {
   497  		createOpts.Labels[key] = value
   498  	}
   499  
   500  	return createOpts
   501  }