github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/cluster/calcium/remove.go (about)

     1  package calcium
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"sync"
     7  
     8  	"github.com/projecteru2/core/log"
     9  	"github.com/projecteru2/core/resource/plugins"
    10  	resourcetypes "github.com/projecteru2/core/resource/types"
    11  	"github.com/projecteru2/core/types"
    12  	"github.com/projecteru2/core/utils"
    13  )
    14  
    15  // RemoveWorkload remove workloads
    16  // returns a channel that contains removing responses
    17  func (c *Calcium) RemoveWorkload(ctx context.Context, IDs []string, force bool) (chan *types.RemoveWorkloadMessage, error) {
    18  	logger := log.WithFunc("calcium.RemoveWorkload").WithField("IDs", IDs).WithField("force", force)
    19  
    20  	nodeWorkloadGroup, err := c.groupWorkloadsByNode(ctx, IDs)
    21  	if err != nil {
    22  		logger.Error(ctx, err, "failed to group workloads by node")
    23  		return nil, err
    24  	}
    25  
    26  	ch := make(chan *types.RemoveWorkloadMessage)
    27  	_ = c.pool.Invoke(func() {
    28  		defer close(ch)
    29  		wg := sync.WaitGroup{}
    30  		defer wg.Wait()
    31  		for nodename, workloadIDs := range nodeWorkloadGroup {
    32  			wg.Add(1)
    33  			_ = c.pool.Invoke(func(nodename string, workloadIDs []string) func() {
    34  				return func() {
    35  					defer wg.Done()
    36  					if err := c.withNodePodLocked(ctx, nodename, func(ctx context.Context, node *types.Node) error {
    37  						for _, workloadID := range workloadIDs {
    38  							ret := &types.RemoveWorkloadMessage{WorkloadID: workloadID, Success: true, Hook: []*bytes.Buffer{}}
    39  							if err := c.withWorkloadLocked(ctx, workloadID, false, func(ctx context.Context, workload *types.Workload) error {
    40  								return utils.Txn(
    41  									ctx,
    42  									// if
    43  									func(ctx context.Context) error {
    44  										_, _, err = c.rmgr.SetNodeResourceUsage(ctx, node.Name, nil, nil, []resourcetypes.Resources{workload.Resources}, true, plugins.Decr)
    45  										return err
    46  									},
    47  									// then
    48  									func(ctx context.Context) (err error) {
    49  										if err = c.doRemoveWorkload(ctx, workload, force); err == nil {
    50  											logger.Infof(ctx, "Workload %s removed", workload.ID)
    51  										}
    52  										return err
    53  									},
    54  									// rollback
    55  									func(ctx context.Context, failedByCond bool) error {
    56  										if failedByCond {
    57  											return nil
    58  										}
    59  										_, _, err = c.rmgr.SetNodeResourceUsage(ctx, node.Name, nil, nil, []resourcetypes.Resources{workload.Resources}, true, plugins.Incr)
    60  										return err
    61  									},
    62  									c.config.GlobalTimeout,
    63  								)
    64  							}); err != nil {
    65  								logger.WithField("id", workloadID).Error(ctx, err, "failed to lock workload")
    66  								ret.Hook = append(ret.Hook, bytes.NewBufferString(err.Error()))
    67  								ret.Success = false
    68  							}
    69  							ch <- ret
    70  						}
    71  						_ = c.pool.Invoke(func() { c.RemapResourceAndLog(ctx, logger, node) })
    72  						return nil
    73  					}); err != nil {
    74  						logger.WithField("node", nodename).Error(ctx, err, "failed to lock node")
    75  						ch <- &types.RemoveWorkloadMessage{Success: false}
    76  					}
    77  				}
    78  			}(nodename, workloadIDs))
    79  		}
    80  	})
    81  	return ch, nil
    82  }
    83  
    84  // RemoveWorkloadSync .
    85  func (c *Calcium) RemoveWorkloadSync(ctx context.Context, IDs []string) error {
    86  	return c.doRemoveWorkloadSync(ctx, IDs)
    87  }
    88  
    89  // semantic: instance removed on err == nil, instance remained on err != nil
    90  func (c *Calcium) doRemoveWorkload(ctx context.Context, workload *types.Workload, force bool) error {
    91  	return utils.Txn(
    92  		ctx,
    93  		// if
    94  		func(ctx context.Context) error {
    95  			return c.store.RemoveWorkload(ctx, workload)
    96  		},
    97  		// then
    98  		func(ctx context.Context) error {
    99  			return workload.Remove(ctx, force)
   100  		},
   101  		// rollback
   102  		func(ctx context.Context, failedByCond bool) error {
   103  			if failedByCond {
   104  				return nil
   105  			}
   106  			return c.store.AddWorkload(ctx, workload, nil)
   107  		},
   108  		c.config.GlobalTimeout,
   109  	)
   110  }
   111  
   112  // 同步地删除容器, 在某些需要等待的场合异常有用!
   113  func (c *Calcium) doRemoveWorkloadSync(ctx context.Context, IDs []string) error {
   114  	ch, err := c.RemoveWorkload(ctx, IDs, true)
   115  	if err != nil {
   116  		return err
   117  	}
   118  
   119  	for m := range ch {
   120  		// TODO deal with failed
   121  		log.WithFunc("calcium.doRemoveWorkloadSync").Debugf(ctx, "Removed %s", m.WorkloadID)
   122  	}
   123  	return nil
   124  }
   125  
   126  func (c *Calcium) groupWorkloadsByNode(ctx context.Context, IDs []string) (map[string][]string, error) {
   127  	workloads, err := c.store.GetWorkloads(ctx, IDs)
   128  	if err != nil {
   129  		return nil, err
   130  	}
   131  	nodeWorkloadGroup := map[string][]string{}
   132  	for _, workload := range workloads {
   133  		nodeWorkloadGroup[workload.Nodename] = append(nodeWorkloadGroup[workload.Nodename], workload.ID)
   134  	}
   135  	return nodeWorkloadGroup, nil
   136  }