github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/cluster/calcium/wal.go (about)

     1  package calcium
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/cockroachdb/errors"
    10  	"github.com/panjf2000/ants/v2"
    11  	"github.com/projecteru2/core/cluster"
    12  	"github.com/projecteru2/core/log"
    13  	"github.com/projecteru2/core/store"
    14  	"github.com/projecteru2/core/types"
    15  	"github.com/projecteru2/core/utils"
    16  	"github.com/projecteru2/core/wal"
    17  )
    18  
    19  const (
    20  	eventCreateLambda              = "create-lambda"
    21  	eventWorkloadCreated           = "create-workload"   // created but yet to start
    22  	eventWorkloadResourceAllocated = "allocate-workload" // resource updated in node meta but yet to create all workloads
    23  	eventProcessingCreated         = "create-processing" // processing created but yet to delete
    24  )
    25  
    26  func enableWAL(config types.Config, calcium cluster.Cluster, store store.Store) (wal.WAL, error) {
    27  	hydro, err := wal.NewHydro(config.WALFile, config.WALOpenTimeout)
    28  	if err != nil {
    29  		return nil, err
    30  	}
    31  
    32  	hydro.Register(newCreateLambdaHandler(config, calcium, store))
    33  	hydro.Register(newCreateWorkloadHandler(config, calcium, store))
    34  	hydro.Register(newWorkloadResourceAllocatedHandler(config, calcium, store))
    35  	hydro.Register(newProcessingCreatedHandler(config, calcium, store))
    36  	return hydro, nil
    37  }
    38  
    39  // CreateLambdaHandler indicates event handler for creating lambda.
    40  type CreateLambdaHandler struct {
    41  	typ     string
    42  	config  types.Config
    43  	calcium cluster.Cluster
    44  	store   store.Store
    45  }
    46  
    47  func newCreateLambdaHandler(config types.Config, calcium cluster.Cluster, store store.Store) *CreateLambdaHandler {
    48  	return &CreateLambdaHandler{
    49  		typ:     eventCreateLambda,
    50  		config:  config,
    51  		calcium: calcium,
    52  		store:   store,
    53  	}
    54  }
    55  
    56  // Event .
    57  func (h *CreateLambdaHandler) Typ() string {
    58  	return h.typ
    59  }
    60  
    61  // Check .
    62  func (h *CreateLambdaHandler) Check(context.Context, any) (bool, error) {
    63  	return true, nil
    64  }
    65  
    66  // Encode .
    67  func (h *CreateLambdaHandler) Encode(raw any) ([]byte, error) {
    68  	workloadID, ok := raw.(string)
    69  	if !ok {
    70  		return nil, errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw)
    71  	}
    72  	return []byte(workloadID), nil
    73  }
    74  
    75  // Decode .
    76  func (h *CreateLambdaHandler) Decode(bs []byte) (any, error) {
    77  	return string(bs), nil
    78  }
    79  
    80  // Handle .
    81  func (h *CreateLambdaHandler) Handle(ctx context.Context, raw any) error {
    82  	workloadID, ok := raw.(string)
    83  	if !ok {
    84  		return errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw)
    85  	}
    86  
    87  	logger := log.WithFunc("wal.CreateLambdaHandler.Handle").WithField("ID", workloadID)
    88  	go func() {
    89  		workload, err := h.calcium.GetWorkload(ctx, workloadID)
    90  		if err != nil {
    91  			logger.Error(ctx, err, "Get workload failed")
    92  			return
    93  		}
    94  
    95  		r, err := workload.Engine.VirtualizationWait(ctx, workloadID, "")
    96  		if err != nil {
    97  			logger.Error(ctx, err, "Wait failed")
    98  			return
    99  		}
   100  		if r.Code != 0 {
   101  			logger.Errorf(ctx, nil, "Run failed: %s", r.Message)
   102  		}
   103  
   104  		if err := h.calcium.RemoveWorkloadSync(ctx, []string{workloadID}); err != nil {
   105  			logger.Error(ctx, err, "Remove failed")
   106  		}
   107  		logger.Infof(ctx, "waited and removed")
   108  	}()
   109  
   110  	return nil
   111  }
   112  
   113  // CreateWorkloadHandler indicates event handler for creating workload.
   114  type CreateWorkloadHandler struct {
   115  	typ     string
   116  	config  types.Config
   117  	calcium cluster.Cluster
   118  	store   store.Store
   119  }
   120  
   121  func newCreateWorkloadHandler(config types.Config, calcium cluster.Cluster, store store.Store) *CreateWorkloadHandler {
   122  	return &CreateWorkloadHandler{
   123  		typ:     eventWorkloadCreated,
   124  		config:  config,
   125  		calcium: calcium,
   126  		store:   store,
   127  	}
   128  }
   129  
   130  // Event .
   131  func (h *CreateWorkloadHandler) Typ() string {
   132  	return h.typ
   133  }
   134  
   135  // Check .
   136  func (h *CreateWorkloadHandler) Check(_ context.Context, raw any) (handle bool, err error) {
   137  	_, ok := raw.(*types.Workload)
   138  	if !ok {
   139  		return false, errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw)
   140  	}
   141  	return true, nil
   142  }
   143  
   144  // Encode .
   145  func (h *CreateWorkloadHandler) Encode(raw any) ([]byte, error) {
   146  	wrk, ok := raw.(*types.Workload)
   147  	if !ok {
   148  		return nil, errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw)
   149  	}
   150  	return json.Marshal(wrk)
   151  }
   152  
   153  // Decode .
   154  func (h *CreateWorkloadHandler) Decode(bs []byte) (any, error) {
   155  	wrk := &types.Workload{}
   156  	err := json.Unmarshal(bs, wrk)
   157  	return wrk, err
   158  }
   159  
   160  // Handle will remove instance, remove meta, restore resource
   161  func (h *CreateWorkloadHandler) Handle(ctx context.Context, raw any) (err error) {
   162  	wrk, _ := raw.(*types.Workload)
   163  	logger := log.WithFunc("wal.CreateWorkloadHandler.Handle").WithField("ID", wrk.ID).WithField("node", wrk.Nodename)
   164  
   165  	ctx, cancel := getReplayContext(ctx)
   166  	defer cancel()
   167  
   168  	if _, err = h.calcium.GetWorkload(ctx, wrk.ID); err == nil {
   169  		return h.calcium.RemoveWorkloadSync(ctx, []string{wrk.ID})
   170  	}
   171  
   172  	// workload meta doesn't exist
   173  	node, err := h.calcium.GetNode(ctx, wrk.Nodename)
   174  	if err != nil {
   175  		logger.Error(ctx, err)
   176  		return err
   177  	}
   178  	if err = node.Engine.VirtualizationRemove(ctx, wrk.ID, true, true); err != nil && !errors.Is(err, types.ErrWorkloadNotExists) {
   179  		logger.Error(ctx, err)
   180  		return err
   181  	}
   182  
   183  	logger.Infof(ctx, "workload removed")
   184  	return nil
   185  }
   186  
   187  // WorkloadResourceAllocatedHandler .
   188  type WorkloadResourceAllocatedHandler struct {
   189  	typ     string
   190  	config  types.Config
   191  	calcium cluster.Cluster
   192  	store   store.Store
   193  	pool    *ants.PoolWithFunc
   194  }
   195  
   196  func newWorkloadResourceAllocatedHandler(config types.Config, calcium cluster.Cluster, store store.Store) *WorkloadResourceAllocatedHandler {
   197  	pool, _ := utils.NewPool(config.MaxConcurrency)
   198  	return &WorkloadResourceAllocatedHandler{
   199  		typ:     eventWorkloadResourceAllocated,
   200  		config:  config,
   201  		calcium: calcium,
   202  		store:   store,
   203  		pool:    pool,
   204  	}
   205  }
   206  
   207  // Event .
   208  func (h *WorkloadResourceAllocatedHandler) Typ() string {
   209  	return h.typ
   210  }
   211  
   212  // Check .
   213  func (h *WorkloadResourceAllocatedHandler) Check(_ context.Context, raw any) (bool, error) {
   214  	if _, ok := raw.([]*types.Node); !ok {
   215  		return false, errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw)
   216  	}
   217  	return true, nil
   218  }
   219  
   220  // Encode .
   221  func (h *WorkloadResourceAllocatedHandler) Encode(raw any) ([]byte, error) {
   222  	nodes, ok := raw.([]*types.Node)
   223  	if !ok {
   224  		return nil, errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw)
   225  	}
   226  	return json.Marshal(nodes)
   227  }
   228  
   229  // Decode .
   230  func (h *WorkloadResourceAllocatedHandler) Decode(bytes []byte) (any, error) {
   231  	nodes := []*types.Node{}
   232  	return nodes, json.Unmarshal(bytes, &nodes)
   233  }
   234  
   235  // Handle .
   236  func (h *WorkloadResourceAllocatedHandler) Handle(ctx context.Context, raw any) (err error) {
   237  	nodes, _ := raw.([]*types.Node)
   238  	logger := log.WithFunc("wal.WorkloadResourceAllocatedHandler.Handle").WithField("event", eventWorkloadResourceAllocated)
   239  
   240  	ctx, cancel := getReplayContext(ctx)
   241  	defer cancel()
   242  
   243  	wg := &sync.WaitGroup{}
   244  	wg.Add(len(nodes))
   245  	defer wg.Wait()
   246  	for _, node := range nodes {
   247  		node := node
   248  		_ = h.pool.Invoke(func() {
   249  			defer wg.Done()
   250  			if _, err = h.calcium.NodeResource(ctx, node.Name, true); err != nil {
   251  				logger.Errorf(ctx, err, "failed to fix node resource: %s", node.Name)
   252  				return
   253  			}
   254  			logger.Infof(ctx, "fixed node resource: %s", node.Name)
   255  		})
   256  	}
   257  
   258  	return nil
   259  }
   260  
   261  // ProcessingCreatedHandler .
   262  type ProcessingCreatedHandler struct {
   263  	typ     string
   264  	config  types.Config
   265  	calcium cluster.Cluster
   266  	store   store.Store
   267  }
   268  
   269  func newProcessingCreatedHandler(config types.Config, calcium cluster.Cluster, store store.Store) *ProcessingCreatedHandler {
   270  	return &ProcessingCreatedHandler{
   271  		typ:     eventProcessingCreated,
   272  		config:  config,
   273  		calcium: calcium,
   274  		store:   store,
   275  	}
   276  }
   277  
   278  // Event .
   279  func (h *ProcessingCreatedHandler) Typ() string {
   280  	return h.typ
   281  }
   282  
   283  // Check .
   284  func (h ProcessingCreatedHandler) Check(_ context.Context, raw any) (bool, error) {
   285  	if _, ok := raw.(*types.Processing); !ok {
   286  		return false, errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw)
   287  	}
   288  	return true, nil
   289  }
   290  
   291  // Encode .
   292  func (h *ProcessingCreatedHandler) Encode(raw any) ([]byte, error) {
   293  	processing, ok := raw.(*types.Processing)
   294  	if !ok {
   295  		return nil, errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw)
   296  	}
   297  	return json.Marshal(processing)
   298  }
   299  
   300  // Decode .
   301  func (h *ProcessingCreatedHandler) Decode(bs []byte) (any, error) {
   302  	processing := &types.Processing{}
   303  	return processing, json.Unmarshal(bs, processing)
   304  }
   305  
   306  // Handle .
   307  func (h *ProcessingCreatedHandler) Handle(ctx context.Context, raw any) (err error) {
   308  	processing, _ := raw.(*types.Processing)
   309  	logger := log.WithFunc("wal.ProcessingCreatedHandler.Handle").WithField("event", eventProcessingCreated).WithField("ident", processing.Ident)
   310  
   311  	ctx, cancel := getReplayContext(ctx)
   312  	defer cancel()
   313  
   314  	if err = h.store.DeleteProcessing(ctx, processing); err != nil {
   315  		logger.Error(ctx, err)
   316  		return err
   317  	}
   318  	logger.Infof(ctx, "obsolete processing deleted")
   319  	return
   320  }
   321  
   322  func getReplayContext(ctx context.Context) (context.Context, context.CancelFunc) {
   323  	return context.WithTimeout(ctx, time.Second*32) // TODO why 32?
   324  }