github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/cluster/calcium/wal.go (about) 1 package calcium 2 3 import ( 4 "context" 5 "encoding/json" 6 "sync" 7 "time" 8 9 "github.com/cockroachdb/errors" 10 "github.com/panjf2000/ants/v2" 11 "github.com/projecteru2/core/cluster" 12 "github.com/projecteru2/core/log" 13 "github.com/projecteru2/core/store" 14 "github.com/projecteru2/core/types" 15 "github.com/projecteru2/core/utils" 16 "github.com/projecteru2/core/wal" 17 ) 18 19 const ( 20 eventCreateLambda = "create-lambda" 21 eventWorkloadCreated = "create-workload" // created but yet to start 22 eventWorkloadResourceAllocated = "allocate-workload" // resource updated in node meta but yet to create all workloads 23 eventProcessingCreated = "create-processing" // processing created but yet to delete 24 ) 25 26 func enableWAL(config types.Config, calcium cluster.Cluster, store store.Store) (wal.WAL, error) { 27 hydro, err := wal.NewHydro(config.WALFile, config.WALOpenTimeout) 28 if err != nil { 29 return nil, err 30 } 31 32 hydro.Register(newCreateLambdaHandler(config, calcium, store)) 33 hydro.Register(newCreateWorkloadHandler(config, calcium, store)) 34 hydro.Register(newWorkloadResourceAllocatedHandler(config, calcium, store)) 35 hydro.Register(newProcessingCreatedHandler(config, calcium, store)) 36 return hydro, nil 37 } 38 39 // CreateLambdaHandler indicates event handler for creating lambda. 40 type CreateLambdaHandler struct { 41 typ string 42 config types.Config 43 calcium cluster.Cluster 44 store store.Store 45 } 46 47 func newCreateLambdaHandler(config types.Config, calcium cluster.Cluster, store store.Store) *CreateLambdaHandler { 48 return &CreateLambdaHandler{ 49 typ: eventCreateLambda, 50 config: config, 51 calcium: calcium, 52 store: store, 53 } 54 } 55 56 // Event . 57 func (h *CreateLambdaHandler) Typ() string { 58 return h.typ 59 } 60 61 // Check . 62 func (h *CreateLambdaHandler) Check(context.Context, any) (bool, error) { 63 return true, nil 64 } 65 66 // Encode . 67 func (h *CreateLambdaHandler) Encode(raw any) ([]byte, error) { 68 workloadID, ok := raw.(string) 69 if !ok { 70 return nil, errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw) 71 } 72 return []byte(workloadID), nil 73 } 74 75 // Decode . 76 func (h *CreateLambdaHandler) Decode(bs []byte) (any, error) { 77 return string(bs), nil 78 } 79 80 // Handle . 81 func (h *CreateLambdaHandler) Handle(ctx context.Context, raw any) error { 82 workloadID, ok := raw.(string) 83 if !ok { 84 return errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw) 85 } 86 87 logger := log.WithFunc("wal.CreateLambdaHandler.Handle").WithField("ID", workloadID) 88 go func() { 89 workload, err := h.calcium.GetWorkload(ctx, workloadID) 90 if err != nil { 91 logger.Error(ctx, err, "Get workload failed") 92 return 93 } 94 95 r, err := workload.Engine.VirtualizationWait(ctx, workloadID, "") 96 if err != nil { 97 logger.Error(ctx, err, "Wait failed") 98 return 99 } 100 if r.Code != 0 { 101 logger.Errorf(ctx, nil, "Run failed: %s", r.Message) 102 } 103 104 if err := h.calcium.RemoveWorkloadSync(ctx, []string{workloadID}); err != nil { 105 logger.Error(ctx, err, "Remove failed") 106 } 107 logger.Infof(ctx, "waited and removed") 108 }() 109 110 return nil 111 } 112 113 // CreateWorkloadHandler indicates event handler for creating workload. 114 type CreateWorkloadHandler struct { 115 typ string 116 config types.Config 117 calcium cluster.Cluster 118 store store.Store 119 } 120 121 func newCreateWorkloadHandler(config types.Config, calcium cluster.Cluster, store store.Store) *CreateWorkloadHandler { 122 return &CreateWorkloadHandler{ 123 typ: eventWorkloadCreated, 124 config: config, 125 calcium: calcium, 126 store: store, 127 } 128 } 129 130 // Event . 131 func (h *CreateWorkloadHandler) Typ() string { 132 return h.typ 133 } 134 135 // Check . 136 func (h *CreateWorkloadHandler) Check(_ context.Context, raw any) (handle bool, err error) { 137 _, ok := raw.(*types.Workload) 138 if !ok { 139 return false, errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw) 140 } 141 return true, nil 142 } 143 144 // Encode . 145 func (h *CreateWorkloadHandler) Encode(raw any) ([]byte, error) { 146 wrk, ok := raw.(*types.Workload) 147 if !ok { 148 return nil, errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw) 149 } 150 return json.Marshal(wrk) 151 } 152 153 // Decode . 154 func (h *CreateWorkloadHandler) Decode(bs []byte) (any, error) { 155 wrk := &types.Workload{} 156 err := json.Unmarshal(bs, wrk) 157 return wrk, err 158 } 159 160 // Handle will remove instance, remove meta, restore resource 161 func (h *CreateWorkloadHandler) Handle(ctx context.Context, raw any) (err error) { 162 wrk, _ := raw.(*types.Workload) 163 logger := log.WithFunc("wal.CreateWorkloadHandler.Handle").WithField("ID", wrk.ID).WithField("node", wrk.Nodename) 164 165 ctx, cancel := getReplayContext(ctx) 166 defer cancel() 167 168 if _, err = h.calcium.GetWorkload(ctx, wrk.ID); err == nil { 169 return h.calcium.RemoveWorkloadSync(ctx, []string{wrk.ID}) 170 } 171 172 // workload meta doesn't exist 173 node, err := h.calcium.GetNode(ctx, wrk.Nodename) 174 if err != nil { 175 logger.Error(ctx, err) 176 return err 177 } 178 if err = node.Engine.VirtualizationRemove(ctx, wrk.ID, true, true); err != nil && !errors.Is(err, types.ErrWorkloadNotExists) { 179 logger.Error(ctx, err) 180 return err 181 } 182 183 logger.Infof(ctx, "workload removed") 184 return nil 185 } 186 187 // WorkloadResourceAllocatedHandler . 188 type WorkloadResourceAllocatedHandler struct { 189 typ string 190 config types.Config 191 calcium cluster.Cluster 192 store store.Store 193 pool *ants.PoolWithFunc 194 } 195 196 func newWorkloadResourceAllocatedHandler(config types.Config, calcium cluster.Cluster, store store.Store) *WorkloadResourceAllocatedHandler { 197 pool, _ := utils.NewPool(config.MaxConcurrency) 198 return &WorkloadResourceAllocatedHandler{ 199 typ: eventWorkloadResourceAllocated, 200 config: config, 201 calcium: calcium, 202 store: store, 203 pool: pool, 204 } 205 } 206 207 // Event . 208 func (h *WorkloadResourceAllocatedHandler) Typ() string { 209 return h.typ 210 } 211 212 // Check . 213 func (h *WorkloadResourceAllocatedHandler) Check(_ context.Context, raw any) (bool, error) { 214 if _, ok := raw.([]*types.Node); !ok { 215 return false, errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw) 216 } 217 return true, nil 218 } 219 220 // Encode . 221 func (h *WorkloadResourceAllocatedHandler) Encode(raw any) ([]byte, error) { 222 nodes, ok := raw.([]*types.Node) 223 if !ok { 224 return nil, errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw) 225 } 226 return json.Marshal(nodes) 227 } 228 229 // Decode . 230 func (h *WorkloadResourceAllocatedHandler) Decode(bytes []byte) (any, error) { 231 nodes := []*types.Node{} 232 return nodes, json.Unmarshal(bytes, &nodes) 233 } 234 235 // Handle . 236 func (h *WorkloadResourceAllocatedHandler) Handle(ctx context.Context, raw any) (err error) { 237 nodes, _ := raw.([]*types.Node) 238 logger := log.WithFunc("wal.WorkloadResourceAllocatedHandler.Handle").WithField("event", eventWorkloadResourceAllocated) 239 240 ctx, cancel := getReplayContext(ctx) 241 defer cancel() 242 243 wg := &sync.WaitGroup{} 244 wg.Add(len(nodes)) 245 defer wg.Wait() 246 for _, node := range nodes { 247 node := node 248 _ = h.pool.Invoke(func() { 249 defer wg.Done() 250 if _, err = h.calcium.NodeResource(ctx, node.Name, true); err != nil { 251 logger.Errorf(ctx, err, "failed to fix node resource: %s", node.Name) 252 return 253 } 254 logger.Infof(ctx, "fixed node resource: %s", node.Name) 255 }) 256 } 257 258 return nil 259 } 260 261 // ProcessingCreatedHandler . 262 type ProcessingCreatedHandler struct { 263 typ string 264 config types.Config 265 calcium cluster.Cluster 266 store store.Store 267 } 268 269 func newProcessingCreatedHandler(config types.Config, calcium cluster.Cluster, store store.Store) *ProcessingCreatedHandler { 270 return &ProcessingCreatedHandler{ 271 typ: eventProcessingCreated, 272 config: config, 273 calcium: calcium, 274 store: store, 275 } 276 } 277 278 // Event . 279 func (h *ProcessingCreatedHandler) Typ() string { 280 return h.typ 281 } 282 283 // Check . 284 func (h ProcessingCreatedHandler) Check(_ context.Context, raw any) (bool, error) { 285 if _, ok := raw.(*types.Processing); !ok { 286 return false, errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw) 287 } 288 return true, nil 289 } 290 291 // Encode . 292 func (h *ProcessingCreatedHandler) Encode(raw any) ([]byte, error) { 293 processing, ok := raw.(*types.Processing) 294 if !ok { 295 return nil, errors.Wrapf(types.ErrInvalidWALDataType, "%+v", raw) 296 } 297 return json.Marshal(processing) 298 } 299 300 // Decode . 301 func (h *ProcessingCreatedHandler) Decode(bs []byte) (any, error) { 302 processing := &types.Processing{} 303 return processing, json.Unmarshal(bs, processing) 304 } 305 306 // Handle . 307 func (h *ProcessingCreatedHandler) Handle(ctx context.Context, raw any) (err error) { 308 processing, _ := raw.(*types.Processing) 309 logger := log.WithFunc("wal.ProcessingCreatedHandler.Handle").WithField("event", eventProcessingCreated).WithField("ident", processing.Ident) 310 311 ctx, cancel := getReplayContext(ctx) 312 defer cancel() 313 314 if err = h.store.DeleteProcessing(ctx, processing); err != nil { 315 logger.Error(ctx, err) 316 return err 317 } 318 logger.Infof(ctx, "obsolete processing deleted") 319 return 320 } 321 322 func getReplayContext(ctx context.Context) (context.Context, context.CancelFunc) { 323 return context.WithTimeout(ctx, time.Second*32) // TODO why 32? 324 }