github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/resource/cobalt/node.go (about) 1 package cobalt 2 3 import ( 4 "context" 5 "math" 6 7 "github.com/cockroachdb/errors" 8 enginetypes "github.com/projecteru2/core/engine/types" 9 plugintypes "github.com/projecteru2/core/resource/plugins/types" 10 resourcetypes "github.com/projecteru2/core/resource/types" 11 "github.com/sanity-io/litter" 12 "golang.org/x/exp/slices" 13 14 "github.com/projecteru2/core/log" 15 "github.com/projecteru2/core/resource/plugins" 16 "github.com/projecteru2/core/types" 17 "github.com/projecteru2/core/utils" 18 ) 19 20 // AddNode . 21 func (m Manager) AddNode(ctx context.Context, nodename string, opts resourcetypes.Resources, nodeInfo *enginetypes.Info) (resourcetypes.Resources, error) { 22 logger := log.WithFunc("resource.cobalt.AddNode").WithField("node", nodename) 23 res := resourcetypes.Resources{} 24 rollbackPlugins := []plugins.Plugin{} 25 26 return res, utils.PCR(ctx, 27 // prepare: do nothing 28 func(_ context.Context) error { 29 return nil 30 }, 31 // commit: call plugins to add the node 32 func(ctx context.Context) error { 33 resps, err := call(ctx, m.plugins, func(plugin plugins.Plugin) (*plugintypes.AddNodeResponse, error) { 34 r := opts[plugin.Name()] 35 // Even when r==nil, we still need to run plugin, 36 // The reasons are as follows 37 // 1. plugin can fetch config from engine info 38 // 2. plugin need a chance to create empty config on ETCD. 39 logger.WithField("plugin", plugin.Name()).WithField("node", nodename).Infof(ctx, "%v", litter.Sdump(r)) 40 resp, err := plugin.AddNode(ctx, nodename, r, nodeInfo) 41 if err != nil { 42 logger.Errorf(ctx, err, "node %+v plugin %+v failed to add node, req: %+v", nodename, plugin.Name(), litter.Sdump(r)) 43 } 44 return resp, err 45 }) 46 47 if err != nil { 48 for plugin := range resps { 49 rollbackPlugins = append(rollbackPlugins, plugin) 50 } 51 return err 52 } 53 54 for plugin, resp := range resps { 55 res[plugin.Name()] = resp.Capacity 56 } 57 return nil 58 }, 59 // rollback: remove node 60 func(ctx context.Context) error { 61 _, err := call(ctx, rollbackPlugins, func(plugin plugins.Plugin) (*plugintypes.RemoveNodeResponse, error) { 62 resp, err := plugin.RemoveNode(ctx, nodename) 63 if err != nil { 64 logger.Errorf(ctx, err, "node %+v plugin %+v failed to rollback", nodename, plugin.Name()) 65 } 66 return resp, err 67 }) 68 69 if err != nil { 70 logger.Error(ctx, err, "failed to rollback") 71 } 72 return err 73 }, 74 m.config.GlobalTimeout, 75 ) 76 } 77 78 // RemoveNode . 79 func (m Manager) RemoveNode(ctx context.Context, nodename string) error { 80 logger := log.WithFunc("resource.cobalt.RemoveNode").WithField("node", nodename) 81 var nodeCapacity resourcetypes.Resources 82 var nodeUsage resourcetypes.Resources 83 rollbackPlugins := []plugins.Plugin{} 84 85 return utils.PCR(ctx, 86 // prepare: get node resource 87 func(ctx context.Context) error { 88 var err error 89 nodeCapacity, nodeUsage, _, err = m.GetNodeResourceInfo(ctx, nodename, nil, false) 90 if err != nil { 91 logger.Error(ctx, err, "failed to get node resource") 92 return err 93 } 94 return nil 95 }, 96 // commit: remove node 97 func(ctx context.Context) error { 98 resps, err := call(ctx, m.plugins, func(plugin plugins.Plugin) (*plugintypes.RemoveNodeResponse, error) { 99 resp, err := plugin.RemoveNode(ctx, nodename) 100 if err != nil { 101 logger.Errorf(ctx, err, "plugin %+v failed to remove node", plugin.Name()) 102 } 103 return resp, err 104 }) 105 106 if err != nil { 107 for plugin := range resps { 108 rollbackPlugins = append(rollbackPlugins, plugin) 109 } 110 111 logger.Error(ctx, err, "failed to remove node") 112 return err 113 } 114 return nil 115 }, 116 // rollback: add node 117 func(ctx context.Context) error { 118 _, err := call(ctx, rollbackPlugins, func(plugin plugins.Plugin) (*plugintypes.SetNodeResourceInfoResponse, error) { 119 capacity := nodeCapacity[plugin.Name()] 120 usage := nodeUsage[plugin.Name()] 121 122 resp, err := plugin.SetNodeResourceInfo(ctx, nodename, capacity, usage) 123 if err != nil { 124 logger.Errorf(ctx, err, "plugin %+v node %+v failed to rollback", plugin.Name(), nodename) 125 } 126 return resp, err 127 }) 128 129 if err != nil { 130 logger.Error(ctx, err, "failed to rollback") 131 } 132 return err 133 }, 134 m.config.GlobalTimeout, 135 ) 136 } 137 138 // GetMostIdleNode . 139 func (m Manager) GetMostIdleNode(ctx context.Context, nodenames []string) (string, error) { 140 logger := log.WithFunc("resource.cobalt.GetMostIdleNode") 141 if len(nodenames) == 0 { 142 return "", errors.Wrap(types.ErrGetMostIdleNodeFailed, "empty node names") 143 } 144 145 resps, err := call(ctx, m.plugins, func(plugin plugins.Plugin) (*plugintypes.GetMostIdleNodeResponse, error) { 146 resp, err := plugin.GetMostIdleNode(ctx, nodenames) 147 if err != nil { 148 logger.Errorf(ctx, err, "plugin %+v failed to get the most idle node of %+v", plugin.Name(), nodenames) 149 } 150 return resp, err 151 }) 152 if err != nil { 153 logger.Errorf(ctx, err, "failed to get the most idle node of %+v", nodenames) 154 return "", err 155 } 156 157 var mostIdleNode *plugintypes.GetMostIdleNodeResponse 158 for _, resp := range resps { 159 if (mostIdleNode == nil || resp.Priority > mostIdleNode.Priority) && len(resp.Nodename) > 0 { 160 mostIdleNode = resp 161 } 162 } 163 164 if mostIdleNode == nil { 165 return "", types.ErrGetMostIdleNodeFailed 166 } 167 return mostIdleNode.Nodename, nil 168 } 169 170 // GetNodeResourceInfo . 171 func (m Manager) GetNodeResourceInfo(ctx context.Context, nodename string, workloads []*types.Workload, fix bool) (resourcetypes.Resources, resourcetypes.Resources, []string, error) { 172 nodeCapacity := resourcetypes.Resources{} 173 nodeUsage := resourcetypes.Resources{} 174 resourceDiffs := []string{} 175 176 ps := m.plugins 177 if m.config.ResourcePlugin.Whitelist != nil { 178 ps = utils.Filter(ps, func(plugin plugins.Plugin) bool { 179 return slices.Contains(m.config.ResourcePlugin.Whitelist, plugin.Name()) 180 }) 181 } 182 183 resps, err := call(ctx, ps, func(plugin plugins.Plugin) (*plugintypes.GetNodeResourceInfoResponse, error) { 184 var resp *plugintypes.GetNodeResourceInfoResponse 185 var err error 186 187 wrks := []plugintypes.WorkloadResource{} 188 189 for _, wrk := range workloads { 190 r := wrk.Resources[plugin.Name()] 191 wrks = append(wrks, r) 192 } 193 194 if fix { 195 resp, err = plugin.FixNodeResource(ctx, nodename, wrks) 196 } else { 197 resp, err = plugin.GetNodeResourceInfo(ctx, nodename, wrks) 198 } 199 if err != nil { 200 log.WithFunc("resource.cobalt.GetNodeResourceInfo").WithField("node", nodename).Errorf(ctx, err, "plugin %+v failed to get node resource", plugin.Name()) 201 } 202 return resp, err 203 }) 204 205 if err != nil { 206 return nil, nil, nil, err 207 } 208 209 for plugin, resp := range resps { 210 nodeCapacity[plugin.Name()] = resp.Capacity 211 nodeUsage[plugin.Name()] = resp.Usage 212 resourceDiffs = append(resourceDiffs, resp.Diffs...) 213 } 214 215 return nodeCapacity, nodeUsage, resourceDiffs, nil 216 } 217 218 // SetNodeResourceUsage . 219 func (m Manager) SetNodeResourceUsage(ctx context.Context, nodename string, nodeResource resourcetypes.Resources, nodeResourceRequest resourcetypes.Resources, workloadsResource []resourcetypes.Resources, delta bool, incr bool) (resourcetypes.Resources, resourcetypes.Resources, error) { 220 logger := log.WithFunc("resource.cobalt.SetNodeResourceUsage").WithField("node", nodename) 221 wrksResource := map[string][]resourcetypes.RawParams{} 222 rollbackPlugins := []plugins.Plugin{} 223 before := resourcetypes.Resources{} 224 after := resourcetypes.Resources{} 225 226 return before, after, utils.PCR(ctx, 227 func(_ context.Context) error { 228 // prepare: covert []resourcetypes.Resources to map[plugin]resourcetypes.Resources 229 // [{"cpu-plugin": {"cpu": 1}}, {"cpu-plugin": {"cpu": 1}}] -> {"cpu-plugin": [{"cpu": 1}, {"cpu": 1}]} 230 for _, workloadResource := range workloadsResource { 231 for plugin, params := range workloadResource { 232 if _, ok := wrksResource[plugin]; !ok { 233 wrksResource[plugin] = []resourcetypes.RawParams{} 234 } 235 wrksResource[plugin] = append(wrksResource[plugin], params) 236 } 237 } 238 if nodeResourceRequest == nil { 239 nodeResourceRequest = resourcetypes.Resources{} 240 } 241 return nil 242 }, 243 // commit: call plugins to set node resource 244 func(ctx context.Context) error { 245 resps, err := call(ctx, m.plugins, func(plugin plugins.Plugin) (*plugintypes.SetNodeResourceUsageResponse, error) { 246 return plugin.SetNodeResourceUsage(ctx, nodename, nodeResource[plugin.Name()], nodeResourceRequest[plugin.Name()], wrksResource[plugin.Name()], delta, incr) 247 }) 248 249 if err != nil { 250 for plugin, resp := range resps { 251 rollbackPlugins = append(rollbackPlugins, plugin) 252 before[plugin.Name()] = resp.Before 253 after[plugin.Name()] = resp.After 254 } 255 logger.Error(ctx, err, "failed to set node resource") 256 } 257 return err 258 }, 259 // rollback: set the rollback resource args in reverse 260 func(ctx context.Context) error { 261 _, err := call(ctx, rollbackPlugins, func(plugin plugins.Plugin) (*plugintypes.SetNodeResourceUsageResponse, error) { 262 resp, err := plugin.SetNodeResourceUsage(ctx, nodename, before[plugin.Name()], nil, nil, false, false) 263 if err != nil { 264 logger.Errorf(ctx, err, "node %+v plugin %+v failed to rollback node resource", nodename, plugin.Name()) 265 } 266 return resp, err 267 }) 268 return err 269 }, 270 m.config.GlobalTimeout, 271 ) 272 } 273 274 // GetNodesDeployCapacity returns available nodes which meet all the requirements 275 // the caller should require locks 276 // pure calculation 277 func (m Manager) GetNodesDeployCapacity(ctx context.Context, nodenames []string, opts resourcetypes.Resources) (map[string]*plugintypes.NodeDeployCapacity, int, error) { 278 logger := log.WithFunc("resource.cobalt.GetNodesDeployCapacity") 279 var resp map[string]*plugintypes.NodeDeployCapacity 280 281 resps, err := call(ctx, m.plugins, func(plugin plugins.Plugin) (*plugintypes.GetNodesDeployCapacityResponse, error) { 282 resp, err := plugin.GetNodesDeployCapacity(ctx, nodenames, opts[plugin.Name()]) 283 if err != nil { 284 logger.Errorf(ctx, err, "plugin %+v failed to get available nodenames, request %+v", plugin.Name(), opts[plugin.Name()]) 285 } 286 return resp, err 287 }) 288 if err != nil { 289 return nil, 0, err 290 } 291 292 // get nodenames with all resource capacities > 0 293 for _, info := range resps { 294 resp = m.mergeCapacity(resp, info.NodeDeployCapacityMap) 295 } 296 total := 0 297 298 // weighted average 299 for _, info := range resp { 300 info.Rate /= info.Weight 301 info.Usage /= info.Weight 302 if info.Capacity == math.MaxInt64 { 303 total = math.MaxInt64 304 } else { 305 total += info.Capacity 306 } 307 } 308 309 return resp, total, nil 310 } 311 312 // SetNodeResourceCapacity updates node resource capacity 313 // receives resource options instead of resource args 314 func (m Manager) SetNodeResourceCapacity(ctx context.Context, nodename string, nodeResource resourcetypes.Resources, nodeResourceRequest resourcetypes.Resources, delta bool, incr bool) (resourcetypes.Resources, resourcetypes.Resources, error) { 315 logger := log.WithFunc("resource.cobalt.SetNodeResourceCapacity").WithField("node", nodename) 316 317 rollbackPlugins := []plugins.Plugin{} 318 before := resourcetypes.Resources{} 319 after := resourcetypes.Resources{} 320 321 return before, after, utils.PCR(ctx, 322 func(_ context.Context) error { 323 if nodeResourceRequest == nil { 324 nodeResourceRequest = resourcetypes.Resources{} 325 } 326 return nil 327 }, 328 // commit: call plugins to set node resource 329 func(ctx context.Context) error { 330 resps, err := call(ctx, m.plugins, func(plugin plugins.Plugin) (*plugintypes.SetNodeResourceCapacityResponse, error) { 331 if nodeResource[plugin.Name()] == nil && nodeResourceRequest[plugin.Name()] == nil { 332 return nil, nil 333 } 334 resp, err := plugin.SetNodeResourceCapacity(ctx, nodename, nodeResource[plugin.Name()], nodeResourceRequest[plugin.Name()], delta, incr) 335 if err != nil { 336 logger.Errorf(ctx, err, "plugin %+v failed to set node resource capacity", plugin.Name()) 337 } 338 return resp, err 339 }) 340 341 if err != nil { 342 for plugin, resp := range resps { 343 if resp == nil { 344 continue 345 } 346 rollbackPlugins = append(rollbackPlugins, plugin) 347 before[plugin.Name()] = resp.Before 348 after[plugin.Name()] = resp.After 349 } 350 logger.Errorf(ctx, err, "failed to set node resource for node %+v", nodename) 351 return err 352 } 353 return nil 354 }, 355 // rollback: set the rollback resource args in reverse 356 func(ctx context.Context) error { 357 _, err := call(ctx, rollbackPlugins, func(plugin plugins.Plugin) (*plugintypes.SetNodeResourceCapacityResponse, error) { 358 resp, err := plugin.SetNodeResourceCapacity(ctx, nodename, nil, before[plugin.Name()], false, false) 359 if err != nil { 360 logger.Errorf(ctx, err, "node %+v plugin %+v failed to rollback node resource capacity", nodename, plugin.Name()) 361 } 362 return resp, err 363 }) 364 return err 365 }, 366 m.config.GlobalTimeout, 367 ) 368 } 369 370 func (m Manager) mergeCapacity(m1 map[string]*plugintypes.NodeDeployCapacity, m2 map[string]*plugintypes.NodeDeployCapacity) map[string]*plugintypes.NodeDeployCapacity { 371 if m1 == nil { 372 return m2 373 } 374 375 resp := map[string]*plugintypes.NodeDeployCapacity{} 376 for nodename, info1 := range m1 { 377 // all the capacities should > 0 378 if info2, ok := m2[nodename]; ok { 379 resp[nodename] = &plugintypes.NodeDeployCapacity{ 380 Capacity: utils.Min(info1.Capacity, info2.Capacity), 381 Rate: info1.Rate + info2.Rate*info2.Weight, 382 Usage: info1.Usage + info2.Usage*info2.Weight, 383 Weight: info1.Weight + info2.Weight, 384 } 385 } 386 } 387 return resp 388 }