github.com/hernad/nomad@v1.6.112/nomad/node_pool_endpoint.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package nomad 5 6 import ( 7 "errors" 8 "fmt" 9 "net/http" 10 "time" 11 12 metrics "github.com/armon/go-metrics" 13 "github.com/hashicorp/go-memdb" 14 multierror "github.com/hashicorp/go-multierror" 15 16 "github.com/hernad/nomad/acl" 17 "github.com/hernad/nomad/helper" 18 "github.com/hernad/nomad/nomad/state" 19 "github.com/hernad/nomad/nomad/state/paginator" 20 "github.com/hernad/nomad/nomad/structs" 21 ) 22 23 // NodePool endpoint is used for node pool management and interaction. 24 type NodePool struct { 25 srv *Server 26 ctx *RPCContext 27 } 28 29 func NewNodePoolEndpoint(srv *Server, ctx *RPCContext) *NodePool { 30 return &NodePool{srv: srv, ctx: ctx} 31 } 32 33 // List is used to retrieve multiple node pools. It supports prefix listing, 34 // pagination, and filtering. 35 func (n *NodePool) List(args *structs.NodePoolListRequest, reply *structs.NodePoolListResponse) error { 36 authErr := n.srv.Authenticate(n.ctx, args) 37 if done, err := n.srv.forward("NodePool.List", args, args, reply); done { 38 return err 39 } 40 n.srv.MeasureRPCRate("node_pool", structs.RateMetricList, args) 41 if authErr != nil { 42 return structs.ErrPermissionDenied 43 } 44 defer metrics.MeasureSince([]string{"nomad", "node_pool", "list"}, time.Now()) 45 46 // Resolve ACL token to only return node pools it has access to. 47 aclObj, err := n.srv.ResolveACL(args) 48 if err != nil { 49 return err 50 } 51 52 // Only warn for expiration of a read request. 53 _ = n.validateLicense(nil) 54 55 // Setup blocking query. 56 sort := state.SortOption(args.Reverse) 57 opts := blockingOptions{ 58 queryOpts: &args.QueryOptions, 59 queryMeta: &reply.QueryMeta, 60 run: func(ws memdb.WatchSet, store *state.StateStore) error { 61 var err error 62 var iter memdb.ResultIterator 63 64 if prefix := args.QueryOptions.Prefix; prefix != "" { 65 iter, err = store.NodePoolsByNamePrefix(ws, prefix, sort) 66 } else { 67 iter, err = store.NodePools(ws, sort) 68 } 69 if err != nil { 70 return err 71 } 72 73 pageOpts := paginator.StructsTokenizerOptions{WithID: true} 74 tokenizer := paginator.NewStructsTokenizer(iter, pageOpts) 75 filters := []paginator.Filter{ 76 // Filter out node pools based on ACL token capabilities. 77 paginator.GenericFilter{ 78 Allow: func(raw interface{}) (bool, error) { 79 pool := raw.(*structs.NodePool) 80 return aclObj.AllowNodePoolOperation(pool.Name, acl.NodePoolCapabilityRead), nil 81 }, 82 }, 83 } 84 85 var pools []*structs.NodePool 86 pager, err := paginator.NewPaginator(iter, tokenizer, filters, args.QueryOptions, 87 func(raw interface{}) error { 88 pool := raw.(*structs.NodePool) 89 pools = append(pools, pool) 90 return nil 91 }) 92 if err != nil { 93 return structs.NewErrRPCCodedf(http.StatusBadRequest, "failed to create result paginator: %v", err) 94 } 95 96 nextToken, err := pager.Page() 97 if err != nil { 98 return structs.NewErrRPCCodedf(http.StatusBadRequest, "failed to read result page: %v", err) 99 } 100 101 reply.QueryMeta.NextToken = nextToken 102 reply.NodePools = pools 103 104 // Use the last index that affected the node pools table. 105 index, err := store.Index("node_pools") 106 if err != nil { 107 return err 108 } 109 reply.Index = helper.Max(1, index) 110 111 // Set the query response. 112 n.srv.setQueryMeta(&reply.QueryMeta) 113 return nil 114 }} 115 return n.srv.blockingRPC(&opts) 116 } 117 118 // GetNodePool returns the specific node pool requested or nil if the node pool 119 // doesn't exist. 120 func (n *NodePool) GetNodePool(args *structs.NodePoolSpecificRequest, reply *structs.SingleNodePoolResponse) error { 121 authErr := n.srv.Authenticate(n.ctx, args) 122 if done, err := n.srv.forward("NodePool.GetNodePool", args, args, reply); done { 123 return err 124 } 125 n.srv.MeasureRPCRate("node_pool", structs.RateMetricRead, args) 126 if authErr != nil { 127 return structs.ErrPermissionDenied 128 } 129 defer metrics.MeasureSince([]string{"nomad", "node_pool", "get_node_pool"}, time.Now()) 130 131 // Resolve ACL token and verify it has read capability for the pool. 132 aclObj, err := n.srv.ResolveACL(args) 133 if err != nil { 134 return err 135 } 136 if !aclObj.AllowNodePoolOperation(args.Name, acl.NodePoolCapabilityRead) { 137 return structs.ErrPermissionDenied 138 } 139 140 // Only warn for expiration of a read request. 141 _ = n.validateLicense(nil) 142 143 // Setup the blocking query. 144 opts := blockingOptions{ 145 queryOpts: &args.QueryOptions, 146 queryMeta: &reply.QueryMeta, 147 run: func(ws memdb.WatchSet, store *state.StateStore) error { 148 // Fetch node pool. 149 pool, err := store.NodePoolByName(ws, args.Name) 150 if err != nil { 151 return err 152 } 153 154 reply.NodePool = pool 155 if pool != nil { 156 reply.Index = pool.ModifyIndex 157 } else { 158 // Return the last index that affected the node pools table if 159 // the requested node pool doesn't exist. 160 index, err := store.Index(state.TableNodePools) 161 if err != nil { 162 return err 163 } 164 reply.Index = helper.Max(1, index) 165 } 166 return nil 167 }} 168 return n.srv.blockingRPC(&opts) 169 } 170 171 // UpsertNodePools creates or updates the given node pools. Built-in node pools 172 // cannot be updated. 173 func (n *NodePool) UpsertNodePools(args *structs.NodePoolUpsertRequest, reply *structs.GenericResponse) error { 174 authErr := n.srv.Authenticate(n.ctx, args) 175 args.Region = n.srv.config.AuthoritativeRegion 176 if done, err := n.srv.forward("NodePool.UpsertNodePools", args, args, reply); done { 177 return err 178 } 179 n.srv.MeasureRPCRate("node_pool", structs.RateMetricWrite, args) 180 if authErr != nil { 181 return structs.ErrPermissionDenied 182 } 183 defer metrics.MeasureSince([]string{"nomad", "node_pool", "upsert_node_pools"}, time.Now()) 184 185 // Resolve ACL token and verify it has write capability to all pools in the 186 // request. 187 aclObj, err := n.srv.ResolveACL(args) 188 if err != nil { 189 return err 190 } 191 for _, pool := range args.NodePools { 192 if !aclObj.AllowNodePoolOperation(pool.Name, acl.NodePoolCapabilityWrite) { 193 return structs.ErrPermissionDenied 194 } 195 196 // Strict enforcement for write requests. 197 // If not licensed then requests will be denied. 198 if err := n.validateLicense(pool); err != nil { 199 return err 200 } 201 } 202 203 if !ServersMeetMinimumVersion( 204 n.srv.serf.Members(), n.srv.Region(), minNodePoolsVersion, true) { 205 return fmt.Errorf("all servers must be running version %v or later to upsert node pools", minNodePoolsVersion) 206 } 207 208 // Validate request. 209 if len(args.NodePools) == 0 { 210 return structs.NewErrRPCCodedf(http.StatusBadRequest, "must specify at least one node pool") 211 } 212 for _, pool := range args.NodePools { 213 if err := pool.Validate(); err != nil { 214 return structs.NewErrRPCCodedf(http.StatusBadRequest, "invalid node pool %q: %v", pool.Name, err) 215 } 216 if pool.IsBuiltIn() { 217 return structs.NewErrRPCCodedf(http.StatusBadRequest, "modifying node pool %q is not allowed", pool.Name) 218 } 219 220 pool.SetHash() 221 } 222 223 // Update via Raft. 224 _, index, err := n.srv.raftApply(structs.NodePoolUpsertRequestType, args) 225 if err != nil { 226 return err 227 } 228 reply.Index = index 229 return nil 230 } 231 232 // DeleteNodePools deletes the given node pools. Built-in node pools cannot be 233 // deleted. 234 func (n *NodePool) DeleteNodePools(args *structs.NodePoolDeleteRequest, reply *structs.GenericResponse) error { 235 authErr := n.srv.Authenticate(n.ctx, args) 236 args.Region = n.srv.config.AuthoritativeRegion 237 if done, err := n.srv.forward("NodePool.DeleteNodePools", args, args, reply); done { 238 return err 239 } 240 n.srv.MeasureRPCRate("node_pool", structs.RateMetricWrite, args) 241 if authErr != nil { 242 return structs.ErrPermissionDenied 243 } 244 defer metrics.MeasureSince([]string{"nomad", "node_pool", "delete_node_pools"}, time.Now()) 245 246 // Resolve ACL token and verify it has delete capability to all pools in 247 // the request. 248 aclObj, err := n.srv.ResolveACL(args) 249 if err != nil { 250 return err 251 } 252 for _, name := range args.Names { 253 if !aclObj.AllowNodePoolOperation(name, acl.NodePoolCapabilityDelete) { 254 return structs.ErrPermissionDenied 255 } 256 } 257 258 // Only warn for expiration on delete because just parts of node pools are 259 // licensed, so they are allowed to be deleted. 260 _ = n.validateLicense(nil) 261 262 if !ServersMeetMinimumVersion( 263 n.srv.serf.Members(), n.srv.Region(), minNodePoolsVersion, true) { 264 return fmt.Errorf("all servers must be running version %v or later to delete node pools", minNodePoolsVersion) 265 } 266 267 // Validate request. 268 if len(args.Names) == 0 { 269 return structs.NewErrRPCCodedf(http.StatusBadRequest, "must specify at least one node pool to delete") 270 } 271 for _, name := range args.Names { 272 if name == "" { 273 return structs.NewErrRPCCodedf(http.StatusBadRequest, "node pool name is empty") 274 } 275 } 276 277 // Verify that the node pools we're deleting do not have nodes or 278 // non-terminal jobs in this region or in any federated region. 279 var mErr multierror.Error 280 for _, name := range args.Names { 281 regionsWithNonTerminal, regionsWithNodes, err := n.nodePoolRegionsInUse(args.AuthToken, name) 282 if err != nil { 283 _ = multierror.Append(&mErr, err) 284 } 285 if len(regionsWithNonTerminal) != 0 { 286 _ = multierror.Append(&mErr, fmt.Errorf( 287 "node pool %q has non-terminal jobs in regions: %v", name, regionsWithNonTerminal)) 288 } 289 if len(regionsWithNodes) != 0 { 290 _ = multierror.Append(&mErr, fmt.Errorf( 291 "node pool %q has nodes in regions: %v", name, regionsWithNodes)) 292 } 293 } 294 295 if err := mErr.ErrorOrNil(); err != nil { 296 return err 297 } 298 299 // Delete via Raft. 300 _, index, err := n.srv.raftApply(structs.NodePoolDeleteRequestType, args) 301 if err != nil { 302 return err 303 } 304 305 reply.Index = index 306 return nil 307 } 308 309 // nodePoolRegionsInUse returns a list of regions where the node pool is still 310 // in use for non-terminal jobs, and a list of regions where it is in use by 311 // nodes. 312 func (n *NodePool) nodePoolRegionsInUse(token, poolName string) ([]string, []string, error) { 313 regions := n.srv.Regions() 314 thisRegion := n.srv.Region() 315 hasNodes := make([]string, 0, len(regions)) 316 hasNonTerminal := make([]string, 0, len(regions)) 317 318 // Check if the pool in use in this region 319 snap, err := n.srv.State().Snapshot() 320 if err != nil { 321 return nil, nil, err 322 } 323 iter, err := snap.NodesByNodePool(nil, poolName) 324 if err != nil { 325 return nil, nil, err 326 } 327 found := iter.Next() 328 if found != nil { 329 hasNodes = append(hasNodes, thisRegion) 330 } 331 iter, err = snap.JobsByPool(nil, poolName) 332 for raw := iter.Next(); raw != nil; raw = iter.Next() { 333 job := raw.(*structs.Job) 334 if job.Status != structs.JobStatusDead { 335 hasNonTerminal = append(hasNonTerminal, thisRegion) 336 break 337 } 338 } 339 340 for _, region := range regions { 341 if region == thisRegion { 342 continue 343 } 344 345 nodesReq := &structs.NodePoolNodesRequest{ 346 Name: poolName, 347 QueryOptions: structs.QueryOptions{ 348 Region: region, 349 AuthToken: token, 350 PerPage: 1, // we only care if there are any 351 }, 352 } 353 var nodesResp structs.NodePoolNodesResponse 354 err := n.srv.RPC("NodePool.ListNodes", nodesReq, &nodesResp) 355 if err != nil { 356 return hasNodes, hasNonTerminal, err 357 } 358 if len(nodesResp.Nodes) != 0 { 359 hasNodes = append(hasNodes, region) 360 } 361 362 jobsReq := &structs.NodePoolJobsRequest{ 363 Name: poolName, 364 QueryOptions: structs.QueryOptions{ 365 Region: region, 366 AuthToken: token, 367 PerPage: 1, // we only care if there are any 368 Filter: `Status != "dead"`, 369 }, 370 } 371 var jobsResp structs.NodePoolJobsResponse 372 err = n.srv.RPC("NodePool.ListJobs", jobsReq, &jobsResp) 373 if err != nil { 374 return hasNodes, hasNonTerminal, err 375 } 376 377 if len(jobsResp.Jobs) != 0 { 378 hasNonTerminal = append(hasNonTerminal, region) 379 } 380 381 } 382 383 return hasNonTerminal, hasNodes, err 384 } 385 386 // ListJobs is used to retrieve a list of jobs for a given node pool. It supports 387 // pagination and filtering. 388 func (n *NodePool) ListJobs(args *structs.NodePoolJobsRequest, reply *structs.NodePoolJobsResponse) error { 389 authErr := n.srv.Authenticate(n.ctx, args) 390 if done, err := n.srv.forward("NodePool.ListJobs", args, args, reply); done { 391 return err 392 } 393 n.srv.MeasureRPCRate("node_pool", structs.RateMetricList, args) 394 if authErr != nil { 395 return structs.ErrPermissionDenied 396 } 397 defer metrics.MeasureSince([]string{"nomad", "node_pool", "list_jobs"}, time.Now()) 398 399 // Resolve ACL token and verify it has read capability for the pool. 400 aclObj, err := n.srv.ResolveACL(args) 401 if err != nil { 402 return err 403 } 404 if !aclObj.AllowNodePoolOperation(args.Name, acl.NodePoolCapabilityRead) { 405 return structs.ErrPermissionDenied 406 } 407 allowNsFunc := aclObj.AllowNsOpFunc(acl.NamespaceCapabilityListJobs) 408 namespace := args.RequestNamespace() 409 410 // Setup the blocking query. This largely mirrors the Jobs.List RPC but with 411 // an additional paginator filter for the node pool. 412 opts := blockingOptions{ 413 queryOpts: &args.QueryOptions, 414 queryMeta: &reply.QueryMeta, 415 run: func(ws memdb.WatchSet, store *state.StateStore) error { 416 // ensure the node pool exists 417 pool, err := store.NodePoolByName(ws, args.Name) 418 if err != nil { 419 return err 420 } 421 if pool == nil { 422 return nil 423 } 424 425 var iter memdb.ResultIterator 426 427 // Get the namespaces the user is allowed to access. 428 allowableNamespaces, err := allowedNSes(aclObj, store, allowNsFunc) 429 if errors.Is(err, structs.ErrPermissionDenied) { 430 // return empty jobs if token isn't authorized for any 431 // namespace, matching other endpoints 432 reply.Jobs = make([]*structs.JobListStub, 0) 433 } else if err != nil { 434 return err 435 } else { 436 437 filters := []paginator.Filter{ 438 paginator.NamespaceFilter{ 439 AllowableNamespaces: allowableNamespaces, 440 }, 441 } 442 443 if namespace == structs.AllNamespacesSentinel { 444 iter, err = store.JobsByPool(ws, args.Name) 445 } else { 446 iter, err = store.JobsByNamespace(ws, namespace) 447 filters = append(filters, 448 paginator.GenericFilter{ 449 Allow: func(raw interface{}) (bool, error) { 450 job := raw.(*structs.Job) 451 if job == nil || job.NodePool != args.Name { 452 return false, nil 453 } 454 return true, nil 455 }, 456 }) 457 } 458 if err != nil { 459 return err 460 } 461 462 tokenizer := paginator.NewStructsTokenizer( 463 iter, 464 paginator.StructsTokenizerOptions{ 465 WithNamespace: true, 466 WithID: true, 467 }, 468 ) 469 470 var jobs []*structs.JobListStub 471 472 paginator, err := paginator.NewPaginator(iter, tokenizer, filters, args.QueryOptions, 473 func(raw interface{}) error { 474 job := raw.(*structs.Job) 475 summary, err := store.JobSummaryByID(ws, job.Namespace, job.ID) 476 if err != nil || summary == nil { 477 return fmt.Errorf("unable to look up summary for job: %v", job.ID) 478 } 479 jobs = append(jobs, job.Stub(summary, args.Fields)) 480 return nil 481 }) 482 if err != nil { 483 return structs.NewErrRPCCodedf( 484 http.StatusBadRequest, "failed to create result paginator: %v", err) 485 } 486 487 nextToken, err := paginator.Page() 488 if err != nil { 489 return structs.NewErrRPCCodedf( 490 http.StatusBadRequest, "failed to read result page: %v", err) 491 } 492 493 reply.QueryMeta.NextToken = nextToken 494 reply.Jobs = jobs 495 } 496 497 // Use the last index that affected the jobs table or summary 498 jindex, err := store.Index("jobs") 499 if err != nil { 500 return err 501 } 502 sindex, err := store.Index("job_summary") 503 if err != nil { 504 return err 505 } 506 reply.Index = helper.Max(jindex, sindex) 507 508 // Set the query response 509 n.srv.setQueryMeta(&reply.QueryMeta) 510 return nil 511 }} 512 return n.srv.blockingRPC(&opts) 513 } 514 515 // ListNodes is used to retrieve a list of nodes for a give node pool. It 516 // supports pagination and filtering. 517 func (n *NodePool) ListNodes(args *structs.NodePoolNodesRequest, reply *structs.NodePoolNodesResponse) error { 518 authErr := n.srv.Authenticate(n.ctx, args) 519 if done, err := n.srv.forward("NodePool.ListNodes", args, args, reply); done { 520 return err 521 } 522 n.srv.MeasureRPCRate("node_pool", structs.RateMetricList, args) 523 if authErr != nil { 524 return structs.ErrPermissionDenied 525 } 526 defer metrics.MeasureSince([]string{"nomad", "node_pool", "list_nodes"}, time.Now()) 527 528 // Resolve ACL token and verify it has read capability for nodes and the 529 // node pool. 530 aclObj, err := n.srv.ResolveACL(args) 531 if err != nil { 532 return err 533 } 534 535 allowed := aclObj.AllowNodeRead() && 536 aclObj.AllowNodePoolOperation(args.Name, acl.NodePoolCapabilityRead) 537 if !allowed { 538 return structs.ErrPermissionDenied 539 } 540 541 // Setup blocking query. 542 opts := blockingOptions{ 543 queryOpts: &args.QueryOptions, 544 queryMeta: &reply.QueryMeta, 545 run: func(ws memdb.WatchSet, store *state.StateStore) error { 546 // Verify node pool exists. 547 pool, err := store.NodePoolByName(ws, args.Name) 548 if err != nil { 549 return err 550 } 551 if pool == nil { 552 return nil 553 } 554 555 // Fetch nodes in the pool. 556 var iter memdb.ResultIterator 557 if args.Name == structs.NodePoolAll { 558 iter, err = store.Nodes(ws) 559 } else { 560 iter, err = store.NodesByNodePool(ws, args.Name) 561 } 562 if err != nil { 563 return err 564 } 565 566 // Setup paginator by node ID. 567 pageOpts := paginator.StructsTokenizerOptions{ 568 WithID: true, 569 } 570 tokenizer := paginator.NewStructsTokenizer(iter, pageOpts) 571 572 var nodes []*structs.NodeListStub 573 pager, err := paginator.NewPaginator(iter, tokenizer, nil, args.QueryOptions, 574 func(raw interface{}) error { 575 node := raw.(*structs.Node) 576 nodes = append(nodes, node.Stub(args.Fields)) 577 return nil 578 }) 579 if err != nil { 580 return structs.NewErrRPCCodedf(http.StatusBadRequest, "failed to create result paginator: %v", err) 581 } 582 583 nextToken, err := pager.Page() 584 if err != nil { 585 return structs.NewErrRPCCodedf(http.StatusBadRequest, "failed to read result page: %v", err) 586 } 587 588 reply.QueryMeta.NextToken = nextToken 589 reply.Nodes = nodes 590 591 // Use the last index that affected the nodes table. 592 index, err := store.Index("nodes") 593 if err != nil { 594 return err 595 } 596 reply.Index = helper.Max(1, index) 597 598 // Set the query response. 599 n.srv.setQueryMeta(&reply.QueryMeta) 600 return nil 601 }} 602 return n.srv.blockingRPC(&opts) 603 }