github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocwatcher/alloc_watcher.go (about) 1 package allocwatcher 2 3 import ( 4 "archive/tar" 5 "context" 6 "fmt" 7 "io" 8 "os" 9 "path/filepath" 10 "sync" 11 "syscall" 12 "time" 13 14 hclog "github.com/hashicorp/go-hclog" 15 nomadapi "github.com/hashicorp/nomad/api" 16 "github.com/hashicorp/nomad/client/allocdir" 17 "github.com/hashicorp/nomad/client/config" 18 cstructs "github.com/hashicorp/nomad/client/structs" 19 "github.com/hashicorp/nomad/helper" 20 "github.com/hashicorp/nomad/nomad/structs" 21 ) 22 23 const ( 24 // getRemoteRetryIntv is minimum interval on which we retry 25 // to fetch remote objects. We pick a value between this and 2x this. 26 getRemoteRetryIntv = 30 * time.Second 27 ) 28 29 // RPCer is the interface needed by a prevAllocWatcher to make RPC calls. 30 type RPCer interface { 31 // RPC allows retrieving remote allocs. 32 RPC(method string, args interface{}, reply interface{}) error 33 } 34 35 // terminated is the interface needed by a prevAllocWatcher to check if an 36 // alloc is terminated. 37 type terminated interface { 38 Terminated() bool 39 } 40 41 // AllocRunnerMeta provides metadata about an AllocRunner such as its alloc and 42 // alloc dir. 43 type AllocRunnerMeta interface { 44 GetAllocDir() *allocdir.AllocDir 45 Listener() *cstructs.AllocListener 46 Alloc() *structs.Allocation 47 } 48 49 // PrevAllocWatcher allows AllocRunners to wait for a previous allocation to 50 // terminate whether or not the previous allocation is local or remote. 51 // See `PrevAllocMigrator` for migrating workloads. 52 type PrevAllocWatcher interface { 53 // Wait for previous alloc to terminate 54 Wait(context.Context) error 55 56 // IsWaiting returns true if a concurrent caller is blocked in Wait 57 IsWaiting() bool 58 } 59 60 // PrevAllocMigrator allows AllocRunners to migrate a previous allocation 61 // whether or not the previous allocation is local or remote. 62 type PrevAllocMigrator interface { 63 PrevAllocWatcher 64 65 // IsMigrating returns true if a concurrent caller is in Migrate 66 IsMigrating() bool 67 68 // Migrate data from previous alloc 69 Migrate(ctx context.Context, dest *allocdir.AllocDir) error 70 } 71 72 type Config struct { 73 // Alloc is the current allocation which may need to block on its 74 // previous allocation stopping. 75 Alloc *structs.Allocation 76 77 // PreviousRunner is non-nil if Alloc has a PreviousAllocation and it is 78 // running locally. 79 PreviousRunner AllocRunnerMeta 80 81 // PreemptedRunners is non-nil if Alloc has one or more PreemptedAllocations. 82 PreemptedRunners map[string]AllocRunnerMeta 83 84 // RPC allows the alloc watcher to monitor remote allocations. 85 RPC RPCer 86 87 // Config is necessary for using the RPC. 88 Config *config.Config 89 90 // MigrateToken is used to migrate remote alloc dirs when ACLs are 91 // enabled. 92 MigrateToken string 93 94 Logger hclog.Logger 95 } 96 97 func newMigratorForAlloc(c Config, tg *structs.TaskGroup, watchedAllocID string, m AllocRunnerMeta) PrevAllocMigrator { 98 logger := c.Logger.Named("alloc_migrator").With("alloc_id", c.Alloc.ID).With("previous_alloc", watchedAllocID) 99 100 tasks := tg.Tasks 101 sticky := tg.EphemeralDisk != nil && tg.EphemeralDisk.Sticky 102 migrate := tg.EphemeralDisk != nil && tg.EphemeralDisk.Migrate 103 104 if m != nil { 105 // Local Allocation because there's an alloc runner 106 return &localPrevAlloc{ 107 allocID: c.Alloc.ID, 108 prevAllocID: watchedAllocID, 109 tasks: tasks, 110 sticky: sticky, 111 prevAllocDir: m.GetAllocDir(), 112 prevListener: m.Listener(), 113 prevStatus: m.Alloc(), 114 logger: logger, 115 } 116 } 117 118 return &remotePrevAlloc{ 119 allocID: c.Alloc.ID, 120 prevAllocID: watchedAllocID, 121 tasks: tasks, 122 config: c.Config, 123 migrate: migrate, 124 rpc: c.RPC, 125 migrateToken: c.MigrateToken, 126 logger: logger, 127 } 128 } 129 130 // newWatcherForAlloc uses a local or rpc-based watcher depending on whether 131 // AllocRunnerMeta is nil or not. 132 // 133 // Note that c.Alloc.PreviousAllocation must NOT be used in this func as it 134 // used for preemption which has a distinct field. The caller is responsible 135 // for passing the allocation to be watched as watchedAllocID. 136 func newWatcherForAlloc(c Config, watchedAllocID string, m AllocRunnerMeta) PrevAllocWatcher { 137 logger := c.Logger.Named("alloc_watcher").With("alloc_id", c.Alloc.ID).With("previous_alloc", watchedAllocID) 138 139 if m != nil { 140 // Local Allocation because there's an alloc runner 141 return &localPrevAlloc{ 142 allocID: c.Alloc.ID, 143 prevAllocID: watchedAllocID, 144 prevAllocDir: m.GetAllocDir(), 145 prevListener: m.Listener(), 146 prevStatus: m.Alloc(), 147 logger: logger, 148 } 149 } 150 151 return &remotePrevAlloc{ 152 allocID: c.Alloc.ID, 153 prevAllocID: watchedAllocID, 154 config: c.Config, 155 rpc: c.RPC, 156 migrateToken: c.MigrateToken, 157 logger: logger, 158 } 159 } 160 161 // NewAllocWatcher creates a PrevAllocWatcher if either PreviousAllocation or 162 // PreemptedRunners are set. If any of the allocs to watch have local runners, 163 // wait for them to terminate directly. 164 // For allocs which are either running on another node or have already 165 // terminated their alloc runners, use a remote backend which watches the alloc 166 // status via rpc. 167 func NewAllocWatcher(c Config) (PrevAllocWatcher, PrevAllocMigrator) { 168 if c.Alloc.PreviousAllocation == "" && c.PreemptedRunners == nil { 169 return NoopPrevAlloc{}, NoopPrevAlloc{} 170 } 171 172 var prevAllocWatchers []PrevAllocWatcher 173 var prevAllocMigrator PrevAllocMigrator = NoopPrevAlloc{} 174 175 // We have a previous allocation, add its listener to the watchers, and 176 // use a migrator. 177 if c.Alloc.PreviousAllocation != "" { 178 tg := c.Alloc.Job.LookupTaskGroup(c.Alloc.TaskGroup) 179 m := newMigratorForAlloc(c, tg, c.Alloc.PreviousAllocation, c.PreviousRunner) 180 prevAllocWatchers = append(prevAllocWatchers, m) 181 prevAllocMigrator = m 182 } 183 184 // We are preempting allocations, add their listeners to the watchers. 185 if c.PreemptedRunners != nil { 186 for aid, r := range c.PreemptedRunners { 187 w := newWatcherForAlloc(c, aid, r) 188 prevAllocWatchers = append(prevAllocWatchers, w) 189 } 190 } 191 192 groupWatcher := &groupPrevAllocWatcher{ 193 prevAllocs: prevAllocWatchers, 194 } 195 196 return groupWatcher, prevAllocMigrator 197 } 198 199 // localPrevAlloc is a prevAllocWatcher for previous allocations on the same 200 // node as an updated allocation. 201 type localPrevAlloc struct { 202 // allocID is the ID of the alloc being blocked 203 allocID string 204 205 // prevAllocID is the ID of the alloc being replaced 206 prevAllocID string 207 208 // tasks on the new alloc 209 tasks []*structs.Task 210 211 // sticky is true if data should be moved 212 sticky bool 213 214 // prevAllocDir is the alloc dir for the previous alloc 215 prevAllocDir *allocdir.AllocDir 216 217 // prevListener allows blocking for updates to the previous alloc 218 prevListener *cstructs.AllocListener 219 220 // prevStatus allows checking if the previous alloc has already 221 // terminated (and therefore won't send updates to the listener) 222 prevStatus terminated 223 224 // waiting and migrating are true when alloc runner is waiting on the 225 // prevAllocWatcher. Writers must acquire the waitingLock and readers 226 // should use the helper methods IsWaiting and IsMigrating. 227 waiting bool 228 migrating bool 229 waitingLock sync.RWMutex 230 231 logger hclog.Logger 232 } 233 234 // IsWaiting returns true if there's a concurrent call inside Wait 235 func (p *localPrevAlloc) IsWaiting() bool { 236 p.waitingLock.RLock() 237 b := p.waiting 238 p.waitingLock.RUnlock() 239 return b 240 } 241 242 // IsMigrating returns true if there's a concurrent call inside Migrate 243 func (p *localPrevAlloc) IsMigrating() bool { 244 p.waitingLock.RLock() 245 b := p.migrating 246 p.waitingLock.RUnlock() 247 return b 248 } 249 250 // Wait on a local alloc to become terminal, exit, or the context to be done. 251 func (p *localPrevAlloc) Wait(ctx context.Context) error { 252 p.waitingLock.Lock() 253 p.waiting = true 254 p.waitingLock.Unlock() 255 defer func() { 256 p.waitingLock.Lock() 257 p.waiting = false 258 p.waitingLock.Unlock() 259 }() 260 261 defer p.prevListener.Close() 262 263 // Don't bother blocking for updates from the previous alloc if it has 264 // already terminated. 265 if p.prevStatus.Terminated() { 266 p.logger.Trace("previous allocation already terminated") 267 return nil 268 } 269 270 // Block until previous alloc exits 271 p.logger.Debug("waiting for previous alloc to terminate") 272 for { 273 select { 274 case prevAlloc, ok := <-p.prevListener.Ch(): 275 if !ok || prevAlloc.Terminated() { 276 return nil 277 } 278 case <-ctx.Done(): 279 return ctx.Err() 280 } 281 } 282 } 283 284 // Migrate from previous local alloc dir to destination alloc dir. 285 func (p *localPrevAlloc) Migrate(ctx context.Context, dest *allocdir.AllocDir) error { 286 if !p.sticky { 287 // Not a sticky volume, nothing to migrate 288 return nil 289 } 290 291 p.waitingLock.Lock() 292 p.migrating = true 293 p.waitingLock.Unlock() 294 defer func() { 295 p.waitingLock.Lock() 296 p.migrating = false 297 p.waitingLock.Unlock() 298 }() 299 300 p.logger.Debug("copying previous alloc") 301 302 moveErr := dest.Move(p.prevAllocDir, p.tasks) 303 304 // Always cleanup previous alloc 305 if err := p.prevAllocDir.Destroy(); err != nil { 306 p.logger.Error("error destroying alloc dir", 307 "error", err, "previous_alloc_dir", p.prevAllocDir.AllocDir) 308 } 309 310 return moveErr 311 } 312 313 // remotePrevAlloc is a prevAllocWatcher for previous allocations on remote 314 // nodes as an updated allocation. 315 type remotePrevAlloc struct { 316 // allocID is the ID of the alloc being blocked 317 allocID string 318 319 // prevAllocID is the ID of the alloc being replaced 320 prevAllocID string 321 322 // tasks on the new alloc 323 tasks []*structs.Task 324 325 // config for the Client to get AllocDir, Region, and Node.SecretID 326 config *config.Config 327 328 // migrate is true if data should be moved between nodes 329 migrate bool 330 331 // rpc provides an RPC method for watching for updates to the previous 332 // alloc and determining what node it was on. 333 rpc RPCer 334 335 // nodeID is the node the previous alloc. Set by Wait() for use in 336 // Migrate() iff the previous alloc has not already been GC'd. 337 nodeID string 338 339 // waiting and migrating are true when alloc runner is waiting on the 340 // prevAllocWatcher. Writers must acquire the waitingLock and readers 341 // should use the helper methods IsWaiting and IsMigrating. 342 waiting bool 343 migrating bool 344 waitingLock sync.RWMutex 345 346 logger hclog.Logger 347 348 // migrateToken allows a client to migrate data in an ACL-protected remote 349 // volume 350 migrateToken string 351 } 352 353 // IsWaiting returns true if there's a concurrent call inside Wait 354 func (p *remotePrevAlloc) IsWaiting() bool { 355 p.waitingLock.RLock() 356 b := p.waiting 357 p.waitingLock.RUnlock() 358 return b 359 } 360 361 // IsMigrating returns true if there's a concurrent call inside Migrate 362 func (p *remotePrevAlloc) IsMigrating() bool { 363 p.waitingLock.RLock() 364 b := p.migrating 365 p.waitingLock.RUnlock() 366 return b 367 } 368 369 // Wait until the remote previous allocation has terminated. 370 func (p *remotePrevAlloc) Wait(ctx context.Context) error { 371 p.waitingLock.Lock() 372 p.waiting = true 373 p.waitingLock.Unlock() 374 defer func() { 375 p.waitingLock.Lock() 376 p.waiting = false 377 p.waitingLock.Unlock() 378 }() 379 380 p.logger.Debug("waiting for remote previous alloc to terminate") 381 req := structs.AllocSpecificRequest{ 382 AllocID: p.prevAllocID, 383 QueryOptions: structs.QueryOptions{ 384 Region: p.config.Region, 385 AllowStale: true, 386 AuthToken: p.config.Node.SecretID, 387 }, 388 } 389 390 done := func() bool { 391 select { 392 case <-ctx.Done(): 393 return true 394 default: 395 return false 396 } 397 } 398 399 for !done() { 400 resp := structs.SingleAllocResponse{} 401 err := p.rpc.RPC("Alloc.GetAlloc", &req, &resp) 402 if err != nil { 403 p.logger.Error("error querying previous alloc", "error", err) 404 retry := getRemoteRetryIntv + helper.RandomStagger(getRemoteRetryIntv) 405 select { 406 case <-time.After(retry): 407 continue 408 case <-ctx.Done(): 409 return ctx.Err() 410 } 411 } 412 if resp.Alloc == nil { 413 p.logger.Debug("blocking alloc was GC'd") 414 return nil 415 } 416 if resp.Alloc.Terminated() || resp.Alloc.ClientStatus == structs.AllocClientStatusUnknown { 417 p.nodeID = resp.Alloc.NodeID 418 return nil 419 } 420 421 // Update the query index and requery. 422 if resp.Index > req.MinQueryIndex { 423 req.MinQueryIndex = resp.Index 424 } 425 } 426 427 return ctx.Err() 428 } 429 430 // Migrate alloc data from a remote node if the new alloc has migration enabled 431 // and the old alloc hasn't been GC'd. 432 func (p *remotePrevAlloc) Migrate(ctx context.Context, dest *allocdir.AllocDir) error { 433 if !p.migrate { 434 // Volume wasn't configured to be migrated, return early 435 return nil 436 } 437 438 p.waitingLock.Lock() 439 p.migrating = true 440 p.waitingLock.Unlock() 441 defer func() { 442 p.waitingLock.Lock() 443 p.migrating = false 444 p.waitingLock.Unlock() 445 }() 446 447 p.logger.Debug("copying from remote previous alloc") 448 449 if p.nodeID == "" { 450 // NodeID couldn't be found; likely alloc was GC'd 451 p.logger.Warn("unable to migrate data from previous alloc; previous alloc may have been GC'd") 452 return nil 453 } 454 455 addr, err := p.getNodeAddr(ctx, p.nodeID) 456 if err != nil { 457 return err 458 } 459 460 prevAllocDir, err := p.migrateAllocDir(ctx, addr) 461 if err != nil { 462 return err 463 } 464 465 if err := dest.Move(prevAllocDir, p.tasks); err != nil { 466 // cleanup on error 467 prevAllocDir.Destroy() 468 return err 469 } 470 471 if err := prevAllocDir.Destroy(); err != nil { 472 p.logger.Error("error destroying alloc dir", 473 "error", err, "previous_alloc_dir", prevAllocDir.AllocDir) 474 } 475 return nil 476 } 477 478 // getNodeAddr gets the node from the server with the given Node ID 479 func (p *remotePrevAlloc) getNodeAddr(ctx context.Context, nodeID string) (string, error) { 480 req := structs.NodeSpecificRequest{ 481 NodeID: nodeID, 482 QueryOptions: structs.QueryOptions{ 483 Region: p.config.Region, 484 AllowStale: true, 485 AuthToken: p.config.Node.SecretID, 486 }, 487 } 488 489 resp := structs.SingleNodeResponse{} 490 for { 491 err := p.rpc.RPC("Node.GetNode", &req, &resp) 492 if err != nil { 493 p.logger.Error("failed to query node", "error", err, "node", nodeID) 494 retry := getRemoteRetryIntv + helper.RandomStagger(getRemoteRetryIntv) 495 select { 496 case <-time.After(retry): 497 continue 498 case <-ctx.Done(): 499 return "", ctx.Err() 500 } 501 } 502 break 503 } 504 505 if resp.Node == nil { 506 return "", fmt.Errorf("node %q not found", nodeID) 507 } 508 509 scheme := "http://" 510 if resp.Node.TLSEnabled { 511 scheme = "https://" 512 } 513 return scheme + resp.Node.HTTPAddr, nil 514 } 515 516 // migrate a remote alloc dir to local node. Caller is responsible for calling 517 // Destroy on the returned allocdir if no error occurs. 518 func (p *remotePrevAlloc) migrateAllocDir(ctx context.Context, nodeAddr string) (*allocdir.AllocDir, error) { 519 // Create the previous alloc dir 520 prevAllocDir := allocdir.NewAllocDir(p.logger, p.config.AllocDir, p.prevAllocID) 521 if err := prevAllocDir.Build(); err != nil { 522 return nil, fmt.Errorf("error building alloc dir for previous alloc %q: %v", p.prevAllocID, err) 523 } 524 525 // Create an API client 526 apiConfig := nomadapi.DefaultConfig() 527 apiConfig.Address = nodeAddr 528 apiConfig.TLSConfig = &nomadapi.TLSConfig{ 529 CACert: p.config.TLSConfig.CAFile, 530 ClientCert: p.config.TLSConfig.CertFile, 531 ClientKey: p.config.TLSConfig.KeyFile, 532 TLSServerName: fmt.Sprintf("client.%s.nomad", p.config.Region), 533 } 534 apiClient, err := nomadapi.NewClient(apiConfig) 535 if err != nil { 536 return nil, err 537 } 538 539 url := fmt.Sprintf("/v1/client/allocation/%v/snapshot", p.prevAllocID) 540 qo := &nomadapi.QueryOptions{AuthToken: p.migrateToken} 541 resp, err := apiClient.Raw().Response(url, qo) 542 if err != nil { 543 prevAllocDir.Destroy() 544 return nil, fmt.Errorf("error getting snapshot from previous alloc %q: %v", p.prevAllocID, err) 545 } 546 547 if err := p.streamAllocDir(ctx, resp, prevAllocDir.AllocDir); err != nil { 548 prevAllocDir.Destroy() 549 return nil, err 550 } 551 552 return prevAllocDir, nil 553 } 554 555 // stream remote alloc to dir to a local path. Caller should cleanup dest on 556 // error. 557 func (p *remotePrevAlloc) streamAllocDir(ctx context.Context, resp io.ReadCloser, dest string) error { 558 p.logger.Debug("streaming snapshot of previous alloc", "destination", dest) 559 tr := tar.NewReader(resp) 560 defer resp.Close() 561 562 // Cache effective uid as we only run Chown if we're root 563 euid := syscall.Geteuid() 564 565 canceled := func() bool { 566 select { 567 case <-ctx.Done(): 568 p.logger.Info("migration of previous alloc canceled") 569 return true 570 default: 571 return false 572 } 573 } 574 575 // if we see this file, there was an error on the remote side 576 errorFilename := allocdir.SnapshotErrorFilename(p.prevAllocID) 577 578 buf := make([]byte, 1024) 579 for !canceled() { 580 // Get the next header 581 hdr, err := tr.Next() 582 583 // Snapshot has ended 584 if err == io.EOF { 585 return nil 586 } 587 588 if err != nil { 589 return fmt.Errorf("error streaming previous alloc %q for new alloc %q: %v", 590 p.prevAllocID, p.allocID, err) 591 } 592 593 if hdr.Name == errorFilename { 594 // Error snapshotting on the remote side, try to read 595 // the message out of the file and return it. 596 errBuf := make([]byte, int(hdr.Size)) 597 if _, err := tr.Read(errBuf); err != nil && err != io.EOF { 598 return fmt.Errorf("error streaming previous alloc %q for new alloc %q; failed reading error message: %v", 599 p.prevAllocID, p.allocID, err) 600 } 601 return fmt.Errorf("error streaming previous alloc %q for new alloc %q: %s", 602 p.prevAllocID, p.allocID, string(errBuf)) 603 } 604 605 // If the header is for a directory we create the directory 606 if hdr.Typeflag == tar.TypeDir { 607 name := filepath.Join(dest, hdr.Name) 608 os.MkdirAll(name, os.FileMode(hdr.Mode)) 609 610 // Can't change owner if not root or on Windows. 611 if euid == 0 { 612 if err := os.Chown(name, hdr.Uid, hdr.Gid); err != nil { 613 return fmt.Errorf("error chowning directory %v", err) 614 } 615 } 616 continue 617 } 618 // If the header is for a symlink we create the symlink 619 if hdr.Typeflag == tar.TypeSymlink { 620 if err = os.Symlink(hdr.Linkname, filepath.Join(dest, hdr.Name)); err != nil { 621 return fmt.Errorf("error creating symlink: %v", err) 622 } 623 continue 624 } 625 // If the header is a file, we write to a file 626 if hdr.Typeflag == tar.TypeReg { 627 f, err := os.Create(filepath.Join(dest, hdr.Name)) 628 if err != nil { 629 return fmt.Errorf("error creating file: %v", err) 630 } 631 632 // Setting the permissions of the file as the origin. 633 if err := f.Chmod(os.FileMode(hdr.Mode)); err != nil { 634 f.Close() 635 return fmt.Errorf("error chmoding file %v", err) 636 } 637 638 // Can't change owner if not root or on Windows. 639 if euid == 0 { 640 if err := f.Chown(hdr.Uid, hdr.Gid); err != nil { 641 f.Close() 642 return fmt.Errorf("error chowning file %v", err) 643 } 644 } 645 646 // We write in chunks so that we can test if the client 647 // is still alive 648 for !canceled() { 649 n, err := tr.Read(buf) 650 if n > 0 && (err == nil || err == io.EOF) { 651 if _, err := f.Write(buf[:n]); err != nil { 652 f.Close() 653 return fmt.Errorf("error writing to file %q: %v", f.Name(), err) 654 } 655 } 656 657 if err != nil { 658 f.Close() 659 if err != io.EOF { 660 return fmt.Errorf("error reading snapshot: %v", err) 661 } 662 break 663 } 664 } 665 666 } 667 } 668 669 if canceled() { 670 return ctx.Err() 671 } 672 673 return nil 674 } 675 676 // NoopPrevAlloc does not block or migrate on a previous allocation and never 677 // returns an error. 678 type NoopPrevAlloc struct{} 679 680 // Wait returns nil immediately. 681 func (NoopPrevAlloc) Wait(context.Context) error { return nil } 682 683 // Migrate returns nil immediately. 684 func (NoopPrevAlloc) Migrate(context.Context, *allocdir.AllocDir) error { return nil } 685 686 func (NoopPrevAlloc) IsWaiting() bool { return false } 687 func (NoopPrevAlloc) IsMigrating() bool { return false }