github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/client/allocrunner/alloc_watcher.go (about) 1 package allocrunner 2 3 import ( 4 "archive/tar" 5 "context" 6 "fmt" 7 "io" 8 "log" 9 "os" 10 "path/filepath" 11 "sync" 12 "syscall" 13 "time" 14 15 "github.com/hashicorp/consul/lib" 16 nomadapi "github.com/hashicorp/nomad/api" 17 "github.com/hashicorp/nomad/client/allocdir" 18 "github.com/hashicorp/nomad/client/config" 19 cstructs "github.com/hashicorp/nomad/client/structs" 20 "github.com/hashicorp/nomad/nomad/structs" 21 ) 22 23 const ( 24 // getRemoteRetryIntv is minimum interval on which we retry 25 // to fetch remote objects. We pick a value between this and 2x this. 26 getRemoteRetryIntv = 30 * time.Second 27 ) 28 29 // rpcer is the interface needed by a prevAllocWatcher to make RPC calls. 30 type rpcer interface { 31 // RPC allows retrieving remote allocs. 32 RPC(method string, args interface{}, reply interface{}) error 33 } 34 35 // terminated is the interface needed by a prevAllocWatcher to check if an 36 // alloc is terminated. 37 type terminated interface { 38 Terminated() bool 39 } 40 41 // prevAllocWatcher allows AllocRunners to wait for a previous allocation to 42 // terminate and migrate its data whether or not the previous allocation is 43 // local or remote. 44 type prevAllocWatcher interface { 45 // Wait for previous alloc to terminate 46 Wait(context.Context) error 47 48 // Migrate data from previous alloc 49 Migrate(ctx context.Context, dest *allocdir.AllocDir) error 50 51 // IsWaiting returns true if a concurrent caller is blocked in Wait 52 IsWaiting() bool 53 54 // IsMigrating returns true if a concurrent caller is in Migrate 55 IsMigrating() bool 56 } 57 58 // NewAllocWatcher creates a prevAllocWatcher appropriate for whether this 59 // alloc's previous allocation was local or remote. If this alloc has no 60 // previous alloc then a noop implementation is returned. 61 func NewAllocWatcher(alloc *structs.Allocation, prevAR *AllocRunner, rpc rpcer, config *config.Config, l *log.Logger, migrateToken string) prevAllocWatcher { 62 if alloc.PreviousAllocation == "" { 63 // No previous allocation, use noop transitioner 64 return NoopPrevAlloc{} 65 } 66 67 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 68 69 if prevAR != nil { 70 // Previous allocation is local, use local transitioner 71 return &localPrevAlloc{ 72 allocID: alloc.ID, 73 prevAllocID: alloc.PreviousAllocation, 74 tasks: tg.Tasks, 75 sticky: tg.EphemeralDisk != nil && tg.EphemeralDisk.Sticky, 76 prevAllocDir: prevAR.GetAllocDir(), 77 prevListener: prevAR.GetListener(), 78 prevWaitCh: prevAR.WaitCh(), 79 prevStatus: prevAR.Alloc(), 80 logger: l, 81 } 82 } 83 84 return &remotePrevAlloc{ 85 allocID: alloc.ID, 86 prevAllocID: alloc.PreviousAllocation, 87 tasks: tg.Tasks, 88 config: config, 89 migrate: tg.EphemeralDisk != nil && tg.EphemeralDisk.Migrate, 90 rpc: rpc, 91 logger: l, 92 migrateToken: migrateToken, 93 } 94 } 95 96 // localPrevAlloc is a prevAllocWatcher for previous allocations on the same 97 // node as an updated allocation. 98 type localPrevAlloc struct { 99 // allocID is the ID of the alloc being blocked 100 allocID string 101 102 // prevAllocID is the ID of the alloc being replaced 103 prevAllocID string 104 105 // tasks on the new alloc 106 tasks []*structs.Task 107 108 // sticky is true if data should be moved 109 sticky bool 110 111 // prevAllocDir is the alloc dir for the previous alloc 112 prevAllocDir *allocdir.AllocDir 113 114 // prevListener allows blocking for updates to the previous alloc 115 prevListener *cstructs.AllocListener 116 117 // prevStatus allows checking if the previous alloc has already 118 // terminated (and therefore won't send updates to the listener) 119 prevStatus terminated 120 121 // prevWaitCh is closed when the previous alloc is garbage collected 122 // which is a failsafe against blocking the new alloc forever 123 prevWaitCh <-chan struct{} 124 125 // waiting and migrating are true when alloc runner is waiting on the 126 // prevAllocWatcher. Writers must acquire the waitingLock and readers 127 // should use the helper methods IsWaiting and IsMigrating. 128 waiting bool 129 migrating bool 130 waitingLock sync.RWMutex 131 132 logger *log.Logger 133 } 134 135 // IsWaiting returns true if there's a concurrent call inside Wait 136 func (p *localPrevAlloc) IsWaiting() bool { 137 p.waitingLock.RLock() 138 b := p.waiting 139 p.waitingLock.RUnlock() 140 return b 141 } 142 143 // IsMigrating returns true if there's a concurrent call inside Migrate 144 func (p *localPrevAlloc) IsMigrating() bool { 145 p.waitingLock.RLock() 146 b := p.migrating 147 p.waitingLock.RUnlock() 148 return b 149 } 150 151 // Wait on a local alloc to become terminal, exit, or the context to be done. 152 func (p *localPrevAlloc) Wait(ctx context.Context) error { 153 p.waitingLock.Lock() 154 p.waiting = true 155 p.waitingLock.Unlock() 156 defer func() { 157 p.waitingLock.Lock() 158 p.waiting = false 159 p.waitingLock.Unlock() 160 }() 161 162 defer p.prevListener.Close() 163 164 if p.prevStatus.Terminated() { 165 // Fast path - previous alloc already terminated! 166 return nil 167 } 168 169 // Block until previous alloc exits 170 p.logger.Printf("[DEBUG] client: alloc %q waiting for previous alloc %q to terminate", p.allocID, p.prevAllocID) 171 for { 172 select { 173 case prevAlloc, ok := <-p.prevListener.Ch: 174 if !ok || prevAlloc.Terminated() { 175 return nil 176 } 177 case <-p.prevWaitCh: 178 return nil 179 case <-ctx.Done(): 180 return ctx.Err() 181 } 182 } 183 } 184 185 // Migrate from previous local alloc dir to destination alloc dir. 186 func (p *localPrevAlloc) Migrate(ctx context.Context, dest *allocdir.AllocDir) error { 187 if !p.sticky { 188 // Not a sticky volume, nothing to migrate 189 return nil 190 } 191 192 p.waitingLock.Lock() 193 p.migrating = true 194 p.waitingLock.Unlock() 195 defer func() { 196 p.waitingLock.Lock() 197 p.migrating = false 198 p.waitingLock.Unlock() 199 }() 200 201 p.logger.Printf("[DEBUG] client: alloc %q copying previous alloc %q", p.allocID, p.prevAllocID) 202 203 moveErr := dest.Move(p.prevAllocDir, p.tasks) 204 205 // Always cleanup previous alloc 206 if err := p.prevAllocDir.Destroy(); err != nil { 207 p.logger.Printf("[ERR] client: error destroying allocdir %v: %v", p.prevAllocDir.AllocDir, err) 208 } 209 210 return moveErr 211 } 212 213 // remotePrevAlloc is a prevAllocWatcher for previous allocations on remote 214 // nodes as an updated allocation. 215 type remotePrevAlloc struct { 216 // allocID is the ID of the alloc being blocked 217 allocID string 218 219 // prevAllocID is the ID of the alloc being replaced 220 prevAllocID string 221 222 // tasks on the new alloc 223 tasks []*structs.Task 224 225 // config for the Client to get AllocDir, Region, and Node.SecretID 226 config *config.Config 227 228 // migrate is true if data should be moved between nodes 229 migrate bool 230 231 // rpc provides an RPC method for watching for updates to the previous 232 // alloc and determining what node it was on. 233 rpc rpcer 234 235 // nodeID is the node the previous alloc. Set by Wait() for use in 236 // Migrate() iff the previous alloc has not already been GC'd. 237 nodeID string 238 239 // waiting and migrating are true when alloc runner is waiting on the 240 // prevAllocWatcher. Writers must acquire the waitingLock and readers 241 // should use the helper methods IsWaiting and IsMigrating. 242 waiting bool 243 migrating bool 244 waitingLock sync.RWMutex 245 246 logger *log.Logger 247 248 // migrateToken allows a client to migrate data in an ACL-protected remote 249 // volume 250 migrateToken string 251 } 252 253 // IsWaiting returns true if there's a concurrent call inside Wait 254 func (p *remotePrevAlloc) IsWaiting() bool { 255 p.waitingLock.RLock() 256 b := p.waiting 257 p.waitingLock.RUnlock() 258 return b 259 } 260 261 // IsMigrating returns true if there's a concurrent call inside Migrate 262 func (p *remotePrevAlloc) IsMigrating() bool { 263 p.waitingLock.RLock() 264 b := p.migrating 265 p.waitingLock.RUnlock() 266 return b 267 } 268 269 // Wait until the remote previous allocation has terminated. 270 func (p *remotePrevAlloc) Wait(ctx context.Context) error { 271 p.waitingLock.Lock() 272 p.waiting = true 273 p.waitingLock.Unlock() 274 defer func() { 275 p.waitingLock.Lock() 276 p.waiting = false 277 p.waitingLock.Unlock() 278 }() 279 280 p.logger.Printf("[DEBUG] client: alloc %q waiting for remote previous alloc %q to terminate", p.allocID, p.prevAllocID) 281 req := structs.AllocSpecificRequest{ 282 AllocID: p.prevAllocID, 283 QueryOptions: structs.QueryOptions{ 284 Region: p.config.Region, 285 AllowStale: true, 286 AuthToken: p.config.Node.SecretID, 287 }, 288 } 289 290 done := func() bool { 291 select { 292 case <-ctx.Done(): 293 return true 294 default: 295 return false 296 } 297 } 298 299 for !done() { 300 resp := structs.SingleAllocResponse{} 301 err := p.rpc.RPC("Alloc.GetAlloc", &req, &resp) 302 if err != nil { 303 p.logger.Printf("[ERR] client: failed to query previous alloc %q: %v", p.prevAllocID, err) 304 retry := getRemoteRetryIntv + lib.RandomStagger(getRemoteRetryIntv) 305 select { 306 case <-time.After(retry): 307 continue 308 case <-ctx.Done(): 309 return ctx.Err() 310 } 311 } 312 if resp.Alloc == nil { 313 p.logger.Printf("[DEBUG] client: blocking alloc %q has been GC'd", p.prevAllocID) 314 return nil 315 } 316 if resp.Alloc.Terminated() { 317 // Terminated! 318 p.nodeID = resp.Alloc.NodeID 319 return nil 320 } 321 322 // Update the query index and requery. 323 if resp.Index > req.MinQueryIndex { 324 req.MinQueryIndex = resp.Index 325 } 326 } 327 328 return ctx.Err() 329 } 330 331 // Migrate alloc data from a remote node if the new alloc has migration enabled 332 // and the old alloc hasn't been GC'd. 333 func (p *remotePrevAlloc) Migrate(ctx context.Context, dest *allocdir.AllocDir) error { 334 if !p.migrate { 335 // Volume wasn't configured to be migrated, return early 336 return nil 337 } 338 339 p.waitingLock.Lock() 340 p.migrating = true 341 p.waitingLock.Unlock() 342 defer func() { 343 p.waitingLock.Lock() 344 p.migrating = false 345 p.waitingLock.Unlock() 346 }() 347 348 p.logger.Printf("[DEBUG] client: alloc %q copying from remote previous alloc %q", p.allocID, p.prevAllocID) 349 350 if p.nodeID == "" { 351 // NodeID couldn't be found; likely alloc was GC'd 352 p.logger.Printf("[WARN] client: alloc %q couldn't migrate data from previous alloc %q; previous alloc may have been GC'd", 353 p.allocID, p.prevAllocID) 354 return nil 355 } 356 357 addr, err := p.getNodeAddr(ctx, p.nodeID) 358 if err != nil { 359 return err 360 } 361 362 prevAllocDir, err := p.migrateAllocDir(ctx, addr) 363 if err != nil { 364 return err 365 } 366 367 if err := dest.Move(prevAllocDir, p.tasks); err != nil { 368 // cleanup on error 369 prevAllocDir.Destroy() 370 return err 371 } 372 373 if err := prevAllocDir.Destroy(); err != nil { 374 p.logger.Printf("[ERR] client: error destroying allocdir %q: %v", prevAllocDir.AllocDir, err) 375 } 376 return nil 377 } 378 379 // getNodeAddr gets the node from the server with the given Node ID 380 func (p *remotePrevAlloc) getNodeAddr(ctx context.Context, nodeID string) (string, error) { 381 req := structs.NodeSpecificRequest{ 382 NodeID: nodeID, 383 QueryOptions: structs.QueryOptions{ 384 Region: p.config.Region, 385 AllowStale: true, 386 AuthToken: p.config.Node.SecretID, 387 }, 388 } 389 390 resp := structs.SingleNodeResponse{} 391 for { 392 err := p.rpc.RPC("Node.GetNode", &req, &resp) 393 if err != nil { 394 p.logger.Printf("[ERR] client: failed to query node info %q: %v", nodeID, err) 395 retry := getRemoteRetryIntv + lib.RandomStagger(getRemoteRetryIntv) 396 select { 397 case <-time.After(retry): 398 continue 399 case <-ctx.Done(): 400 return "", ctx.Err() 401 } 402 } 403 break 404 } 405 406 if resp.Node == nil { 407 return "", fmt.Errorf("node %q not found", nodeID) 408 } 409 410 scheme := "http://" 411 if resp.Node.TLSEnabled { 412 scheme = "https://" 413 } 414 return scheme + resp.Node.HTTPAddr, nil 415 } 416 417 // migrate a remote alloc dir to local node. Caller is responsible for calling 418 // Destroy on the returned allocdir if no error occurs. 419 func (p *remotePrevAlloc) migrateAllocDir(ctx context.Context, nodeAddr string) (*allocdir.AllocDir, error) { 420 // Create the previous alloc dir 421 prevAllocDir := allocdir.NewAllocDir(p.logger, filepath.Join(p.config.AllocDir, p.prevAllocID)) 422 if err := prevAllocDir.Build(); err != nil { 423 return nil, fmt.Errorf("error building alloc dir for previous alloc %q: %v", p.prevAllocID, err) 424 } 425 426 // Create an API client 427 apiConfig := nomadapi.DefaultConfig() 428 apiConfig.Address = nodeAddr 429 apiConfig.TLSConfig = &nomadapi.TLSConfig{ 430 CACert: p.config.TLSConfig.CAFile, 431 ClientCert: p.config.TLSConfig.CertFile, 432 ClientKey: p.config.TLSConfig.KeyFile, 433 TLSServerName: fmt.Sprintf("client.%s.nomad", p.config.Region), 434 } 435 apiClient, err := nomadapi.NewClient(apiConfig) 436 if err != nil { 437 return nil, err 438 } 439 440 url := fmt.Sprintf("/v1/client/allocation/%v/snapshot", p.prevAllocID) 441 qo := &nomadapi.QueryOptions{AuthToken: p.migrateToken} 442 resp, err := apiClient.Raw().Response(url, qo) 443 if err != nil { 444 prevAllocDir.Destroy() 445 return nil, fmt.Errorf("error getting snapshot from previous alloc %q: %v", p.prevAllocID, err) 446 } 447 448 if err := p.streamAllocDir(ctx, resp, prevAllocDir.AllocDir); err != nil { 449 prevAllocDir.Destroy() 450 return nil, err 451 } 452 453 return prevAllocDir, nil 454 } 455 456 // stream remote alloc to dir to a local path. Caller should cleanup dest on 457 // error. 458 func (p *remotePrevAlloc) streamAllocDir(ctx context.Context, resp io.ReadCloser, dest string) error { 459 p.logger.Printf("[DEBUG] client: alloc %q streaming snapshot of previous alloc %q to %q", p.allocID, p.prevAllocID, dest) 460 tr := tar.NewReader(resp) 461 defer resp.Close() 462 463 // Cache effective uid as we only run Chown if we're root 464 euid := syscall.Geteuid() 465 466 canceled := func() bool { 467 select { 468 case <-ctx.Done(): 469 p.logger.Printf("[INFO] client: stopping migration of previous alloc %q for new alloc: %v", 470 p.prevAllocID, p.allocID) 471 return true 472 default: 473 return false 474 } 475 } 476 477 // if we see this file, there was an error on the remote side 478 errorFilename := allocdir.SnapshotErrorFilename(p.prevAllocID) 479 480 buf := make([]byte, 1024) 481 for !canceled() { 482 // Get the next header 483 hdr, err := tr.Next() 484 485 // Snapshot has ended 486 if err == io.EOF { 487 return nil 488 } 489 490 if err != nil { 491 return fmt.Errorf("error streaming previous alloc %q for new alloc %q: %v", 492 p.prevAllocID, p.allocID, err) 493 } 494 495 if hdr.Name == errorFilename { 496 // Error snapshotting on the remote side, try to read 497 // the message out of the file and return it. 498 errBuf := make([]byte, int(hdr.Size)) 499 if _, err := tr.Read(errBuf); err != nil && err != io.EOF { 500 return fmt.Errorf("error streaming previous alloc %q for new alloc %q; failed reading error message: %v", 501 p.prevAllocID, p.allocID, err) 502 } 503 return fmt.Errorf("error streaming previous alloc %q for new alloc %q: %s", 504 p.prevAllocID, p.allocID, string(errBuf)) 505 } 506 507 // If the header is for a directory we create the directory 508 if hdr.Typeflag == tar.TypeDir { 509 name := filepath.Join(dest, hdr.Name) 510 os.MkdirAll(name, os.FileMode(hdr.Mode)) 511 512 // Can't change owner if not root or on Windows. 513 if euid == 0 { 514 if err := os.Chown(name, hdr.Uid, hdr.Gid); err != nil { 515 return fmt.Errorf("error chowning directory %v", err) 516 } 517 } 518 continue 519 } 520 // If the header is for a symlink we create the symlink 521 if hdr.Typeflag == tar.TypeSymlink { 522 if err = os.Symlink(hdr.Linkname, filepath.Join(dest, hdr.Name)); err != nil { 523 return fmt.Errorf("error creating symlink: %v", err) 524 } 525 continue 526 } 527 // If the header is a file, we write to a file 528 if hdr.Typeflag == tar.TypeReg { 529 f, err := os.Create(filepath.Join(dest, hdr.Name)) 530 if err != nil { 531 return fmt.Errorf("error creating file: %v", err) 532 } 533 534 // Setting the permissions of the file as the origin. 535 if err := f.Chmod(os.FileMode(hdr.Mode)); err != nil { 536 f.Close() 537 return fmt.Errorf("error chmoding file %v", err) 538 } 539 540 // Can't change owner if not root or on Windows. 541 if euid == 0 { 542 if err := f.Chown(hdr.Uid, hdr.Gid); err != nil { 543 f.Close() 544 return fmt.Errorf("error chowning file %v", err) 545 } 546 } 547 548 // We write in chunks so that we can test if the client 549 // is still alive 550 for !canceled() { 551 n, err := tr.Read(buf) 552 if n > 0 && (err == nil || err == io.EOF) { 553 if _, err := f.Write(buf[:n]); err != nil { 554 f.Close() 555 return fmt.Errorf("error writing to file %q: %v", f.Name(), err) 556 } 557 } 558 559 if err != nil { 560 f.Close() 561 if err != io.EOF { 562 return fmt.Errorf("error reading snapshot: %v", err) 563 } 564 break 565 } 566 } 567 568 } 569 } 570 571 if canceled() { 572 return ctx.Err() 573 } 574 575 return nil 576 } 577 578 // NoopPrevAlloc does not block or migrate on a previous allocation and never 579 // returns an error. 580 type NoopPrevAlloc struct{} 581 582 // Wait returns nil immediately. 583 func (NoopPrevAlloc) Wait(context.Context) error { return nil } 584 585 // Migrate returns nil immediately. 586 func (NoopPrevAlloc) Migrate(context.Context, *allocdir.AllocDir) error { return nil } 587 588 func (NoopPrevAlloc) IsWaiting() bool { return false } 589 func (NoopPrevAlloc) IsMigrating() bool { return false }