github.com/blixtra/nomad@v0.7.2-0.20171221000451-da9a1d7bb050/client/alloc_watcher.go (about) 1 package client 2 3 import ( 4 "archive/tar" 5 "context" 6 "fmt" 7 "io" 8 "log" 9 "os" 10 "path/filepath" 11 "sync" 12 "syscall" 13 "time" 14 15 "github.com/hashicorp/consul/lib" 16 nomadapi "github.com/hashicorp/nomad/api" 17 "github.com/hashicorp/nomad/client/allocdir" 18 "github.com/hashicorp/nomad/client/config" 19 cstructs "github.com/hashicorp/nomad/client/structs" 20 "github.com/hashicorp/nomad/nomad/structs" 21 ) 22 23 // rpcer is the interface needed by a prevAllocWatcher to make RPC calls. 24 type rpcer interface { 25 // RPC allows retrieving remote allocs. 26 RPC(method string, args interface{}, reply interface{}) error 27 } 28 29 // terminated is the interface needed by a prevAllocWatcher to check if an 30 // alloc is terminated. 31 type terminated interface { 32 Terminated() bool 33 } 34 35 // prevAllocWatcher allows AllocRunners to wait for a previous allocation to 36 // terminate and migrate its data whether or not the previous allocation is 37 // local or remote. 38 type prevAllocWatcher interface { 39 // Wait for previous alloc to terminate 40 Wait(context.Context) error 41 42 // Migrate data from previous alloc 43 Migrate(ctx context.Context, dest *allocdir.AllocDir) error 44 45 // IsWaiting returns true if a concurrent caller is blocked in Wait 46 IsWaiting() bool 47 48 // IsMigrating returns true if a concurrent caller is in Migrate 49 IsMigrating() bool 50 } 51 52 // newAllocWatcher creates a prevAllocWatcher appropriate for whether this 53 // alloc's previous allocation was local or remote. If this alloc has no 54 // previous alloc then a noop implementation is returned. 55 func newAllocWatcher(alloc *structs.Allocation, prevAR *AllocRunner, rpc rpcer, config *config.Config, l *log.Logger, migrateToken string) prevAllocWatcher { 56 if alloc.PreviousAllocation == "" { 57 // No previous allocation, use noop transitioner 58 return noopPrevAlloc{} 59 } 60 61 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 62 63 if prevAR != nil { 64 // Previous allocation is local, use local transitioner 65 return &localPrevAlloc{ 66 allocID: alloc.ID, 67 prevAllocID: alloc.PreviousAllocation, 68 tasks: tg.Tasks, 69 sticky: tg.EphemeralDisk != nil && tg.EphemeralDisk.Sticky, 70 prevAllocDir: prevAR.GetAllocDir(), 71 prevListener: prevAR.GetListener(), 72 prevWaitCh: prevAR.WaitCh(), 73 prevStatus: prevAR.Alloc(), 74 logger: l, 75 } 76 } 77 78 return &remotePrevAlloc{ 79 allocID: alloc.ID, 80 prevAllocID: alloc.PreviousAllocation, 81 tasks: tg.Tasks, 82 config: config, 83 migrate: tg.EphemeralDisk != nil && tg.EphemeralDisk.Migrate, 84 rpc: rpc, 85 logger: l, 86 migrateToken: migrateToken, 87 } 88 } 89 90 // localPrevAlloc is a prevAllocWatcher for previous allocations on the same 91 // node as an updated allocation. 92 type localPrevAlloc struct { 93 // allocID is the ID of the alloc being blocked 94 allocID string 95 96 // prevAllocID is the ID of the alloc being replaced 97 prevAllocID string 98 99 // tasks on the new alloc 100 tasks []*structs.Task 101 102 // sticky is true if data should be moved 103 sticky bool 104 105 // prevAllocDir is the alloc dir for the previous alloc 106 prevAllocDir *allocdir.AllocDir 107 108 // prevListener allows blocking for updates to the previous alloc 109 prevListener *cstructs.AllocListener 110 111 // prevStatus allows checking if the previous alloc has already 112 // terminated (and therefore won't send updates to the listener) 113 prevStatus terminated 114 115 // prevWaitCh is closed when the previous alloc is garbage collected 116 // which is a failsafe against blocking the new alloc forever 117 prevWaitCh <-chan struct{} 118 119 // waiting and migrating are true when alloc runner is waiting on the 120 // prevAllocWatcher. Writers must acquire the waitingLock and readers 121 // should use the helper methods IsWaiting and IsMigrating. 122 waiting bool 123 migrating bool 124 waitingLock sync.RWMutex 125 126 logger *log.Logger 127 } 128 129 // IsWaiting returns true if there's a concurrent call inside Wait 130 func (p *localPrevAlloc) IsWaiting() bool { 131 p.waitingLock.RLock() 132 b := p.waiting 133 p.waitingLock.RUnlock() 134 return b 135 } 136 137 // IsMigrating returns true if there's a concurrent call inside Migrate 138 func (p *localPrevAlloc) IsMigrating() bool { 139 p.waitingLock.RLock() 140 b := p.migrating 141 p.waitingLock.RUnlock() 142 return b 143 } 144 145 // Wait on a local alloc to become terminal, exit, or the context to be done. 146 func (p *localPrevAlloc) Wait(ctx context.Context) error { 147 p.waitingLock.Lock() 148 p.waiting = true 149 p.waitingLock.Unlock() 150 defer func() { 151 p.waitingLock.Lock() 152 p.waiting = false 153 p.waitingLock.Unlock() 154 }() 155 156 defer p.prevListener.Close() 157 158 if p.prevStatus.Terminated() { 159 // Fast path - previous alloc already terminated! 160 return nil 161 } 162 163 // Block until previous alloc exits 164 p.logger.Printf("[DEBUG] client: alloc %q waiting for previous alloc %q to terminate", p.allocID, p.prevAllocID) 165 for { 166 select { 167 case prevAlloc, ok := <-p.prevListener.Ch: 168 if !ok || prevAlloc.Terminated() { 169 return nil 170 } 171 case <-p.prevWaitCh: 172 return nil 173 case <-ctx.Done(): 174 return ctx.Err() 175 } 176 } 177 } 178 179 // Migrate from previous local alloc dir to destination alloc dir. 180 func (p *localPrevAlloc) Migrate(ctx context.Context, dest *allocdir.AllocDir) error { 181 if !p.sticky { 182 // Not a sticky volume, nothing to migrate 183 return nil 184 } 185 186 p.waitingLock.Lock() 187 p.migrating = true 188 p.waitingLock.Unlock() 189 defer func() { 190 p.waitingLock.Lock() 191 p.migrating = false 192 p.waitingLock.Unlock() 193 }() 194 195 p.logger.Printf("[DEBUG] client: alloc %q copying previous alloc %q", p.allocID, p.prevAllocID) 196 197 moveErr := dest.Move(p.prevAllocDir, p.tasks) 198 199 // Always cleanup previous alloc 200 if err := p.prevAllocDir.Destroy(); err != nil { 201 p.logger.Printf("[ERR] client: error destroying allocdir %v: %v", p.prevAllocDir.AllocDir, err) 202 } 203 204 return moveErr 205 } 206 207 // remotePrevAlloc is a prevAllcWatcher for previous allocations on remote 208 // nodes as an updated allocation. 209 type remotePrevAlloc struct { 210 // allocID is the ID of the alloc being blocked 211 allocID string 212 213 // prevAllocID is the ID of the alloc being replaced 214 prevAllocID string 215 216 // tasks on the new alloc 217 tasks []*structs.Task 218 219 // config for the Client to get AllocDir, Region, and Node.SecretID 220 config *config.Config 221 222 // migrate is true if data should be moved between nodes 223 migrate bool 224 225 // rpc provides an RPC method for watching for updates to the previous 226 // alloc and determining what node it was on. 227 rpc rpcer 228 229 // nodeID is the node the previous alloc. Set by Wait() for use in 230 // Migrate() iff the previous alloc has not already been GC'd. 231 nodeID string 232 233 // waiting and migrating are true when alloc runner is waiting on the 234 // prevAllocWatcher. Writers must acquire the waitingLock and readers 235 // should use the helper methods IsWaiting and IsMigrating. 236 waiting bool 237 migrating bool 238 waitingLock sync.RWMutex 239 240 logger *log.Logger 241 242 // migrateToken allows a client to migrate data in an ACL-protected remote 243 // volume 244 migrateToken string 245 } 246 247 // IsWaiting returns true if there's a concurrent call inside Wait 248 func (p *remotePrevAlloc) IsWaiting() bool { 249 p.waitingLock.RLock() 250 b := p.waiting 251 p.waitingLock.RUnlock() 252 return b 253 } 254 255 // IsMigrating returns true if there's a concurrent call inside Migrate 256 func (p *remotePrevAlloc) IsMigrating() bool { 257 p.waitingLock.RLock() 258 b := p.migrating 259 p.waitingLock.RUnlock() 260 return b 261 } 262 263 // Wait until the remote previousl allocation has terminated. 264 func (p *remotePrevAlloc) Wait(ctx context.Context) error { 265 p.waitingLock.Lock() 266 p.waiting = true 267 p.waitingLock.Unlock() 268 defer func() { 269 p.waitingLock.Lock() 270 p.waiting = false 271 p.waitingLock.Unlock() 272 }() 273 274 p.logger.Printf("[DEBUG] client: alloc %q waiting for remote previous alloc %q to terminate", p.allocID, p.prevAllocID) 275 req := structs.AllocSpecificRequest{ 276 AllocID: p.prevAllocID, 277 QueryOptions: structs.QueryOptions{ 278 Region: p.config.Region, 279 AllowStale: true, 280 AuthToken: p.config.Node.SecretID, 281 }, 282 } 283 284 done := func() bool { 285 select { 286 case <-ctx.Done(): 287 return true 288 default: 289 return false 290 } 291 } 292 293 for !done() { 294 resp := structs.SingleAllocResponse{} 295 err := p.rpc.RPC("Alloc.GetAlloc", &req, &resp) 296 if err != nil { 297 p.logger.Printf("[ERR] client: failed to query previous alloc %q: %v", p.prevAllocID, err) 298 retry := getAllocRetryIntv + lib.RandomStagger(getAllocRetryIntv) 299 select { 300 case <-time.After(retry): 301 continue 302 case <-ctx.Done(): 303 return ctx.Err() 304 } 305 } 306 if resp.Alloc == nil { 307 p.logger.Printf("[DEBUG] client: blocking alloc %q has been GC'd", p.prevAllocID) 308 return nil 309 } 310 if resp.Alloc.Terminated() { 311 // Terminated! 312 p.nodeID = resp.Alloc.NodeID 313 return nil 314 } 315 316 // Update the query index and requery. 317 if resp.Index > req.MinQueryIndex { 318 req.MinQueryIndex = resp.Index 319 } 320 } 321 322 return ctx.Err() 323 } 324 325 // Migrate alloc data from a remote node if the new alloc has migration enabled 326 // and the old alloc hasn't been GC'd. 327 func (p *remotePrevAlloc) Migrate(ctx context.Context, dest *allocdir.AllocDir) error { 328 if !p.migrate { 329 // Volume wasn't configured to be migrated, return early 330 return nil 331 } 332 333 p.waitingLock.Lock() 334 p.migrating = true 335 p.waitingLock.Unlock() 336 defer func() { 337 p.waitingLock.Lock() 338 p.migrating = false 339 p.waitingLock.Unlock() 340 }() 341 342 p.logger.Printf("[DEBUG] client: alloc %q copying from remote previous alloc %q", p.allocID, p.prevAllocID) 343 344 if p.nodeID == "" { 345 // NodeID couldn't be found; likely alloc was GC'd 346 p.logger.Printf("[WARN] client: alloc %q couldn't migrate data from previous alloc %q; previous alloc may have been GC'd", 347 p.allocID, p.prevAllocID) 348 return nil 349 } 350 351 addr, err := p.getNodeAddr(ctx, p.nodeID) 352 if err != nil { 353 return err 354 } 355 356 prevAllocDir, err := p.migrateAllocDir(ctx, addr) 357 if err != nil { 358 return err 359 } 360 361 if err := dest.Move(prevAllocDir, p.tasks); err != nil { 362 // cleanup on error 363 prevAllocDir.Destroy() 364 return err 365 } 366 367 if err := prevAllocDir.Destroy(); err != nil { 368 p.logger.Printf("[ERR] client: error destroying allocdir %q: %v", prevAllocDir.AllocDir, err) 369 } 370 return nil 371 } 372 373 // getNodeAddr gets the node from the server with the given Node ID 374 func (p *remotePrevAlloc) getNodeAddr(ctx context.Context, nodeID string) (string, error) { 375 req := structs.NodeSpecificRequest{ 376 NodeID: nodeID, 377 QueryOptions: structs.QueryOptions{ 378 Region: p.config.Region, 379 AllowStale: true, 380 AuthToken: p.config.Node.SecretID, 381 }, 382 } 383 384 resp := structs.SingleNodeResponse{} 385 for { 386 err := p.rpc.RPC("Node.GetNode", &req, &resp) 387 if err != nil { 388 p.logger.Printf("[ERR] client: failed to query node info %q: %v", nodeID, err) 389 retry := getAllocRetryIntv + lib.RandomStagger(getAllocRetryIntv) 390 select { 391 case <-time.After(retry): 392 continue 393 case <-ctx.Done(): 394 return "", ctx.Err() 395 } 396 } 397 break 398 } 399 400 if resp.Node == nil { 401 return "", fmt.Errorf("node %q not found", nodeID) 402 } 403 404 scheme := "http://" 405 if resp.Node.TLSEnabled { 406 scheme = "https://" 407 } 408 return scheme + resp.Node.HTTPAddr, nil 409 } 410 411 // migrate a remote alloc dir to local node. Caller is responsible for calling 412 // Destroy on the returned allocdir if no error occurs. 413 func (p *remotePrevAlloc) migrateAllocDir(ctx context.Context, nodeAddr string) (*allocdir.AllocDir, error) { 414 // Create the previous alloc dir 415 prevAllocDir := allocdir.NewAllocDir(p.logger, filepath.Join(p.config.AllocDir, p.prevAllocID)) 416 if err := prevAllocDir.Build(); err != nil { 417 return nil, fmt.Errorf("error building alloc dir for previous alloc %q: %v", p.prevAllocID, err) 418 } 419 420 // Create an API client 421 apiConfig := nomadapi.DefaultConfig() 422 apiConfig.Address = nodeAddr 423 apiConfig.TLSConfig = &nomadapi.TLSConfig{ 424 CACert: p.config.TLSConfig.CAFile, 425 ClientCert: p.config.TLSConfig.CertFile, 426 ClientKey: p.config.TLSConfig.KeyFile, 427 } 428 apiClient, err := nomadapi.NewClient(apiConfig) 429 if err != nil { 430 return nil, err 431 } 432 433 url := fmt.Sprintf("/v1/client/allocation/%v/snapshot", p.prevAllocID) 434 qo := &nomadapi.QueryOptions{AuthToken: p.migrateToken} 435 resp, err := apiClient.Raw().Response(url, qo) 436 if err != nil { 437 prevAllocDir.Destroy() 438 return nil, fmt.Errorf("error getting snapshot from previous alloc %q: %v", p.prevAllocID, err) 439 } 440 441 if err := p.streamAllocDir(ctx, resp, prevAllocDir.AllocDir); err != nil { 442 prevAllocDir.Destroy() 443 return nil, err 444 } 445 446 return prevAllocDir, nil 447 } 448 449 // stream remote alloc to dir to a local path. Caller should cleanup dest on 450 // error. 451 func (p *remotePrevAlloc) streamAllocDir(ctx context.Context, resp io.ReadCloser, dest string) error { 452 p.logger.Printf("[DEBUG] client: alloc %q streaming snapshot of previous alloc %q to %q", p.allocID, p.prevAllocID, dest) 453 tr := tar.NewReader(resp) 454 defer resp.Close() 455 456 // Cache effective uid as we only run Chown if we're root 457 euid := syscall.Geteuid() 458 459 canceled := func() bool { 460 select { 461 case <-ctx.Done(): 462 p.logger.Printf("[INFO] client: stopping migration of previous alloc %q for new alloc: %v", 463 p.prevAllocID, p.allocID) 464 return true 465 default: 466 return false 467 } 468 } 469 470 // if we see this file, there was an error on the remote side 471 errorFilename := allocdir.SnapshotErrorFilename(p.prevAllocID) 472 473 buf := make([]byte, 1024) 474 for !canceled() { 475 // Get the next header 476 hdr, err := tr.Next() 477 478 // Snapshot has ended 479 if err == io.EOF { 480 return nil 481 } 482 483 if err != nil { 484 return fmt.Errorf("error streaming previous alloc %q for new alloc %q: %v", 485 p.prevAllocID, p.allocID, err) 486 } 487 488 if hdr.Name == errorFilename { 489 // Error snapshotting on the remote side, try to read 490 // the message out of the file and return it. 491 errBuf := make([]byte, int(hdr.Size)) 492 if _, err := tr.Read(errBuf); err != nil { 493 return fmt.Errorf("error streaming previous alloc %q for new alloc %q; failed reading error message: %v", 494 p.prevAllocID, p.allocID, err) 495 } 496 return fmt.Errorf("error streaming previous alloc %q for new alloc %q: %s", 497 p.prevAllocID, p.allocID, string(errBuf)) 498 } 499 500 // If the header is for a directory we create the directory 501 if hdr.Typeflag == tar.TypeDir { 502 os.MkdirAll(filepath.Join(dest, hdr.Name), os.FileMode(hdr.Mode)) 503 continue 504 } 505 // If the header is for a symlink we create the symlink 506 if hdr.Typeflag == tar.TypeSymlink { 507 if err = os.Symlink(hdr.Linkname, filepath.Join(dest, hdr.Name)); err != nil { 508 return fmt.Errorf("error creating symlink: %v", err) 509 } 510 continue 511 } 512 // If the header is a file, we write to a file 513 if hdr.Typeflag == tar.TypeReg { 514 f, err := os.Create(filepath.Join(dest, hdr.Name)) 515 if err != nil { 516 return fmt.Errorf("error creating file: %v", err) 517 } 518 519 // Setting the permissions of the file as the origin. 520 if err := f.Chmod(os.FileMode(hdr.Mode)); err != nil { 521 f.Close() 522 return fmt.Errorf("error chmoding file %v", err) 523 } 524 525 // Can't change owner if not root. Returns false on 526 // Windows as Chown always errors there. 527 if euid == 0 { 528 if err := f.Chown(hdr.Uid, hdr.Gid); err != nil { 529 f.Close() 530 return fmt.Errorf("error chowning file %v", err) 531 } 532 } 533 534 // We write in chunks so that we can test if the client 535 // is still alive 536 for !canceled() { 537 n, err := tr.Read(buf) 538 if err != nil { 539 f.Close() 540 if err != io.EOF { 541 return fmt.Errorf("error reading snapshot: %v", err) 542 } 543 break 544 } 545 if _, err := f.Write(buf[:n]); err != nil { 546 f.Close() 547 return fmt.Errorf("error writing to file %q: %v", f.Name(), err) 548 } 549 } 550 551 } 552 } 553 554 if canceled() { 555 return ctx.Err() 556 } 557 558 return nil 559 } 560 561 // noopPrevAlloc does not block or migrate on a previous allocation and never 562 // returns an error. 563 type noopPrevAlloc struct{} 564 565 // Wait returns nil immediately. 566 func (noopPrevAlloc) Wait(context.Context) error { return nil } 567 568 // Migrate returns nil immediately. 569 func (noopPrevAlloc) Migrate(context.Context, *allocdir.AllocDir) error { return nil } 570 571 func (noopPrevAlloc) IsWaiting() bool { return false } 572 func (noopPrevAlloc) IsMigrating() bool { return false }