github.com/djenriquez/nomad-1@v0.8.1/client/alloc_watcher.go (about)

     1  package client
     2  
     3  import (
     4  	"archive/tar"
     5  	"context"
     6  	"fmt"
     7  	"io"
     8  	"log"
     9  	"os"
    10  	"path/filepath"
    11  	"sync"
    12  	"syscall"
    13  	"time"
    14  
    15  	"github.com/hashicorp/consul/lib"
    16  	nomadapi "github.com/hashicorp/nomad/api"
    17  	"github.com/hashicorp/nomad/client/allocdir"
    18  	"github.com/hashicorp/nomad/client/config"
    19  	cstructs "github.com/hashicorp/nomad/client/structs"
    20  	"github.com/hashicorp/nomad/nomad/structs"
    21  )
    22  
    23  // rpcer is the interface needed by a prevAllocWatcher to make RPC calls.
    24  type rpcer interface {
    25  	// RPC allows retrieving remote allocs.
    26  	RPC(method string, args interface{}, reply interface{}) error
    27  }
    28  
    29  // terminated is the interface needed by a prevAllocWatcher to check if an
    30  // alloc is terminated.
    31  type terminated interface {
    32  	Terminated() bool
    33  }
    34  
    35  // prevAllocWatcher allows AllocRunners to wait for a previous allocation to
    36  // terminate and migrate its data whether or not the previous allocation is
    37  // local or remote.
    38  type prevAllocWatcher interface {
    39  	// Wait for previous alloc to terminate
    40  	Wait(context.Context) error
    41  
    42  	// Migrate data from previous alloc
    43  	Migrate(ctx context.Context, dest *allocdir.AllocDir) error
    44  
    45  	// IsWaiting returns true if a concurrent caller is blocked in Wait
    46  	IsWaiting() bool
    47  
    48  	// IsMigrating returns true if a concurrent caller is in Migrate
    49  	IsMigrating() bool
    50  }
    51  
    52  // newAllocWatcher creates a prevAllocWatcher appropriate for whether this
    53  // alloc's previous allocation was local or remote. If this alloc has no
    54  // previous alloc then a noop implementation is returned.
    55  func newAllocWatcher(alloc *structs.Allocation, prevAR *AllocRunner, rpc rpcer, config *config.Config, l *log.Logger, migrateToken string) prevAllocWatcher {
    56  	if alloc.PreviousAllocation == "" {
    57  		// No previous allocation, use noop transitioner
    58  		return noopPrevAlloc{}
    59  	}
    60  
    61  	tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
    62  
    63  	if prevAR != nil {
    64  		// Previous allocation is local, use local transitioner
    65  		return &localPrevAlloc{
    66  			allocID:      alloc.ID,
    67  			prevAllocID:  alloc.PreviousAllocation,
    68  			tasks:        tg.Tasks,
    69  			sticky:       tg.EphemeralDisk != nil && tg.EphemeralDisk.Sticky,
    70  			prevAllocDir: prevAR.GetAllocDir(),
    71  			prevListener: prevAR.GetListener(),
    72  			prevWaitCh:   prevAR.WaitCh(),
    73  			prevStatus:   prevAR.Alloc(),
    74  			logger:       l,
    75  		}
    76  	}
    77  
    78  	return &remotePrevAlloc{
    79  		allocID:      alloc.ID,
    80  		prevAllocID:  alloc.PreviousAllocation,
    81  		tasks:        tg.Tasks,
    82  		config:       config,
    83  		migrate:      tg.EphemeralDisk != nil && tg.EphemeralDisk.Migrate,
    84  		rpc:          rpc,
    85  		logger:       l,
    86  		migrateToken: migrateToken,
    87  	}
    88  }
    89  
    90  // localPrevAlloc is a prevAllocWatcher for previous allocations on the same
    91  // node as an updated allocation.
    92  type localPrevAlloc struct {
    93  	// allocID is the ID of the alloc being blocked
    94  	allocID string
    95  
    96  	// prevAllocID is the ID of the alloc being replaced
    97  	prevAllocID string
    98  
    99  	// tasks on the new alloc
   100  	tasks []*structs.Task
   101  
   102  	// sticky is true if data should be moved
   103  	sticky bool
   104  
   105  	// prevAllocDir is the alloc dir for the previous alloc
   106  	prevAllocDir *allocdir.AllocDir
   107  
   108  	// prevListener allows blocking for updates to the previous alloc
   109  	prevListener *cstructs.AllocListener
   110  
   111  	// prevStatus allows checking if the previous alloc has already
   112  	// terminated (and therefore won't send updates to the listener)
   113  	prevStatus terminated
   114  
   115  	// prevWaitCh is closed when the previous alloc is garbage collected
   116  	// which is a failsafe against blocking the new alloc forever
   117  	prevWaitCh <-chan struct{}
   118  
   119  	// waiting and migrating are true when alloc runner is waiting on the
   120  	// prevAllocWatcher. Writers must acquire the waitingLock and readers
   121  	// should use the helper methods IsWaiting and IsMigrating.
   122  	waiting     bool
   123  	migrating   bool
   124  	waitingLock sync.RWMutex
   125  
   126  	logger *log.Logger
   127  }
   128  
   129  // IsWaiting returns true if there's a concurrent call inside Wait
   130  func (p *localPrevAlloc) IsWaiting() bool {
   131  	p.waitingLock.RLock()
   132  	b := p.waiting
   133  	p.waitingLock.RUnlock()
   134  	return b
   135  }
   136  
   137  // IsMigrating returns true if there's a concurrent call inside Migrate
   138  func (p *localPrevAlloc) IsMigrating() bool {
   139  	p.waitingLock.RLock()
   140  	b := p.migrating
   141  	p.waitingLock.RUnlock()
   142  	return b
   143  }
   144  
   145  // Wait on a local alloc to become terminal, exit, or the context to be done.
   146  func (p *localPrevAlloc) Wait(ctx context.Context) error {
   147  	p.waitingLock.Lock()
   148  	p.waiting = true
   149  	p.waitingLock.Unlock()
   150  	defer func() {
   151  		p.waitingLock.Lock()
   152  		p.waiting = false
   153  		p.waitingLock.Unlock()
   154  	}()
   155  
   156  	defer p.prevListener.Close()
   157  
   158  	if p.prevStatus.Terminated() {
   159  		// Fast path - previous alloc already terminated!
   160  		return nil
   161  	}
   162  
   163  	// Block until previous alloc exits
   164  	p.logger.Printf("[DEBUG] client: alloc %q waiting for previous alloc %q to terminate", p.allocID, p.prevAllocID)
   165  	for {
   166  		select {
   167  		case prevAlloc, ok := <-p.prevListener.Ch:
   168  			if !ok || prevAlloc.Terminated() {
   169  				return nil
   170  			}
   171  		case <-p.prevWaitCh:
   172  			return nil
   173  		case <-ctx.Done():
   174  			return ctx.Err()
   175  		}
   176  	}
   177  }
   178  
   179  // Migrate from previous local alloc dir to destination alloc dir.
   180  func (p *localPrevAlloc) Migrate(ctx context.Context, dest *allocdir.AllocDir) error {
   181  	if !p.sticky {
   182  		// Not a sticky volume, nothing to migrate
   183  		return nil
   184  	}
   185  
   186  	p.waitingLock.Lock()
   187  	p.migrating = true
   188  	p.waitingLock.Unlock()
   189  	defer func() {
   190  		p.waitingLock.Lock()
   191  		p.migrating = false
   192  		p.waitingLock.Unlock()
   193  	}()
   194  
   195  	p.logger.Printf("[DEBUG] client: alloc %q copying previous alloc %q", p.allocID, p.prevAllocID)
   196  
   197  	moveErr := dest.Move(p.prevAllocDir, p.tasks)
   198  
   199  	// Always cleanup previous alloc
   200  	if err := p.prevAllocDir.Destroy(); err != nil {
   201  		p.logger.Printf("[ERR] client: error destroying allocdir %v: %v", p.prevAllocDir.AllocDir, err)
   202  	}
   203  
   204  	return moveErr
   205  }
   206  
   207  // remotePrevAlloc is a prevAllocWatcher for previous allocations on remote
   208  // nodes as an updated allocation.
   209  type remotePrevAlloc struct {
   210  	// allocID is the ID of the alloc being blocked
   211  	allocID string
   212  
   213  	// prevAllocID is the ID of the alloc being replaced
   214  	prevAllocID string
   215  
   216  	// tasks on the new alloc
   217  	tasks []*structs.Task
   218  
   219  	// config for the Client to get AllocDir, Region, and Node.SecretID
   220  	config *config.Config
   221  
   222  	// migrate is true if data should be moved between nodes
   223  	migrate bool
   224  
   225  	// rpc provides an RPC method for watching for updates to the previous
   226  	// alloc and determining what node it was on.
   227  	rpc rpcer
   228  
   229  	// nodeID is the node the previous alloc. Set by Wait() for use in
   230  	// Migrate() iff the previous alloc has not already been GC'd.
   231  	nodeID string
   232  
   233  	// waiting and migrating are true when alloc runner is waiting on the
   234  	// prevAllocWatcher. Writers must acquire the waitingLock and readers
   235  	// should use the helper methods IsWaiting and IsMigrating.
   236  	waiting     bool
   237  	migrating   bool
   238  	waitingLock sync.RWMutex
   239  
   240  	logger *log.Logger
   241  
   242  	// migrateToken allows a client to migrate data in an ACL-protected remote
   243  	// volume
   244  	migrateToken string
   245  }
   246  
   247  // IsWaiting returns true if there's a concurrent call inside Wait
   248  func (p *remotePrevAlloc) IsWaiting() bool {
   249  	p.waitingLock.RLock()
   250  	b := p.waiting
   251  	p.waitingLock.RUnlock()
   252  	return b
   253  }
   254  
   255  // IsMigrating returns true if there's a concurrent call inside Migrate
   256  func (p *remotePrevAlloc) IsMigrating() bool {
   257  	p.waitingLock.RLock()
   258  	b := p.migrating
   259  	p.waitingLock.RUnlock()
   260  	return b
   261  }
   262  
   263  // Wait until the remote previous allocation has terminated.
   264  func (p *remotePrevAlloc) Wait(ctx context.Context) error {
   265  	p.waitingLock.Lock()
   266  	p.waiting = true
   267  	p.waitingLock.Unlock()
   268  	defer func() {
   269  		p.waitingLock.Lock()
   270  		p.waiting = false
   271  		p.waitingLock.Unlock()
   272  	}()
   273  
   274  	p.logger.Printf("[DEBUG] client: alloc %q waiting for remote previous alloc %q to terminate", p.allocID, p.prevAllocID)
   275  	req := structs.AllocSpecificRequest{
   276  		AllocID: p.prevAllocID,
   277  		QueryOptions: structs.QueryOptions{
   278  			Region:     p.config.Region,
   279  			AllowStale: true,
   280  			AuthToken:  p.config.Node.SecretID,
   281  		},
   282  	}
   283  
   284  	done := func() bool {
   285  		select {
   286  		case <-ctx.Done():
   287  			return true
   288  		default:
   289  			return false
   290  		}
   291  	}
   292  
   293  	for !done() {
   294  		resp := structs.SingleAllocResponse{}
   295  		err := p.rpc.RPC("Alloc.GetAlloc", &req, &resp)
   296  		if err != nil {
   297  			p.logger.Printf("[ERR] client: failed to query previous alloc %q: %v", p.prevAllocID, err)
   298  			retry := getAllocRetryIntv + lib.RandomStagger(getAllocRetryIntv)
   299  			select {
   300  			case <-time.After(retry):
   301  				continue
   302  			case <-ctx.Done():
   303  				return ctx.Err()
   304  			}
   305  		}
   306  		if resp.Alloc == nil {
   307  			p.logger.Printf("[DEBUG] client: blocking alloc %q has been GC'd", p.prevAllocID)
   308  			return nil
   309  		}
   310  		if resp.Alloc.Terminated() {
   311  			// Terminated!
   312  			p.nodeID = resp.Alloc.NodeID
   313  			return nil
   314  		}
   315  
   316  		// Update the query index and requery.
   317  		if resp.Index > req.MinQueryIndex {
   318  			req.MinQueryIndex = resp.Index
   319  		}
   320  	}
   321  
   322  	return ctx.Err()
   323  }
   324  
   325  // Migrate alloc data from a remote node if the new alloc has migration enabled
   326  // and the old alloc hasn't been GC'd.
   327  func (p *remotePrevAlloc) Migrate(ctx context.Context, dest *allocdir.AllocDir) error {
   328  	if !p.migrate {
   329  		// Volume wasn't configured to be migrated, return early
   330  		return nil
   331  	}
   332  
   333  	p.waitingLock.Lock()
   334  	p.migrating = true
   335  	p.waitingLock.Unlock()
   336  	defer func() {
   337  		p.waitingLock.Lock()
   338  		p.migrating = false
   339  		p.waitingLock.Unlock()
   340  	}()
   341  
   342  	p.logger.Printf("[DEBUG] client: alloc %q copying from remote previous alloc %q", p.allocID, p.prevAllocID)
   343  
   344  	if p.nodeID == "" {
   345  		// NodeID couldn't be found; likely alloc was GC'd
   346  		p.logger.Printf("[WARN] client: alloc %q couldn't migrate data from previous alloc %q; previous alloc may have been GC'd",
   347  			p.allocID, p.prevAllocID)
   348  		return nil
   349  	}
   350  
   351  	addr, err := p.getNodeAddr(ctx, p.nodeID)
   352  	if err != nil {
   353  		return err
   354  	}
   355  
   356  	prevAllocDir, err := p.migrateAllocDir(ctx, addr)
   357  	if err != nil {
   358  		return err
   359  	}
   360  
   361  	if err := dest.Move(prevAllocDir, p.tasks); err != nil {
   362  		// cleanup on error
   363  		prevAllocDir.Destroy()
   364  		return err
   365  	}
   366  
   367  	if err := prevAllocDir.Destroy(); err != nil {
   368  		p.logger.Printf("[ERR] client: error destroying allocdir %q: %v", prevAllocDir.AllocDir, err)
   369  	}
   370  	return nil
   371  }
   372  
   373  // getNodeAddr gets the node from the server with the given Node ID
   374  func (p *remotePrevAlloc) getNodeAddr(ctx context.Context, nodeID string) (string, error) {
   375  	req := structs.NodeSpecificRequest{
   376  		NodeID: nodeID,
   377  		QueryOptions: structs.QueryOptions{
   378  			Region:     p.config.Region,
   379  			AllowStale: true,
   380  			AuthToken:  p.config.Node.SecretID,
   381  		},
   382  	}
   383  
   384  	resp := structs.SingleNodeResponse{}
   385  	for {
   386  		err := p.rpc.RPC("Node.GetNode", &req, &resp)
   387  		if err != nil {
   388  			p.logger.Printf("[ERR] client: failed to query node info %q: %v", nodeID, err)
   389  			retry := getAllocRetryIntv + lib.RandomStagger(getAllocRetryIntv)
   390  			select {
   391  			case <-time.After(retry):
   392  				continue
   393  			case <-ctx.Done():
   394  				return "", ctx.Err()
   395  			}
   396  		}
   397  		break
   398  	}
   399  
   400  	if resp.Node == nil {
   401  		return "", fmt.Errorf("node %q not found", nodeID)
   402  	}
   403  
   404  	scheme := "http://"
   405  	if resp.Node.TLSEnabled {
   406  		scheme = "https://"
   407  	}
   408  	return scheme + resp.Node.HTTPAddr, nil
   409  }
   410  
   411  // migrate a remote alloc dir to local node. Caller is responsible for calling
   412  // Destroy on the returned allocdir if no error occurs.
   413  func (p *remotePrevAlloc) migrateAllocDir(ctx context.Context, nodeAddr string) (*allocdir.AllocDir, error) {
   414  	// Create the previous alloc dir
   415  	prevAllocDir := allocdir.NewAllocDir(p.logger, filepath.Join(p.config.AllocDir, p.prevAllocID))
   416  	if err := prevAllocDir.Build(); err != nil {
   417  		return nil, fmt.Errorf("error building alloc dir for previous alloc %q: %v", p.prevAllocID, err)
   418  	}
   419  
   420  	// Create an API client
   421  	apiConfig := nomadapi.DefaultConfig()
   422  	apiConfig.Address = nodeAddr
   423  	apiConfig.TLSConfig = &nomadapi.TLSConfig{
   424  		CACert:     p.config.TLSConfig.CAFile,
   425  		ClientCert: p.config.TLSConfig.CertFile,
   426  		ClientKey:  p.config.TLSConfig.KeyFile,
   427  	}
   428  	apiClient, err := nomadapi.NewClient(apiConfig)
   429  	if err != nil {
   430  		return nil, err
   431  	}
   432  
   433  	url := fmt.Sprintf("/v1/client/allocation/%v/snapshot", p.prevAllocID)
   434  	qo := &nomadapi.QueryOptions{AuthToken: p.migrateToken}
   435  	resp, err := apiClient.Raw().Response(url, qo)
   436  	if err != nil {
   437  		prevAllocDir.Destroy()
   438  		return nil, fmt.Errorf("error getting snapshot from previous alloc %q: %v", p.prevAllocID, err)
   439  	}
   440  
   441  	if err := p.streamAllocDir(ctx, resp, prevAllocDir.AllocDir); err != nil {
   442  		prevAllocDir.Destroy()
   443  		return nil, err
   444  	}
   445  
   446  	return prevAllocDir, nil
   447  }
   448  
   449  // stream remote alloc to dir to a local path. Caller should cleanup dest on
   450  // error.
   451  func (p *remotePrevAlloc) streamAllocDir(ctx context.Context, resp io.ReadCloser, dest string) error {
   452  	p.logger.Printf("[DEBUG] client: alloc %q streaming snapshot of previous alloc %q to %q", p.allocID, p.prevAllocID, dest)
   453  	tr := tar.NewReader(resp)
   454  	defer resp.Close()
   455  
   456  	// Cache effective uid as we only run Chown if we're root
   457  	euid := syscall.Geteuid()
   458  
   459  	canceled := func() bool {
   460  		select {
   461  		case <-ctx.Done():
   462  			p.logger.Printf("[INFO] client: stopping migration of previous alloc %q for new alloc: %v",
   463  				p.prevAllocID, p.allocID)
   464  			return true
   465  		default:
   466  			return false
   467  		}
   468  	}
   469  
   470  	// if we see this file, there was an error on the remote side
   471  	errorFilename := allocdir.SnapshotErrorFilename(p.prevAllocID)
   472  
   473  	buf := make([]byte, 1024)
   474  	for !canceled() {
   475  		// Get the next header
   476  		hdr, err := tr.Next()
   477  
   478  		// Snapshot has ended
   479  		if err == io.EOF {
   480  			return nil
   481  		}
   482  
   483  		if err != nil {
   484  			return fmt.Errorf("error streaming previous alloc %q for new alloc %q: %v",
   485  				p.prevAllocID, p.allocID, err)
   486  		}
   487  
   488  		if hdr.Name == errorFilename {
   489  			// Error snapshotting on the remote side, try to read
   490  			// the message out of the file and return it.
   491  			errBuf := make([]byte, int(hdr.Size))
   492  			if _, err := tr.Read(errBuf); err != nil && err != io.EOF {
   493  				return fmt.Errorf("error streaming previous alloc %q for new alloc %q; failed reading error message: %v",
   494  					p.prevAllocID, p.allocID, err)
   495  			}
   496  			return fmt.Errorf("error streaming previous alloc %q for new alloc %q: %s",
   497  				p.prevAllocID, p.allocID, string(errBuf))
   498  		}
   499  
   500  		// If the header is for a directory we create the directory
   501  		if hdr.Typeflag == tar.TypeDir {
   502  			name := filepath.Join(dest, hdr.Name)
   503  			os.MkdirAll(name, os.FileMode(hdr.Mode))
   504  
   505  			// Can't change owner if not root or on Windows.
   506  			if euid == 0 {
   507  				if err := os.Chown(name, hdr.Uid, hdr.Gid); err != nil {
   508  					return fmt.Errorf("error chowning directory %v", err)
   509  				}
   510  			}
   511  			continue
   512  		}
   513  		// If the header is for a symlink we create the symlink
   514  		if hdr.Typeflag == tar.TypeSymlink {
   515  			if err = os.Symlink(hdr.Linkname, filepath.Join(dest, hdr.Name)); err != nil {
   516  				return fmt.Errorf("error creating symlink: %v", err)
   517  			}
   518  			continue
   519  		}
   520  		// If the header is a file, we write to a file
   521  		if hdr.Typeflag == tar.TypeReg {
   522  			f, err := os.Create(filepath.Join(dest, hdr.Name))
   523  			if err != nil {
   524  				return fmt.Errorf("error creating file: %v", err)
   525  			}
   526  
   527  			// Setting the permissions of the file as the origin.
   528  			if err := f.Chmod(os.FileMode(hdr.Mode)); err != nil {
   529  				f.Close()
   530  				return fmt.Errorf("error chmoding file %v", err)
   531  			}
   532  
   533  			// Can't change owner if not root or on Windows.
   534  			if euid == 0 {
   535  				if err := f.Chown(hdr.Uid, hdr.Gid); err != nil {
   536  					f.Close()
   537  					return fmt.Errorf("error chowning file %v", err)
   538  				}
   539  			}
   540  
   541  			// We write in chunks so that we can test if the client
   542  			// is still alive
   543  			for !canceled() {
   544  				n, err := tr.Read(buf)
   545  				if n > 0 && (err == nil || err == io.EOF) {
   546  					if _, err := f.Write(buf[:n]); err != nil {
   547  						f.Close()
   548  						return fmt.Errorf("error writing to file %q: %v", f.Name(), err)
   549  					}
   550  				}
   551  
   552  				if err != nil {
   553  					f.Close()
   554  					if err != io.EOF {
   555  						return fmt.Errorf("error reading snapshot: %v", err)
   556  					}
   557  					break
   558  				}
   559  			}
   560  
   561  		}
   562  	}
   563  
   564  	if canceled() {
   565  		return ctx.Err()
   566  	}
   567  
   568  	return nil
   569  }
   570  
   571  // noopPrevAlloc does not block or migrate on a previous allocation and never
   572  // returns an error.
   573  type noopPrevAlloc struct{}
   574  
   575  // Wait returns nil immediately.
   576  func (noopPrevAlloc) Wait(context.Context) error { return nil }
   577  
   578  // Migrate returns nil immediately.
   579  func (noopPrevAlloc) Migrate(context.Context, *allocdir.AllocDir) error { return nil }
   580  
   581  func (noopPrevAlloc) IsWaiting() bool   { return false }
   582  func (noopPrevAlloc) IsMigrating() bool { return false }