github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/client/allocrunner/alloc_watcher.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"archive/tar"
     5  	"context"
     6  	"fmt"
     7  	"io"
     8  	"log"
     9  	"os"
    10  	"path/filepath"
    11  	"sync"
    12  	"syscall"
    13  	"time"
    14  
    15  	"github.com/hashicorp/consul/lib"
    16  	nomadapi "github.com/hashicorp/nomad/api"
    17  	"github.com/hashicorp/nomad/client/allocdir"
    18  	"github.com/hashicorp/nomad/client/config"
    19  	cstructs "github.com/hashicorp/nomad/client/structs"
    20  	"github.com/hashicorp/nomad/nomad/structs"
    21  )
    22  
    23  const (
    24  	// getRemoteRetryIntv is minimum interval on which we retry
    25  	// to fetch remote objects. We pick a value between this and 2x this.
    26  	getRemoteRetryIntv = 30 * time.Second
    27  )
    28  
    29  // rpcer is the interface needed by a prevAllocWatcher to make RPC calls.
    30  type rpcer interface {
    31  	// RPC allows retrieving remote allocs.
    32  	RPC(method string, args interface{}, reply interface{}) error
    33  }
    34  
    35  // terminated is the interface needed by a prevAllocWatcher to check if an
    36  // alloc is terminated.
    37  type terminated interface {
    38  	Terminated() bool
    39  }
    40  
    41  // prevAllocWatcher allows AllocRunners to wait for a previous allocation to
    42  // terminate and migrate its data whether or not the previous allocation is
    43  // local or remote.
    44  type prevAllocWatcher interface {
    45  	// Wait for previous alloc to terminate
    46  	Wait(context.Context) error
    47  
    48  	// Migrate data from previous alloc
    49  	Migrate(ctx context.Context, dest *allocdir.AllocDir) error
    50  
    51  	// IsWaiting returns true if a concurrent caller is blocked in Wait
    52  	IsWaiting() bool
    53  
    54  	// IsMigrating returns true if a concurrent caller is in Migrate
    55  	IsMigrating() bool
    56  }
    57  
    58  // NewAllocWatcher creates a prevAllocWatcher appropriate for whether this
    59  // alloc's previous allocation was local or remote. If this alloc has no
    60  // previous alloc then a noop implementation is returned.
    61  func NewAllocWatcher(alloc *structs.Allocation, prevAR *AllocRunner, rpc rpcer, config *config.Config, l *log.Logger, migrateToken string) prevAllocWatcher {
    62  	if alloc.PreviousAllocation == "" {
    63  		// No previous allocation, use noop transitioner
    64  		return NoopPrevAlloc{}
    65  	}
    66  
    67  	tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
    68  
    69  	if prevAR != nil {
    70  		// Previous allocation is local, use local transitioner
    71  		return &localPrevAlloc{
    72  			allocID:      alloc.ID,
    73  			prevAllocID:  alloc.PreviousAllocation,
    74  			tasks:        tg.Tasks,
    75  			sticky:       tg.EphemeralDisk != nil && tg.EphemeralDisk.Sticky,
    76  			prevAllocDir: prevAR.GetAllocDir(),
    77  			prevListener: prevAR.GetListener(),
    78  			prevWaitCh:   prevAR.WaitCh(),
    79  			prevStatus:   prevAR.Alloc(),
    80  			logger:       l,
    81  		}
    82  	}
    83  
    84  	return &remotePrevAlloc{
    85  		allocID:      alloc.ID,
    86  		prevAllocID:  alloc.PreviousAllocation,
    87  		tasks:        tg.Tasks,
    88  		config:       config,
    89  		migrate:      tg.EphemeralDisk != nil && tg.EphemeralDisk.Migrate,
    90  		rpc:          rpc,
    91  		logger:       l,
    92  		migrateToken: migrateToken,
    93  	}
    94  }
    95  
    96  // localPrevAlloc is a prevAllocWatcher for previous allocations on the same
    97  // node as an updated allocation.
    98  type localPrevAlloc struct {
    99  	// allocID is the ID of the alloc being blocked
   100  	allocID string
   101  
   102  	// prevAllocID is the ID of the alloc being replaced
   103  	prevAllocID string
   104  
   105  	// tasks on the new alloc
   106  	tasks []*structs.Task
   107  
   108  	// sticky is true if data should be moved
   109  	sticky bool
   110  
   111  	// prevAllocDir is the alloc dir for the previous alloc
   112  	prevAllocDir *allocdir.AllocDir
   113  
   114  	// prevListener allows blocking for updates to the previous alloc
   115  	prevListener *cstructs.AllocListener
   116  
   117  	// prevStatus allows checking if the previous alloc has already
   118  	// terminated (and therefore won't send updates to the listener)
   119  	prevStatus terminated
   120  
   121  	// prevWaitCh is closed when the previous alloc is garbage collected
   122  	// which is a failsafe against blocking the new alloc forever
   123  	prevWaitCh <-chan struct{}
   124  
   125  	// waiting and migrating are true when alloc runner is waiting on the
   126  	// prevAllocWatcher. Writers must acquire the waitingLock and readers
   127  	// should use the helper methods IsWaiting and IsMigrating.
   128  	waiting     bool
   129  	migrating   bool
   130  	waitingLock sync.RWMutex
   131  
   132  	logger *log.Logger
   133  }
   134  
   135  // IsWaiting returns true if there's a concurrent call inside Wait
   136  func (p *localPrevAlloc) IsWaiting() bool {
   137  	p.waitingLock.RLock()
   138  	b := p.waiting
   139  	p.waitingLock.RUnlock()
   140  	return b
   141  }
   142  
   143  // IsMigrating returns true if there's a concurrent call inside Migrate
   144  func (p *localPrevAlloc) IsMigrating() bool {
   145  	p.waitingLock.RLock()
   146  	b := p.migrating
   147  	p.waitingLock.RUnlock()
   148  	return b
   149  }
   150  
   151  // Wait on a local alloc to become terminal, exit, or the context to be done.
   152  func (p *localPrevAlloc) Wait(ctx context.Context) error {
   153  	p.waitingLock.Lock()
   154  	p.waiting = true
   155  	p.waitingLock.Unlock()
   156  	defer func() {
   157  		p.waitingLock.Lock()
   158  		p.waiting = false
   159  		p.waitingLock.Unlock()
   160  	}()
   161  
   162  	defer p.prevListener.Close()
   163  
   164  	if p.prevStatus.Terminated() {
   165  		// Fast path - previous alloc already terminated!
   166  		return nil
   167  	}
   168  
   169  	// Block until previous alloc exits
   170  	p.logger.Printf("[DEBUG] client: alloc %q waiting for previous alloc %q to terminate", p.allocID, p.prevAllocID)
   171  	for {
   172  		select {
   173  		case prevAlloc, ok := <-p.prevListener.Ch:
   174  			if !ok || prevAlloc.Terminated() {
   175  				return nil
   176  			}
   177  		case <-p.prevWaitCh:
   178  			return nil
   179  		case <-ctx.Done():
   180  			return ctx.Err()
   181  		}
   182  	}
   183  }
   184  
   185  // Migrate from previous local alloc dir to destination alloc dir.
   186  func (p *localPrevAlloc) Migrate(ctx context.Context, dest *allocdir.AllocDir) error {
   187  	if !p.sticky {
   188  		// Not a sticky volume, nothing to migrate
   189  		return nil
   190  	}
   191  
   192  	p.waitingLock.Lock()
   193  	p.migrating = true
   194  	p.waitingLock.Unlock()
   195  	defer func() {
   196  		p.waitingLock.Lock()
   197  		p.migrating = false
   198  		p.waitingLock.Unlock()
   199  	}()
   200  
   201  	p.logger.Printf("[DEBUG] client: alloc %q copying previous alloc %q", p.allocID, p.prevAllocID)
   202  
   203  	moveErr := dest.Move(p.prevAllocDir, p.tasks)
   204  
   205  	// Always cleanup previous alloc
   206  	if err := p.prevAllocDir.Destroy(); err != nil {
   207  		p.logger.Printf("[ERR] client: error destroying allocdir %v: %v", p.prevAllocDir.AllocDir, err)
   208  	}
   209  
   210  	return moveErr
   211  }
   212  
   213  // remotePrevAlloc is a prevAllocWatcher for previous allocations on remote
   214  // nodes as an updated allocation.
   215  type remotePrevAlloc struct {
   216  	// allocID is the ID of the alloc being blocked
   217  	allocID string
   218  
   219  	// prevAllocID is the ID of the alloc being replaced
   220  	prevAllocID string
   221  
   222  	// tasks on the new alloc
   223  	tasks []*structs.Task
   224  
   225  	// config for the Client to get AllocDir, Region, and Node.SecretID
   226  	config *config.Config
   227  
   228  	// migrate is true if data should be moved between nodes
   229  	migrate bool
   230  
   231  	// rpc provides an RPC method for watching for updates to the previous
   232  	// alloc and determining what node it was on.
   233  	rpc rpcer
   234  
   235  	// nodeID is the node the previous alloc. Set by Wait() for use in
   236  	// Migrate() iff the previous alloc has not already been GC'd.
   237  	nodeID string
   238  
   239  	// waiting and migrating are true when alloc runner is waiting on the
   240  	// prevAllocWatcher. Writers must acquire the waitingLock and readers
   241  	// should use the helper methods IsWaiting and IsMigrating.
   242  	waiting     bool
   243  	migrating   bool
   244  	waitingLock sync.RWMutex
   245  
   246  	logger *log.Logger
   247  
   248  	// migrateToken allows a client to migrate data in an ACL-protected remote
   249  	// volume
   250  	migrateToken string
   251  }
   252  
   253  // IsWaiting returns true if there's a concurrent call inside Wait
   254  func (p *remotePrevAlloc) IsWaiting() bool {
   255  	p.waitingLock.RLock()
   256  	b := p.waiting
   257  	p.waitingLock.RUnlock()
   258  	return b
   259  }
   260  
   261  // IsMigrating returns true if there's a concurrent call inside Migrate
   262  func (p *remotePrevAlloc) IsMigrating() bool {
   263  	p.waitingLock.RLock()
   264  	b := p.migrating
   265  	p.waitingLock.RUnlock()
   266  	return b
   267  }
   268  
   269  // Wait until the remote previous allocation has terminated.
   270  func (p *remotePrevAlloc) Wait(ctx context.Context) error {
   271  	p.waitingLock.Lock()
   272  	p.waiting = true
   273  	p.waitingLock.Unlock()
   274  	defer func() {
   275  		p.waitingLock.Lock()
   276  		p.waiting = false
   277  		p.waitingLock.Unlock()
   278  	}()
   279  
   280  	p.logger.Printf("[DEBUG] client: alloc %q waiting for remote previous alloc %q to terminate", p.allocID, p.prevAllocID)
   281  	req := structs.AllocSpecificRequest{
   282  		AllocID: p.prevAllocID,
   283  		QueryOptions: structs.QueryOptions{
   284  			Region:     p.config.Region,
   285  			AllowStale: true,
   286  			AuthToken:  p.config.Node.SecretID,
   287  		},
   288  	}
   289  
   290  	done := func() bool {
   291  		select {
   292  		case <-ctx.Done():
   293  			return true
   294  		default:
   295  			return false
   296  		}
   297  	}
   298  
   299  	for !done() {
   300  		resp := structs.SingleAllocResponse{}
   301  		err := p.rpc.RPC("Alloc.GetAlloc", &req, &resp)
   302  		if err != nil {
   303  			p.logger.Printf("[ERR] client: failed to query previous alloc %q: %v", p.prevAllocID, err)
   304  			retry := getRemoteRetryIntv + lib.RandomStagger(getRemoteRetryIntv)
   305  			select {
   306  			case <-time.After(retry):
   307  				continue
   308  			case <-ctx.Done():
   309  				return ctx.Err()
   310  			}
   311  		}
   312  		if resp.Alloc == nil {
   313  			p.logger.Printf("[DEBUG] client: blocking alloc %q has been GC'd", p.prevAllocID)
   314  			return nil
   315  		}
   316  		if resp.Alloc.Terminated() {
   317  			// Terminated!
   318  			p.nodeID = resp.Alloc.NodeID
   319  			return nil
   320  		}
   321  
   322  		// Update the query index and requery.
   323  		if resp.Index > req.MinQueryIndex {
   324  			req.MinQueryIndex = resp.Index
   325  		}
   326  	}
   327  
   328  	return ctx.Err()
   329  }
   330  
   331  // Migrate alloc data from a remote node if the new alloc has migration enabled
   332  // and the old alloc hasn't been GC'd.
   333  func (p *remotePrevAlloc) Migrate(ctx context.Context, dest *allocdir.AllocDir) error {
   334  	if !p.migrate {
   335  		// Volume wasn't configured to be migrated, return early
   336  		return nil
   337  	}
   338  
   339  	p.waitingLock.Lock()
   340  	p.migrating = true
   341  	p.waitingLock.Unlock()
   342  	defer func() {
   343  		p.waitingLock.Lock()
   344  		p.migrating = false
   345  		p.waitingLock.Unlock()
   346  	}()
   347  
   348  	p.logger.Printf("[DEBUG] client: alloc %q copying from remote previous alloc %q", p.allocID, p.prevAllocID)
   349  
   350  	if p.nodeID == "" {
   351  		// NodeID couldn't be found; likely alloc was GC'd
   352  		p.logger.Printf("[WARN] client: alloc %q couldn't migrate data from previous alloc %q; previous alloc may have been GC'd",
   353  			p.allocID, p.prevAllocID)
   354  		return nil
   355  	}
   356  
   357  	addr, err := p.getNodeAddr(ctx, p.nodeID)
   358  	if err != nil {
   359  		return err
   360  	}
   361  
   362  	prevAllocDir, err := p.migrateAllocDir(ctx, addr)
   363  	if err != nil {
   364  		return err
   365  	}
   366  
   367  	if err := dest.Move(prevAllocDir, p.tasks); err != nil {
   368  		// cleanup on error
   369  		prevAllocDir.Destroy()
   370  		return err
   371  	}
   372  
   373  	if err := prevAllocDir.Destroy(); err != nil {
   374  		p.logger.Printf("[ERR] client: error destroying allocdir %q: %v", prevAllocDir.AllocDir, err)
   375  	}
   376  	return nil
   377  }
   378  
   379  // getNodeAddr gets the node from the server with the given Node ID
   380  func (p *remotePrevAlloc) getNodeAddr(ctx context.Context, nodeID string) (string, error) {
   381  	req := structs.NodeSpecificRequest{
   382  		NodeID: nodeID,
   383  		QueryOptions: structs.QueryOptions{
   384  			Region:     p.config.Region,
   385  			AllowStale: true,
   386  			AuthToken:  p.config.Node.SecretID,
   387  		},
   388  	}
   389  
   390  	resp := structs.SingleNodeResponse{}
   391  	for {
   392  		err := p.rpc.RPC("Node.GetNode", &req, &resp)
   393  		if err != nil {
   394  			p.logger.Printf("[ERR] client: failed to query node info %q: %v", nodeID, err)
   395  			retry := getRemoteRetryIntv + lib.RandomStagger(getRemoteRetryIntv)
   396  			select {
   397  			case <-time.After(retry):
   398  				continue
   399  			case <-ctx.Done():
   400  				return "", ctx.Err()
   401  			}
   402  		}
   403  		break
   404  	}
   405  
   406  	if resp.Node == nil {
   407  		return "", fmt.Errorf("node %q not found", nodeID)
   408  	}
   409  
   410  	scheme := "http://"
   411  	if resp.Node.TLSEnabled {
   412  		scheme = "https://"
   413  	}
   414  	return scheme + resp.Node.HTTPAddr, nil
   415  }
   416  
   417  // migrate a remote alloc dir to local node. Caller is responsible for calling
   418  // Destroy on the returned allocdir if no error occurs.
   419  func (p *remotePrevAlloc) migrateAllocDir(ctx context.Context, nodeAddr string) (*allocdir.AllocDir, error) {
   420  	// Create the previous alloc dir
   421  	prevAllocDir := allocdir.NewAllocDir(p.logger, filepath.Join(p.config.AllocDir, p.prevAllocID))
   422  	if err := prevAllocDir.Build(); err != nil {
   423  		return nil, fmt.Errorf("error building alloc dir for previous alloc %q: %v", p.prevAllocID, err)
   424  	}
   425  
   426  	// Create an API client
   427  	apiConfig := nomadapi.DefaultConfig()
   428  	apiConfig.Address = nodeAddr
   429  	apiConfig.TLSConfig = &nomadapi.TLSConfig{
   430  		CACert:        p.config.TLSConfig.CAFile,
   431  		ClientCert:    p.config.TLSConfig.CertFile,
   432  		ClientKey:     p.config.TLSConfig.KeyFile,
   433  		TLSServerName: fmt.Sprintf("client.%s.nomad", p.config.Region),
   434  	}
   435  	apiClient, err := nomadapi.NewClient(apiConfig)
   436  	if err != nil {
   437  		return nil, err
   438  	}
   439  
   440  	url := fmt.Sprintf("/v1/client/allocation/%v/snapshot", p.prevAllocID)
   441  	qo := &nomadapi.QueryOptions{AuthToken: p.migrateToken}
   442  	resp, err := apiClient.Raw().Response(url, qo)
   443  	if err != nil {
   444  		prevAllocDir.Destroy()
   445  		return nil, fmt.Errorf("error getting snapshot from previous alloc %q: %v", p.prevAllocID, err)
   446  	}
   447  
   448  	if err := p.streamAllocDir(ctx, resp, prevAllocDir.AllocDir); err != nil {
   449  		prevAllocDir.Destroy()
   450  		return nil, err
   451  	}
   452  
   453  	return prevAllocDir, nil
   454  }
   455  
   456  // stream remote alloc to dir to a local path. Caller should cleanup dest on
   457  // error.
   458  func (p *remotePrevAlloc) streamAllocDir(ctx context.Context, resp io.ReadCloser, dest string) error {
   459  	p.logger.Printf("[DEBUG] client: alloc %q streaming snapshot of previous alloc %q to %q", p.allocID, p.prevAllocID, dest)
   460  	tr := tar.NewReader(resp)
   461  	defer resp.Close()
   462  
   463  	// Cache effective uid as we only run Chown if we're root
   464  	euid := syscall.Geteuid()
   465  
   466  	canceled := func() bool {
   467  		select {
   468  		case <-ctx.Done():
   469  			p.logger.Printf("[INFO] client: stopping migration of previous alloc %q for new alloc: %v",
   470  				p.prevAllocID, p.allocID)
   471  			return true
   472  		default:
   473  			return false
   474  		}
   475  	}
   476  
   477  	// if we see this file, there was an error on the remote side
   478  	errorFilename := allocdir.SnapshotErrorFilename(p.prevAllocID)
   479  
   480  	buf := make([]byte, 1024)
   481  	for !canceled() {
   482  		// Get the next header
   483  		hdr, err := tr.Next()
   484  
   485  		// Snapshot has ended
   486  		if err == io.EOF {
   487  			return nil
   488  		}
   489  
   490  		if err != nil {
   491  			return fmt.Errorf("error streaming previous alloc %q for new alloc %q: %v",
   492  				p.prevAllocID, p.allocID, err)
   493  		}
   494  
   495  		if hdr.Name == errorFilename {
   496  			// Error snapshotting on the remote side, try to read
   497  			// the message out of the file and return it.
   498  			errBuf := make([]byte, int(hdr.Size))
   499  			if _, err := tr.Read(errBuf); err != nil && err != io.EOF {
   500  				return fmt.Errorf("error streaming previous alloc %q for new alloc %q; failed reading error message: %v",
   501  					p.prevAllocID, p.allocID, err)
   502  			}
   503  			return fmt.Errorf("error streaming previous alloc %q for new alloc %q: %s",
   504  				p.prevAllocID, p.allocID, string(errBuf))
   505  		}
   506  
   507  		// If the header is for a directory we create the directory
   508  		if hdr.Typeflag == tar.TypeDir {
   509  			name := filepath.Join(dest, hdr.Name)
   510  			os.MkdirAll(name, os.FileMode(hdr.Mode))
   511  
   512  			// Can't change owner if not root or on Windows.
   513  			if euid == 0 {
   514  				if err := os.Chown(name, hdr.Uid, hdr.Gid); err != nil {
   515  					return fmt.Errorf("error chowning directory %v", err)
   516  				}
   517  			}
   518  			continue
   519  		}
   520  		// If the header is for a symlink we create the symlink
   521  		if hdr.Typeflag == tar.TypeSymlink {
   522  			if err = os.Symlink(hdr.Linkname, filepath.Join(dest, hdr.Name)); err != nil {
   523  				return fmt.Errorf("error creating symlink: %v", err)
   524  			}
   525  			continue
   526  		}
   527  		// If the header is a file, we write to a file
   528  		if hdr.Typeflag == tar.TypeReg {
   529  			f, err := os.Create(filepath.Join(dest, hdr.Name))
   530  			if err != nil {
   531  				return fmt.Errorf("error creating file: %v", err)
   532  			}
   533  
   534  			// Setting the permissions of the file as the origin.
   535  			if err := f.Chmod(os.FileMode(hdr.Mode)); err != nil {
   536  				f.Close()
   537  				return fmt.Errorf("error chmoding file %v", err)
   538  			}
   539  
   540  			// Can't change owner if not root or on Windows.
   541  			if euid == 0 {
   542  				if err := f.Chown(hdr.Uid, hdr.Gid); err != nil {
   543  					f.Close()
   544  					return fmt.Errorf("error chowning file %v", err)
   545  				}
   546  			}
   547  
   548  			// We write in chunks so that we can test if the client
   549  			// is still alive
   550  			for !canceled() {
   551  				n, err := tr.Read(buf)
   552  				if n > 0 && (err == nil || err == io.EOF) {
   553  					if _, err := f.Write(buf[:n]); err != nil {
   554  						f.Close()
   555  						return fmt.Errorf("error writing to file %q: %v", f.Name(), err)
   556  					}
   557  				}
   558  
   559  				if err != nil {
   560  					f.Close()
   561  					if err != io.EOF {
   562  						return fmt.Errorf("error reading snapshot: %v", err)
   563  					}
   564  					break
   565  				}
   566  			}
   567  
   568  		}
   569  	}
   570  
   571  	if canceled() {
   572  		return ctx.Err()
   573  	}
   574  
   575  	return nil
   576  }
   577  
   578  // NoopPrevAlloc does not block or migrate on a previous allocation and never
   579  // returns an error.
   580  type NoopPrevAlloc struct{}
   581  
   582  // Wait returns nil immediately.
   583  func (NoopPrevAlloc) Wait(context.Context) error { return nil }
   584  
   585  // Migrate returns nil immediately.
   586  func (NoopPrevAlloc) Migrate(context.Context, *allocdir.AllocDir) error { return nil }
   587  
   588  func (NoopPrevAlloc) IsWaiting() bool   { return false }
   589  func (NoopPrevAlloc) IsMigrating() bool { return false }