github.com/kardianos/nomad@v0.1.3-0.20151022182107-b13df73ee850/client/task_runner.go (about)

     1  package client
     2  
     3  import (
     4  	"crypto/md5"
     5  	"encoding/hex"
     6  	"fmt"
     7  	"log"
     8  	"os"
     9  	"path/filepath"
    10  	"sync"
    11  
    12  	"github.com/hashicorp/nomad/client/config"
    13  	"github.com/hashicorp/nomad/client/driver"
    14  	"github.com/hashicorp/nomad/nomad/structs"
    15  )
    16  
    17  // TaskRunner is used to wrap a task within an allocation and provide the execution context.
    18  type TaskRunner struct {
    19  	config  *config.Config
    20  	updater TaskStateUpdater
    21  	logger  *log.Logger
    22  	ctx     *driver.ExecContext
    23  	allocID string
    24  
    25  	task     *structs.Task
    26  	updateCh chan *structs.Task
    27  	handle   driver.DriverHandle
    28  
    29  	destroy     bool
    30  	destroyCh   chan struct{}
    31  	destroyLock sync.Mutex
    32  	waitCh      chan struct{}
    33  }
    34  
    35  // taskRunnerState is used to snapshot the state of the task runner
    36  type taskRunnerState struct {
    37  	Task     *structs.Task
    38  	HandleID string
    39  }
    40  
    41  // TaskStateUpdater is used to update the status of a task
    42  type TaskStateUpdater func(taskName, status, desc string)
    43  
    44  // NewTaskRunner is used to create a new task context
    45  func NewTaskRunner(logger *log.Logger, config *config.Config,
    46  	updater TaskStateUpdater, ctx *driver.ExecContext,
    47  	allocID string, task *structs.Task) *TaskRunner {
    48  	tc := &TaskRunner{
    49  		config:    config,
    50  		updater:   updater,
    51  		logger:    logger,
    52  		ctx:       ctx,
    53  		allocID:   allocID,
    54  		task:      task,
    55  		updateCh:  make(chan *structs.Task, 8),
    56  		destroyCh: make(chan struct{}),
    57  		waitCh:    make(chan struct{}),
    58  	}
    59  	return tc
    60  }
    61  
    62  // WaitCh returns a channel to wait for termination
    63  func (r *TaskRunner) WaitCh() <-chan struct{} {
    64  	return r.waitCh
    65  }
    66  
    67  // stateFilePath returns the path to our state file
    68  func (r *TaskRunner) stateFilePath() string {
    69  	// Get the MD5 of the task name
    70  	hashVal := md5.Sum([]byte(r.task.Name))
    71  	hashHex := hex.EncodeToString(hashVal[:])
    72  	dirName := fmt.Sprintf("task-%s", hashHex)
    73  
    74  	// Generate the path
    75  	path := filepath.Join(r.config.StateDir, "alloc", r.allocID,
    76  		dirName, "state.json")
    77  	return path
    78  }
    79  
    80  // RestoreState is used to restore our state
    81  func (r *TaskRunner) RestoreState() error {
    82  	// Load the snapshot
    83  	var snap taskRunnerState
    84  	if err := restoreState(r.stateFilePath(), &snap); err != nil {
    85  		return err
    86  	}
    87  
    88  	// Restore fields
    89  	r.task = snap.Task
    90  
    91  	// Restore the driver
    92  	if snap.HandleID != "" {
    93  		driver, err := r.createDriver()
    94  		if err != nil {
    95  			return err
    96  		}
    97  
    98  		handle, err := driver.Open(r.ctx, snap.HandleID)
    99  		if err != nil {
   100  			r.logger.Printf("[ERR] client: failed to open handle to task '%s' for alloc '%s': %v",
   101  				r.task.Name, r.allocID, err)
   102  			return err
   103  		}
   104  		r.handle = handle
   105  	}
   106  	return nil
   107  }
   108  
   109  // SaveState is used to snapshot our state
   110  func (r *TaskRunner) SaveState() error {
   111  	snap := taskRunnerState{
   112  		Task: r.task,
   113  	}
   114  	if r.handle != nil {
   115  		snap.HandleID = r.handle.ID()
   116  	}
   117  	return persistState(r.stateFilePath(), &snap)
   118  }
   119  
   120  // DestroyState is used to cleanup after ourselves
   121  func (r *TaskRunner) DestroyState() error {
   122  	return os.RemoveAll(r.stateFilePath())
   123  }
   124  
   125  // setStatus is used to update the status of the task runner
   126  func (r *TaskRunner) setStatus(status, desc string) {
   127  	r.updater(r.task.Name, status, desc)
   128  }
   129  
   130  // createDriver makes a driver for the task
   131  func (r *TaskRunner) createDriver() (driver.Driver, error) {
   132  	driverCtx := driver.NewDriverContext(r.task.Name, r.config, r.config.Node, r.logger)
   133  	driver, err := driver.NewDriver(r.task.Driver, driverCtx)
   134  	if err != nil {
   135  		err = fmt.Errorf("failed to create driver '%s' for alloc %s: %v",
   136  			r.task.Driver, r.allocID, err)
   137  		r.logger.Printf("[ERR] client: %s", err)
   138  	}
   139  	return driver, err
   140  }
   141  
   142  // startTask is used to start the task if there is no handle
   143  func (r *TaskRunner) startTask() error {
   144  	// Create a driver
   145  	driver, err := r.createDriver()
   146  	if err != nil {
   147  		r.setStatus(structs.AllocClientStatusFailed, err.Error())
   148  		return err
   149  	}
   150  
   151  	// Start the job
   152  	handle, err := driver.Start(r.ctx, r.task)
   153  	if err != nil {
   154  		r.logger.Printf("[ERR] client: failed to start task '%s' for alloc '%s': %v",
   155  			r.task.Name, r.allocID, err)
   156  		r.setStatus(structs.AllocClientStatusFailed,
   157  			fmt.Sprintf("failed to start: %v", err))
   158  		return err
   159  	}
   160  	r.handle = handle
   161  	r.setStatus(structs.AllocClientStatusRunning, "task started")
   162  	return nil
   163  }
   164  
   165  // Run is a long running routine used to manage the task
   166  func (r *TaskRunner) Run() {
   167  	defer close(r.waitCh)
   168  	r.logger.Printf("[DEBUG] client: starting task context for '%s' (alloc '%s')",
   169  		r.task.Name, r.allocID)
   170  
   171  	// Start the task if not yet started
   172  	if r.handle == nil {
   173  		if err := r.startTask(); err != nil {
   174  			return
   175  		}
   176  	}
   177  
   178  OUTER:
   179  	// Wait for updates
   180  	for {
   181  		select {
   182  		case err := <-r.handle.WaitCh():
   183  			if err != nil {
   184  				r.logger.Printf("[ERR] client: failed to complete task '%s' for alloc '%s': %v",
   185  					r.task.Name, r.allocID, err)
   186  				r.setStatus(structs.AllocClientStatusDead,
   187  					fmt.Sprintf("task failed with: %v", err))
   188  			} else {
   189  				r.logger.Printf("[INFO] client: completed task '%s' for alloc '%s'",
   190  					r.task.Name, r.allocID)
   191  				r.setStatus(structs.AllocClientStatusDead,
   192  					"task completed")
   193  			}
   194  			break OUTER
   195  
   196  		case update := <-r.updateCh:
   197  			// Update
   198  			r.task = update
   199  			if err := r.handle.Update(update); err != nil {
   200  				r.logger.Printf("[ERR] client: failed to update task '%s' for alloc '%s': %v",
   201  					r.task.Name, r.allocID, err)
   202  			}
   203  
   204  		case <-r.destroyCh:
   205  			// Send the kill signal, and use the WaitCh to block until complete
   206  			if err := r.handle.Kill(); err != nil {
   207  				r.logger.Printf("[ERR] client: failed to kill task '%s' for alloc '%s': %v",
   208  					r.task.Name, r.allocID, err)
   209  			}
   210  		}
   211  	}
   212  
   213  	// Cleanup after ourselves
   214  	r.DestroyState()
   215  }
   216  
   217  // Update is used to update the task of the context
   218  func (r *TaskRunner) Update(update *structs.Task) {
   219  	select {
   220  	case r.updateCh <- update:
   221  	default:
   222  		r.logger.Printf("[ERR] client: dropping task update '%s' (alloc '%s')",
   223  			update.Name, r.allocID)
   224  	}
   225  }
   226  
   227  // Destroy is used to indicate that the task context should be destroyed
   228  func (r *TaskRunner) Destroy() {
   229  	r.destroyLock.Lock()
   230  	defer r.destroyLock.Unlock()
   231  
   232  	if r.destroy {
   233  		return
   234  	}
   235  	r.destroy = true
   236  	close(r.destroyCh)
   237  }