github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/client/driver/lxc.go (about)

     1  //+build linux,lxc
     2  
     3  package driver
     4  
     5  import (
     6  	"encoding/json"
     7  	"fmt"
     8  	"log"
     9  	"os"
    10  	"path/filepath"
    11  	"strconv"
    12  	"strings"
    13  	"syscall"
    14  	"time"
    15  
    16  	"github.com/hashicorp/nomad/client/allocdir"
    17  	"github.com/hashicorp/nomad/client/config"
    18  	"github.com/hashicorp/nomad/client/fingerprint"
    19  	"github.com/hashicorp/nomad/client/stats"
    20  	"github.com/hashicorp/nomad/helper/fields"
    21  	"github.com/hashicorp/nomad/nomad/structs"
    22  	"github.com/mitchellh/mapstructure"
    23  
    24  	dstructs "github.com/hashicorp/nomad/client/driver/structs"
    25  	cstructs "github.com/hashicorp/nomad/client/structs"
    26  	lxc "gopkg.in/lxc/go-lxc.v2"
    27  )
    28  
    29  const (
    30  	// lxcConfigOption is the key for enabling the LXC driver in the
    31  	// Config.Options map.
    32  	lxcConfigOption = "driver.lxc.enable"
    33  
    34  	// containerMonitorIntv is the interval at which the driver checks if the
    35  	// container is still alive
    36  	containerMonitorIntv = 2 * time.Second
    37  )
    38  
    39  var (
    40  	LXCMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"}
    41  
    42  	LXCMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage", "Kernel Usage", "Kernel Max Usage"}
    43  )
    44  
    45  // Add the lxc driver to the list of builtin drivers
    46  func init() {
    47  	BuiltinDrivers["lxc"] = NewLxcDriver
    48  }
    49  
    50  // LxcDriver allows users to run LXC Containers
    51  type LxcDriver struct {
    52  	DriverContext
    53  	fingerprint.StaticFingerprinter
    54  }
    55  
    56  // LxcDriverConfig is the configuration of the LXC Container
    57  type LxcDriverConfig struct {
    58  	Template             string
    59  	Distro               string
    60  	Release              string
    61  	Arch                 string
    62  	ImageVariant         string   "mapstructure:`image_variant`"
    63  	ImageServer          string   "mapstructure:`image_server`"
    64  	GPGKeyID             string   "mapstructure:`gpg_key_id`"
    65  	GPGKeyServer         string   "mapstructure:`gpg_key_server`"
    66  	DisableGPGValidation bool     "mapstructure:`disable_gpg`"
    67  	FlushCache           bool     "mapstructure:`flush_cache`"
    68  	ForceCache           bool     "mapstructure:`force_cache`"
    69  	TemplateArgs         []string "mapstructure:`template_args`"
    70  	LogLevel             string   `mapstructure:"log_level"`
    71  	Verbosity            string
    72  }
    73  
    74  // NewLxcDriver returns a new instance of the LXC driver
    75  func NewLxcDriver(ctx *DriverContext) Driver {
    76  	return &LxcDriver{DriverContext: *ctx}
    77  }
    78  
    79  // Validate validates the lxc driver configuration
    80  func (d *LxcDriver) Validate(config map[string]interface{}) error {
    81  	fd := &fields.FieldData{
    82  		Raw: config,
    83  		Schema: map[string]*fields.FieldSchema{
    84  			"template": &fields.FieldSchema{
    85  				Type:     fields.TypeString,
    86  				Required: true,
    87  			},
    88  			"distro": &fields.FieldSchema{
    89  				Type:     fields.TypeString,
    90  				Required: false,
    91  			},
    92  			"release": &fields.FieldSchema{
    93  				Type:     fields.TypeString,
    94  				Required: false,
    95  			},
    96  			"arch": &fields.FieldSchema{
    97  				Type:     fields.TypeString,
    98  				Required: false,
    99  			},
   100  			"image_variant": &fields.FieldSchema{
   101  				Type:     fields.TypeString,
   102  				Required: false,
   103  			},
   104  			"image_server": &fields.FieldSchema{
   105  				Type:     fields.TypeString,
   106  				Required: false,
   107  			},
   108  			"gpg_key_id": &fields.FieldSchema{
   109  				Type:     fields.TypeString,
   110  				Required: false,
   111  			},
   112  			"gpg_key_server": &fields.FieldSchema{
   113  				Type:     fields.TypeString,
   114  				Required: false,
   115  			},
   116  			"disable_gpg": &fields.FieldSchema{
   117  				Type:     fields.TypeString,
   118  				Required: false,
   119  			},
   120  			"flush_cache": &fields.FieldSchema{
   121  				Type:     fields.TypeString,
   122  				Required: false,
   123  			},
   124  			"force_cache": &fields.FieldSchema{
   125  				Type:     fields.TypeString,
   126  				Required: false,
   127  			},
   128  			"template_args": &fields.FieldSchema{
   129  				Type:     fields.TypeArray,
   130  				Required: false,
   131  			},
   132  			"log_level": &fields.FieldSchema{
   133  				Type:     fields.TypeString,
   134  				Required: false,
   135  			},
   136  			"verbosity": &fields.FieldSchema{
   137  				Type:     fields.TypeString,
   138  				Required: false,
   139  			},
   140  		},
   141  	}
   142  
   143  	if err := fd.Validate(); err != nil {
   144  		return err
   145  	}
   146  
   147  	return nil
   148  }
   149  
   150  func (d *LxcDriver) Abilities() DriverAbilities {
   151  	return DriverAbilities{
   152  		SendSignals: false,
   153  	}
   154  }
   155  
   156  // Fingerprint fingerprints the lxc driver configuration
   157  func (d *LxcDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
   158  	enabled := cfg.ReadBoolDefault(lxcConfigOption, true)
   159  	if !enabled && !cfg.DevMode {
   160  		return false, nil
   161  	}
   162  	version := lxc.Version()
   163  	if version == "" {
   164  		return false, nil
   165  	}
   166  	node.Attributes["driver.lxc.version"] = version
   167  	node.Attributes["driver.lxc"] = "1"
   168  	return true, nil
   169  }
   170  
   171  // Start starts the LXC Driver
   172  func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
   173  	var driverConfig LxcDriverConfig
   174  	if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil {
   175  		return nil, err
   176  	}
   177  	lxcPath := lxc.DefaultConfigPath()
   178  	if path := d.config.Read("driver.lxc.path"); path != "" {
   179  		lxcPath = path
   180  	}
   181  
   182  	containerName := fmt.Sprintf("%s-%s", task.Name, ctx.AllocID)
   183  	c, err := lxc.NewContainer(containerName, lxcPath)
   184  	if err != nil {
   185  		return nil, fmt.Errorf("unable to create container: %v", err)
   186  	}
   187  
   188  	var verbosity lxc.Verbosity
   189  	switch driverConfig.Verbosity {
   190  	case "verbose":
   191  		verbosity = lxc.Verbose
   192  	case "", "quiet":
   193  		verbosity = lxc.Quiet
   194  	default:
   195  		return nil, fmt.Errorf("lxc driver config 'verbosity' can only be either quiet or verbose")
   196  	}
   197  	c.SetVerbosity(verbosity)
   198  
   199  	var logLevel lxc.LogLevel
   200  	switch driverConfig.LogLevel {
   201  	case "trace":
   202  		logLevel = lxc.TRACE
   203  	case "debug":
   204  		logLevel = lxc.DEBUG
   205  	case "info":
   206  		logLevel = lxc.INFO
   207  	case "warn":
   208  		logLevel = lxc.WARN
   209  	case "", "error":
   210  		logLevel = lxc.ERROR
   211  	default:
   212  		return nil, fmt.Errorf("lxc driver config 'log_level' can only be trace, debug, info, warn or error")
   213  	}
   214  	c.SetLogLevel(logLevel)
   215  
   216  	logFile := filepath.Join(ctx.AllocDir.LogDir(), fmt.Sprintf("%v-lxc.log", task.Name))
   217  	c.SetLogFile(logFile)
   218  
   219  	options := lxc.TemplateOptions{
   220  		Template:             driverConfig.Template,
   221  		Distro:               driverConfig.Distro,
   222  		Release:              driverConfig.Release,
   223  		Arch:                 driverConfig.Arch,
   224  		FlushCache:           driverConfig.FlushCache,
   225  		DisableGPGValidation: driverConfig.DisableGPGValidation,
   226  	}
   227  
   228  	if err := c.Create(options); err != nil {
   229  		return nil, fmt.Errorf("unable to create container: %v", err)
   230  	}
   231  
   232  	// Set the network type to none
   233  	if err := c.SetConfigItem("lxc.network.type", "none"); err != nil {
   234  		return nil, fmt.Errorf("error setting network type configuration: %v", err)
   235  	}
   236  
   237  	// Bind mount the shared alloc dir and task local dir in the container
   238  	taskDir, ok := ctx.AllocDir.TaskDirs[task.Name]
   239  	if !ok {
   240  		return nil, fmt.Errorf("failed to find task local directory: %v", task.Name)
   241  	}
   242  	secretdir, err := ctx.AllocDir.GetSecretDir(task.Name)
   243  	if err != nil {
   244  		return nil, fmt.Errorf("faild getting secret path for task: %v", err)
   245  	}
   246  	taskLocalDir := filepath.Join(taskDir, allocdir.TaskLocal)
   247  	mounts := []string{
   248  		fmt.Sprintf("%s local none rw,bind,create=dir", taskLocalDir),
   249  		fmt.Sprintf("%s alloc none rw,bind,create=dir", ctx.AllocDir.SharedDir),
   250  		fmt.Sprintf("%s secret none rw,bind,create=dir", secretdir),
   251  	}
   252  	for _, mnt := range mounts {
   253  		if err := c.SetConfigItem("lxc.mount.entry", mnt); err != nil {
   254  			return nil, fmt.Errorf("error setting bind mount %q error: %v", mnt, err)
   255  		}
   256  	}
   257  
   258  	// Start the container
   259  	if err := c.Start(); err != nil {
   260  		return nil, fmt.Errorf("unable to start container: %v", err)
   261  	}
   262  
   263  	// Set the resource limits
   264  	if err := c.SetMemoryLimit(lxc.ByteSize(task.Resources.MemoryMB) * lxc.MB); err != nil {
   265  		return nil, fmt.Errorf("unable to set memory limits: %v", err)
   266  	}
   267  	if err := c.SetCgroupItem("cpu.shares", strconv.Itoa(task.Resources.CPU)); err != nil {
   268  		return nil, fmt.Errorf("unable to set cpu shares: %v", err)
   269  	}
   270  
   271  	handle := lxcDriverHandle{
   272  		container:      c,
   273  		initPid:        c.InitPid(),
   274  		lxcPath:        lxcPath,
   275  		logger:         d.logger,
   276  		killTimeout:    GetKillTimeout(task.KillTimeout, d.DriverContext.config.MaxKillTimeout),
   277  		maxKillTimeout: d.DriverContext.config.MaxKillTimeout,
   278  		totalCpuStats:  stats.NewCpuStats(),
   279  		userCpuStats:   stats.NewCpuStats(),
   280  		systemCpuStats: stats.NewCpuStats(),
   281  		waitCh:         make(chan *dstructs.WaitResult, 1),
   282  		doneCh:         make(chan bool, 1),
   283  	}
   284  
   285  	go handle.run()
   286  
   287  	return &handle, nil
   288  }
   289  
   290  // Open creates the driver to monitor an existing LXC container
   291  func (d *LxcDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
   292  	pid := &lxcPID{}
   293  	if err := json.Unmarshal([]byte(handleID), pid); err != nil {
   294  		return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
   295  	}
   296  
   297  	var container *lxc.Container
   298  	containers := lxc.Containers(pid.LxcPath)
   299  	for _, c := range containers {
   300  		if c.Name() == pid.ContainerName {
   301  			container = &c
   302  			break
   303  		}
   304  	}
   305  
   306  	if container == nil {
   307  		return nil, fmt.Errorf("container %v not found", pid.ContainerName)
   308  	}
   309  
   310  	handle := lxcDriverHandle{
   311  		container:      container,
   312  		initPid:        container.InitPid(),
   313  		lxcPath:        pid.LxcPath,
   314  		logger:         d.logger,
   315  		killTimeout:    pid.KillTimeout,
   316  		maxKillTimeout: d.DriverContext.config.MaxKillTimeout,
   317  		totalCpuStats:  stats.NewCpuStats(),
   318  		userCpuStats:   stats.NewCpuStats(),
   319  		systemCpuStats: stats.NewCpuStats(),
   320  		waitCh:         make(chan *dstructs.WaitResult, 1),
   321  		doneCh:         make(chan bool, 1),
   322  	}
   323  	go handle.run()
   324  
   325  	return &handle, nil
   326  }
   327  
   328  // lxcDriverHandle allows controlling the lifecycle of an lxc container
   329  type lxcDriverHandle struct {
   330  	container *lxc.Container
   331  	initPid   int
   332  	lxcPath   string
   333  
   334  	logger *log.Logger
   335  
   336  	killTimeout    time.Duration
   337  	maxKillTimeout time.Duration
   338  
   339  	totalCpuStats  *stats.CpuStats
   340  	userCpuStats   *stats.CpuStats
   341  	systemCpuStats *stats.CpuStats
   342  
   343  	waitCh chan *dstructs.WaitResult
   344  	doneCh chan bool
   345  }
   346  
   347  type lxcPID struct {
   348  	ContainerName string
   349  	InitPid       int
   350  	LxcPath       string
   351  	KillTimeout   time.Duration
   352  }
   353  
   354  func (h *lxcDriverHandle) ID() string {
   355  	pid := lxcPID{
   356  		ContainerName: h.container.Name(),
   357  		InitPid:       h.initPid,
   358  		LxcPath:       h.lxcPath,
   359  		KillTimeout:   h.killTimeout,
   360  	}
   361  	data, err := json.Marshal(pid)
   362  	if err != nil {
   363  		h.logger.Printf("[ERR] driver.lxc: failed to marshal lxc PID to JSON: %v", err)
   364  	}
   365  	return string(data)
   366  }
   367  
   368  func (h *lxcDriverHandle) WaitCh() chan *dstructs.WaitResult {
   369  	return h.waitCh
   370  }
   371  
   372  func (h *lxcDriverHandle) Update(task *structs.Task) error {
   373  	h.killTimeout = GetKillTimeout(task.KillTimeout, h.killTimeout)
   374  	return nil
   375  }
   376  
   377  func (h *lxcDriverHandle) Kill() error {
   378  	h.logger.Printf("[INFO] driver.lxc: shutting down container %q", h.container.Name())
   379  	if err := h.container.Shutdown(h.killTimeout); err != nil {
   380  		h.logger.Printf("[INFO] driver.lxc: shutting down container %q failed: %v", h.container.Name(), err)
   381  		if err := h.container.Stop(); err != nil {
   382  			h.logger.Printf("[ERR] driver.lxc: error stopping container %q: %v", h.container.Name(), err)
   383  		}
   384  	}
   385  	close(h.doneCh)
   386  	return nil
   387  }
   388  
   389  func (h *lxcDriverHandle) Signal(s os.Signal) error {
   390  	return fmt.Errorf("LXC does not support signals")
   391  }
   392  
   393  func (h *lxcDriverHandle) Stats() (*cstructs.TaskResourceUsage, error) {
   394  	cpuStats, err := h.container.CPUStats()
   395  	if err != nil {
   396  		return nil, nil
   397  	}
   398  	total, err := h.container.CPUTime()
   399  	if err != nil {
   400  		return nil, nil
   401  	}
   402  
   403  	t := time.Now()
   404  
   405  	// Get the cpu stats
   406  	system := cpuStats["system"]
   407  	user := cpuStats["user"]
   408  	cs := &cstructs.CpuStats{
   409  		SystemMode: h.systemCpuStats.Percent(float64(system)),
   410  		UserMode:   h.systemCpuStats.Percent(float64(user)),
   411  		Percent:    h.totalCpuStats.Percent(float64(total)),
   412  		TotalTicks: float64(user + system),
   413  		Measured:   LXCMeasuredCpuStats,
   414  	}
   415  
   416  	// Get the Memory Stats
   417  	memData := map[string]uint64{
   418  		"rss":   0,
   419  		"cache": 0,
   420  		"swap":  0,
   421  	}
   422  	rawMemStats := h.container.CgroupItem("memory.stat")
   423  	for _, rawMemStat := range rawMemStats {
   424  		key, val, err := keysToVal(rawMemStat)
   425  		if err != nil {
   426  			h.logger.Printf("[ERR] driver.lxc: error getting stat for line %q", rawMemStat)
   427  			continue
   428  		}
   429  		if _, ok := memData[key]; ok {
   430  			memData[key] = val
   431  
   432  		}
   433  	}
   434  	ms := &cstructs.MemoryStats{
   435  		RSS:      memData["rss"],
   436  		Cache:    memData["cache"],
   437  		Swap:     memData["swap"],
   438  		Measured: LXCMeasuredMemStats,
   439  	}
   440  
   441  	mu := h.container.CgroupItem("memory.max_usage_in_bytes")
   442  	for _, rawMemMaxUsage := range mu {
   443  		val, err := strconv.ParseUint(rawMemMaxUsage, 10, 64)
   444  		if err != nil {
   445  			h.logger.Printf("[ERR] driver.lxc: unable to get max memory usage: %v", err)
   446  			continue
   447  		}
   448  		ms.MaxUsage = val
   449  	}
   450  	ku := h.container.CgroupItem("memory.kmem.usage_in_bytes")
   451  	for _, rawKernelUsage := range ku {
   452  		val, err := strconv.ParseUint(rawKernelUsage, 10, 64)
   453  		if err != nil {
   454  			h.logger.Printf("[ERR] driver.lxc: unable to get kernel memory usage: %v", err)
   455  			continue
   456  		}
   457  		ms.KernelUsage = val
   458  	}
   459  
   460  	mku := h.container.CgroupItem("memory.kmem.max_usage_in_bytes")
   461  	for _, rawMaxKernelUsage := range mku {
   462  		val, err := strconv.ParseUint(rawMaxKernelUsage, 10, 64)
   463  		if err != nil {
   464  			h.logger.Printf("[ERR] driver.lxc: unable to get max kernel memory usage: %v", err)
   465  			continue
   466  		}
   467  		ms.KernelMaxUsage = val
   468  	}
   469  
   470  	taskResUsage := cstructs.TaskResourceUsage{
   471  		ResourceUsage: &cstructs.ResourceUsage{
   472  			CpuStats:    cs,
   473  			MemoryStats: ms,
   474  		},
   475  		Timestamp: t.UTC().UnixNano(),
   476  	}
   477  
   478  	return &taskResUsage, nil
   479  }
   480  
   481  func (h *lxcDriverHandle) run() {
   482  	defer close(h.waitCh)
   483  	timer := time.NewTimer(containerMonitorIntv)
   484  	for {
   485  		select {
   486  		case <-timer.C:
   487  			process, err := os.FindProcess(h.initPid)
   488  			if err != nil {
   489  				h.waitCh <- &dstructs.WaitResult{Err: err}
   490  				return
   491  			}
   492  			if err := process.Signal(syscall.Signal(0)); err != nil {
   493  				h.waitCh <- &dstructs.WaitResult{}
   494  				return
   495  			}
   496  			timer.Reset(containerMonitorIntv)
   497  		case <-h.doneCh:
   498  			h.waitCh <- &dstructs.WaitResult{}
   499  			return
   500  		}
   501  	}
   502  }
   503  
   504  func keysToVal(line string) (string, uint64, error) {
   505  	tokens := strings.Split(line, " ")
   506  	if len(tokens) != 2 {
   507  		return "", 0, fmt.Errorf("line isn't a k/v pair")
   508  	}
   509  	key := tokens[0]
   510  	val, err := strconv.ParseUint(tokens[1], 10, 64)
   511  	return key, val, err
   512  }