github.com/hhrutter/nomad@v0.6.0-rc2.0.20170723054333-80c4b03f0705/client/driver/lxc.go (about)

     1  //+build linux,lxc
     2  
     3  package driver
     4  
     5  import (
     6  	"context"
     7  	"encoding/json"
     8  	"fmt"
     9  	"log"
    10  	"os"
    11  	"path/filepath"
    12  	"strconv"
    13  	"strings"
    14  	"syscall"
    15  	"time"
    16  
    17  	"github.com/hashicorp/nomad/client/config"
    18  	"github.com/hashicorp/nomad/client/fingerprint"
    19  	"github.com/hashicorp/nomad/client/stats"
    20  	"github.com/hashicorp/nomad/helper/fields"
    21  	"github.com/hashicorp/nomad/nomad/structs"
    22  	"github.com/mitchellh/mapstructure"
    23  
    24  	dstructs "github.com/hashicorp/nomad/client/driver/structs"
    25  	cstructs "github.com/hashicorp/nomad/client/structs"
    26  	lxc "gopkg.in/lxc/go-lxc.v2"
    27  )
    28  
    29  const (
    30  	// lxcConfigOption is the key for enabling the LXC driver in the
    31  	// Config.Options map.
    32  	lxcConfigOption = "driver.lxc.enable"
    33  
    34  	// containerMonitorIntv is the interval at which the driver checks if the
    35  	// container is still alive
    36  	containerMonitorIntv = 2 * time.Second
    37  )
    38  
    39  var (
    40  	LXCMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"}
    41  
    42  	LXCMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage", "Kernel Usage", "Kernel Max Usage"}
    43  )
    44  
    45  // Add the lxc driver to the list of builtin drivers
    46  func init() {
    47  	BuiltinDrivers["lxc"] = NewLxcDriver
    48  }
    49  
    50  // LxcDriver allows users to run LXC Containers
    51  type LxcDriver struct {
    52  	DriverContext
    53  	fingerprint.StaticFingerprinter
    54  }
    55  
    56  // LxcDriverConfig is the configuration of the LXC Container
    57  type LxcDriverConfig struct {
    58  	Template             string
    59  	Distro               string
    60  	Release              string
    61  	Arch                 string
    62  	ImageVariant         string   `mapstructure:"image_variant"`
    63  	ImageServer          string   `mapstructure:"image_server"`
    64  	GPGKeyID             string   `mapstructure:"gpg_key_id"`
    65  	GPGKeyServer         string   `mapstructure:"gpg_key_server"`
    66  	DisableGPGValidation bool     `mapstructure:"disable_gpg"`
    67  	FlushCache           bool     `mapstructure:"flush_cache"`
    68  	ForceCache           bool     `mapstructure:"force_cache"`
    69  	TemplateArgs         []string `mapstructure:"template_args"`
    70  	LogLevel             string   `mapstructure:"log_level"`
    71  	Verbosity            string
    72  }
    73  
    74  // NewLxcDriver returns a new instance of the LXC driver
    75  func NewLxcDriver(ctx *DriverContext) Driver {
    76  	return &LxcDriver{DriverContext: *ctx}
    77  }
    78  
    79  // Validate validates the lxc driver configuration
    80  func (d *LxcDriver) Validate(config map[string]interface{}) error {
    81  	fd := &fields.FieldData{
    82  		Raw: config,
    83  		Schema: map[string]*fields.FieldSchema{
    84  			"template": &fields.FieldSchema{
    85  				Type:     fields.TypeString,
    86  				Required: true,
    87  			},
    88  			"distro": &fields.FieldSchema{
    89  				Type:     fields.TypeString,
    90  				Required: false,
    91  			},
    92  			"release": &fields.FieldSchema{
    93  				Type:     fields.TypeString,
    94  				Required: false,
    95  			},
    96  			"arch": &fields.FieldSchema{
    97  				Type:     fields.TypeString,
    98  				Required: false,
    99  			},
   100  			"image_variant": &fields.FieldSchema{
   101  				Type:     fields.TypeString,
   102  				Required: false,
   103  			},
   104  			"image_server": &fields.FieldSchema{
   105  				Type:     fields.TypeString,
   106  				Required: false,
   107  			},
   108  			"gpg_key_id": &fields.FieldSchema{
   109  				Type:     fields.TypeString,
   110  				Required: false,
   111  			},
   112  			"gpg_key_server": &fields.FieldSchema{
   113  				Type:     fields.TypeString,
   114  				Required: false,
   115  			},
   116  			"disable_gpg": &fields.FieldSchema{
   117  				Type:     fields.TypeString,
   118  				Required: false,
   119  			},
   120  			"flush_cache": &fields.FieldSchema{
   121  				Type:     fields.TypeString,
   122  				Required: false,
   123  			},
   124  			"force_cache": &fields.FieldSchema{
   125  				Type:     fields.TypeString,
   126  				Required: false,
   127  			},
   128  			"template_args": &fields.FieldSchema{
   129  				Type:     fields.TypeArray,
   130  				Required: false,
   131  			},
   132  			"log_level": &fields.FieldSchema{
   133  				Type:     fields.TypeString,
   134  				Required: false,
   135  			},
   136  			"verbosity": &fields.FieldSchema{
   137  				Type:     fields.TypeString,
   138  				Required: false,
   139  			},
   140  		},
   141  	}
   142  
   143  	if err := fd.Validate(); err != nil {
   144  		return err
   145  	}
   146  
   147  	return nil
   148  }
   149  
   150  func (d *LxcDriver) Abilities() DriverAbilities {
   151  	return DriverAbilities{
   152  		SendSignals: false,
   153  		Exec:        false,
   154  	}
   155  }
   156  
   157  func (d *LxcDriver) FSIsolation() cstructs.FSIsolation {
   158  	return cstructs.FSIsolationImage
   159  }
   160  
   161  // Fingerprint fingerprints the lxc driver configuration
   162  func (d *LxcDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
   163  	enabled := cfg.ReadBoolDefault(lxcConfigOption, true)
   164  	if !enabled && !cfg.DevMode {
   165  		return false, nil
   166  	}
   167  	version := lxc.Version()
   168  	if version == "" {
   169  		return false, nil
   170  	}
   171  	node.Attributes["driver.lxc.version"] = version
   172  	node.Attributes["driver.lxc"] = "1"
   173  	return true, nil
   174  }
   175  
   176  func (d *LxcDriver) Prestart(*ExecContext, *structs.Task) (*PrestartResponse, error) {
   177  	return nil, nil
   178  }
   179  
   180  // Start starts the LXC Driver
   181  func (d *LxcDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, error) {
   182  	var driverConfig LxcDriverConfig
   183  	if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil {
   184  		return nil, err
   185  	}
   186  	lxcPath := lxc.DefaultConfigPath()
   187  	if path := d.config.Read("driver.lxc.path"); path != "" {
   188  		lxcPath = path
   189  	}
   190  
   191  	containerName := fmt.Sprintf("%s-%s", task.Name, d.DriverContext.allocID)
   192  	c, err := lxc.NewContainer(containerName, lxcPath)
   193  	if err != nil {
   194  		return nil, fmt.Errorf("unable to initialize container: %v", err)
   195  	}
   196  
   197  	var verbosity lxc.Verbosity
   198  	switch driverConfig.Verbosity {
   199  	case "verbose":
   200  		verbosity = lxc.Verbose
   201  	case "", "quiet":
   202  		verbosity = lxc.Quiet
   203  	default:
   204  		return nil, fmt.Errorf("lxc driver config 'verbosity' can only be either quiet or verbose")
   205  	}
   206  	c.SetVerbosity(verbosity)
   207  
   208  	var logLevel lxc.LogLevel
   209  	switch driverConfig.LogLevel {
   210  	case "trace":
   211  		logLevel = lxc.TRACE
   212  	case "debug":
   213  		logLevel = lxc.DEBUG
   214  	case "info":
   215  		logLevel = lxc.INFO
   216  	case "warn":
   217  		logLevel = lxc.WARN
   218  	case "", "error":
   219  		logLevel = lxc.ERROR
   220  	default:
   221  		return nil, fmt.Errorf("lxc driver config 'log_level' can only be trace, debug, info, warn or error")
   222  	}
   223  	c.SetLogLevel(logLevel)
   224  
   225  	logFile := filepath.Join(ctx.TaskDir.LogDir, fmt.Sprintf("%v-lxc.log", task.Name))
   226  	c.SetLogFile(logFile)
   227  
   228  	options := lxc.TemplateOptions{
   229  		Template:             driverConfig.Template,
   230  		Distro:               driverConfig.Distro,
   231  		Release:              driverConfig.Release,
   232  		Arch:                 driverConfig.Arch,
   233  		FlushCache:           driverConfig.FlushCache,
   234  		DisableGPGValidation: driverConfig.DisableGPGValidation,
   235  		ExtraArgs:            driverConfig.TemplateArgs,
   236  	}
   237  
   238  	if err := c.Create(options); err != nil {
   239  		return nil, fmt.Errorf("unable to create container: %v", err)
   240  	}
   241  
   242  	// Set the network type to none
   243  	if err := c.SetConfigItem("lxc.network.type", "none"); err != nil {
   244  		return nil, fmt.Errorf("error setting network type configuration: %v", err)
   245  	}
   246  
   247  	// Bind mount the shared alloc dir and task local dir in the container
   248  	mounts := []string{
   249  		fmt.Sprintf("%s local none rw,bind,create=dir", ctx.TaskDir.LocalDir),
   250  		fmt.Sprintf("%s alloc none rw,bind,create=dir", ctx.TaskDir.SharedAllocDir),
   251  		fmt.Sprintf("%s secrets none rw,bind,create=dir", ctx.TaskDir.SecretsDir),
   252  	}
   253  	for _, mnt := range mounts {
   254  		if err := c.SetConfigItem("lxc.mount.entry", mnt); err != nil {
   255  			return nil, fmt.Errorf("error setting bind mount %q error: %v", mnt, err)
   256  		}
   257  	}
   258  
   259  	// Start the container
   260  	if err := c.Start(); err != nil {
   261  		return nil, fmt.Errorf("unable to start container: %v", err)
   262  	}
   263  
   264  	// Set the resource limits
   265  	if err := c.SetMemoryLimit(lxc.ByteSize(task.Resources.MemoryMB) * lxc.MB); err != nil {
   266  		return nil, fmt.Errorf("unable to set memory limits: %v", err)
   267  	}
   268  	if err := c.SetCgroupItem("cpu.shares", strconv.Itoa(task.Resources.CPU)); err != nil {
   269  		return nil, fmt.Errorf("unable to set cpu shares: %v", err)
   270  	}
   271  
   272  	h := lxcDriverHandle{
   273  		container:      c,
   274  		initPid:        c.InitPid(),
   275  		lxcPath:        lxcPath,
   276  		logger:         d.logger,
   277  		killTimeout:    GetKillTimeout(task.KillTimeout, d.DriverContext.config.MaxKillTimeout),
   278  		maxKillTimeout: d.DriverContext.config.MaxKillTimeout,
   279  		totalCpuStats:  stats.NewCpuStats(),
   280  		userCpuStats:   stats.NewCpuStats(),
   281  		systemCpuStats: stats.NewCpuStats(),
   282  		waitCh:         make(chan *dstructs.WaitResult, 1),
   283  		doneCh:         make(chan bool, 1),
   284  	}
   285  
   286  	go h.run()
   287  
   288  	return &StartResponse{Handle: &h}, nil
   289  }
   290  
   291  func (d *LxcDriver) Cleanup(*ExecContext, *CreatedResources) error { return nil }
   292  
   293  // Open creates the driver to monitor an existing LXC container
   294  func (d *LxcDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
   295  	pid := &lxcPID{}
   296  	if err := json.Unmarshal([]byte(handleID), pid); err != nil {
   297  		return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
   298  	}
   299  
   300  	var container *lxc.Container
   301  	containers := lxc.Containers(pid.LxcPath)
   302  	for _, c := range containers {
   303  		if c.Name() == pid.ContainerName {
   304  			container = &c
   305  			break
   306  		}
   307  	}
   308  
   309  	if container == nil {
   310  		return nil, fmt.Errorf("container %v not found", pid.ContainerName)
   311  	}
   312  
   313  	handle := lxcDriverHandle{
   314  		container:      container,
   315  		initPid:        container.InitPid(),
   316  		lxcPath:        pid.LxcPath,
   317  		logger:         d.logger,
   318  		killTimeout:    pid.KillTimeout,
   319  		maxKillTimeout: d.DriverContext.config.MaxKillTimeout,
   320  		totalCpuStats:  stats.NewCpuStats(),
   321  		userCpuStats:   stats.NewCpuStats(),
   322  		systemCpuStats: stats.NewCpuStats(),
   323  		waitCh:         make(chan *dstructs.WaitResult, 1),
   324  		doneCh:         make(chan bool, 1),
   325  	}
   326  	go handle.run()
   327  
   328  	return &handle, nil
   329  }
   330  
   331  // lxcDriverHandle allows controlling the lifecycle of an lxc container
   332  type lxcDriverHandle struct {
   333  	container *lxc.Container
   334  	initPid   int
   335  	lxcPath   string
   336  
   337  	logger *log.Logger
   338  
   339  	killTimeout    time.Duration
   340  	maxKillTimeout time.Duration
   341  
   342  	totalCpuStats  *stats.CpuStats
   343  	userCpuStats   *stats.CpuStats
   344  	systemCpuStats *stats.CpuStats
   345  
   346  	waitCh chan *dstructs.WaitResult
   347  	doneCh chan bool
   348  }
   349  
   350  type lxcPID struct {
   351  	ContainerName string
   352  	InitPid       int
   353  	LxcPath       string
   354  	KillTimeout   time.Duration
   355  }
   356  
   357  func (h *lxcDriverHandle) ID() string {
   358  	pid := lxcPID{
   359  		ContainerName: h.container.Name(),
   360  		InitPid:       h.initPid,
   361  		LxcPath:       h.lxcPath,
   362  		KillTimeout:   h.killTimeout,
   363  	}
   364  	data, err := json.Marshal(pid)
   365  	if err != nil {
   366  		h.logger.Printf("[ERR] driver.lxc: failed to marshal lxc PID to JSON: %v", err)
   367  	}
   368  	return string(data)
   369  }
   370  
   371  func (h *lxcDriverHandle) WaitCh() chan *dstructs.WaitResult {
   372  	return h.waitCh
   373  }
   374  
   375  func (h *lxcDriverHandle) Update(task *structs.Task) error {
   376  	h.killTimeout = GetKillTimeout(task.KillTimeout, h.killTimeout)
   377  	return nil
   378  }
   379  
   380  func (h *lxcDriverHandle) Exec(ctx context.Context, cmd string, args []string) ([]byte, int, error) {
   381  	return nil, 0, fmt.Errorf("lxc driver cannot execute commands")
   382  }
   383  
   384  func (h *lxcDriverHandle) Kill() error {
   385  	name := h.container.Name()
   386  
   387  	h.logger.Printf("[INFO] driver.lxc: shutting down container %q", name)
   388  	if err := h.container.Shutdown(h.killTimeout); err != nil {
   389  		h.logger.Printf("[INFO] driver.lxc: shutting down container %q failed: %v", name, err)
   390  		if err := h.container.Stop(); err != nil {
   391  			h.logger.Printf("[ERR] driver.lxc: error stopping container %q: %v", name, err)
   392  		}
   393  	}
   394  
   395  	close(h.doneCh)
   396  	return nil
   397  }
   398  
   399  func (h *lxcDriverHandle) Signal(s os.Signal) error {
   400  	return fmt.Errorf("LXC does not support signals")
   401  }
   402  
   403  func (h *lxcDriverHandle) Stats() (*cstructs.TaskResourceUsage, error) {
   404  	cpuStats, err := h.container.CPUStats()
   405  	if err != nil {
   406  		return nil, nil
   407  	}
   408  	total, err := h.container.CPUTime()
   409  	if err != nil {
   410  		return nil, nil
   411  	}
   412  
   413  	t := time.Now()
   414  
   415  	// Get the cpu stats
   416  	system := cpuStats["system"]
   417  	user := cpuStats["user"]
   418  	cs := &cstructs.CpuStats{
   419  		SystemMode: h.systemCpuStats.Percent(float64(system)),
   420  		UserMode:   h.systemCpuStats.Percent(float64(user)),
   421  		Percent:    h.totalCpuStats.Percent(float64(total)),
   422  		TotalTicks: float64(user + system),
   423  		Measured:   LXCMeasuredCpuStats,
   424  	}
   425  
   426  	// Get the Memory Stats
   427  	memData := map[string]uint64{
   428  		"rss":   0,
   429  		"cache": 0,
   430  		"swap":  0,
   431  	}
   432  	rawMemStats := h.container.CgroupItem("memory.stat")
   433  	for _, rawMemStat := range rawMemStats {
   434  		key, val, err := keysToVal(rawMemStat)
   435  		if err != nil {
   436  			h.logger.Printf("[ERR] driver.lxc: error getting stat for line %q", rawMemStat)
   437  			continue
   438  		}
   439  		if _, ok := memData[key]; ok {
   440  			memData[key] = val
   441  
   442  		}
   443  	}
   444  	ms := &cstructs.MemoryStats{
   445  		RSS:      memData["rss"],
   446  		Cache:    memData["cache"],
   447  		Swap:     memData["swap"],
   448  		Measured: LXCMeasuredMemStats,
   449  	}
   450  
   451  	mu := h.container.CgroupItem("memory.max_usage_in_bytes")
   452  	for _, rawMemMaxUsage := range mu {
   453  		val, err := strconv.ParseUint(rawMemMaxUsage, 10, 64)
   454  		if err != nil {
   455  			h.logger.Printf("[ERR] driver.lxc: unable to get max memory usage: %v", err)
   456  			continue
   457  		}
   458  		ms.MaxUsage = val
   459  	}
   460  	ku := h.container.CgroupItem("memory.kmem.usage_in_bytes")
   461  	for _, rawKernelUsage := range ku {
   462  		val, err := strconv.ParseUint(rawKernelUsage, 10, 64)
   463  		if err != nil {
   464  			h.logger.Printf("[ERR] driver.lxc: unable to get kernel memory usage: %v", err)
   465  			continue
   466  		}
   467  		ms.KernelUsage = val
   468  	}
   469  
   470  	mku := h.container.CgroupItem("memory.kmem.max_usage_in_bytes")
   471  	for _, rawMaxKernelUsage := range mku {
   472  		val, err := strconv.ParseUint(rawMaxKernelUsage, 10, 64)
   473  		if err != nil {
   474  			h.logger.Printf("[ERR] driver.lxc: unable to get max kernel memory usage: %v", err)
   475  			continue
   476  		}
   477  		ms.KernelMaxUsage = val
   478  	}
   479  
   480  	taskResUsage := cstructs.TaskResourceUsage{
   481  		ResourceUsage: &cstructs.ResourceUsage{
   482  			CpuStats:    cs,
   483  			MemoryStats: ms,
   484  		},
   485  		Timestamp: t.UTC().UnixNano(),
   486  	}
   487  
   488  	return &taskResUsage, nil
   489  }
   490  
   491  func (h *lxcDriverHandle) run() {
   492  	defer close(h.waitCh)
   493  	timer := time.NewTimer(containerMonitorIntv)
   494  	for {
   495  		select {
   496  		case <-timer.C:
   497  			process, err := os.FindProcess(h.initPid)
   498  			if err != nil {
   499  				h.waitCh <- &dstructs.WaitResult{Err: err}
   500  				return
   501  			}
   502  			if err := process.Signal(syscall.Signal(0)); err != nil {
   503  				h.waitCh <- &dstructs.WaitResult{}
   504  				return
   505  			}
   506  			timer.Reset(containerMonitorIntv)
   507  		case <-h.doneCh:
   508  			h.waitCh <- &dstructs.WaitResult{}
   509  			return
   510  		}
   511  	}
   512  }
   513  
   514  func keysToVal(line string) (string, uint64, error) {
   515  	tokens := strings.Split(line, " ")
   516  	if len(tokens) != 2 {
   517  		return "", 0, fmt.Errorf("line isn't a k/v pair")
   518  	}
   519  	key := tokens[0]
   520  	val, err := strconv.ParseUint(tokens[1], 10, 64)
   521  	return key, val, err
   522  }