github.com/alexandrev/docker@v1.9.0/daemon/execdriver/native/create.go (about)

     1  // +build linux,cgo
     2  
     3  package native
     4  
     5  import (
     6  	"fmt"
     7  	"strings"
     8  	"syscall"
     9  
    10  	"github.com/docker/docker/daemon/execdriver"
    11  
    12  	"github.com/opencontainers/runc/libcontainer/apparmor"
    13  	"github.com/opencontainers/runc/libcontainer/configs"
    14  	"github.com/opencontainers/runc/libcontainer/devices"
    15  )
    16  
    17  // createContainer populates and configures the container type with the
    18  // data provided by the execdriver.Command
    19  func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks) (*configs.Config, error) {
    20  	container := execdriver.InitContainer(c)
    21  
    22  	if err := d.createIpc(container, c); err != nil {
    23  		return nil, err
    24  	}
    25  
    26  	if err := d.createPid(container, c); err != nil {
    27  		return nil, err
    28  	}
    29  
    30  	if err := d.createUTS(container, c); err != nil {
    31  		return nil, err
    32  	}
    33  
    34  	if err := d.setupRemappedRoot(container, c); err != nil {
    35  		return nil, err
    36  	}
    37  
    38  	if err := d.createNetwork(container, c, hooks); err != nil {
    39  		return nil, err
    40  	}
    41  
    42  	if c.ProcessConfig.Privileged {
    43  		if !container.Readonlyfs {
    44  			// clear readonly for /sys
    45  			for i := range container.Mounts {
    46  				if container.Mounts[i].Destination == "/sys" {
    47  					container.Mounts[i].Flags &= ^syscall.MS_RDONLY
    48  				}
    49  			}
    50  			container.ReadonlyPaths = nil
    51  		}
    52  
    53  		// clear readonly for cgroup
    54  		for i := range container.Mounts {
    55  			if container.Mounts[i].Device == "cgroup" {
    56  				container.Mounts[i].Flags &= ^syscall.MS_RDONLY
    57  			}
    58  		}
    59  
    60  		container.MaskPaths = nil
    61  		if err := d.setPrivileged(container); err != nil {
    62  			return nil, err
    63  		}
    64  	} else {
    65  		if err := d.setCapabilities(container, c); err != nil {
    66  			return nil, err
    67  		}
    68  	}
    69  	// add CAP_ prefix to all caps for new libcontainer update to match
    70  	// the spec format.
    71  	for i, s := range container.Capabilities {
    72  		if !strings.HasPrefix(s, "CAP_") {
    73  			container.Capabilities[i] = fmt.Sprintf("CAP_%s", s)
    74  		}
    75  	}
    76  	container.AdditionalGroups = c.GroupAdd
    77  
    78  	if c.AppArmorProfile != "" {
    79  		container.AppArmorProfile = c.AppArmorProfile
    80  	}
    81  
    82  	if err := execdriver.SetupCgroups(container, c); err != nil {
    83  		return nil, err
    84  	}
    85  
    86  	if container.Readonlyfs {
    87  		for i := range container.Mounts {
    88  			switch container.Mounts[i].Destination {
    89  			case "/proc", "/dev", "/dev/pts":
    90  				continue
    91  			}
    92  			container.Mounts[i].Flags |= syscall.MS_RDONLY
    93  		}
    94  
    95  		/* These paths must be remounted as r/o */
    96  		container.ReadonlyPaths = append(container.ReadonlyPaths, "/dev")
    97  	}
    98  
    99  	if err := d.setupMounts(container, c); err != nil {
   100  		return nil, err
   101  	}
   102  
   103  	d.setupLabels(container, c)
   104  	d.setupRlimits(container, c)
   105  	return container, nil
   106  }
   107  
   108  func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command, hooks execdriver.Hooks) error {
   109  	if c.Network == nil {
   110  		return nil
   111  	}
   112  	if c.Network.ContainerID != "" {
   113  		d.Lock()
   114  		active := d.activeContainers[c.Network.ContainerID]
   115  		d.Unlock()
   116  
   117  		if active == nil {
   118  			return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID)
   119  		}
   120  
   121  		state, err := active.State()
   122  		if err != nil {
   123  			return err
   124  		}
   125  
   126  		container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET])
   127  		return nil
   128  	}
   129  
   130  	if c.Network.NamespacePath != "" {
   131  		container.Namespaces.Add(configs.NEWNET, c.Network.NamespacePath)
   132  		return nil
   133  	}
   134  	// only set up prestart hook if the namespace path is not set (this should be
   135  	// all cases *except* for --net=host shared networking)
   136  	container.Hooks = &configs.Hooks{
   137  		Prestart: []configs.Hook{
   138  			configs.NewFunctionHook(func(s configs.HookState) error {
   139  				if len(hooks.PreStart) > 0 {
   140  					for _, fnHook := range hooks.PreStart {
   141  						// A closed channel for OOM is returned here as it will be
   142  						// non-blocking and return the correct result when read.
   143  						chOOM := make(chan struct{})
   144  						close(chOOM)
   145  						if err := fnHook(&c.ProcessConfig, s.Pid, chOOM); err != nil {
   146  							return err
   147  						}
   148  					}
   149  				}
   150  				return nil
   151  			}),
   152  		},
   153  	}
   154  	return nil
   155  }
   156  
   157  func (d *Driver) createIpc(container *configs.Config, c *execdriver.Command) error {
   158  	if c.Ipc.HostIpc {
   159  		container.Namespaces.Remove(configs.NEWIPC)
   160  		return nil
   161  	}
   162  
   163  	if c.Ipc.ContainerID != "" {
   164  		d.Lock()
   165  		active := d.activeContainers[c.Ipc.ContainerID]
   166  		d.Unlock()
   167  
   168  		if active == nil {
   169  			return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
   170  		}
   171  
   172  		state, err := active.State()
   173  		if err != nil {
   174  			return err
   175  		}
   176  		container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC])
   177  	}
   178  
   179  	return nil
   180  }
   181  
   182  func (d *Driver) createPid(container *configs.Config, c *execdriver.Command) error {
   183  	if c.Pid.HostPid {
   184  		container.Namespaces.Remove(configs.NEWPID)
   185  		return nil
   186  	}
   187  
   188  	return nil
   189  }
   190  
   191  func (d *Driver) createUTS(container *configs.Config, c *execdriver.Command) error {
   192  	if c.UTS.HostUTS {
   193  		container.Namespaces.Remove(configs.NEWUTS)
   194  		container.Hostname = ""
   195  		return nil
   196  	}
   197  
   198  	return nil
   199  }
   200  
   201  func (d *Driver) setupRemappedRoot(container *configs.Config, c *execdriver.Command) error {
   202  	if c.RemappedRoot.UID == 0 {
   203  		container.Namespaces.Remove(configs.NEWUSER)
   204  		return nil
   205  	}
   206  
   207  	// convert the Docker daemon id map to the libcontainer variant of the same struct
   208  	// this keeps us from having to import libcontainer code across Docker client + daemon packages
   209  	cuidMaps := []configs.IDMap{}
   210  	cgidMaps := []configs.IDMap{}
   211  	for _, idMap := range c.UIDMapping {
   212  		cuidMaps = append(cuidMaps, configs.IDMap(idMap))
   213  	}
   214  	for _, idMap := range c.GIDMapping {
   215  		cgidMaps = append(cgidMaps, configs.IDMap(idMap))
   216  	}
   217  	container.UidMappings = cuidMaps
   218  	container.GidMappings = cgidMaps
   219  
   220  	for _, node := range container.Devices {
   221  		node.Uid = uint32(c.RemappedRoot.UID)
   222  		node.Gid = uint32(c.RemappedRoot.GID)
   223  	}
   224  	// TODO: until a kernel/mount solution exists for handling remount in a user namespace,
   225  	// we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
   226  	for i := range container.Mounts {
   227  		if container.Mounts[i].Device == "cgroup" {
   228  			container.Mounts[i].Flags &= ^syscall.MS_RDONLY
   229  		}
   230  	}
   231  
   232  	return nil
   233  }
   234  
   235  func (d *Driver) setPrivileged(container *configs.Config) (err error) {
   236  	container.Capabilities = execdriver.GetAllCapabilities()
   237  	container.Cgroups.AllowAllDevices = true
   238  
   239  	hostDevices, err := devices.HostDevices()
   240  	if err != nil {
   241  		return err
   242  	}
   243  	container.Devices = hostDevices
   244  
   245  	if apparmor.IsEnabled() {
   246  		container.AppArmorProfile = "unconfined"
   247  	}
   248  	return nil
   249  }
   250  
   251  func (d *Driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) {
   252  	container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop)
   253  	return err
   254  }
   255  
   256  func (d *Driver) setupRlimits(container *configs.Config, c *execdriver.Command) {
   257  	if c.Resources == nil {
   258  		return
   259  	}
   260  
   261  	for _, rlimit := range c.Resources.Rlimits {
   262  		container.Rlimits = append(container.Rlimits, configs.Rlimit{
   263  			Type: rlimit.Type,
   264  			Hard: rlimit.Hard,
   265  			Soft: rlimit.Soft,
   266  		})
   267  	}
   268  }
   269  
   270  func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) error {
   271  	userMounts := make(map[string]struct{})
   272  	for _, m := range c.Mounts {
   273  		userMounts[m.Destination] = struct{}{}
   274  	}
   275  
   276  	// Filter out mounts that are overriden by user supplied mounts
   277  	var defaultMounts []*configs.Mount
   278  	_, mountDev := userMounts["/dev"]
   279  	for _, m := range container.Mounts {
   280  		if _, ok := userMounts[m.Destination]; !ok {
   281  			if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
   282  				continue
   283  			}
   284  			defaultMounts = append(defaultMounts, m)
   285  		}
   286  	}
   287  	container.Mounts = defaultMounts
   288  
   289  	for _, m := range c.Mounts {
   290  		flags := syscall.MS_BIND | syscall.MS_REC
   291  		if !m.Writable {
   292  			flags |= syscall.MS_RDONLY
   293  		}
   294  		if m.Slave {
   295  			flags |= syscall.MS_SLAVE
   296  		}
   297  
   298  		container.Mounts = append(container.Mounts, &configs.Mount{
   299  			Source:      m.Source,
   300  			Destination: m.Destination,
   301  			Device:      "bind",
   302  			Flags:       flags,
   303  		})
   304  	}
   305  	return nil
   306  }
   307  
   308  func (d *Driver) setupLabels(container *configs.Config, c *execdriver.Command) {
   309  	container.ProcessLabel = c.ProcessLabel
   310  	container.MountLabel = c.MountLabel
   311  }