github.com/Shopify/docker@v1.13.1/libcontainerd/client_linux.go (about)

     1  package libcontainerd
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"strings"
     7  	"sync"
     8  	"syscall"
     9  	"time"
    10  
    11  	"github.com/Sirupsen/logrus"
    12  	containerd "github.com/docker/containerd/api/grpc/types"
    13  	"github.com/docker/docker/pkg/ioutils"
    14  	"github.com/docker/docker/pkg/mount"
    15  	"github.com/golang/protobuf/ptypes"
    16  	"github.com/golang/protobuf/ptypes/timestamp"
    17  	specs "github.com/opencontainers/runtime-spec/specs-go"
    18  	"golang.org/x/net/context"
    19  )
    20  
    21  type client struct {
    22  	clientCommon
    23  
    24  	// Platform specific properties below here.
    25  	remote        *remote
    26  	q             queue
    27  	exitNotifiers map[string]*exitNotifier
    28  	liveRestore   bool
    29  }
    30  
    31  // GetServerVersion returns the connected server version information
    32  func (clnt *client) GetServerVersion(ctx context.Context) (*ServerVersion, error) {
    33  	resp, err := clnt.remote.apiClient.GetServerVersion(ctx, &containerd.GetServerVersionRequest{})
    34  	if err != nil {
    35  		return nil, err
    36  	}
    37  
    38  	sv := &ServerVersion{
    39  		GetServerVersionResponse: *resp,
    40  	}
    41  
    42  	return sv, nil
    43  }
    44  
    45  // AddProcess is the handler for adding a process to an already running
    46  // container. It's called through docker exec. It returns the system pid of the
    47  // exec'd process.
    48  func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendlyName string, specp Process, attachStdio StdioCallback) (pid int, err error) {
    49  	clnt.lock(containerID)
    50  	defer clnt.unlock(containerID)
    51  	container, err := clnt.getContainer(containerID)
    52  	if err != nil {
    53  		return -1, err
    54  	}
    55  
    56  	spec, err := container.spec()
    57  	if err != nil {
    58  		return -1, err
    59  	}
    60  	sp := spec.Process
    61  	sp.Args = specp.Args
    62  	sp.Terminal = specp.Terminal
    63  	if len(specp.Env) > 0 {
    64  		sp.Env = specp.Env
    65  	}
    66  	if specp.Cwd != nil {
    67  		sp.Cwd = *specp.Cwd
    68  	}
    69  	if specp.User != nil {
    70  		sp.User = specs.User{
    71  			UID:            specp.User.UID,
    72  			GID:            specp.User.GID,
    73  			AdditionalGids: specp.User.AdditionalGids,
    74  		}
    75  	}
    76  	if specp.Capabilities != nil {
    77  		sp.Capabilities = specp.Capabilities
    78  	}
    79  
    80  	p := container.newProcess(processFriendlyName)
    81  
    82  	r := &containerd.AddProcessRequest{
    83  		Args:     sp.Args,
    84  		Cwd:      sp.Cwd,
    85  		Terminal: sp.Terminal,
    86  		Id:       containerID,
    87  		Env:      sp.Env,
    88  		User: &containerd.User{
    89  			Uid:            sp.User.UID,
    90  			Gid:            sp.User.GID,
    91  			AdditionalGids: sp.User.AdditionalGids,
    92  		},
    93  		Pid:             processFriendlyName,
    94  		Stdin:           p.fifo(syscall.Stdin),
    95  		Stdout:          p.fifo(syscall.Stdout),
    96  		Stderr:          p.fifo(syscall.Stderr),
    97  		Capabilities:    sp.Capabilities,
    98  		ApparmorProfile: sp.ApparmorProfile,
    99  		SelinuxLabel:    sp.SelinuxLabel,
   100  		NoNewPrivileges: sp.NoNewPrivileges,
   101  		Rlimits:         convertRlimits(sp.Rlimits),
   102  	}
   103  
   104  	fifoCtx, cancel := context.WithCancel(context.Background())
   105  	defer func() {
   106  		if err != nil {
   107  			cancel()
   108  		}
   109  	}()
   110  
   111  	iopipe, err := p.openFifos(fifoCtx, sp.Terminal)
   112  	if err != nil {
   113  		return -1, err
   114  	}
   115  
   116  	resp, err := clnt.remote.apiClient.AddProcess(ctx, r)
   117  	if err != nil {
   118  		p.closeFifos(iopipe)
   119  		return -1, err
   120  	}
   121  
   122  	var stdinOnce sync.Once
   123  	stdin := iopipe.Stdin
   124  	iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error {
   125  		var err error
   126  		stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed
   127  			err = stdin.Close()
   128  			if err2 := p.sendCloseStdin(); err == nil {
   129  				err = err2
   130  			}
   131  		})
   132  		return err
   133  	})
   134  
   135  	container.processes[processFriendlyName] = p
   136  
   137  	if err := attachStdio(*iopipe); err != nil {
   138  		p.closeFifos(iopipe)
   139  		return -1, err
   140  	}
   141  
   142  	return int(resp.SystemPid), nil
   143  }
   144  
   145  func (clnt *client) SignalProcess(containerID string, pid string, sig int) error {
   146  	clnt.lock(containerID)
   147  	defer clnt.unlock(containerID)
   148  	_, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{
   149  		Id:     containerID,
   150  		Pid:    pid,
   151  		Signal: uint32(sig),
   152  	})
   153  	return err
   154  }
   155  
   156  func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
   157  	clnt.lock(containerID)
   158  	defer clnt.unlock(containerID)
   159  	if _, err := clnt.getContainer(containerID); err != nil {
   160  		return err
   161  	}
   162  	_, err := clnt.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{
   163  		Id:     containerID,
   164  		Pid:    processFriendlyName,
   165  		Width:  uint32(width),
   166  		Height: uint32(height),
   167  	})
   168  	return err
   169  }
   170  
   171  func (clnt *client) Pause(containerID string) error {
   172  	return clnt.setState(containerID, StatePause)
   173  }
   174  
   175  func (clnt *client) setState(containerID, state string) error {
   176  	clnt.lock(containerID)
   177  	container, err := clnt.getContainer(containerID)
   178  	if err != nil {
   179  		clnt.unlock(containerID)
   180  		return err
   181  	}
   182  	if container.systemPid == 0 {
   183  		clnt.unlock(containerID)
   184  		return fmt.Errorf("No active process for container %s", containerID)
   185  	}
   186  	st := "running"
   187  	if state == StatePause {
   188  		st = "paused"
   189  	}
   190  	chstate := make(chan struct{})
   191  	_, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{
   192  		Id:     containerID,
   193  		Pid:    InitFriendlyName,
   194  		Status: st,
   195  	})
   196  	if err != nil {
   197  		clnt.unlock(containerID)
   198  		return err
   199  	}
   200  	container.pauseMonitor.append(state, chstate)
   201  	clnt.unlock(containerID)
   202  	<-chstate
   203  	return nil
   204  }
   205  
   206  func (clnt *client) Resume(containerID string) error {
   207  	return clnt.setState(containerID, StateResume)
   208  }
   209  
   210  func (clnt *client) Stats(containerID string) (*Stats, error) {
   211  	resp, err := clnt.remote.apiClient.Stats(context.Background(), &containerd.StatsRequest{containerID})
   212  	if err != nil {
   213  		return nil, err
   214  	}
   215  	return (*Stats)(resp), nil
   216  }
   217  
   218  // Take care of the old 1.11.0 behavior in case the version upgrade
   219  // happened without a clean daemon shutdown
   220  func (clnt *client) cleanupOldRootfs(containerID string) {
   221  	// Unmount and delete the bundle folder
   222  	if mts, err := mount.GetMounts(); err == nil {
   223  		for _, mts := range mts {
   224  			if strings.HasSuffix(mts.Mountpoint, containerID+"/rootfs") {
   225  				if err := syscall.Unmount(mts.Mountpoint, syscall.MNT_DETACH); err == nil {
   226  					os.RemoveAll(strings.TrimSuffix(mts.Mountpoint, "/rootfs"))
   227  				}
   228  				break
   229  			}
   230  		}
   231  	}
   232  }
   233  
   234  func (clnt *client) setExited(containerID string, exitCode uint32) error {
   235  	clnt.lock(containerID)
   236  	defer clnt.unlock(containerID)
   237  
   238  	err := clnt.backend.StateChanged(containerID, StateInfo{
   239  		CommonStateInfo: CommonStateInfo{
   240  			State:    StateExit,
   241  			ExitCode: exitCode,
   242  		}})
   243  
   244  	clnt.cleanupOldRootfs(containerID)
   245  
   246  	return err
   247  }
   248  
   249  func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) {
   250  	cont, err := clnt.getContainerdContainer(containerID)
   251  	if err != nil {
   252  		return nil, err
   253  	}
   254  	pids := make([]int, len(cont.Pids))
   255  	for i, p := range cont.Pids {
   256  		pids[i] = int(p)
   257  	}
   258  	return pids, nil
   259  }
   260  
   261  // Summary returns a summary of the processes running in a container.
   262  // This is a no-op on Linux.
   263  func (clnt *client) Summary(containerID string) ([]Summary, error) {
   264  	return nil, nil
   265  }
   266  
   267  func (clnt *client) getContainerdContainer(containerID string) (*containerd.Container, error) {
   268  	resp, err := clnt.remote.apiClient.State(context.Background(), &containerd.StateRequest{Id: containerID})
   269  	if err != nil {
   270  		return nil, err
   271  	}
   272  	for _, cont := range resp.Containers {
   273  		if cont.Id == containerID {
   274  			return cont, nil
   275  		}
   276  	}
   277  	return nil, fmt.Errorf("invalid state response")
   278  }
   279  
   280  func (clnt *client) UpdateResources(containerID string, resources Resources) error {
   281  	clnt.lock(containerID)
   282  	defer clnt.unlock(containerID)
   283  	container, err := clnt.getContainer(containerID)
   284  	if err != nil {
   285  		return err
   286  	}
   287  	if container.systemPid == 0 {
   288  		return fmt.Errorf("No active process for container %s", containerID)
   289  	}
   290  	_, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{
   291  		Id:        containerID,
   292  		Pid:       InitFriendlyName,
   293  		Resources: (*containerd.UpdateResource)(&resources),
   294  	})
   295  	if err != nil {
   296  		return err
   297  	}
   298  	return nil
   299  }
   300  
   301  func (clnt *client) getExitNotifier(containerID string) *exitNotifier {
   302  	clnt.mapMutex.RLock()
   303  	defer clnt.mapMutex.RUnlock()
   304  	return clnt.exitNotifiers[containerID]
   305  }
   306  
   307  func (clnt *client) getOrCreateExitNotifier(containerID string) *exitNotifier {
   308  	clnt.mapMutex.Lock()
   309  	w, ok := clnt.exitNotifiers[containerID]
   310  	defer clnt.mapMutex.Unlock()
   311  	if !ok {
   312  		w = &exitNotifier{c: make(chan struct{}), client: clnt}
   313  		clnt.exitNotifiers[containerID] = w
   314  	}
   315  	return w
   316  }
   317  
   318  func (clnt *client) restore(cont *containerd.Container, lastEvent *containerd.Event, attachStdio StdioCallback, options ...CreateOption) (err error) {
   319  	clnt.lock(cont.Id)
   320  	defer clnt.unlock(cont.Id)
   321  
   322  	logrus.Debugf("libcontainerd: restore container %s state %s", cont.Id, cont.Status)
   323  
   324  	containerID := cont.Id
   325  	if _, err := clnt.getContainer(containerID); err == nil {
   326  		return fmt.Errorf("container %s is already active", containerID)
   327  	}
   328  
   329  	defer func() {
   330  		if err != nil {
   331  			clnt.deleteContainer(cont.Id)
   332  		}
   333  	}()
   334  
   335  	container := clnt.newContainer(cont.BundlePath, options...)
   336  	container.systemPid = systemPid(cont)
   337  
   338  	var terminal bool
   339  	for _, p := range cont.Processes {
   340  		if p.Pid == InitFriendlyName {
   341  			terminal = p.Terminal
   342  		}
   343  	}
   344  
   345  	fifoCtx, cancel := context.WithCancel(context.Background())
   346  	defer func() {
   347  		if err != nil {
   348  			cancel()
   349  		}
   350  	}()
   351  
   352  	iopipe, err := container.openFifos(fifoCtx, terminal)
   353  	if err != nil {
   354  		return err
   355  	}
   356  	var stdinOnce sync.Once
   357  	stdin := iopipe.Stdin
   358  	iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error {
   359  		var err error
   360  		stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed
   361  			err = stdin.Close()
   362  		})
   363  		return err
   364  	})
   365  
   366  	if err := attachStdio(*iopipe); err != nil {
   367  		container.closeFifos(iopipe)
   368  		return err
   369  	}
   370  
   371  	clnt.appendContainer(container)
   372  
   373  	err = clnt.backend.StateChanged(containerID, StateInfo{
   374  		CommonStateInfo: CommonStateInfo{
   375  			State: StateRestore,
   376  			Pid:   container.systemPid,
   377  		}})
   378  
   379  	if err != nil {
   380  		container.closeFifos(iopipe)
   381  		return err
   382  	}
   383  
   384  	if lastEvent != nil {
   385  		// This should only be a pause or resume event
   386  		if lastEvent.Type == StatePause || lastEvent.Type == StateResume {
   387  			return clnt.backend.StateChanged(containerID, StateInfo{
   388  				CommonStateInfo: CommonStateInfo{
   389  					State: lastEvent.Type,
   390  					Pid:   container.systemPid,
   391  				}})
   392  		}
   393  
   394  		logrus.Warnf("libcontainerd: unexpected backlog event: %#v", lastEvent)
   395  	}
   396  
   397  	return nil
   398  }
   399  
   400  func (clnt *client) getContainerLastEventSinceTime(id string, tsp *timestamp.Timestamp) (*containerd.Event, error) {
   401  	er := &containerd.EventsRequest{
   402  		Timestamp:  tsp,
   403  		StoredOnly: true,
   404  		Id:         id,
   405  	}
   406  	events, err := clnt.remote.apiClient.Events(context.Background(), er)
   407  	if err != nil {
   408  		logrus.Errorf("libcontainerd: failed to get container events stream for %s: %q", er.Id, err)
   409  		return nil, err
   410  	}
   411  
   412  	var ev *containerd.Event
   413  	for {
   414  		e, err := events.Recv()
   415  		if err != nil {
   416  			if err.Error() == "EOF" {
   417  				break
   418  			}
   419  			logrus.Errorf("libcontainerd: failed to get container event for %s: %q", id, err)
   420  			return nil, err
   421  		}
   422  		ev = e
   423  		logrus.Debugf("libcontainerd: received past event %#v", ev)
   424  	}
   425  
   426  	return ev, nil
   427  }
   428  
   429  func (clnt *client) getContainerLastEvent(id string) (*containerd.Event, error) {
   430  	ev, err := clnt.getContainerLastEventSinceTime(id, clnt.remote.restoreFromTimestamp)
   431  	if err == nil && ev == nil {
   432  		// If ev is nil and the container is running in containerd,
   433  		// we already consumed all the event of the
   434  		// container, included the "exit" one.
   435  		// Thus, we request all events containerd has in memory for
   436  		// this container in order to get the last one (which should
   437  		// be an exit event)
   438  		logrus.Warnf("libcontainerd: client is out of sync, restore was called on a fully synced container (%s).", id)
   439  		// Request all events since beginning of time
   440  		t := time.Unix(0, 0)
   441  		tsp, err := ptypes.TimestampProto(t)
   442  		if err != nil {
   443  			logrus.Errorf("libcontainerd: getLastEventSinceTime() failed to convert timestamp: %q", err)
   444  			return nil, err
   445  		}
   446  
   447  		return clnt.getContainerLastEventSinceTime(id, tsp)
   448  	}
   449  
   450  	return ev, err
   451  }
   452  
   453  func (clnt *client) Restore(containerID string, attachStdio StdioCallback, options ...CreateOption) error {
   454  	// Synchronize with live events
   455  	clnt.remote.Lock()
   456  	defer clnt.remote.Unlock()
   457  	// Check that containerd still knows this container.
   458  	//
   459  	// In the unlikely event that Restore for this container process
   460  	// the its past event before the main loop, the event will be
   461  	// processed twice. However, this is not an issue as all those
   462  	// events will do is change the state of the container to be
   463  	// exactly the same.
   464  	cont, err := clnt.getContainerdContainer(containerID)
   465  	// Get its last event
   466  	ev, eerr := clnt.getContainerLastEvent(containerID)
   467  	if err != nil || cont.Status == "Stopped" {
   468  		if err != nil {
   469  			logrus.Warnf("libcontainerd: failed to retrieve container %s state: %v", containerID, err)
   470  		}
   471  		if ev != nil && (ev.Pid != InitFriendlyName || ev.Type != StateExit) {
   472  			// Wait a while for the exit event
   473  			timeout := time.NewTimer(10 * time.Second)
   474  			tick := time.NewTicker(100 * time.Millisecond)
   475  		stop:
   476  			for {
   477  				select {
   478  				case <-timeout.C:
   479  					break stop
   480  				case <-tick.C:
   481  					ev, eerr = clnt.getContainerLastEvent(containerID)
   482  					if eerr != nil {
   483  						break stop
   484  					}
   485  					if ev != nil && ev.Pid == InitFriendlyName && ev.Type == StateExit {
   486  						break stop
   487  					}
   488  				}
   489  			}
   490  			timeout.Stop()
   491  			tick.Stop()
   492  		}
   493  
   494  		// get the exit status for this container, if we don't have
   495  		// one, indicate an error
   496  		ec := uint32(255)
   497  		if eerr == nil && ev != nil && ev.Pid == InitFriendlyName && ev.Type == StateExit {
   498  			ec = ev.Status
   499  		}
   500  		clnt.setExited(containerID, ec)
   501  
   502  		return nil
   503  	}
   504  
   505  	// container is still alive
   506  	if clnt.liveRestore {
   507  		if err := clnt.restore(cont, ev, attachStdio, options...); err != nil {
   508  			logrus.Errorf("libcontainerd: error restoring %s: %v", containerID, err)
   509  		}
   510  		return nil
   511  	}
   512  
   513  	// Kill the container if liveRestore == false
   514  	w := clnt.getOrCreateExitNotifier(containerID)
   515  	clnt.lock(cont.Id)
   516  	container := clnt.newContainer(cont.BundlePath)
   517  	container.systemPid = systemPid(cont)
   518  	clnt.appendContainer(container)
   519  	clnt.unlock(cont.Id)
   520  
   521  	container.discardFifos()
   522  
   523  	if err := clnt.Signal(containerID, int(syscall.SIGTERM)); err != nil {
   524  		logrus.Errorf("libcontainerd: error sending sigterm to %v: %v", containerID, err)
   525  	}
   526  	// Let the main loop handle the exit event
   527  	clnt.remote.Unlock()
   528  	select {
   529  	case <-time.After(10 * time.Second):
   530  		if err := clnt.Signal(containerID, int(syscall.SIGKILL)); err != nil {
   531  			logrus.Errorf("libcontainerd: error sending sigkill to %v: %v", containerID, err)
   532  		}
   533  		select {
   534  		case <-time.After(2 * time.Second):
   535  		case <-w.wait():
   536  			// relock because of the defer
   537  			clnt.remote.Lock()
   538  			return nil
   539  		}
   540  	case <-w.wait():
   541  		// relock because of the defer
   542  		clnt.remote.Lock()
   543  		return nil
   544  	}
   545  	// relock because of the defer
   546  	clnt.remote.Lock()
   547  
   548  	clnt.deleteContainer(containerID)
   549  
   550  	return clnt.setExited(containerID, uint32(255))
   551  }
   552  
   553  func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error {
   554  	clnt.lock(containerID)
   555  	defer clnt.unlock(containerID)
   556  	if _, err := clnt.getContainer(containerID); err != nil {
   557  		return err
   558  	}
   559  
   560  	_, err := clnt.remote.apiClient.CreateCheckpoint(context.Background(), &containerd.CreateCheckpointRequest{
   561  		Id: containerID,
   562  		Checkpoint: &containerd.Checkpoint{
   563  			Name:        checkpointID,
   564  			Exit:        exit,
   565  			Tcp:         true,
   566  			UnixSockets: true,
   567  			Shell:       false,
   568  			EmptyNS:     []string{"network"},
   569  		},
   570  		CheckpointDir: checkpointDir,
   571  	})
   572  	return err
   573  }
   574  
   575  func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error {
   576  	clnt.lock(containerID)
   577  	defer clnt.unlock(containerID)
   578  	if _, err := clnt.getContainer(containerID); err != nil {
   579  		return err
   580  	}
   581  
   582  	_, err := clnt.remote.apiClient.DeleteCheckpoint(context.Background(), &containerd.DeleteCheckpointRequest{
   583  		Id:            containerID,
   584  		Name:          checkpointID,
   585  		CheckpointDir: checkpointDir,
   586  	})
   587  	return err
   588  }
   589  
   590  func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) {
   591  	clnt.lock(containerID)
   592  	defer clnt.unlock(containerID)
   593  	if _, err := clnt.getContainer(containerID); err != nil {
   594  		return nil, err
   595  	}
   596  
   597  	resp, err := clnt.remote.apiClient.ListCheckpoint(context.Background(), &containerd.ListCheckpointRequest{
   598  		Id:            containerID,
   599  		CheckpointDir: checkpointDir,
   600  	})
   601  	if err != nil {
   602  		return nil, err
   603  	}
   604  	return (*Checkpoints)(resp), nil
   605  }