github.com/tonistiigi/docker@v0.10.1-0.20240229224939-974013b0dc6a/integration/container/restart_test.go (about)

     1  package container // import "github.com/docker/docker/integration/container"
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"runtime"
     7  	"testing"
     8  	"time"
     9  
    10  	"github.com/docker/docker/api/types"
    11  	"github.com/docker/docker/api/types/container"
    12  	"github.com/docker/docker/api/types/events"
    13  	"github.com/docker/docker/api/types/filters"
    14  	"github.com/docker/docker/client"
    15  	testContainer "github.com/docker/docker/integration/internal/container"
    16  	"github.com/docker/docker/testutil"
    17  	"github.com/docker/docker/testutil/daemon"
    18  	"gotest.tools/v3/assert"
    19  	is "gotest.tools/v3/assert/cmp"
    20  	"gotest.tools/v3/poll"
    21  	"gotest.tools/v3/skip"
    22  )
    23  
    24  func TestDaemonRestartKillContainers(t *testing.T) {
    25  	skip.If(t, testEnv.IsRemoteDaemon, "cannot start daemon on remote test run")
    26  	skip.If(t, testEnv.DaemonInfo.OSType == "windows")
    27  	skip.If(t, testEnv.IsRootless, "rootless mode doesn't support live-restore")
    28  
    29  	ctx := testutil.StartSpan(baseContext, t)
    30  
    31  	type testCase struct {
    32  		desc          string
    33  		restartPolicy container.RestartPolicy
    34  
    35  		xRunning            bool
    36  		xRunningLiveRestore bool
    37  		xStart              bool
    38  		xHealthCheck        bool
    39  	}
    40  
    41  	for _, tc := range []testCase{
    42  		{
    43  			desc:                "container without restart policy",
    44  			xRunningLiveRestore: true,
    45  			xStart:              true,
    46  		},
    47  		{
    48  			desc:                "container with restart=always",
    49  			restartPolicy:       container.RestartPolicy{Name: "always"},
    50  			xRunning:            true,
    51  			xRunningLiveRestore: true,
    52  			xStart:              true,
    53  		},
    54  		{
    55  			desc:                "container with restart=always and with healthcheck",
    56  			restartPolicy:       container.RestartPolicy{Name: "always"},
    57  			xRunning:            true,
    58  			xRunningLiveRestore: true,
    59  			xStart:              true,
    60  			xHealthCheck:        true,
    61  		},
    62  		{
    63  			desc:          "container created should not be restarted",
    64  			restartPolicy: container.RestartPolicy{Name: "always"},
    65  		},
    66  	} {
    67  		for _, liveRestoreEnabled := range []bool{false, true} {
    68  			for fnName, stopDaemon := range map[string]func(*testing.T, *daemon.Daemon){
    69  				"kill-daemon": func(t *testing.T, d *daemon.Daemon) {
    70  					err := d.Kill()
    71  					assert.NilError(t, err)
    72  				},
    73  				"stop-daemon": func(t *testing.T, d *daemon.Daemon) {
    74  					d.Stop(t)
    75  				},
    76  			} {
    77  				tc := tc
    78  				liveRestoreEnabled := liveRestoreEnabled
    79  				stopDaemon := stopDaemon
    80  				t.Run(fmt.Sprintf("live-restore=%v/%s/%s", liveRestoreEnabled, tc.desc, fnName), func(t *testing.T) {
    81  					t.Parallel()
    82  
    83  					ctx := testutil.StartSpan(ctx, t)
    84  
    85  					d := daemon.New(t)
    86  					apiClient := d.NewClientT(t)
    87  
    88  					args := []string{"--iptables=false"}
    89  					if liveRestoreEnabled {
    90  						args = append(args, "--live-restore")
    91  					}
    92  
    93  					d.StartWithBusybox(ctx, t, args...)
    94  					defer d.Stop(t)
    95  
    96  					config := container.Config{Image: "busybox", Cmd: []string{"top"}}
    97  					hostConfig := container.HostConfig{RestartPolicy: tc.restartPolicy}
    98  					if tc.xHealthCheck {
    99  						config.Healthcheck = &container.HealthConfig{
   100  							Test:          []string{"CMD-SHELL", "! test -f /tmp/unhealthy"},
   101  							StartPeriod:   60 * time.Second,
   102  							StartInterval: 1 * time.Second,
   103  							Interval:      60 * time.Second,
   104  						}
   105  					}
   106  					resp, err := apiClient.ContainerCreate(ctx, &config, &hostConfig, nil, nil, "")
   107  					assert.NilError(t, err)
   108  					defer apiClient.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
   109  
   110  					if tc.xStart {
   111  						err = apiClient.ContainerStart(ctx, resp.ID, container.StartOptions{})
   112  						assert.NilError(t, err)
   113  						if tc.xHealthCheck {
   114  							poll.WaitOn(t, pollForHealthStatus(ctx, apiClient, resp.ID, types.Healthy), poll.WithDelay(100*time.Millisecond), poll.WithTimeout(30*time.Second))
   115  							testContainer.ExecT(ctx, t, apiClient, resp.ID, []string{"touch", "/tmp/unhealthy"}).AssertSuccess(t)
   116  						}
   117  					}
   118  
   119  					stopDaemon(t, d)
   120  					startTime := time.Now()
   121  					d.Start(t, args...)
   122  
   123  					expected := tc.xRunning
   124  					if liveRestoreEnabled {
   125  						expected = tc.xRunningLiveRestore
   126  					}
   127  
   128  					poll.WaitOn(t, testContainer.RunningStateFlagIs(ctx, apiClient, resp.ID, expected), poll.WithDelay(100*time.Millisecond), poll.WithTimeout(30*time.Second))
   129  
   130  					if tc.xHealthCheck {
   131  						// We have arranged to have the container's health probes fail until we tell it
   132  						// to become healthy, which gives us the entire StartPeriod (60s) to assert that
   133  						// the container's health state is Starting before we have to worry about racing
   134  						// the health monitor.
   135  						assert.Equal(t, testContainer.Inspect(ctx, t, apiClient, resp.ID).State.Health.Status, types.Starting)
   136  						poll.WaitOn(t, pollForNewHealthCheck(ctx, apiClient, startTime, resp.ID), poll.WithDelay(100*time.Millisecond), poll.WithTimeout(30*time.Second))
   137  
   138  						testContainer.ExecT(ctx, t, apiClient, resp.ID, []string{"rm", "/tmp/unhealthy"}).AssertSuccess(t)
   139  						poll.WaitOn(t, pollForHealthStatus(ctx, apiClient, resp.ID, types.Healthy), poll.WithDelay(100*time.Millisecond), poll.WithTimeout(30*time.Second))
   140  					}
   141  					// TODO(cpuguy83): test pause states... this seems to be rather undefined currently
   142  				})
   143  			}
   144  		}
   145  	}
   146  }
   147  
   148  func pollForNewHealthCheck(ctx context.Context, client *client.Client, startTime time.Time, containerID string) func(log poll.LogT) poll.Result {
   149  	return func(log poll.LogT) poll.Result {
   150  		inspect, err := client.ContainerInspect(ctx, containerID)
   151  		if err != nil {
   152  			return poll.Error(err)
   153  		}
   154  		healthChecksTotal := len(inspect.State.Health.Log)
   155  		if healthChecksTotal > 0 {
   156  			if inspect.State.Health.Log[healthChecksTotal-1].Start.After(startTime) {
   157  				return poll.Success()
   158  			}
   159  		}
   160  		return poll.Continue("waiting for a new container healthcheck")
   161  	}
   162  }
   163  
   164  // Container started with --rm should be able to be restarted.
   165  // It should be removed only if killed or stopped
   166  func TestContainerWithAutoRemoveCanBeRestarted(t *testing.T) {
   167  	ctx := setupTest(t)
   168  	apiClient := testEnv.APIClient()
   169  
   170  	noWaitTimeout := 0
   171  
   172  	for _, tc := range []struct {
   173  		desc  string
   174  		doSth func(ctx context.Context, containerID string) error
   175  	}{
   176  		{
   177  			desc: "kill",
   178  			doSth: func(ctx context.Context, containerID string) error {
   179  				return apiClient.ContainerKill(ctx, containerID, "SIGKILL")
   180  			},
   181  		},
   182  		{
   183  			desc: "stop",
   184  			doSth: func(ctx context.Context, containerID string) error {
   185  				return apiClient.ContainerStop(ctx, containerID, container.StopOptions{Timeout: &noWaitTimeout})
   186  			},
   187  		},
   188  	} {
   189  		tc := tc
   190  		t.Run(tc.desc, func(t *testing.T) {
   191  			testutil.StartSpan(ctx, t)
   192  			cID := testContainer.Run(ctx, t, apiClient,
   193  				testContainer.WithName("autoremove-restart-and-"+tc.desc),
   194  				testContainer.WithAutoRemove,
   195  			)
   196  			defer func() {
   197  				err := apiClient.ContainerRemove(ctx, cID, container.RemoveOptions{Force: true})
   198  				if t.Failed() && err != nil {
   199  					t.Logf("Cleaning up test container failed with error: %v", err)
   200  				}
   201  			}()
   202  
   203  			err := apiClient.ContainerRestart(ctx, cID, container.StopOptions{Timeout: &noWaitTimeout})
   204  			assert.NilError(t, err)
   205  
   206  			inspect, err := apiClient.ContainerInspect(ctx, cID)
   207  			assert.NilError(t, err)
   208  			assert.Assert(t, inspect.State.Status != "removing", "Container should not be removing yet")
   209  
   210  			poll.WaitOn(t, testContainer.IsInState(ctx, apiClient, cID, "running"))
   211  
   212  			err = tc.doSth(ctx, cID)
   213  			assert.NilError(t, err)
   214  
   215  			poll.WaitOn(t, testContainer.IsRemoved(ctx, apiClient, cID))
   216  		})
   217  	}
   218  }
   219  
   220  // TestContainerRestartWithCancelledRequest verifies that cancelling a restart
   221  // request does not cancel the restart operation, and still starts the container
   222  // after it was stopped.
   223  //
   224  // Regression test for https://github.com/moby/moby/discussions/46682
   225  func TestContainerRestartWithCancelledRequest(t *testing.T) {
   226  	ctx := setupTest(t)
   227  	apiClient := testEnv.APIClient()
   228  
   229  	testutil.StartSpan(ctx, t)
   230  
   231  	// Create a container that ignores SIGTERM and doesn't stop immediately,
   232  	// giving us time to cancel the request.
   233  	//
   234  	// Restarting a container is "stop" (and, if needed, "kill"), then "start"
   235  	// the container. We're trying to create the scenario where the "stop" is
   236  	// handled, but the request was cancelled and therefore the "start" not
   237  	// taking place.
   238  	cID := testContainer.Run(ctx, t, apiClient, testContainer.WithCmd("sh", "-c", "trap 'echo received TERM' TERM; while true; do usleep 10; done"))
   239  	defer func() {
   240  		err := apiClient.ContainerRemove(ctx, cID, container.RemoveOptions{Force: true})
   241  		if t.Failed() && err != nil {
   242  			t.Logf("Cleaning up test container failed with error: %v", err)
   243  		}
   244  	}()
   245  
   246  	// Start listening for events.
   247  	messages, errs := apiClient.Events(ctx, types.EventsOptions{
   248  		Filters: filters.NewArgs(
   249  			filters.Arg("container", cID),
   250  			filters.Arg("event", string(events.ActionRestart)),
   251  		),
   252  	})
   253  
   254  	// Make restart request, but cancel the request before the container
   255  	// is (forcibly) killed.
   256  	ctx2, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
   257  	stopTimeout := 1
   258  	err := apiClient.ContainerRestart(ctx2, cID, container.StopOptions{
   259  		Timeout: &stopTimeout,
   260  	})
   261  	assert.Check(t, is.ErrorIs(err, context.DeadlineExceeded))
   262  	cancel()
   263  
   264  	// Validate that the restart event occurred, which is emitted
   265  	// after the restart (stop (kill) start) finished.
   266  	//
   267  	// Note that we cannot use RestartCount for this, as that's only
   268  	// used for restart-policies.
   269  	restartTimeout := 2 * time.Second
   270  	if runtime.GOOS == "windows" {
   271  		// hcs can sometimes take a long time to stop container.
   272  		restartTimeout = StopContainerWindowsPollTimeout
   273  	}
   274  	select {
   275  	case m := <-messages:
   276  		assert.Check(t, is.Equal(m.Actor.ID, cID))
   277  		assert.Check(t, is.Equal(m.Action, events.ActionRestart))
   278  	case err := <-errs:
   279  		assert.NilError(t, err)
   280  	case <-time.After(restartTimeout):
   281  		t.Errorf("timeout waiting for restart event")
   282  	}
   283  
   284  	// Container should be restarted (running).
   285  	inspect, err := apiClient.ContainerInspect(ctx, cID)
   286  	assert.NilError(t, err)
   287  	assert.Check(t, is.Equal(inspect.State.Status, "running"))
   288  }