github.com/tonistiigi/docker@v0.10.1-0.20240229224939-974013b0dc6a/integration/container/restart_test.go (about) 1 package container // import "github.com/docker/docker/integration/container" 2 3 import ( 4 "context" 5 "fmt" 6 "runtime" 7 "testing" 8 "time" 9 10 "github.com/docker/docker/api/types" 11 "github.com/docker/docker/api/types/container" 12 "github.com/docker/docker/api/types/events" 13 "github.com/docker/docker/api/types/filters" 14 "github.com/docker/docker/client" 15 testContainer "github.com/docker/docker/integration/internal/container" 16 "github.com/docker/docker/testutil" 17 "github.com/docker/docker/testutil/daemon" 18 "gotest.tools/v3/assert" 19 is "gotest.tools/v3/assert/cmp" 20 "gotest.tools/v3/poll" 21 "gotest.tools/v3/skip" 22 ) 23 24 func TestDaemonRestartKillContainers(t *testing.T) { 25 skip.If(t, testEnv.IsRemoteDaemon, "cannot start daemon on remote test run") 26 skip.If(t, testEnv.DaemonInfo.OSType == "windows") 27 skip.If(t, testEnv.IsRootless, "rootless mode doesn't support live-restore") 28 29 ctx := testutil.StartSpan(baseContext, t) 30 31 type testCase struct { 32 desc string 33 restartPolicy container.RestartPolicy 34 35 xRunning bool 36 xRunningLiveRestore bool 37 xStart bool 38 xHealthCheck bool 39 } 40 41 for _, tc := range []testCase{ 42 { 43 desc: "container without restart policy", 44 xRunningLiveRestore: true, 45 xStart: true, 46 }, 47 { 48 desc: "container with restart=always", 49 restartPolicy: container.RestartPolicy{Name: "always"}, 50 xRunning: true, 51 xRunningLiveRestore: true, 52 xStart: true, 53 }, 54 { 55 desc: "container with restart=always and with healthcheck", 56 restartPolicy: container.RestartPolicy{Name: "always"}, 57 xRunning: true, 58 xRunningLiveRestore: true, 59 xStart: true, 60 xHealthCheck: true, 61 }, 62 { 63 desc: "container created should not be restarted", 64 restartPolicy: container.RestartPolicy{Name: "always"}, 65 }, 66 } { 67 for _, liveRestoreEnabled := range []bool{false, true} { 68 for fnName, stopDaemon := range map[string]func(*testing.T, *daemon.Daemon){ 69 "kill-daemon": func(t *testing.T, d *daemon.Daemon) { 70 err := d.Kill() 71 assert.NilError(t, err) 72 }, 73 "stop-daemon": func(t *testing.T, d *daemon.Daemon) { 74 d.Stop(t) 75 }, 76 } { 77 tc := tc 78 liveRestoreEnabled := liveRestoreEnabled 79 stopDaemon := stopDaemon 80 t.Run(fmt.Sprintf("live-restore=%v/%s/%s", liveRestoreEnabled, tc.desc, fnName), func(t *testing.T) { 81 t.Parallel() 82 83 ctx := testutil.StartSpan(ctx, t) 84 85 d := daemon.New(t) 86 apiClient := d.NewClientT(t) 87 88 args := []string{"--iptables=false"} 89 if liveRestoreEnabled { 90 args = append(args, "--live-restore") 91 } 92 93 d.StartWithBusybox(ctx, t, args...) 94 defer d.Stop(t) 95 96 config := container.Config{Image: "busybox", Cmd: []string{"top"}} 97 hostConfig := container.HostConfig{RestartPolicy: tc.restartPolicy} 98 if tc.xHealthCheck { 99 config.Healthcheck = &container.HealthConfig{ 100 Test: []string{"CMD-SHELL", "! test -f /tmp/unhealthy"}, 101 StartPeriod: 60 * time.Second, 102 StartInterval: 1 * time.Second, 103 Interval: 60 * time.Second, 104 } 105 } 106 resp, err := apiClient.ContainerCreate(ctx, &config, &hostConfig, nil, nil, "") 107 assert.NilError(t, err) 108 defer apiClient.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true}) 109 110 if tc.xStart { 111 err = apiClient.ContainerStart(ctx, resp.ID, container.StartOptions{}) 112 assert.NilError(t, err) 113 if tc.xHealthCheck { 114 poll.WaitOn(t, pollForHealthStatus(ctx, apiClient, resp.ID, types.Healthy), poll.WithDelay(100*time.Millisecond), poll.WithTimeout(30*time.Second)) 115 testContainer.ExecT(ctx, t, apiClient, resp.ID, []string{"touch", "/tmp/unhealthy"}).AssertSuccess(t) 116 } 117 } 118 119 stopDaemon(t, d) 120 startTime := time.Now() 121 d.Start(t, args...) 122 123 expected := tc.xRunning 124 if liveRestoreEnabled { 125 expected = tc.xRunningLiveRestore 126 } 127 128 poll.WaitOn(t, testContainer.RunningStateFlagIs(ctx, apiClient, resp.ID, expected), poll.WithDelay(100*time.Millisecond), poll.WithTimeout(30*time.Second)) 129 130 if tc.xHealthCheck { 131 // We have arranged to have the container's health probes fail until we tell it 132 // to become healthy, which gives us the entire StartPeriod (60s) to assert that 133 // the container's health state is Starting before we have to worry about racing 134 // the health monitor. 135 assert.Equal(t, testContainer.Inspect(ctx, t, apiClient, resp.ID).State.Health.Status, types.Starting) 136 poll.WaitOn(t, pollForNewHealthCheck(ctx, apiClient, startTime, resp.ID), poll.WithDelay(100*time.Millisecond), poll.WithTimeout(30*time.Second)) 137 138 testContainer.ExecT(ctx, t, apiClient, resp.ID, []string{"rm", "/tmp/unhealthy"}).AssertSuccess(t) 139 poll.WaitOn(t, pollForHealthStatus(ctx, apiClient, resp.ID, types.Healthy), poll.WithDelay(100*time.Millisecond), poll.WithTimeout(30*time.Second)) 140 } 141 // TODO(cpuguy83): test pause states... this seems to be rather undefined currently 142 }) 143 } 144 } 145 } 146 } 147 148 func pollForNewHealthCheck(ctx context.Context, client *client.Client, startTime time.Time, containerID string) func(log poll.LogT) poll.Result { 149 return func(log poll.LogT) poll.Result { 150 inspect, err := client.ContainerInspect(ctx, containerID) 151 if err != nil { 152 return poll.Error(err) 153 } 154 healthChecksTotal := len(inspect.State.Health.Log) 155 if healthChecksTotal > 0 { 156 if inspect.State.Health.Log[healthChecksTotal-1].Start.After(startTime) { 157 return poll.Success() 158 } 159 } 160 return poll.Continue("waiting for a new container healthcheck") 161 } 162 } 163 164 // Container started with --rm should be able to be restarted. 165 // It should be removed only if killed or stopped 166 func TestContainerWithAutoRemoveCanBeRestarted(t *testing.T) { 167 ctx := setupTest(t) 168 apiClient := testEnv.APIClient() 169 170 noWaitTimeout := 0 171 172 for _, tc := range []struct { 173 desc string 174 doSth func(ctx context.Context, containerID string) error 175 }{ 176 { 177 desc: "kill", 178 doSth: func(ctx context.Context, containerID string) error { 179 return apiClient.ContainerKill(ctx, containerID, "SIGKILL") 180 }, 181 }, 182 { 183 desc: "stop", 184 doSth: func(ctx context.Context, containerID string) error { 185 return apiClient.ContainerStop(ctx, containerID, container.StopOptions{Timeout: &noWaitTimeout}) 186 }, 187 }, 188 } { 189 tc := tc 190 t.Run(tc.desc, func(t *testing.T) { 191 testutil.StartSpan(ctx, t) 192 cID := testContainer.Run(ctx, t, apiClient, 193 testContainer.WithName("autoremove-restart-and-"+tc.desc), 194 testContainer.WithAutoRemove, 195 ) 196 defer func() { 197 err := apiClient.ContainerRemove(ctx, cID, container.RemoveOptions{Force: true}) 198 if t.Failed() && err != nil { 199 t.Logf("Cleaning up test container failed with error: %v", err) 200 } 201 }() 202 203 err := apiClient.ContainerRestart(ctx, cID, container.StopOptions{Timeout: &noWaitTimeout}) 204 assert.NilError(t, err) 205 206 inspect, err := apiClient.ContainerInspect(ctx, cID) 207 assert.NilError(t, err) 208 assert.Assert(t, inspect.State.Status != "removing", "Container should not be removing yet") 209 210 poll.WaitOn(t, testContainer.IsInState(ctx, apiClient, cID, "running")) 211 212 err = tc.doSth(ctx, cID) 213 assert.NilError(t, err) 214 215 poll.WaitOn(t, testContainer.IsRemoved(ctx, apiClient, cID)) 216 }) 217 } 218 } 219 220 // TestContainerRestartWithCancelledRequest verifies that cancelling a restart 221 // request does not cancel the restart operation, and still starts the container 222 // after it was stopped. 223 // 224 // Regression test for https://github.com/moby/moby/discussions/46682 225 func TestContainerRestartWithCancelledRequest(t *testing.T) { 226 ctx := setupTest(t) 227 apiClient := testEnv.APIClient() 228 229 testutil.StartSpan(ctx, t) 230 231 // Create a container that ignores SIGTERM and doesn't stop immediately, 232 // giving us time to cancel the request. 233 // 234 // Restarting a container is "stop" (and, if needed, "kill"), then "start" 235 // the container. We're trying to create the scenario where the "stop" is 236 // handled, but the request was cancelled and therefore the "start" not 237 // taking place. 238 cID := testContainer.Run(ctx, t, apiClient, testContainer.WithCmd("sh", "-c", "trap 'echo received TERM' TERM; while true; do usleep 10; done")) 239 defer func() { 240 err := apiClient.ContainerRemove(ctx, cID, container.RemoveOptions{Force: true}) 241 if t.Failed() && err != nil { 242 t.Logf("Cleaning up test container failed with error: %v", err) 243 } 244 }() 245 246 // Start listening for events. 247 messages, errs := apiClient.Events(ctx, types.EventsOptions{ 248 Filters: filters.NewArgs( 249 filters.Arg("container", cID), 250 filters.Arg("event", string(events.ActionRestart)), 251 ), 252 }) 253 254 // Make restart request, but cancel the request before the container 255 // is (forcibly) killed. 256 ctx2, cancel := context.WithTimeout(ctx, 100*time.Millisecond) 257 stopTimeout := 1 258 err := apiClient.ContainerRestart(ctx2, cID, container.StopOptions{ 259 Timeout: &stopTimeout, 260 }) 261 assert.Check(t, is.ErrorIs(err, context.DeadlineExceeded)) 262 cancel() 263 264 // Validate that the restart event occurred, which is emitted 265 // after the restart (stop (kill) start) finished. 266 // 267 // Note that we cannot use RestartCount for this, as that's only 268 // used for restart-policies. 269 restartTimeout := 2 * time.Second 270 if runtime.GOOS == "windows" { 271 // hcs can sometimes take a long time to stop container. 272 restartTimeout = StopContainerWindowsPollTimeout 273 } 274 select { 275 case m := <-messages: 276 assert.Check(t, is.Equal(m.Actor.ID, cID)) 277 assert.Check(t, is.Equal(m.Action, events.ActionRestart)) 278 case err := <-errs: 279 assert.NilError(t, err) 280 case <-time.After(restartTimeout): 281 t.Errorf("timeout waiting for restart event") 282 } 283 284 // Container should be restarted (running). 285 inspect, err := apiClient.ContainerInspect(ctx, cID) 286 assert.NilError(t, err) 287 assert.Check(t, is.Equal(inspect.State.Status, "running")) 288 }