github.com/coreos/mantle@v0.13.0/kola/tests/docker/docker.go (about) 1 // Copyright 2016 CoreOS, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package docker 16 17 import ( 18 "bytes" 19 "encoding/json" 20 "fmt" 21 "os" 22 "reflect" 23 "sort" 24 "strings" 25 "time" 26 27 "golang.org/x/crypto/ssh" 28 "golang.org/x/net/context" 29 30 "github.com/coreos/mantle/kola/cluster" 31 "github.com/coreos/mantle/kola/register" 32 "github.com/coreos/mantle/lang/worker" 33 "github.com/coreos/mantle/platform" 34 "github.com/coreos/mantle/platform/conf" 35 "github.com/coreos/mantle/util" 36 ) 37 38 type simplifiedDockerInfo struct { 39 ServerVersion string 40 Driver string 41 CgroupDriver string 42 Runtimes map[string]struct { 43 Path string `json:"path"` 44 } 45 ContainerdCommit struct { 46 ID string 47 Expected string 48 } 49 RuncCommit struct { 50 ID string 51 Expected string 52 } 53 SecurityOptions []string 54 } 55 56 func init() { 57 register.Register(®ister.Test{ 58 Run: dockerNetwork, 59 ClusterSize: 2, 60 Name: "docker.network", 61 Distros: []string{"cl"}, 62 63 // qemu-unpriv machines cannot communicate 64 ExcludePlatforms: []string{"qemu-unpriv"}, 65 }) 66 register.Register(®ister.Test{ 67 Run: dockerOldClient, 68 ClusterSize: 0, 69 Name: "docker.oldclient", 70 Distros: []string{"cl"}, 71 }) 72 register.Register(®ister.Test{ 73 Run: dockerUserns, 74 ClusterSize: 1, 75 Name: "docker.userns", 76 Distros: []string{"cl"}, 77 UserData: conf.ContainerLinuxConfig(` 78 systemd: 79 units: 80 - name: docker.service 81 enable: true 82 dropins: 83 - name: 10-userns.conf 84 contents: |- 85 [Service] 86 Environment=DOCKER_OPTS=--userns-remap=dockremap 87 storage: 88 files: 89 - filesystem: root 90 path: /etc/subuid 91 contents: 92 inline: "dockremap:100000:65536" 93 mode: 0644 94 - filesystem: root 95 path: /etc/subgid 96 contents: 97 inline: "dockremap:100000:65536" 98 mode: 0644 99 passwd: 100 users: 101 - name: dockremap`), 102 103 // qemu-unpriv machines cannot communicate 104 ExcludePlatforms: []string{"qemu-unpriv"}, 105 }) 106 107 // This test covers all functionality that should be quick to run and can be 108 // run: 109 // 1. On an entirely default docker configuration on CL 110 // 2. On a 'dirty machine' (in that other tests have already potentially run) 111 // 112 // Note, being able to run in parallel is desirable for these tests, but not 113 // required. Parallelism should be tweaked at the subtest level in the 114 // 'dockerBaseTests' implementation 115 // The primary goal of using subtests here is to make things quicker to run. 116 register.Register(®ister.Test{ 117 Run: dockerBaseTests, 118 ClusterSize: 1, 119 Name: `docker.base`, 120 Distros: []string{"cl"}, 121 }) 122 123 register.Register(®ister.Test{ 124 Run: func(c cluster.TestCluster) { testDockerInfo("btrfs", c) }, 125 ClusterSize: 1, 126 Name: "docker.btrfs-storage", 127 // Note: copied verbatim from https://github.com/coreos/docs/blob/master/os/mounting-storage.md#creating-and-mounting-a-btrfs-volume-file 128 UserData: conf.ContainerLinuxConfig(` 129 systemd: 130 units: 131 - name: format-var-lib-docker.service 132 enable: true 133 contents: | 134 [Unit] 135 Before=docker.service var-lib-docker.mount 136 ConditionPathExists=!/var/lib/docker.btrfs 137 [Service] 138 Type=oneshot 139 ExecStart=/usr/bin/truncate --size=25G /var/lib/docker.btrfs 140 ExecStart=/usr/sbin/mkfs.btrfs /var/lib/docker.btrfs 141 [Install] 142 WantedBy=multi-user.target 143 - name: var-lib-docker.mount 144 enable: true 145 contents: | 146 [Unit] 147 Before=docker.service 148 After=format-var-lib-docker.service 149 Requires=format-var-lib-docker.service 150 [Install] 151 RequiredBy=docker.service 152 [Mount] 153 What=/var/lib/docker.btrfs 154 Where=/var/lib/docker 155 Type=btrfs 156 Options=loop,discard`), 157 Distros: []string{"cl"}, 158 }) 159 160 register.Register(®ister.Test{ 161 // For a while we shipped /usr/lib/coreos/dockerd as the execstart of the 162 // docker systemd unit. 163 // This test verifies backwards compatibility with that unit to ensure 164 // users who copied it into /etc aren't broken. 165 Name: "docker.lib-coreos-dockerd-compat", 166 Run: dockerBaseTests, 167 ClusterSize: 1, 168 Distros: []string{"cl"}, 169 UserData: conf.ContainerLinuxConfig(` 170 systemd: 171 units: 172 - name: docker.service 173 contents: |- 174 [Unit] 175 Description=Docker Application Container Engine 176 Documentation=http://docs.docker.com 177 After=containerd.service docker.socket network.target 178 Requires=containerd.service docker.socket 179 180 [Service] 181 Type=notify 182 EnvironmentFile=-/run/flannel/flannel_docker_opts.env 183 184 # the default is not to use systemd for cgroups because the delegate issues still 185 # exists and systemd currently does not support the cgroup feature set required 186 # for containers run by docker 187 ExecStart=/usr/lib/coreos/dockerd --host=fd:// --containerd=/var/run/docker/libcontainerd/docker-containerd.sock $DOCKER_OPTS $DOCKER_CGROUPS $DOCKER_OPT_BIP $DOCKER_OPT_MTU $DOCKER_OPT_IPMASQ 188 ExecReload=/bin/kill -s HUP $MAINPID 189 LimitNOFILE=1048576 190 # Having non-zero Limit*s causes performance problems due to accounting overhead 191 # in the kernel. We recommend using cgroups to do container-local accounting. 192 LimitNPROC=infinity 193 LimitCORE=infinity 194 # Uncomment TasksMax if your systemd version supports it. 195 # Only systemd 226 and above support this version. 196 TasksMax=infinity 197 TimeoutStartSec=0 198 # set delegate yes so that systemd does not reset the cgroups of docker containers 199 Delegate=yes 200 201 [Install] 202 WantedBy=multi-user.target`), 203 }) 204 register.Register(®ister.Test{ 205 // Ensure containerd gets back up when it dies 206 Name: "docker.containerd-restart", 207 Run: dockerContainerdRestart, 208 ClusterSize: 1, 209 Distros: []string{"cl"}, 210 UserData: conf.ContainerLinuxConfig(` 211 systemd: 212 units: 213 - name: docker.service 214 enable: true`), 215 216 // https://github.com/coreos/mantle/issues/999 217 // On the qemu-unpriv platform the DHCP provides no data, pre-systemd 241 the DHCP server sending 218 // no routes to the link to spin in the configuring state. docker.service pulls in the network-online 219 // target which causes the basic machine checks to fail 220 ExcludePlatforms: []string{"qemu-unpriv"}, 221 }) 222 } 223 224 // make a docker container out of binaries on the host 225 func genDockerContainer(c cluster.TestCluster, m platform.Machine, name string, binnames []string) { 226 cmd := `tmpdir=$(mktemp -d); cd $tmpdir; echo -e "FROM scratch\nCOPY . /" > Dockerfile; 227 b=$(which %s); libs=$(sudo ldd $b | grep -o /lib'[^ ]*' | sort -u); 228 sudo rsync -av --relative --copy-links $b $libs ./; 229 sudo docker build -t %s .` 230 231 c.MustSSH(m, fmt.Sprintf(cmd, strings.Join(binnames, " "), name)) 232 } 233 234 func dockerBaseTests(c cluster.TestCluster) { 235 c.Run("docker-info", func(c cluster.TestCluster) { 236 testDockerInfo("overlay", c) 237 }) 238 c.Run("resources", dockerResources) 239 c.Run("networks-reliably", dockerNetworksReliably) 240 c.Run("user-no-caps", dockerUserNoCaps) 241 } 242 243 // using a simple container, exercise various docker options that set resource 244 // limits. also acts as a regression test for 245 // https://github.com/coreos/bugs/issues/1246. 246 func dockerResources(c cluster.TestCluster) { 247 m := c.Machines()[0] 248 249 genDockerContainer(c, m, "sleep", []string{"sleep"}) 250 251 dockerFmt := "docker run --rm %s sleep sleep 0.2" 252 253 dCmd := func(arg string) string { 254 return fmt.Sprintf(dockerFmt, arg) 255 } 256 257 ctx := context.Background() 258 wg := worker.NewWorkerGroup(ctx, 10) 259 260 // ref https://docs.docker.com/engine/reference/run/#runtime-constraints-on-resources 261 for _, dockerCmd := range []string{ 262 // must set memory when setting memory-swap 263 dCmd("--memory=20m --memory-swap=20m"), 264 dCmd("--memory-reservation=10m"), 265 dCmd("--kernel-memory=10m"), 266 dCmd("--cpu-shares=100"), 267 dCmd("--cpu-period=1000"), 268 dCmd("--cpuset-cpus=0"), 269 dCmd("--cpuset-mems=0"), 270 dCmd("--cpu-quota=1000"), 271 dCmd("--blkio-weight=10"), 272 // none of these work in QEMU due to apparent lack of cfq for 273 // blkio in virtual block devices. 274 //dCmd("--blkio-weight-device=/dev/vda:10"), 275 //dCmd("--device-read-bps=/dev/vda:1kb"), 276 //dCmd("--device-write-bps=/dev/vda:1kb"), 277 //dCmd("--device-read-iops=/dev/vda:10"), 278 //dCmd("--device-write-iops=/dev/vda:10"), 279 dCmd("--memory=20m --oom-kill-disable=true"), 280 dCmd("--memory-swappiness=50"), 281 dCmd("--shm-size=1m"), 282 } { 283 // lol closures 284 cmd := dockerCmd 285 286 worker := func(ctx context.Context) error { 287 // TODO: pass context thru to SSH 288 output, err := c.SSH(m, cmd) 289 if err != nil { 290 return fmt.Errorf("failed to run %q: output: %q status: %q", cmd, output, err) 291 } 292 return nil 293 } 294 295 if err := wg.Start(worker); err != nil { 296 c.Fatal(wg.WaitError(err)) 297 } 298 } 299 300 if err := wg.Wait(); err != nil { 301 c.Fatal(err) 302 } 303 } 304 305 // Ensure that docker containers can make network connections outside of the host 306 func dockerNetwork(c cluster.TestCluster) { 307 machines := c.Machines() 308 src, dest := machines[0], machines[1] 309 310 c.Log("creating ncat containers") 311 312 genDockerContainer(c, src, "ncat", []string{"ncat"}) 313 genDockerContainer(c, dest, "ncat", []string{"ncat"}) 314 315 listener := func(ctx context.Context) error { 316 // Will block until a message is recieved 317 out, err := c.SSH(dest, 318 `echo "HELLO FROM SERVER" | docker run -i -p 9988:9988 ncat ncat --idle-timeout 20 --listen 0.0.0.0 9988`, 319 ) 320 if err != nil { 321 return err 322 } 323 324 if !bytes.Equal(out, []byte("HELLO FROM CLIENT")) { 325 return fmt.Errorf("unexpected result from listener: %q", out) 326 } 327 328 return nil 329 } 330 331 talker := func(ctx context.Context) error { 332 // Wait until listener is ready before trying anything 333 for { 334 _, err := c.SSH(dest, "sudo lsof -i TCP:9988 -s TCP:LISTEN | grep 9988 -q") 335 if err == nil { 336 break // socket is ready 337 } 338 339 exit, ok := err.(*ssh.ExitError) 340 if !ok || exit.Waitmsg.ExitStatus() != 1 { // 1 is the expected exit of grep -q 341 return err 342 } 343 344 select { 345 case <-ctx.Done(): 346 return fmt.Errorf("timeout waiting for server") 347 default: 348 time.Sleep(100 * time.Millisecond) 349 } 350 } 351 352 srcCmd := fmt.Sprintf(`echo "HELLO FROM CLIENT" | docker run -i ncat ncat %s 9988`, dest.PrivateIP()) 353 out, err := c.SSH(src, srcCmd) 354 if err != nil { 355 return err 356 } 357 358 if !bytes.Equal(out, []byte("HELLO FROM SERVER")) { 359 return fmt.Errorf(`unexpected result from listener: "%v"`, out) 360 } 361 362 return nil 363 } 364 365 ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 366 defer cancel() 367 368 if err := worker.Parallel(ctx, listener, talker); err != nil { 369 c.Fatal(err) 370 } 371 } 372 373 // Regression test for https://github.com/coreos/bugs/issues/1569 and 374 // https://github.com/coreos/docker/pull/31 375 func dockerOldClient(c cluster.TestCluster) { 376 oldclient := "/usr/lib/kola/amd64/docker-1.9.1" 377 if _, err := os.Stat(oldclient); err != nil { 378 c.Skipf("Can't find old docker client to test: %v", err) 379 } 380 381 m, err := c.NewMachine(nil) 382 if err != nil { 383 c.Fatal(err) 384 } 385 c.DropFile(oldclient) 386 387 genDockerContainer(c, m, "echo", []string{"echo"}) 388 389 output := c.MustSSH(m, "/home/core/docker-1.9.1 run echo echo 'IT WORKED'") 390 391 if !bytes.Equal(output, []byte("IT WORKED")) { 392 c.Fatalf("unexpected result from docker client: %q", output) 393 } 394 } 395 396 // Regression test for userns breakage under 1.12 397 func dockerUserns(c cluster.TestCluster) { 398 m := c.Machines()[0] 399 400 genDockerContainer(c, m, "userns-test", []string{"echo", "sleep"}) 401 402 c.MustSSH(m, `sudo setenforce 1`) 403 output := c.MustSSH(m, `docker run userns-test echo fj.fj`) 404 if !bytes.Equal(output, []byte("fj.fj")) { 405 c.Fatalf("expected fj.fj, got %s", string(output)) 406 } 407 408 // And just in case, verify that a container really is userns remapped 409 c.MustSSH(m, `docker run -d --name=sleepy userns-test sleep 10000`) 410 uid_map := c.MustSSH(m, `until [[ "$(docker inspect -f {{.State.Running}} sleepy)" == "true" ]]; do sleep 0.1; done; 411 pid=$(docker inspect -f {{.State.Pid}} sleepy); 412 cat /proc/$pid/uid_map; docker kill sleepy &>/dev/null`) 413 // uid_map is of the form `$mappedNamespacePidStart $realNamespacePidStart 414 // $rangeLength`. We expect `0 100000 65536` 415 mapParts := strings.Fields(strings.TrimSpace(string(uid_map))) 416 if len(mapParts) != 3 { 417 c.Fatalf("expected uid_map to have three parts, was: %s", string(uid_map)) 418 } 419 if mapParts[0] != "0" && mapParts[1] != "100000" { 420 c.Fatalf("unexpected userns mapping values: %v", string(uid_map)) 421 } 422 } 423 424 // Regression test for https://github.com/coreos/bugs/issues/1785 425 // Also, hopefully will catch any similar issues 426 func dockerNetworksReliably(c cluster.TestCluster) { 427 m := c.Machines()[0] 428 429 genDockerContainer(c, m, "ping", []string{"sh", "ping"}) 430 431 output := c.MustSSH(m, `for i in $(seq 1 100); do 432 echo -n "$i: " 433 docker run --rm ping sh -c 'ping -i 0.2 172.17.0.1 -w 1 >/dev/null && echo PASS || echo FAIL' 434 done`) 435 436 numPass := strings.Count(string(output), "PASS") 437 438 if numPass != 100 { 439 c.Fatalf("Expected 100 passes, but output was: %s", output) 440 } 441 442 } 443 444 // Regression test for CVE-2016-8867 445 // CVE-2016-8867 gave a container capabilities, including fowner, even if it 446 // was a non-root user. 447 // We test that a user inside a container does not have any effective nor 448 // permitted capabilities (which is what the cve was). 449 // For good measure, we also check that fs permissions deny that user from 450 // accessing /root. 451 func dockerUserNoCaps(c cluster.TestCluster) { 452 m := c.Machines()[0] 453 454 genDockerContainer(c, m, "captest", []string{"capsh", "sh", "grep", "cat", "ls"}) 455 456 output := c.MustSSH(m, `docker run --user 1000:1000 \ 457 -v /root:/root \ 458 captest sh -c \ 459 'cat /proc/self/status | grep -E "Cap(Eff|Prm)"; ls /root &>/dev/null && echo "FAIL: could read root" || echo "PASS: err reading root"'`) 460 461 outputlines := strings.Split(string(output), "\n") 462 if len(outputlines) < 3 { 463 c.Fatalf("expected two lines of caps and an an error/succcess line. Got %q", string(output)) 464 } 465 cap1, cap2 := strings.Fields(outputlines[0]), strings.Fields(outputlines[1]) 466 // The format of capabilities in /proc/*/status is e.g.: CapPrm:\t0000000000000000 467 // We could parse the hex to its actual capabilities, but since we're looking for none, just checking it's all 0 is good enough. 468 if len(cap1) != 2 || len(cap2) != 2 { 469 c.Fatalf("capability lines didn't have two parts: %q", string(output)) 470 } 471 if cap1[1] != "0000000000000000" || cap2[1] != "0000000000000000" { 472 c.Fatalf("Permitted / effective capabilities were non-zero: %q", string(output)) 473 } 474 475 // Finally, check for fail/success on reading /root 476 if !strings.HasPrefix(outputlines[len(outputlines)-1], "PASS: ") { 477 c.Fatalf("reading /root test failed: %q", string(output)) 478 } 479 } 480 481 // dockerContainerdRestart ensures containerd will restart if it dies. It tests that containerd is running, 482 // kills it, the tests that it came back up. 483 func dockerContainerdRestart(c cluster.TestCluster) { 484 m := c.Machines()[0] 485 486 pid := c.MustSSH(m, "systemctl show containerd -p MainPID --value") 487 if string(pid) == "0" { 488 c.Fatalf("Could not find containerd pid") 489 } 490 491 testContainerdUp(c) 492 493 // kill it 494 c.MustSSH(m, "sudo kill "+string(pid)) 495 496 // retry polling its state 497 util.Retry(12, 6*time.Second, func() error { 498 state := c.MustSSH(m, "systemctl show containerd -p SubState --value") 499 switch string(state) { 500 case "running": 501 return nil 502 case "stopped", "exited", "failed": 503 c.Fatalf("containerd entered stopped state") 504 } 505 return fmt.Errorf("containerd failed to restart") 506 }) 507 508 // verify systemd started it and that it's pid is different 509 newPid := c.MustSSH(m, "systemctl show containerd -p MainPID --value") 510 if string(newPid) == "0" { 511 c.Fatalf("Containerd is not running (could not find pid)") 512 } else if string(newPid) == string(pid) { 513 c.Fatalf("Old and new pid's are the same. containerd did not die") 514 } 515 516 // verify it came back and docker knows about it 517 testContainerdUp(c) 518 } 519 520 func testContainerdUp(c cluster.TestCluster) { 521 m := c.Machines()[0] 522 523 info, err := getDockerInfo(c, m) 524 if err != nil { 525 c.Fatal(err) 526 } 527 528 if info.ContainerdCommit.ID != info.ContainerdCommit.Expected { 529 c.Fatalf("Docker could not find containerd") 530 } 531 } 532 533 func getDockerInfo(c cluster.TestCluster, m platform.Machine) (simplifiedDockerInfo, error) { 534 dockerInfoJson, err := c.SSH(m, `curl -s --unix-socket /var/run/docker.sock http://docker/v1.24/info`) 535 if err != nil { 536 return simplifiedDockerInfo{}, fmt.Errorf("could not get dockerinfo: %v", err) 537 } 538 539 target := simplifiedDockerInfo{} 540 541 err = json.Unmarshal(dockerInfoJson, &target) 542 if err != nil { 543 return simplifiedDockerInfo{}, fmt.Errorf("could not unmarshal dockerInfo %q into known json: %v", string(dockerInfoJson), err) 544 } 545 546 return target, nil 547 } 548 549 // testDockerInfo test that docker info's output is as expected. the expected 550 // filesystem may be asserted as one of 'overlay', 'btrfs', 'devicemapper' 551 // depending on how the machine was launched. 552 func testDockerInfo(expectedFs string, c cluster.TestCluster) { 553 m := c.Machines()[0] 554 555 info, err := getDockerInfo(c, m) 556 if err != nil { 557 c.Fatal(err) 558 } 559 560 // Canonicalize info 561 sort.Strings(info.SecurityOptions) 562 563 // Because we prefer overlay2/overlay for different docker versions, figure 564 // out the correct driver to be testing for based on our docker version. 565 expectedOverlayDriver := "overlay2" 566 if strings.HasPrefix(info.ServerVersion, "1.12.") || strings.HasPrefix(info.ServerVersion, "17.04.") { 567 expectedOverlayDriver = "overlay" 568 } 569 570 expectedFsDriverMap := map[string]string{ 571 "overlay": expectedOverlayDriver, 572 "btrfs": "btrfs", 573 "devicemapper": "devicemapper", 574 } 575 576 expectedFsDriver := expectedFsDriverMap[expectedFs] 577 if info.Driver != expectedFsDriver { 578 c.Errorf("unexpected driver: %v != %v", expectedFsDriver, info.Driver) 579 } 580 581 // Validations shared by all versions currently 582 if !reflect.DeepEqual(info.SecurityOptions, []string{"seccomp", "selinux"}) { 583 c.Errorf("unexpected security options: %+v", info.SecurityOptions) 584 } 585 586 if info.CgroupDriver != "cgroupfs" { 587 c.Errorf("unexpected cgroup driver %v", info.CgroupDriver) 588 } 589 590 if info.ContainerdCommit.ID != info.ContainerdCommit.Expected { 591 c.Errorf("commit mismatch for containerd: %v != %v", info.ContainerdCommit.Expected, info.ContainerdCommit.ID) 592 } 593 594 if info.RuncCommit.ID != info.RuncCommit.Expected { 595 c.Errorf("commit mismatch for runc: %v != %v", info.RuncCommit.Expected, info.RuncCommit.ID) 596 } 597 598 if runcInfo, ok := info.Runtimes["runc"]; ok { 599 if runcInfo.Path == "" { 600 c.Errorf("expected non-empty runc path") 601 } 602 } else { 603 c.Errorf("runc was not in runtimes: %+v", info.Runtimes) 604 } 605 }