github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/integration/exec_test.go (about)

     1  package integration
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"errors"
     7  	"fmt"
     8  	"os"
     9  	"os/exec"
    10  	"path/filepath"
    11  	"reflect"
    12  	"strconv"
    13  	"strings"
    14  	"syscall"
    15  	"testing"
    16  
    17  	"github.com/opencontainers/runc/libcontainer"
    18  	"github.com/opencontainers/runc/libcontainer/cgroups"
    19  	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
    20  	"github.com/opencontainers/runc/libcontainer/configs"
    21  	"github.com/opencontainers/runc/libcontainer/userns"
    22  	"github.com/opencontainers/runc/libcontainer/utils"
    23  	"github.com/opencontainers/runtime-spec/specs-go"
    24  
    25  	"golang.org/x/sys/unix"
    26  )
    27  
    28  func TestExecPS(t *testing.T) {
    29  	testExecPS(t, false)
    30  }
    31  
    32  func TestUsernsExecPS(t *testing.T) {
    33  	if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
    34  		t.Skip("Test requires userns.")
    35  	}
    36  	testExecPS(t, true)
    37  }
    38  
    39  func testExecPS(t *testing.T, userns bool) {
    40  	if testing.Short() {
    41  		return
    42  	}
    43  	config := newTemplateConfig(t, &tParam{userns: userns})
    44  
    45  	buffers := runContainerOk(t, config, "ps", "-o", "pid,user,comm")
    46  	lines := strings.Split(buffers.Stdout.String(), "\n")
    47  	if len(lines) < 2 {
    48  		t.Fatalf("more than one process running for output %q", buffers.Stdout.String())
    49  	}
    50  	expected := `1 root     ps`
    51  	actual := strings.Trim(lines[1], "\n ")
    52  	if actual != expected {
    53  		t.Fatalf("expected output %q but received %q", expected, actual)
    54  	}
    55  }
    56  
    57  func TestIPCPrivate(t *testing.T) {
    58  	if testing.Short() {
    59  		return
    60  	}
    61  
    62  	l, err := os.Readlink("/proc/1/ns/ipc")
    63  	ok(t, err)
    64  
    65  	config := newTemplateConfig(t, nil)
    66  	buffers := runContainerOk(t, config, "readlink", "/proc/self/ns/ipc")
    67  
    68  	if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l {
    69  		t.Fatalf("ipc link should be private to the container but equals host %q %q", actual, l)
    70  	}
    71  }
    72  
    73  func TestIPCHost(t *testing.T) {
    74  	if testing.Short() {
    75  		return
    76  	}
    77  
    78  	l, err := os.Readlink("/proc/1/ns/ipc")
    79  	ok(t, err)
    80  
    81  	config := newTemplateConfig(t, nil)
    82  	config.Namespaces.Remove(configs.NEWIPC)
    83  	buffers := runContainerOk(t, config, "readlink", "/proc/self/ns/ipc")
    84  
    85  	if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l {
    86  		t.Fatalf("ipc link not equal to host link %q %q", actual, l)
    87  	}
    88  }
    89  
    90  func TestIPCJoinPath(t *testing.T) {
    91  	if testing.Short() {
    92  		return
    93  	}
    94  
    95  	l, err := os.Readlink("/proc/1/ns/ipc")
    96  	ok(t, err)
    97  
    98  	config := newTemplateConfig(t, nil)
    99  	config.Namespaces.Add(configs.NEWIPC, "/proc/1/ns/ipc")
   100  	buffers := runContainerOk(t, config, "readlink", "/proc/self/ns/ipc")
   101  
   102  	if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l {
   103  		t.Fatalf("ipc link not equal to host link %q %q", actual, l)
   104  	}
   105  }
   106  
   107  func TestIPCBadPath(t *testing.T) {
   108  	if testing.Short() {
   109  		return
   110  	}
   111  
   112  	config := newTemplateConfig(t, nil)
   113  	config.Namespaces.Add(configs.NEWIPC, "/proc/1/ns/ipcc")
   114  
   115  	if _, _, err := runContainer(t, config, "true"); err == nil {
   116  		t.Fatal("container succeeded with bad ipc path")
   117  	}
   118  }
   119  
   120  func TestRlimit(t *testing.T) {
   121  	testRlimit(t, false)
   122  }
   123  
   124  func TestUsernsRlimit(t *testing.T) {
   125  	if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
   126  		t.Skip("Test requires userns.")
   127  	}
   128  
   129  	testRlimit(t, true)
   130  }
   131  
   132  func testRlimit(t *testing.T, userns bool) {
   133  	if testing.Short() {
   134  		return
   135  	}
   136  
   137  	config := newTemplateConfig(t, &tParam{userns: userns})
   138  
   139  	// Ensure limit is lower than what the config requests to test that in a user namespace
   140  	// the Setrlimit call happens early enough that we still have permissions to raise the limit.
   141  	// Do not change the Cur value to be equal to the Max value, please see:
   142  	// https://github.com/opencontainers/runc/pull/4265#discussion_r1589666444
   143  	ok(t, unix.Setrlimit(unix.RLIMIT_NOFILE, &unix.Rlimit{
   144  		Max: 1024,
   145  		Cur: 512,
   146  	}))
   147  
   148  	out := runContainerOk(t, config, "/bin/sh", "-c", "ulimit -n")
   149  	if limit := strings.TrimSpace(out.Stdout.String()); limit != "1025" {
   150  		t.Fatalf("expected rlimit to be 1025, got %s", limit)
   151  	}
   152  }
   153  
   154  func TestEnter(t *testing.T) {
   155  	if testing.Short() {
   156  		return
   157  	}
   158  
   159  	config := newTemplateConfig(t, nil)
   160  
   161  	container, err := newContainer(t, config)
   162  	ok(t, err)
   163  	defer destroyContainer(container)
   164  
   165  	// Execute a first process in the container
   166  	stdinR, stdinW, err := os.Pipe()
   167  	ok(t, err)
   168  
   169  	var stdout, stdout2 bytes.Buffer
   170  
   171  	pconfig := libcontainer.Process{
   172  		Cwd:    "/",
   173  		Args:   []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"},
   174  		Env:    standardEnvironment,
   175  		Stdin:  stdinR,
   176  		Stdout: &stdout,
   177  		Init:   true,
   178  	}
   179  	err = container.Run(&pconfig)
   180  	_ = stdinR.Close()
   181  	defer stdinW.Close() //nolint: errcheck
   182  	ok(t, err)
   183  	pid, err := pconfig.Pid()
   184  	ok(t, err)
   185  
   186  	// Execute another process in the container
   187  	stdinR2, stdinW2, err := os.Pipe()
   188  	ok(t, err)
   189  	pconfig2 := libcontainer.Process{
   190  		Cwd: "/",
   191  		Env: standardEnvironment,
   192  	}
   193  	pconfig2.Args = []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"}
   194  	pconfig2.Stdin = stdinR2
   195  	pconfig2.Stdout = &stdout2
   196  
   197  	err = container.Run(&pconfig2)
   198  	_ = stdinR2.Close()
   199  	defer stdinW2.Close() //nolint: errcheck
   200  	ok(t, err)
   201  
   202  	pid2, err := pconfig2.Pid()
   203  	ok(t, err)
   204  
   205  	processes, err := container.Processes()
   206  	ok(t, err)
   207  
   208  	n := 0
   209  	for i := range processes {
   210  		if processes[i] == pid || processes[i] == pid2 {
   211  			n++
   212  		}
   213  	}
   214  	if n != 2 {
   215  		t.Fatal("unexpected number of processes", processes, pid, pid2)
   216  	}
   217  
   218  	// Wait processes
   219  	_ = stdinW2.Close()
   220  	waitProcess(&pconfig2, t)
   221  
   222  	_ = stdinW.Close()
   223  	waitProcess(&pconfig, t)
   224  
   225  	// Check that both processes live in the same pidns
   226  	pidns := stdout.String()
   227  	ok(t, err)
   228  
   229  	pidns2 := stdout2.String()
   230  	ok(t, err)
   231  
   232  	if pidns != pidns2 {
   233  		t.Fatal("The second process isn't in the required pid namespace", pidns, pidns2)
   234  	}
   235  }
   236  
   237  func TestProcessEnv(t *testing.T) {
   238  	if testing.Short() {
   239  		return
   240  	}
   241  
   242  	config := newTemplateConfig(t, nil)
   243  	container, err := newContainer(t, config)
   244  	ok(t, err)
   245  	defer destroyContainer(container)
   246  
   247  	var stdout bytes.Buffer
   248  	pconfig := libcontainer.Process{
   249  		Cwd:  "/",
   250  		Args: []string{"sh", "-c", "env"},
   251  		Env: []string{
   252  			"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
   253  			"HOSTNAME=integration",
   254  			"TERM=xterm",
   255  			"FOO=BAR",
   256  		},
   257  		Stdin:  nil,
   258  		Stdout: &stdout,
   259  		Init:   true,
   260  	}
   261  	err = container.Run(&pconfig)
   262  	ok(t, err)
   263  
   264  	// Wait for process
   265  	waitProcess(&pconfig, t)
   266  
   267  	outputEnv := stdout.String()
   268  
   269  	// Check that the environment has the key/value pair we added
   270  	if !strings.Contains(outputEnv, "FOO=BAR") {
   271  		t.Fatal("Environment doesn't have the expected FOO=BAR key/value pair: ", outputEnv)
   272  	}
   273  
   274  	// Make sure that HOME is set
   275  	if !strings.Contains(outputEnv, "HOME=/root") {
   276  		t.Fatal("Environment doesn't have HOME set: ", outputEnv)
   277  	}
   278  }
   279  
   280  func TestProcessEmptyCaps(t *testing.T) {
   281  	if testing.Short() {
   282  		return
   283  	}
   284  
   285  	config := newTemplateConfig(t, nil)
   286  	config.Capabilities = nil
   287  
   288  	container, err := newContainer(t, config)
   289  	ok(t, err)
   290  	defer destroyContainer(container)
   291  
   292  	var stdout bytes.Buffer
   293  	pconfig := libcontainer.Process{
   294  		Cwd:    "/",
   295  		Args:   []string{"sh", "-c", "cat /proc/self/status"},
   296  		Env:    standardEnvironment,
   297  		Stdin:  nil,
   298  		Stdout: &stdout,
   299  		Init:   true,
   300  	}
   301  	err = container.Run(&pconfig)
   302  	ok(t, err)
   303  
   304  	// Wait for process
   305  	waitProcess(&pconfig, t)
   306  
   307  	outputStatus := stdout.String()
   308  
   309  	lines := strings.Split(outputStatus, "\n")
   310  
   311  	effectiveCapsLine := ""
   312  	for _, l := range lines {
   313  		line := strings.TrimSpace(l)
   314  		if strings.Contains(line, "CapEff:") {
   315  			effectiveCapsLine = line
   316  			break
   317  		}
   318  	}
   319  
   320  	if effectiveCapsLine == "" {
   321  		t.Fatal("Couldn't find effective caps: ", outputStatus)
   322  	}
   323  }
   324  
   325  func TestProcessCaps(t *testing.T) {
   326  	if testing.Short() {
   327  		return
   328  	}
   329  
   330  	config := newTemplateConfig(t, nil)
   331  	container, err := newContainer(t, config)
   332  	ok(t, err)
   333  	defer destroyContainer(container)
   334  
   335  	var stdout bytes.Buffer
   336  	pconfig := libcontainer.Process{
   337  		Cwd:          "/",
   338  		Args:         []string{"sh", "-c", "cat /proc/self/status"},
   339  		Env:          standardEnvironment,
   340  		Stdin:        nil,
   341  		Stdout:       &stdout,
   342  		Capabilities: &configs.Capabilities{},
   343  		Init:         true,
   344  	}
   345  	pconfig.Capabilities.Bounding = append(config.Capabilities.Bounding, "CAP_NET_ADMIN")
   346  	pconfig.Capabilities.Permitted = append(config.Capabilities.Permitted, "CAP_NET_ADMIN")
   347  	pconfig.Capabilities.Effective = append(config.Capabilities.Effective, "CAP_NET_ADMIN")
   348  	err = container.Run(&pconfig)
   349  	ok(t, err)
   350  
   351  	// Wait for process
   352  	waitProcess(&pconfig, t)
   353  
   354  	outputStatus := stdout.String()
   355  
   356  	lines := strings.Split(outputStatus, "\n")
   357  
   358  	effectiveCapsLine := ""
   359  	for _, l := range lines {
   360  		line := strings.TrimSpace(l)
   361  		if strings.Contains(line, "CapEff:") {
   362  			effectiveCapsLine = line
   363  			break
   364  		}
   365  	}
   366  
   367  	if effectiveCapsLine == "" {
   368  		t.Fatal("Couldn't find effective caps: ", outputStatus)
   369  	}
   370  
   371  	parts := strings.Split(effectiveCapsLine, ":")
   372  	effectiveCapsStr := strings.TrimSpace(parts[1])
   373  
   374  	effectiveCaps, err := strconv.ParseUint(effectiveCapsStr, 16, 64)
   375  	if err != nil {
   376  		t.Fatal("Could not parse effective caps", err)
   377  	}
   378  
   379  	const netAdminMask = 1 << unix.CAP_NET_ADMIN
   380  	if effectiveCaps&netAdminMask != netAdminMask {
   381  		t.Fatal("CAP_NET_ADMIN is not set as expected")
   382  	}
   383  }
   384  
   385  func TestAdditionalGroups(t *testing.T) {
   386  	if testing.Short() {
   387  		return
   388  	}
   389  
   390  	config := newTemplateConfig(t, nil)
   391  	container, err := newContainer(t, config)
   392  	ok(t, err)
   393  	defer destroyContainer(container)
   394  
   395  	var stdout bytes.Buffer
   396  	pconfig := libcontainer.Process{
   397  		Cwd:              "/",
   398  		Args:             []string{"sh", "-c", "id", "-Gn"},
   399  		Env:              standardEnvironment,
   400  		Stdin:            nil,
   401  		Stdout:           &stdout,
   402  		AdditionalGroups: []string{"plugdev", "audio"},
   403  		Init:             true,
   404  	}
   405  	err = container.Run(&pconfig)
   406  	ok(t, err)
   407  
   408  	// Wait for process
   409  	waitProcess(&pconfig, t)
   410  
   411  	outputGroups := stdout.String()
   412  
   413  	// Check that the groups output has the groups that we specified
   414  	if !strings.Contains(outputGroups, "audio") {
   415  		t.Fatalf("Listed groups do not contain the audio group as expected: %v", outputGroups)
   416  	}
   417  
   418  	if !strings.Contains(outputGroups, "plugdev") {
   419  		t.Fatalf("Listed groups do not contain the plugdev group as expected: %v", outputGroups)
   420  	}
   421  }
   422  
   423  func TestFreeze(t *testing.T) {
   424  	for _, systemd := range []bool{true, false} {
   425  		for _, set := range []bool{true, false} {
   426  			name := ""
   427  			if systemd {
   428  				name += "Systemd"
   429  			} else {
   430  				name += "FS"
   431  			}
   432  			if set {
   433  				name += "ViaSet"
   434  			} else {
   435  				name += "ViaPauseResume"
   436  			}
   437  			t.Run(name, func(t *testing.T) {
   438  				testFreeze(t, systemd, set)
   439  			})
   440  		}
   441  	}
   442  }
   443  
   444  func testFreeze(t *testing.T, withSystemd bool, useSet bool) {
   445  	if testing.Short() {
   446  		return
   447  	}
   448  	if withSystemd && !systemd.IsRunningSystemd() {
   449  		t.Skip("Test requires systemd.")
   450  	}
   451  
   452  	config := newTemplateConfig(t, &tParam{systemd: withSystemd})
   453  	container, err := newContainer(t, config)
   454  	ok(t, err)
   455  	defer destroyContainer(container)
   456  
   457  	stdinR, stdinW, err := os.Pipe()
   458  	ok(t, err)
   459  
   460  	pconfig := &libcontainer.Process{
   461  		Cwd:   "/",
   462  		Args:  []string{"cat"},
   463  		Env:   standardEnvironment,
   464  		Stdin: stdinR,
   465  		Init:  true,
   466  	}
   467  	err = container.Run(pconfig)
   468  	_ = stdinR.Close()
   469  	defer stdinW.Close() //nolint: errcheck
   470  	ok(t, err)
   471  
   472  	if !useSet {
   473  		err = container.Pause()
   474  	} else {
   475  		config.Cgroups.Resources.Freezer = configs.Frozen
   476  		err = container.Set(*config)
   477  	}
   478  	ok(t, err)
   479  
   480  	state, err := container.Status()
   481  	ok(t, err)
   482  	if state != libcontainer.Paused {
   483  		t.Fatal("Unexpected state: ", state)
   484  	}
   485  
   486  	if !useSet {
   487  		err = container.Resume()
   488  	} else {
   489  		config.Cgroups.Resources.Freezer = configs.Thawed
   490  		err = container.Set(*config)
   491  	}
   492  	ok(t, err)
   493  
   494  	_ = stdinW.Close()
   495  	waitProcess(pconfig, t)
   496  }
   497  
   498  func TestCpuShares(t *testing.T) {
   499  	testCpuShares(t, false)
   500  }
   501  
   502  func TestCpuSharesSystemd(t *testing.T) {
   503  	if !systemd.IsRunningSystemd() {
   504  		t.Skip("Test requires systemd.")
   505  	}
   506  	testCpuShares(t, true)
   507  }
   508  
   509  func testCpuShares(t *testing.T, systemd bool) {
   510  	if testing.Short() {
   511  		return
   512  	}
   513  	if cgroups.IsCgroup2UnifiedMode() {
   514  		t.Skip("cgroup v2 does not support CpuShares")
   515  	}
   516  
   517  	config := newTemplateConfig(t, &tParam{systemd: systemd})
   518  	config.Cgroups.Resources.CpuShares = 1
   519  
   520  	if _, _, err := runContainer(t, config, "ps"); err == nil {
   521  		t.Fatal("runContainer should fail with invalid CpuShares")
   522  	}
   523  }
   524  
   525  func TestPids(t *testing.T) {
   526  	testPids(t, false)
   527  }
   528  
   529  func TestPidsSystemd(t *testing.T) {
   530  	if !systemd.IsRunningSystemd() {
   531  		t.Skip("Test requires systemd.")
   532  	}
   533  	testPids(t, true)
   534  }
   535  
   536  func testPids(t *testing.T, systemd bool) {
   537  	if testing.Short() {
   538  		return
   539  	}
   540  
   541  	config := newTemplateConfig(t, &tParam{systemd: systemd})
   542  	config.Cgroups.Resources.PidsLimit = -1
   543  
   544  	// Running multiple processes, expecting it to succeed with no pids limit.
   545  	_ = runContainerOk(t, config, "/bin/sh", "-c", "/bin/true | /bin/true | /bin/true | /bin/true")
   546  
   547  	// Enforce a permissive limit. This needs to be fairly hand-wavey due to the
   548  	// issues with running Go binaries with pids restrictions (see below).
   549  	config.Cgroups.Resources.PidsLimit = 64
   550  	_ = runContainerOk(t, config, "/bin/sh", "-c", `
   551  	/bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true |
   552  	/bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true |
   553  	/bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true |
   554  	/bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true`)
   555  
   556  	// Enforce a restrictive limit. 64 * /bin/true + 1 * shell should cause
   557  	// this to fail reliably.
   558  	config.Cgroups.Resources.PidsLimit = 64
   559  	out, _, err := runContainer(t, config, "/bin/sh", "-c", `
   560  	/bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true |
   561  	/bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true |
   562  	/bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true |
   563  	/bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true |
   564  	/bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true |
   565  	/bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true |
   566  	/bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true |
   567  	/bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true`)
   568  	if err != nil && !strings.Contains(out.String(), "sh: can't fork") {
   569  		t.Fatal(err)
   570  	}
   571  
   572  	if err == nil {
   573  		t.Fatal("expected fork() to fail with restrictive pids limit")
   574  	}
   575  
   576  	// Minimal restrictions are not really supported, due to quirks in using Go
   577  	// due to the fact that it spawns random processes. While we do our best with
   578  	// late setting cgroup values, it's just too unreliable with very small pids.max.
   579  	// As such, we don't test that case. YMMV.
   580  }
   581  
   582  func TestCgroupResourcesUnifiedErrorOnV1(t *testing.T) {
   583  	testCgroupResourcesUnifiedErrorOnV1(t, false)
   584  }
   585  
   586  func TestCgroupResourcesUnifiedErrorOnV1Systemd(t *testing.T) {
   587  	if !systemd.IsRunningSystemd() {
   588  		t.Skip("Test requires systemd.")
   589  	}
   590  	testCgroupResourcesUnifiedErrorOnV1(t, true)
   591  }
   592  
   593  func testCgroupResourcesUnifiedErrorOnV1(t *testing.T, systemd bool) {
   594  	if testing.Short() {
   595  		return
   596  	}
   597  	if cgroups.IsCgroup2UnifiedMode() {
   598  		t.Skip("requires cgroup v1")
   599  	}
   600  
   601  	config := newTemplateConfig(t, &tParam{systemd: systemd})
   602  	config.Cgroups.Resources.Unified = map[string]string{
   603  		"memory.min": "10240",
   604  	}
   605  	_, _, err := runContainer(t, config, "true")
   606  	if !strings.Contains(err.Error(), cgroups.ErrV1NoUnified.Error()) {
   607  		t.Fatalf("expected error to contain %v, got %v", cgroups.ErrV1NoUnified, err)
   608  	}
   609  }
   610  
   611  func TestCgroupResourcesUnified(t *testing.T) {
   612  	testCgroupResourcesUnified(t, false)
   613  }
   614  
   615  func TestCgroupResourcesUnifiedSystemd(t *testing.T) {
   616  	if !systemd.IsRunningSystemd() {
   617  		t.Skip("Test requires systemd.")
   618  	}
   619  	testCgroupResourcesUnified(t, true)
   620  }
   621  
   622  func testCgroupResourcesUnified(t *testing.T, systemd bool) {
   623  	if testing.Short() {
   624  		return
   625  	}
   626  	if !cgroups.IsCgroup2UnifiedMode() {
   627  		t.Skip("requires cgroup v2")
   628  	}
   629  
   630  	config := newTemplateConfig(t, &tParam{systemd: systemd})
   631  	config.Cgroups.Resources.Memory = 536870912     // 512M
   632  	config.Cgroups.Resources.MemorySwap = 536870912 // 512M, i.e. no swap
   633  	config.Namespaces.Add(configs.NEWCGROUP, "")
   634  
   635  	testCases := []struct {
   636  		name     string
   637  		cfg      map[string]string
   638  		expError string
   639  		cmd      []string
   640  		exp      string
   641  	}{
   642  		{
   643  			name: "dummy",
   644  			cmd:  []string{"true"},
   645  			exp:  "",
   646  		},
   647  		{
   648  			name: "set memory.min",
   649  			cfg:  map[string]string{"memory.min": "131072"},
   650  			cmd:  []string{"cat", "/sys/fs/cgroup/memory.min"},
   651  			exp:  "131072\n",
   652  		},
   653  		{
   654  			name: "check memory.max",
   655  			cmd:  []string{"cat", "/sys/fs/cgroup/memory.max"},
   656  			exp:  strconv.Itoa(int(config.Cgroups.Resources.Memory)) + "\n",
   657  		},
   658  
   659  		{
   660  			name: "overwrite memory.max",
   661  			cfg:  map[string]string{"memory.max": "268435456"},
   662  			cmd:  []string{"cat", "/sys/fs/cgroup/memory.max"},
   663  			exp:  "268435456\n",
   664  		},
   665  		{
   666  			name:     "no such controller error",
   667  			cfg:      map[string]string{"privet.vsem": "vam"},
   668  			expError: "controller \"privet\" not available",
   669  		},
   670  		{
   671  			name:     "slash in key error",
   672  			cfg:      map[string]string{"bad/key": "val"},
   673  			expError: "must be a file name (no slashes)",
   674  		},
   675  		{
   676  			name:     "no dot in key error",
   677  			cfg:      map[string]string{"badkey": "val"},
   678  			expError: "must be in the form CONTROLLER.PARAMETER",
   679  		},
   680  		{
   681  			name:     "read-only parameter",
   682  			cfg:      map[string]string{"pids.current": "42"},
   683  			expError: "failed to write",
   684  		},
   685  	}
   686  
   687  	for _, tc := range testCases {
   688  		config.Cgroups.Resources.Unified = tc.cfg
   689  		buffers, ret, err := runContainer(t, config, tc.cmd...)
   690  		if tc.expError != "" {
   691  			if err == nil {
   692  				t.Errorf("case %q failed: expected error, got nil", tc.name)
   693  				continue
   694  			}
   695  			if !strings.Contains(err.Error(), tc.expError) {
   696  				t.Errorf("case %q failed: expected error to contain %q, got %q", tc.name, tc.expError, err)
   697  			}
   698  			continue
   699  		}
   700  		if err != nil {
   701  			t.Errorf("case %q failed: expected no error, got %v (command: %v, status: %d, stderr: %q)",
   702  				tc.name, err, tc.cmd, ret, buffers.Stderr.String())
   703  			continue
   704  		}
   705  		if tc.exp != "" {
   706  			out := buffers.Stdout.String()
   707  			if out != tc.exp {
   708  				t.Errorf("expected %q, got %q", tc.exp, out)
   709  			}
   710  		}
   711  	}
   712  }
   713  
   714  func TestContainerState(t *testing.T) {
   715  	if testing.Short() {
   716  		return
   717  	}
   718  
   719  	l, err := os.Readlink("/proc/1/ns/ipc")
   720  	ok(t, err)
   721  
   722  	config := newTemplateConfig(t, nil)
   723  	config.Namespaces = configs.Namespaces([]configs.Namespace{
   724  		{Type: configs.NEWNS},
   725  		{Type: configs.NEWUTS},
   726  		// host for IPC
   727  		//{Type: configs.NEWIPC},
   728  		{Type: configs.NEWPID},
   729  		{Type: configs.NEWNET},
   730  	})
   731  
   732  	container, err := newContainer(t, config)
   733  	ok(t, err)
   734  	defer destroyContainer(container)
   735  
   736  	stdinR, stdinW, err := os.Pipe()
   737  	ok(t, err)
   738  
   739  	p := &libcontainer.Process{
   740  		Cwd:   "/",
   741  		Args:  []string{"cat"},
   742  		Env:   standardEnvironment,
   743  		Stdin: stdinR,
   744  		Init:  true,
   745  	}
   746  	err = container.Run(p)
   747  	ok(t, err)
   748  	_ = stdinR.Close()
   749  	defer stdinW.Close() //nolint: errcheck
   750  
   751  	st, err := container.State()
   752  	ok(t, err)
   753  
   754  	l1, err := os.Readlink(st.NamespacePaths[configs.NEWIPC])
   755  	ok(t, err)
   756  	if l1 != l {
   757  		t.Fatal("Container using non-host ipc namespace")
   758  	}
   759  	_ = stdinW.Close()
   760  	waitProcess(p, t)
   761  }
   762  
   763  func TestPassExtraFiles(t *testing.T) {
   764  	if testing.Short() {
   765  		return
   766  	}
   767  
   768  	config := newTemplateConfig(t, nil)
   769  	container, err := newContainer(t, config)
   770  	ok(t, err)
   771  	defer destroyContainer(container)
   772  
   773  	var stdout bytes.Buffer
   774  	pipeout1, pipein1, err := os.Pipe()
   775  	ok(t, err)
   776  	pipeout2, pipein2, err := os.Pipe()
   777  	ok(t, err)
   778  	process := libcontainer.Process{
   779  		Cwd:        "/",
   780  		Args:       []string{"sh", "-c", "cd /proc/$$/fd; echo -n *; echo -n 1 >3; echo -n 2 >4"},
   781  		Env:        []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"},
   782  		ExtraFiles: []*os.File{pipein1, pipein2},
   783  		Stdin:      nil,
   784  		Stdout:     &stdout,
   785  		Init:       true,
   786  	}
   787  	err = container.Run(&process)
   788  	ok(t, err)
   789  
   790  	waitProcess(&process, t)
   791  
   792  	out := stdout.String()
   793  	// fd 5 is the directory handle for /proc/$$/fd
   794  	if out != "0 1 2 3 4 5" {
   795  		t.Fatalf("expected to have the file descriptors '0 1 2 3 4 5' passed to init, got '%s'", out)
   796  	}
   797  	buf := []byte{0}
   798  	_, err = pipeout1.Read(buf)
   799  	ok(t, err)
   800  	out1 := string(buf)
   801  	if out1 != "1" {
   802  		t.Fatalf("expected first pipe to receive '1', got '%s'", out1)
   803  	}
   804  
   805  	_, err = pipeout2.Read(buf)
   806  	ok(t, err)
   807  	out2 := string(buf)
   808  	if out2 != "2" {
   809  		t.Fatalf("expected second pipe to receive '2', got '%s'", out2)
   810  	}
   811  }
   812  
   813  func TestSysctl(t *testing.T) {
   814  	if testing.Short() {
   815  		return
   816  	}
   817  
   818  	config := newTemplateConfig(t, nil)
   819  	config.Sysctl = map[string]string{
   820  		"kernel.shmmni": "8192",
   821  		"kernel/shmmax": "4194304",
   822  	}
   823  	const (
   824  		cmd = "cat shmmni shmmax"
   825  		exp = "8192\n4194304\n"
   826  	)
   827  
   828  	container, err := newContainer(t, config)
   829  	ok(t, err)
   830  	defer destroyContainer(container)
   831  
   832  	var stdout bytes.Buffer
   833  	pconfig := libcontainer.Process{
   834  		Cwd:    "/proc/sys/kernel",
   835  		Args:   []string{"sh", "-c", cmd},
   836  		Env:    standardEnvironment,
   837  		Stdin:  nil,
   838  		Stdout: &stdout,
   839  		Init:   true,
   840  	}
   841  	err = container.Run(&pconfig)
   842  	ok(t, err)
   843  
   844  	// Wait for process
   845  	waitProcess(&pconfig, t)
   846  
   847  	out := stdout.String()
   848  	if out != exp {
   849  		t.Fatalf("expected %s, got %s", exp, out)
   850  	}
   851  }
   852  
   853  func TestMountCgroupRO(t *testing.T) {
   854  	if testing.Short() {
   855  		return
   856  	}
   857  	config := newTemplateConfig(t, nil)
   858  	buffers := runContainerOk(t, config, "mount")
   859  
   860  	mountInfo := buffers.Stdout.String()
   861  	lines := strings.Split(mountInfo, "\n")
   862  	for _, l := range lines {
   863  		if strings.HasPrefix(l, "tmpfs on /sys/fs/cgroup") {
   864  			if !strings.Contains(l, "ro") ||
   865  				!strings.Contains(l, "nosuid") ||
   866  				!strings.Contains(l, "nodev") ||
   867  				!strings.Contains(l, "noexec") {
   868  				t.Fatalf("Mode expected to contain 'ro,nosuid,nodev,noexec': %s", l)
   869  			}
   870  			if !strings.Contains(l, "mode=755") {
   871  				t.Fatalf("Mode expected to contain 'mode=755': %s", l)
   872  			}
   873  			continue
   874  		}
   875  		if !strings.HasPrefix(l, "cgroup") {
   876  			continue
   877  		}
   878  		if !strings.Contains(l, "ro") ||
   879  			!strings.Contains(l, "nosuid") ||
   880  			!strings.Contains(l, "nodev") ||
   881  			!strings.Contains(l, "noexec") {
   882  			t.Fatalf("Mode expected to contain 'ro,nosuid,nodev,noexec': %s", l)
   883  		}
   884  	}
   885  }
   886  
   887  func TestMountCgroupRW(t *testing.T) {
   888  	if testing.Short() {
   889  		return
   890  	}
   891  	config := newTemplateConfig(t, nil)
   892  	// clear the RO flag from cgroup mount
   893  	for _, m := range config.Mounts {
   894  		if m.Device == "cgroup" {
   895  			m.Flags = defaultMountFlags
   896  			break
   897  		}
   898  	}
   899  
   900  	buffers := runContainerOk(t, config, "mount")
   901  
   902  	mountInfo := buffers.Stdout.String()
   903  	lines := strings.Split(mountInfo, "\n")
   904  	for _, l := range lines {
   905  		if strings.HasPrefix(l, "tmpfs on /sys/fs/cgroup") {
   906  			if !strings.Contains(l, "rw") ||
   907  				!strings.Contains(l, "nosuid") ||
   908  				!strings.Contains(l, "nodev") ||
   909  				!strings.Contains(l, "noexec") {
   910  				t.Fatalf("Mode expected to contain 'rw,nosuid,nodev,noexec': %s", l)
   911  			}
   912  			if !strings.Contains(l, "mode=755") {
   913  				t.Fatalf("Mode expected to contain 'mode=755': %s", l)
   914  			}
   915  			continue
   916  		}
   917  		if !strings.HasPrefix(l, "cgroup") {
   918  			continue
   919  		}
   920  		if !strings.Contains(l, "rw") ||
   921  			!strings.Contains(l, "nosuid") ||
   922  			!strings.Contains(l, "nodev") ||
   923  			!strings.Contains(l, "noexec") {
   924  			t.Fatalf("Mode expected to contain 'rw,nosuid,nodev,noexec': %s", l)
   925  		}
   926  	}
   927  }
   928  
   929  func TestOomScoreAdj(t *testing.T) {
   930  	if testing.Short() {
   931  		return
   932  	}
   933  
   934  	config := newTemplateConfig(t, nil)
   935  	config.OomScoreAdj = ptrInt(200)
   936  
   937  	container, err := newContainer(t, config)
   938  	ok(t, err)
   939  	defer destroyContainer(container)
   940  
   941  	var stdout bytes.Buffer
   942  	pconfig := libcontainer.Process{
   943  		Cwd:    "/",
   944  		Args:   []string{"sh", "-c", "cat /proc/self/oom_score_adj"},
   945  		Env:    standardEnvironment,
   946  		Stdin:  nil,
   947  		Stdout: &stdout,
   948  		Init:   true,
   949  	}
   950  	err = container.Run(&pconfig)
   951  	ok(t, err)
   952  
   953  	// Wait for process
   954  	waitProcess(&pconfig, t)
   955  	outputOomScoreAdj := strings.TrimSpace(stdout.String())
   956  
   957  	// Check that the oom_score_adj matches the value that was set as part of config.
   958  	if outputOomScoreAdj != strconv.Itoa(*config.OomScoreAdj) {
   959  		t.Fatalf("Expected oom_score_adj %d; got %q", *config.OomScoreAdj, outputOomScoreAdj)
   960  	}
   961  }
   962  
   963  func TestHook(t *testing.T) {
   964  	if testing.Short() {
   965  		return
   966  	}
   967  
   968  	config := newTemplateConfig(t, nil)
   969  	expectedBundle := t.TempDir()
   970  	config.Labels = append(config.Labels, "bundle="+expectedBundle)
   971  
   972  	getRootfsFromBundle := func(bundle string) (string, error) {
   973  		f, err := os.Open(filepath.Join(bundle, "config.json"))
   974  		if err != nil {
   975  			return "", err
   976  		}
   977  
   978  		var config configs.Config
   979  		if err = json.NewDecoder(f).Decode(&config); err != nil {
   980  			return "", err
   981  		}
   982  		return config.Rootfs, nil
   983  	}
   984  	createFileFromBundle := func(filename, bundle string) error {
   985  		root, err := getRootfsFromBundle(bundle)
   986  		if err != nil {
   987  			return err
   988  		}
   989  
   990  		f, err := os.Create(filepath.Join(root, filename))
   991  		if err != nil {
   992  			return err
   993  		}
   994  		return f.Close()
   995  	}
   996  
   997  	// Note FunctionHooks can't be serialized to json this means they won't be passed down to the container
   998  	// For CreateContainer and StartContainer which run in the container namespace, this means we need to pass Command Hooks.
   999  	hookFiles := map[configs.HookName]string{
  1000  		configs.Prestart:        "prestart",
  1001  		configs.CreateRuntime:   "createRuntime",
  1002  		configs.CreateContainer: "createContainer",
  1003  		configs.StartContainer:  "startContainer",
  1004  		configs.Poststart:       "poststart",
  1005  	}
  1006  
  1007  	config.Hooks = configs.Hooks{
  1008  		configs.Prestart: configs.HookList{
  1009  			configs.NewFunctionHook(func(s *specs.State) error {
  1010  				if s.Bundle != expectedBundle {
  1011  					t.Fatalf("Expected prestart hook bundlePath '%s'; got '%s'", expectedBundle, s.Bundle)
  1012  				}
  1013  				return createFileFromBundle(hookFiles[configs.Prestart], s.Bundle)
  1014  			}),
  1015  		},
  1016  		configs.CreateRuntime: configs.HookList{
  1017  			configs.NewFunctionHook(func(s *specs.State) error {
  1018  				if s.Bundle != expectedBundle {
  1019  					t.Fatalf("Expected createRuntime hook bundlePath '%s'; got '%s'", expectedBundle, s.Bundle)
  1020  				}
  1021  				return createFileFromBundle(hookFiles[configs.CreateRuntime], s.Bundle)
  1022  			}),
  1023  		},
  1024  		configs.CreateContainer: configs.HookList{
  1025  			configs.NewCommandHook(configs.Command{
  1026  				Path: "/bin/bash",
  1027  				Args: []string{"/bin/bash", "-c", fmt.Sprintf("touch ./%s", hookFiles[configs.CreateContainer])},
  1028  			}),
  1029  		},
  1030  		configs.StartContainer: configs.HookList{
  1031  			configs.NewCommandHook(configs.Command{
  1032  				Path: "/bin/sh",
  1033  				Args: []string{"/bin/sh", "-c", fmt.Sprintf("touch /%s", hookFiles[configs.StartContainer])},
  1034  			}),
  1035  		},
  1036  		configs.Poststart: configs.HookList{
  1037  			configs.NewFunctionHook(func(s *specs.State) error {
  1038  				if s.Bundle != expectedBundle {
  1039  					t.Fatalf("Expected poststart hook bundlePath '%s'; got '%s'", expectedBundle, s.Bundle)
  1040  				}
  1041  				return createFileFromBundle(hookFiles[configs.Poststart], s.Bundle)
  1042  			}),
  1043  		},
  1044  		configs.Poststop: configs.HookList{
  1045  			configs.NewFunctionHook(func(s *specs.State) error {
  1046  				if s.Bundle != expectedBundle {
  1047  					t.Fatalf("Expected poststop hook bundlePath '%s'; got '%s'", expectedBundle, s.Bundle)
  1048  				}
  1049  
  1050  				root, err := getRootfsFromBundle(s.Bundle)
  1051  				if err != nil {
  1052  					return err
  1053  				}
  1054  
  1055  				for _, hook := range hookFiles {
  1056  					if err = os.RemoveAll(filepath.Join(root, hook)); err != nil {
  1057  						return err
  1058  					}
  1059  				}
  1060  				return nil
  1061  			}),
  1062  		},
  1063  	}
  1064  
  1065  	// write config of json format into config.json under bundle
  1066  	f, err := os.OpenFile(filepath.Join(expectedBundle, "config.json"), os.O_CREATE|os.O_RDWR, 0o644)
  1067  	ok(t, err)
  1068  	ok(t, json.NewEncoder(f).Encode(config))
  1069  
  1070  	container, err := newContainer(t, config)
  1071  	ok(t, err)
  1072  
  1073  	// e.g: 'ls /prestart ...'
  1074  	cmd := "ls "
  1075  	for _, hook := range hookFiles {
  1076  		cmd += "/" + hook + " "
  1077  	}
  1078  
  1079  	var stdout bytes.Buffer
  1080  	pconfig := libcontainer.Process{
  1081  		Cwd:    "/",
  1082  		Args:   []string{"sh", "-c", cmd},
  1083  		Env:    standardEnvironment,
  1084  		Stdin:  nil,
  1085  		Stdout: &stdout,
  1086  		Init:   true,
  1087  	}
  1088  	err = container.Run(&pconfig)
  1089  	ok(t, err)
  1090  
  1091  	// Wait for process
  1092  	waitProcess(&pconfig, t)
  1093  
  1094  	if err := container.Destroy(); err != nil {
  1095  		t.Fatalf("container destroy %s", err)
  1096  	}
  1097  
  1098  	for _, hook := range []string{"prestart", "createRuntime", "poststart"} {
  1099  		fi, err := os.Stat(filepath.Join(config.Rootfs, hook))
  1100  		if err == nil || !os.IsNotExist(err) {
  1101  			t.Fatalf("expected file '%s to not exists, but it does", fi.Name())
  1102  		}
  1103  	}
  1104  }
  1105  
  1106  func TestSTDIOPermissions(t *testing.T) {
  1107  	if testing.Short() {
  1108  		return
  1109  	}
  1110  
  1111  	config := newTemplateConfig(t, nil)
  1112  	buffers := runContainerOk(t, config, "sh", "-c", "echo hi > /dev/stderr")
  1113  
  1114  	if actual := strings.Trim(buffers.Stderr.String(), "\n"); actual != "hi" {
  1115  		t.Fatalf("stderr should equal be equal %q %q", actual, "hi")
  1116  	}
  1117  }
  1118  
  1119  func unmountOp(path string) {
  1120  	_ = unix.Unmount(path, unix.MNT_DETACH)
  1121  }
  1122  
  1123  // Launch container with rootfsPropagation in rslave mode. Also
  1124  // bind mount a volume /mnt1host at /mnt1cont at the time of launch. Now do
  1125  // another mount on host (/mnt1host/mnt2host) and this new mount should
  1126  // propagate to container (/mnt1cont/mnt2host)
  1127  func TestRootfsPropagationSlaveMount(t *testing.T) {
  1128  	var mountPropagated bool
  1129  	var dir1cont string
  1130  	var dir2cont string
  1131  
  1132  	dir1cont = "/root/mnt1cont"
  1133  
  1134  	if testing.Short() {
  1135  		return
  1136  	}
  1137  	config := newTemplateConfig(t, nil)
  1138  	config.RootPropagation = unix.MS_SLAVE | unix.MS_REC
  1139  
  1140  	// Bind mount a volume.
  1141  	dir1host := t.TempDir()
  1142  
  1143  	// Make this dir a "shared" mount point. This will make sure a
  1144  	// slave relationship can be established in container.
  1145  	err := unix.Mount(dir1host, dir1host, "bind", unix.MS_BIND|unix.MS_REC, "")
  1146  	ok(t, err)
  1147  	err = unix.Mount("", dir1host, "", unix.MS_SHARED|unix.MS_REC, "")
  1148  	ok(t, err)
  1149  	defer unmountOp(dir1host)
  1150  
  1151  	config.Mounts = append(config.Mounts, &configs.Mount{
  1152  		Source:      dir1host,
  1153  		Destination: dir1cont,
  1154  		Device:      "bind",
  1155  		Flags:       unix.MS_BIND | unix.MS_REC,
  1156  	})
  1157  
  1158  	container, err := newContainer(t, config)
  1159  	ok(t, err)
  1160  	defer destroyContainer(container)
  1161  
  1162  	stdinR, stdinW, err := os.Pipe()
  1163  	ok(t, err)
  1164  
  1165  	pconfig := &libcontainer.Process{
  1166  		Cwd:   "/",
  1167  		Args:  []string{"cat"},
  1168  		Env:   standardEnvironment,
  1169  		Stdin: stdinR,
  1170  		Init:  true,
  1171  	}
  1172  
  1173  	err = container.Run(pconfig)
  1174  	_ = stdinR.Close()
  1175  	defer stdinW.Close() //nolint: errcheck
  1176  	ok(t, err)
  1177  
  1178  	// Create mnt2host under dir1host and bind mount itself on top of it.
  1179  	// This should be visible in container.
  1180  	dir2host := filepath.Join(dir1host, "mnt2host")
  1181  	err = os.Mkdir(dir2host, 0o700)
  1182  	ok(t, err)
  1183  	defer remove(dir2host)
  1184  
  1185  	err = unix.Mount(dir2host, dir2host, "bind", unix.MS_BIND, "")
  1186  	defer unmountOp(dir2host)
  1187  	ok(t, err)
  1188  
  1189  	// Run "cat /proc/self/mountinfo" in container and look at mount points.
  1190  	var stdout2 bytes.Buffer
  1191  
  1192  	stdinR2, stdinW2, err := os.Pipe()
  1193  	ok(t, err)
  1194  
  1195  	pconfig2 := &libcontainer.Process{
  1196  		Cwd:    "/",
  1197  		Args:   []string{"cat", "/proc/self/mountinfo"},
  1198  		Env:    standardEnvironment,
  1199  		Stdin:  stdinR2,
  1200  		Stdout: &stdout2,
  1201  	}
  1202  
  1203  	err = container.Run(pconfig2)
  1204  	_ = stdinR2.Close()
  1205  	defer stdinW2.Close() //nolint: errcheck
  1206  	ok(t, err)
  1207  
  1208  	_ = stdinW2.Close()
  1209  	waitProcess(pconfig2, t)
  1210  	_ = stdinW.Close()
  1211  	waitProcess(pconfig, t)
  1212  
  1213  	mountPropagated = false
  1214  	dir2cont = filepath.Join(dir1cont, filepath.Base(dir2host))
  1215  
  1216  	propagationInfo := stdout2.String()
  1217  	lines := strings.Split(propagationInfo, "\n")
  1218  	for _, l := range lines {
  1219  		linefields := strings.Split(l, " ")
  1220  		if len(linefields) < 5 {
  1221  			continue
  1222  		}
  1223  
  1224  		if linefields[4] == dir2cont {
  1225  			mountPropagated = true
  1226  			break
  1227  		}
  1228  	}
  1229  
  1230  	if mountPropagated != true {
  1231  		t.Fatalf("Mount on host %s did not propagate in container at %s\n", dir2host, dir2cont)
  1232  	}
  1233  }
  1234  
  1235  // Launch container with rootfsPropagation 0 so no propagation flags are
  1236  // applied. Also bind mount a volume /mnt1host at /mnt1cont at the time of
  1237  // launch. Now do a mount in container (/mnt1cont/mnt2cont) and this new
  1238  // mount should propagate to host (/mnt1host/mnt2cont)
  1239  
  1240  func TestRootfsPropagationSharedMount(t *testing.T) {
  1241  	var dir1cont string
  1242  	var dir2cont string
  1243  
  1244  	dir1cont = "/root/mnt1cont"
  1245  
  1246  	if testing.Short() {
  1247  		return
  1248  	}
  1249  	config := newTemplateConfig(t, nil)
  1250  	config.RootPropagation = unix.MS_PRIVATE
  1251  
  1252  	// Bind mount a volume.
  1253  	dir1host := t.TempDir()
  1254  
  1255  	// Make this dir a "shared" mount point. This will make sure a
  1256  	// shared relationship can be established in container.
  1257  	err := unix.Mount(dir1host, dir1host, "bind", unix.MS_BIND|unix.MS_REC, "")
  1258  	ok(t, err)
  1259  	err = unix.Mount("", dir1host, "", unix.MS_SHARED|unix.MS_REC, "")
  1260  	ok(t, err)
  1261  	defer unmountOp(dir1host)
  1262  
  1263  	config.Mounts = append(config.Mounts, &configs.Mount{
  1264  		Source:      dir1host,
  1265  		Destination: dir1cont,
  1266  		Device:      "bind",
  1267  		Flags:       unix.MS_BIND | unix.MS_REC,
  1268  	})
  1269  
  1270  	container, err := newContainer(t, config)
  1271  	ok(t, err)
  1272  	defer destroyContainer(container)
  1273  
  1274  	stdinR, stdinW, err := os.Pipe()
  1275  	ok(t, err)
  1276  
  1277  	pconfig := &libcontainer.Process{
  1278  		Cwd:   "/",
  1279  		Args:  []string{"cat"},
  1280  		Env:   standardEnvironment,
  1281  		Stdin: stdinR,
  1282  		Init:  true,
  1283  	}
  1284  
  1285  	err = container.Run(pconfig)
  1286  	_ = stdinR.Close()
  1287  	defer stdinW.Close() //nolint: errcheck
  1288  	ok(t, err)
  1289  
  1290  	// Create mnt2cont under dir1host. This will become visible inside container
  1291  	// at mnt1cont/mnt2cont. Bind mount itself on top of it. This
  1292  	// should be visible on host now.
  1293  	dir2host := filepath.Join(dir1host, "mnt2cont")
  1294  	err = os.Mkdir(dir2host, 0o700)
  1295  	ok(t, err)
  1296  	defer remove(dir2host)
  1297  
  1298  	dir2cont = filepath.Join(dir1cont, filepath.Base(dir2host))
  1299  
  1300  	// Mount something in container and see if it is visible on host.
  1301  	var stdout2 bytes.Buffer
  1302  
  1303  	stdinR2, stdinW2, err := os.Pipe()
  1304  	ok(t, err)
  1305  
  1306  	pconfig2 := &libcontainer.Process{
  1307  		Cwd:          "/",
  1308  		Args:         []string{"mount", "--bind", dir2cont, dir2cont},
  1309  		Env:          standardEnvironment,
  1310  		Stdin:        stdinR2,
  1311  		Stdout:       &stdout2,
  1312  		Capabilities: &configs.Capabilities{},
  1313  	}
  1314  
  1315  	// Provide CAP_SYS_ADMIN
  1316  	pconfig2.Capabilities.Bounding = append(config.Capabilities.Bounding, "CAP_SYS_ADMIN")
  1317  	pconfig2.Capabilities.Permitted = append(config.Capabilities.Permitted, "CAP_SYS_ADMIN")
  1318  	pconfig2.Capabilities.Effective = append(config.Capabilities.Effective, "CAP_SYS_ADMIN")
  1319  
  1320  	err = container.Run(pconfig2)
  1321  	_ = stdinR2.Close()
  1322  	defer stdinW2.Close() //nolint: errcheck
  1323  	ok(t, err)
  1324  
  1325  	// Wait for process
  1326  	_ = stdinW2.Close()
  1327  	waitProcess(pconfig2, t)
  1328  	_ = stdinW.Close()
  1329  	waitProcess(pconfig, t)
  1330  
  1331  	defer unmountOp(dir2host)
  1332  
  1333  	// Check if mount is visible on host or not.
  1334  	out, err := exec.Command("findmnt", "-n", "-f", "-oTARGET", dir2host).CombinedOutput()
  1335  	outtrim := string(bytes.TrimSpace(out))
  1336  	if err != nil {
  1337  		t.Logf("findmnt error %q: %q", err, outtrim)
  1338  	}
  1339  
  1340  	if outtrim != dir2host {
  1341  		t.Fatalf("Mount in container on %s did not propagate to host on %s. finmnt output=%s", dir2cont, dir2host, outtrim)
  1342  	}
  1343  }
  1344  
  1345  func TestPIDHost(t *testing.T) {
  1346  	if testing.Short() {
  1347  		return
  1348  	}
  1349  
  1350  	l, err := os.Readlink("/proc/1/ns/pid")
  1351  	ok(t, err)
  1352  
  1353  	config := newTemplateConfig(t, nil)
  1354  	config.Namespaces.Remove(configs.NEWPID)
  1355  	buffers := runContainerOk(t, config, "readlink", "/proc/self/ns/pid")
  1356  
  1357  	if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l {
  1358  		t.Fatalf("ipc link not equal to host link %q %q", actual, l)
  1359  	}
  1360  }
  1361  
  1362  func TestHostPidnsInitKill(t *testing.T) {
  1363  	config := newTemplateConfig(t, nil)
  1364  	// Implicitly use host pid ns.
  1365  	config.Namespaces.Remove(configs.NEWPID)
  1366  	testPidnsInitKill(t, config)
  1367  }
  1368  
  1369  func TestSharedPidnsInitKill(t *testing.T) {
  1370  	config := newTemplateConfig(t, nil)
  1371  	// Explicitly use host pid ns.
  1372  	config.Namespaces.Add(configs.NEWPID, "/proc/1/ns/pid")
  1373  	testPidnsInitKill(t, config)
  1374  }
  1375  
  1376  func testPidnsInitKill(t *testing.T, config *configs.Config) {
  1377  	if testing.Short() {
  1378  		return
  1379  	}
  1380  
  1381  	// Run a container with two long-running processes.
  1382  	container, err := newContainer(t, config)
  1383  	ok(t, err)
  1384  	defer func() {
  1385  		_ = container.Destroy()
  1386  	}()
  1387  
  1388  	process1 := &libcontainer.Process{
  1389  		Cwd:  "/",
  1390  		Args: []string{"sleep", "1h"},
  1391  		Env:  standardEnvironment,
  1392  		Init: true,
  1393  	}
  1394  	err = container.Run(process1)
  1395  	ok(t, err)
  1396  
  1397  	process2 := &libcontainer.Process{
  1398  		Cwd:  "/",
  1399  		Args: []string{"sleep", "1h"},
  1400  		Env:  standardEnvironment,
  1401  		Init: false,
  1402  	}
  1403  	err = container.Run(process2)
  1404  	ok(t, err)
  1405  
  1406  	// Kill the container.
  1407  	err = container.Signal(syscall.SIGKILL)
  1408  	ok(t, err)
  1409  	_, err = process1.Wait()
  1410  	if err == nil {
  1411  		t.Fatal("expected Wait to indicate failure")
  1412  	}
  1413  
  1414  	// The non-init process must've also been killed. If not,
  1415  	// the test will time out.
  1416  	_, err = process2.Wait()
  1417  	if err == nil {
  1418  		t.Fatal("expected Wait to indicate failure")
  1419  	}
  1420  }
  1421  
  1422  func TestInitJoinPID(t *testing.T) {
  1423  	if testing.Short() {
  1424  		return
  1425  	}
  1426  	// Execute a long-running container
  1427  	config1 := newTemplateConfig(t, nil)
  1428  	container1, err := newContainer(t, config1)
  1429  	ok(t, err)
  1430  	defer destroyContainer(container1)
  1431  
  1432  	stdinR1, stdinW1, err := os.Pipe()
  1433  	ok(t, err)
  1434  	init1 := &libcontainer.Process{
  1435  		Cwd:   "/",
  1436  		Args:  []string{"cat"},
  1437  		Env:   standardEnvironment,
  1438  		Stdin: stdinR1,
  1439  		Init:  true,
  1440  	}
  1441  	err = container1.Run(init1)
  1442  	_ = stdinR1.Close()
  1443  	defer stdinW1.Close() //nolint: errcheck
  1444  	ok(t, err)
  1445  
  1446  	// get the state of the first container
  1447  	state1, err := container1.State()
  1448  	ok(t, err)
  1449  	pidns1 := state1.NamespacePaths[configs.NEWPID]
  1450  
  1451  	// Run a container inside the existing pidns but with different cgroups
  1452  	config2 := newTemplateConfig(t, nil)
  1453  	config2.Namespaces.Add(configs.NEWPID, pidns1)
  1454  	config2.Cgroups.Path = "integration/test2"
  1455  	container2, err := newContainer(t, config2)
  1456  	ok(t, err)
  1457  	defer destroyContainer(container2)
  1458  
  1459  	stdinR2, stdinW2, err := os.Pipe()
  1460  	ok(t, err)
  1461  	init2 := &libcontainer.Process{
  1462  		Cwd:   "/",
  1463  		Args:  []string{"cat"},
  1464  		Env:   standardEnvironment,
  1465  		Stdin: stdinR2,
  1466  		Init:  true,
  1467  	}
  1468  	err = container2.Run(init2)
  1469  	_ = stdinR2.Close()
  1470  	defer stdinW2.Close() //nolint: errcheck
  1471  	ok(t, err)
  1472  	// get the state of the second container
  1473  	state2, err := container2.State()
  1474  	ok(t, err)
  1475  
  1476  	ns1, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/pid", state1.InitProcessPid))
  1477  	ok(t, err)
  1478  	ns2, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/pid", state2.InitProcessPid))
  1479  	ok(t, err)
  1480  	if ns1 != ns2 {
  1481  		t.Errorf("pidns(%s), wanted %s", ns2, ns1)
  1482  	}
  1483  
  1484  	// check that namespaces are not the same
  1485  	if reflect.DeepEqual(state2.NamespacePaths, state1.NamespacePaths) {
  1486  		t.Errorf("Namespaces(%v), original %v", state2.NamespacePaths,
  1487  			state1.NamespacePaths)
  1488  	}
  1489  	// check that pidns is joined correctly. The initial container process list
  1490  	// should contain the second container's init process
  1491  	buffers := newStdBuffers()
  1492  	ps := &libcontainer.Process{
  1493  		Cwd:    "/",
  1494  		Args:   []string{"ps"},
  1495  		Env:    standardEnvironment,
  1496  		Stdout: buffers.Stdout,
  1497  	}
  1498  	err = container1.Run(ps)
  1499  	ok(t, err)
  1500  	waitProcess(ps, t)
  1501  
  1502  	// Stop init processes one by one. Stop the second container should
  1503  	// not stop the first.
  1504  	_ = stdinW2.Close()
  1505  	waitProcess(init2, t)
  1506  	_ = stdinW1.Close()
  1507  	waitProcess(init1, t)
  1508  
  1509  	out := strings.TrimSpace(buffers.Stdout.String())
  1510  	// output of ps inside the initial PID namespace should have
  1511  	// 1 line of header,
  1512  	// 2 lines of init processes,
  1513  	// 1 line of ps process
  1514  	if len(strings.Split(out, "\n")) != 4 {
  1515  		t.Errorf("unexpected running process, output %q", out)
  1516  	}
  1517  }
  1518  
  1519  func TestInitJoinNetworkAndUser(t *testing.T) {
  1520  	if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
  1521  		t.Skip("Test requires userns.")
  1522  	}
  1523  	if testing.Short() {
  1524  		return
  1525  	}
  1526  
  1527  	// Execute a long-running container
  1528  	config1 := newTemplateConfig(t, &tParam{userns: true})
  1529  	container1, err := newContainer(t, config1)
  1530  	ok(t, err)
  1531  	defer destroyContainer(container1)
  1532  
  1533  	stdinR1, stdinW1, err := os.Pipe()
  1534  	ok(t, err)
  1535  	init1 := &libcontainer.Process{
  1536  		Cwd:   "/",
  1537  		Args:  []string{"cat"},
  1538  		Env:   standardEnvironment,
  1539  		Stdin: stdinR1,
  1540  		Init:  true,
  1541  	}
  1542  	err = container1.Run(init1)
  1543  	_ = stdinR1.Close()
  1544  	defer stdinW1.Close() //nolint: errcheck
  1545  	ok(t, err)
  1546  
  1547  	// get the state of the first container
  1548  	state1, err := container1.State()
  1549  	ok(t, err)
  1550  	netns1 := state1.NamespacePaths[configs.NEWNET]
  1551  	userns1 := state1.NamespacePaths[configs.NEWUSER]
  1552  
  1553  	// Run a container inside the existing pidns but with different cgroups.
  1554  	config2 := newTemplateConfig(t, &tParam{userns: true})
  1555  	config2.Namespaces.Add(configs.NEWNET, netns1)
  1556  	config2.Namespaces.Add(configs.NEWUSER, userns1)
  1557  	// Emulate specconv.setupUserNamespace().
  1558  	uidMap, gidMap, err := userns.GetUserNamespaceMappings(userns1)
  1559  	ok(t, err)
  1560  	config2.UIDMappings = uidMap
  1561  	config2.GIDMappings = gidMap
  1562  	config2.Cgroups.Path = "integration/test2"
  1563  	container2, err := newContainer(t, config2)
  1564  	ok(t, err)
  1565  	defer destroyContainer(container2)
  1566  
  1567  	stdinR2, stdinW2, err := os.Pipe()
  1568  	ok(t, err)
  1569  	init2 := &libcontainer.Process{
  1570  		Cwd:   "/",
  1571  		Args:  []string{"cat"},
  1572  		Env:   standardEnvironment,
  1573  		Stdin: stdinR2,
  1574  		Init:  true,
  1575  	}
  1576  	err = container2.Run(init2)
  1577  	_ = stdinR2.Close()
  1578  	defer stdinW2.Close() //nolint: errcheck
  1579  	ok(t, err)
  1580  
  1581  	// get the state of the second container
  1582  	state2, err := container2.State()
  1583  	ok(t, err)
  1584  
  1585  	for _, ns := range []string{"net", "user"} {
  1586  		ns1, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/%s", state1.InitProcessPid, ns))
  1587  		ok(t, err)
  1588  		ns2, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/%s", state2.InitProcessPid, ns))
  1589  		ok(t, err)
  1590  		if ns1 != ns2 {
  1591  			t.Errorf("%s(%s), wanted %s", ns, ns2, ns1)
  1592  		}
  1593  	}
  1594  
  1595  	// check that namespaces are not the same
  1596  	if reflect.DeepEqual(state2.NamespacePaths, state1.NamespacePaths) {
  1597  		t.Errorf("Namespaces(%v), original %v", state2.NamespacePaths,
  1598  			state1.NamespacePaths)
  1599  	}
  1600  	// Stop init processes one by one. Stop the second container should
  1601  	// not stop the first.
  1602  	_ = stdinW2.Close()
  1603  	waitProcess(init2, t)
  1604  	_ = stdinW1.Close()
  1605  	waitProcess(init1, t)
  1606  }
  1607  
  1608  func TestTmpfsCopyUp(t *testing.T) {
  1609  	if testing.Short() {
  1610  		return
  1611  	}
  1612  
  1613  	config := newTemplateConfig(t, nil)
  1614  	config.Mounts = append(config.Mounts, &configs.Mount{
  1615  		Source:      "tmpfs",
  1616  		Destination: "/etc",
  1617  		Device:      "tmpfs",
  1618  		Extensions:  configs.EXT_COPYUP,
  1619  	})
  1620  
  1621  	container, err := newContainer(t, config)
  1622  	ok(t, err)
  1623  	defer destroyContainer(container)
  1624  
  1625  	var stdout bytes.Buffer
  1626  	pconfig := libcontainer.Process{
  1627  		Args:   []string{"ls", "/etc/passwd"},
  1628  		Env:    standardEnvironment,
  1629  		Stdin:  nil,
  1630  		Stdout: &stdout,
  1631  		Init:   true,
  1632  	}
  1633  	err = container.Run(&pconfig)
  1634  	ok(t, err)
  1635  
  1636  	// Wait for process
  1637  	waitProcess(&pconfig, t)
  1638  
  1639  	outputLs := stdout.String()
  1640  
  1641  	// Check that the ls output has /etc/passwd
  1642  	if !strings.Contains(outputLs, "/etc/passwd") {
  1643  		t.Fatalf("/etc/passwd not copied up as expected: %v", outputLs)
  1644  	}
  1645  }
  1646  
  1647  func TestCGROUPPrivate(t *testing.T) {
  1648  	if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
  1649  		t.Skip("Test requires cgroupns.")
  1650  	}
  1651  	if testing.Short() {
  1652  		return
  1653  	}
  1654  
  1655  	l, err := os.Readlink("/proc/1/ns/cgroup")
  1656  	ok(t, err)
  1657  
  1658  	config := newTemplateConfig(t, nil)
  1659  	config.Namespaces.Add(configs.NEWCGROUP, "")
  1660  	buffers := runContainerOk(t, config, "readlink", "/proc/self/ns/cgroup")
  1661  
  1662  	if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l {
  1663  		t.Fatalf("cgroup link should be private to the container but equals host %q %q", actual, l)
  1664  	}
  1665  }
  1666  
  1667  func TestCGROUPHost(t *testing.T) {
  1668  	if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
  1669  		t.Skip("Test requires cgroupns.")
  1670  	}
  1671  	if testing.Short() {
  1672  		return
  1673  	}
  1674  
  1675  	l, err := os.Readlink("/proc/1/ns/cgroup")
  1676  	ok(t, err)
  1677  
  1678  	config := newTemplateConfig(t, nil)
  1679  	buffers := runContainerOk(t, config, "readlink", "/proc/self/ns/cgroup")
  1680  
  1681  	if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l {
  1682  		t.Fatalf("cgroup link not equal to host link %q %q", actual, l)
  1683  	}
  1684  }
  1685  
  1686  func TestFdLeaks(t *testing.T) {
  1687  	testFdLeaks(t, false)
  1688  }
  1689  
  1690  func TestFdLeaksSystemd(t *testing.T) {
  1691  	if !systemd.IsRunningSystemd() {
  1692  		t.Skip("Test requires systemd.")
  1693  	}
  1694  	testFdLeaks(t, true)
  1695  }
  1696  
  1697  func fdList(t *testing.T) []string {
  1698  	procSelfFd, closer := utils.ProcThreadSelf("fd")
  1699  	defer closer()
  1700  
  1701  	fdDir, err := os.Open(procSelfFd)
  1702  	ok(t, err)
  1703  	defer fdDir.Close()
  1704  
  1705  	fds, err := fdDir.Readdirnames(-1)
  1706  	ok(t, err)
  1707  
  1708  	return fds
  1709  }
  1710  
  1711  func testFdLeaks(t *testing.T, systemd bool) {
  1712  	if testing.Short() {
  1713  		return
  1714  	}
  1715  
  1716  	config := newTemplateConfig(t, &tParam{systemd: systemd})
  1717  	// Run a container once to exclude file descriptors that are only
  1718  	// opened once during the process lifetime by the library and are
  1719  	// never closed. Those are not considered leaks.
  1720  	//
  1721  	// Examples of this open-once file descriptors are:
  1722  	//  - /sys/fs/cgroup dirfd opened by prepareOpenat2 in libct/cgroups;
  1723  	//  - dbus connection opened by getConnection in libct/cgroups/systemd.
  1724  	_ = runContainerOk(t, config, "true")
  1725  	fds0 := fdList(t)
  1726  
  1727  	_ = runContainerOk(t, config, "true")
  1728  	fds1 := fdList(t)
  1729  
  1730  	if reflect.DeepEqual(fds0, fds1) {
  1731  		return
  1732  	}
  1733  	// Show the extra opened files.
  1734  
  1735  	excludedPaths := []string{
  1736  		"anon_inode:bpf-prog", // FIXME: see https://github.com/opencontainers/runc/issues/2366#issuecomment-776411392
  1737  	}
  1738  
  1739  	count := 0
  1740  
  1741  	procSelfFd, closer := utils.ProcThreadSelf("fd/")
  1742  	defer closer()
  1743  
  1744  next_fd:
  1745  	for _, fd1 := range fds1 {
  1746  		for _, fd0 := range fds0 {
  1747  			if fd0 == fd1 {
  1748  				continue next_fd
  1749  			}
  1750  		}
  1751  		dst, _ := os.Readlink(filepath.Join(procSelfFd, fd1))
  1752  		for _, ex := range excludedPaths {
  1753  			if ex == dst {
  1754  				continue next_fd
  1755  			}
  1756  		}
  1757  
  1758  		count++
  1759  		t.Logf("extra fd %s -> %s", fd1, dst)
  1760  	}
  1761  	if count > 0 {
  1762  		t.Fatalf("found %d extra fds after container.Run", count)
  1763  	}
  1764  }
  1765  
  1766  // Test that a container using user namespaces is able to bind mount a folder
  1767  // that does not have permissions for group/others.
  1768  func TestBindMountAndUser(t *testing.T) {
  1769  	if _, err := os.Stat("/proc/self/ns/user"); errors.Is(err, os.ErrNotExist) {
  1770  		t.Skip("userns is unsupported")
  1771  	}
  1772  
  1773  	if testing.Short() {
  1774  		return
  1775  	}
  1776  
  1777  	temphost := t.TempDir()
  1778  	dirhost := filepath.Join(temphost, "inaccessible", "dir")
  1779  
  1780  	err := os.MkdirAll(dirhost, 0o755)
  1781  	ok(t, err)
  1782  
  1783  	err = os.WriteFile(filepath.Join(dirhost, "foo.txt"), []byte("Hello"), 0o755)
  1784  	ok(t, err)
  1785  
  1786  	// Make this dir inaccessible to "group,others".
  1787  	err = os.Chmod(filepath.Join(temphost, "inaccessible"), 0o700)
  1788  	ok(t, err)
  1789  
  1790  	config := newTemplateConfig(t, &tParam{
  1791  		userns: true,
  1792  	})
  1793  
  1794  	// Set HostID to 1000 to avoid DAC_OVERRIDE bypassing the purpose of this test.
  1795  	config.UIDMappings[0].HostID = 1000
  1796  	config.GIDMappings[0].HostID = 1000
  1797  
  1798  	// Set the owner of rootfs to the effective IDs in the host to avoid errors
  1799  	// while creating the folders to perform the mounts.
  1800  	err = os.Chown(config.Rootfs, 1000, 1000)
  1801  	ok(t, err)
  1802  
  1803  	config.Mounts = append(config.Mounts, &configs.Mount{
  1804  		Source:      dirhost,
  1805  		Destination: "/tmp/mnt1cont",
  1806  		Device:      "bind",
  1807  		Flags:       unix.MS_BIND | unix.MS_REC,
  1808  	})
  1809  
  1810  	container, err := newContainer(t, config)
  1811  	ok(t, err)
  1812  	defer container.Destroy() //nolint: errcheck
  1813  
  1814  	var stdout bytes.Buffer
  1815  
  1816  	pconfig := libcontainer.Process{
  1817  		Cwd:    "/",
  1818  		Args:   []string{"sh", "-c", "stat /tmp/mnt1cont/foo.txt"},
  1819  		Env:    standardEnvironment,
  1820  		Stdout: &stdout,
  1821  		Init:   true,
  1822  	}
  1823  	err = container.Run(&pconfig)
  1824  	ok(t, err)
  1825  
  1826  	waitProcess(&pconfig, t)
  1827  }