github.com/criyle/go-sandbox@v0.10.3/pkg/forkexec/runner_linux.go (about)

     1  package forkexec
     2  
     3  import (
     4  	"syscall"
     5  
     6  	"github.com/criyle/go-sandbox/pkg/mount"
     7  	"github.com/criyle/go-sandbox/pkg/rlimit"
     8  )
     9  
    10  // Runner is the configuration including the exec path, argv
    11  // and resource limits. It can creates tracee for ptrace-based tracer.
    12  // It can also create unshared process in another namespace
    13  type Runner struct {
    14  	// argv and env for execve syscall for the child process
    15  	Args []string
    16  	Env  []string
    17  
    18  	// if exec_fd is defined, then at the end, fd_execve is called
    19  	ExecFile uintptr
    20  
    21  	// POSIX Resource limit set by set rlimit
    22  	RLimits []rlimit.RLimit
    23  
    24  	// file descriptors map for new process, from 0 to len - 1
    25  	Files []uintptr
    26  
    27  	// work path set by chdir(dir) (current working directory for child)
    28  	// if pivot_root is defined, this will execute after changed to new root
    29  	WorkDir string
    30  
    31  	// seccomp syscall filter applied to child
    32  	Seccomp *syscall.SockFprog
    33  
    34  	// clone unshare flag to create linux namespace, effective when clone child
    35  	// since unshare syscall does not join the new pid group
    36  	CloneFlags uintptr
    37  
    38  	// mounts defines the mount syscalls after unshare mount namespace
    39  	// need CAP_SYS_ADMIN inside the namespace (e.g. unshare user namespace)
    40  	// if pivot root is provided, relative target is better for chdir-mount meta
    41  	// and pivot root will mount as tmpfs before any mount
    42  	Mounts []mount.SyscallParams
    43  
    44  	// pivot_root defines a readonly new root after unshare mount namespace
    45  	// it should be a directory in absolute path and should used with mounts
    46  	// Call path:
    47  	// mount("tmpfs", root, "tmpfs", 0, nil)
    48  	// chdir(root)
    49  	// [do mounts]
    50  	// mkdir("old_root")
    51  	// pivot_root(root, "old_root")
    52  	// umount("old_root", MNT_DETACH)
    53  	// rmdir("old_root")
    54  	// mount("tmpfs", "/", "tmpfs", MS_BIND | MS_REMOUNT | MS_RDONLY | MS_NOATIME | MS_NOSUID, nil)
    55  	PivotRoot string
    56  
    57  	// HostName and DomainName to be set after unshare UTS & user (CAP_SYS_ADMIN)
    58  	HostName, DomainName string
    59  
    60  	// UidMappings / GidMappings for unshared user namespaces, no-op if mapping is null
    61  	UIDMappings []syscall.SysProcIDMap
    62  	GIDMappings []syscall.SysProcIDMap
    63  
    64  	// Credential holds user and group identities to be assumed
    65  	// by a child process started by StartProcess.
    66  	Credential *syscall.Credential
    67  
    68  	// Parent and child process with sync status through a socket pair.
    69  	// SyncFunc will invoke with the child pid. If SyncFunc return some error,
    70  	// parent will signal child to stop and report the error
    71  	// SyncFunc is called right before execve, thus it could track cpu more accurately
    72  	SyncFunc func(int) error
    73  
    74  	// ptrace controls child process to call ptrace(PTRACE_TRACEME)
    75  	// runtime.LockOSThread is required for tracer to call ptrace syscalls
    76  	Ptrace bool
    77  
    78  	// no_new_privs calls prctl(PR_SET_NO_NEW_PRIVS) to 0 to disable calls to
    79  	// setuid processes. It is automatically enabled when seccomp filter is provided
    80  	NoNewPrivs bool
    81  
    82  	// stop before seccomp calls kill(getpid(), SIGSTOP) to wait for tracer to continue
    83  	// right before the calls to seccomp. It is automatically enabled when seccomp
    84  	// filter and ptrace are provided since kill might not be available after
    85  	// seccomp and execve might be traced by ptrace
    86  	// cannot stop after seccomp since kill might not be allowed by seccomp filter
    87  	StopBeforeSeccomp bool
    88  
    89  	// GidMappingsEnableSetgroups allows / disallows setgroups syscall.
    90  	// deny if GIDMappings is nil
    91  	GIDMappingsEnableSetgroups bool
    92  
    93  	// drop_caps calls cap_set(self, 0) to drop all capabilities
    94  	// from effective, permitted, inheritable capability sets before execve
    95  	// it should avoid calls to set ambient capabilities
    96  	DropCaps bool
    97  
    98  	// UnshareCgroupAfterSync specifies whether to unshare cgroup namespace after
    99  	// sync (the syncFunc might be add the child to the cgroup)
   100  	UnshareCgroupAfterSync bool
   101  
   102  	// CTTY specifies if set the fd 0 as controlling TTY
   103  	CTTY bool
   104  }