github.com/criyle/go-sandbox@v0.10.3/cmd/runprog/main_linux.go (about)

     1  // Command runprog executes program defined restricted environment including seccomp-ptraced, namespaced and containerized.
     2  package main
     3  
     4  import (
     5  	"context"
     6  	"errors"
     7  	"flag"
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"os/signal"
    12  	"sync/atomic"
    13  	"syscall"
    14  	"time"
    15  
    16  	"github.com/criyle/go-sandbox/cmd/runprog/config"
    17  	"github.com/criyle/go-sandbox/container"
    18  	"github.com/criyle/go-sandbox/pkg/cgroup"
    19  	"github.com/criyle/go-sandbox/pkg/forkexec"
    20  	"github.com/criyle/go-sandbox/pkg/memfd"
    21  	"github.com/criyle/go-sandbox/pkg/mount"
    22  	"github.com/criyle/go-sandbox/pkg/rlimit"
    23  	"github.com/criyle/go-sandbox/pkg/seccomp"
    24  	"github.com/criyle/go-sandbox/pkg/seccomp/libseccomp"
    25  	"github.com/criyle/go-sandbox/runner"
    26  	"github.com/criyle/go-sandbox/runner/ptrace"
    27  	"github.com/criyle/go-sandbox/runner/ptrace/filehandler"
    28  	"github.com/criyle/go-sandbox/runner/unshare"
    29  	"golang.org/x/sys/unix"
    30  )
    31  
    32  var (
    33  	addReadable, addWritable, addRawReadable, addRawWritable       arrayFlags
    34  	allowProc, unsafe, showDetails, useCGroup, memfile, cred, nucg bool
    35  	timeLimit, realTimeLimit, memoryLimit, outputLimit, stackLimit uint64
    36  	inputFileName, outputFileName, errorFileName, workPath, runt   string
    37  
    38  	pType, result string
    39  	args          []string
    40  )
    41  
    42  // container init
    43  func init() {
    44  	container.Init()
    45  }
    46  
    47  func main() {
    48  	flag.Usage = printUsage
    49  	flag.Uint64Var(&timeLimit, "tl", 1, "Set time limit (in second)")
    50  	flag.Uint64Var(&realTimeLimit, "rtl", 0, "Set real time limit (in second)")
    51  	flag.Uint64Var(&memoryLimit, "ml", 256, "Set memory limit (in mb)")
    52  	flag.Uint64Var(&outputLimit, "ol", 64, "Set output limit (in mb)")
    53  	flag.Uint64Var(&stackLimit, "sl", 1024, "Set stack limit (in mb)")
    54  	flag.StringVar(&inputFileName, "in", "", "Set input file name")
    55  	flag.StringVar(&outputFileName, "out", "", "Set output file name")
    56  	flag.StringVar(&errorFileName, "err", "", "Set error file name")
    57  	flag.StringVar(&workPath, "work-path", "", "Set the work path of the program")
    58  	flag.StringVar(&pType, "type", "default", "Set the program type (for some program such as python)")
    59  	flag.StringVar(&result, "res", "stdout", "Set the file name for output the result")
    60  	flag.Var(&addReadable, "add-readable", "Add a readable file")
    61  	flag.Var(&addWritable, "add-writable", "Add a writable file")
    62  	flag.BoolVar(&unsafe, "unsafe", false, "Don't check dangerous syscalls")
    63  	flag.BoolVar(&showDetails, "show-trace-details", false, "Show trace details")
    64  	flag.BoolVar(&allowProc, "allow-proc", false, "Allow fork, exec... etc.")
    65  	flag.Var(&addRawReadable, "add-readable-raw", "Add a readable file (don't transform to its real path)")
    66  	flag.Var(&addRawWritable, "add-writable-raw", "Add a writable file (don't transform to its real path)")
    67  	flag.BoolVar(&useCGroup, "cgroup", false, "Use cgroup to colloct resource usage")
    68  	flag.BoolVar(&memfile, "memfd", false, "Use memfd as exec file")
    69  	flag.StringVar(&runt, "runner", "ptrace", "Runner for the program (ptrace, ns, container)")
    70  	flag.BoolVar(&cred, "cred", false, "Generate credential for containers (uid=10000)")
    71  	flag.BoolVar(&nucg, "nucg", false, "don't unshare cgroup")
    72  	flag.Parse()
    73  
    74  	args = flag.Args()
    75  	if len(args) == 0 {
    76  		printUsage()
    77  	}
    78  
    79  	if realTimeLimit < timeLimit {
    80  		realTimeLimit = timeLimit + 2
    81  	}
    82  	if stackLimit > memoryLimit {
    83  		stackLimit = memoryLimit
    84  	}
    85  	if workPath == "" {
    86  		workPath, _ = os.Getwd()
    87  	}
    88  
    89  	var (
    90  		f   *os.File
    91  		err error
    92  	)
    93  	if result == "stdout" {
    94  		f = os.Stdout
    95  	} else if result == "stderr" {
    96  		f = os.Stderr
    97  	} else {
    98  		f, err = os.Create(result)
    99  		if err != nil {
   100  			debug("Failed to open result file:", err)
   101  			return
   102  		}
   103  		defer f.Close()
   104  	}
   105  
   106  	rt, err := start()
   107  	if rt == nil {
   108  		rt = &runner.Result{
   109  			Status: runner.StatusRunnerError,
   110  		}
   111  	}
   112  	if err == nil && rt.Status != runner.StatusNormal {
   113  		err = rt.Status
   114  	}
   115  	debug("setupTime: ", rt.SetUpTime)
   116  	debug("runningTime: ", rt.RunningTime)
   117  	if err != nil {
   118  		debug(err)
   119  		c, ok := err.(runner.Status)
   120  		if !ok {
   121  			c = runner.StatusRunnerError
   122  		}
   123  		// Handle fatal error from trace
   124  		fmt.Fprintf(f, "%d %d %d %d\n", getStatus(c),
   125  			int(rt.Time.Round(time.Millisecond)/time.Millisecond), uint64(rt.Memory)>>10, rt.ExitStatus)
   126  		if c == runner.StatusRunnerError {
   127  			os.Exit(1)
   128  		}
   129  	} else {
   130  		fmt.Fprintf(f, "%d %d %d %d\n", 0,
   131  			int(rt.Time.Round(time.Millisecond)/time.Millisecond), uint64(rt.Memory)>>10, rt.ExitStatus)
   132  	}
   133  }
   134  
   135  type containerRunner struct {
   136  	container.Environment
   137  	container.ExecveParam
   138  }
   139  
   140  func (r *containerRunner) Run(c context.Context) runner.Result {
   141  	return r.Environment.Execve(c, r.ExecveParam)
   142  }
   143  
   144  func start() (*runner.Result, error) {
   145  	var (
   146  		r        runner.Runner
   147  		cg       cgroup.Cgroup
   148  		err      error
   149  		execFile uintptr
   150  		rt       runner.Result
   151  	)
   152  
   153  	addRead := filehandler.GetExtraSet(addReadable, addRawReadable)
   154  	addWrite := filehandler.GetExtraSet(addWritable, addRawWritable)
   155  	args, allow, trace, h := config.GetConf(pType, workPath, args, addRead, addWrite, allowProc)
   156  
   157  	mb := mount.NewBuilder().
   158  		// basic exec and lib
   159  		WithBind("/bin", "bin", true).
   160  		WithBind("/lib", "lib", true).
   161  		WithBind("/lib64", "lib64", true).
   162  		WithBind("/usr", "usr", true).
   163  		// java wants /proc/self/exe as it need relative path for lib
   164  		// however, /proc gives interface like /proc/1/fd/3 ..
   165  		// it is fine since open that file will be a EPERM
   166  		// changing the fs uid and gid would be a good idea
   167  		WithProc().
   168  		// some compiler have multiple version
   169  		WithBind("/etc/alternatives", "etc/alternatives", true).
   170  		// fpc wants /etc/fpc.cfg
   171  		WithBind("/etc/fpc.cfg", "etc/fpc.cfg", true).
   172  		// go wants /dev/null
   173  		WithBind("/dev/null", "dev/null", false).
   174  		// ghc wants /var/lib/ghc
   175  		WithBind("/var/lib/ghc", "var/lib/ghc", true).
   176  		// work dir
   177  		WithTmpfs("w", "size=8m,nr_inodes=4k").
   178  		// tmp dir
   179  		WithTmpfs("tmp", "size=8m,nr_inodes=4k").
   180  		FilterNotExist()
   181  
   182  	mt, err := mb.FilterNotExist().Build()
   183  	if err != nil {
   184  		return nil, err
   185  	}
   186  
   187  	if useCGroup {
   188  		t := cgroup.DetectType()
   189  		if t == cgroup.TypeV2 {
   190  			cgroup.EnableV2Nesting()
   191  		}
   192  		ct, err := cgroup.GetAvailableController()
   193  		if err != nil {
   194  			return nil, err
   195  		}
   196  		b, err := cgroup.New("runprog", ct)
   197  		if err != nil {
   198  			return nil, err
   199  		}
   200  		debug(b)
   201  		cg, err = b.Random("runprog")
   202  		if err != nil {
   203  			return nil, err
   204  		}
   205  		defer cg.Destroy()
   206  		if err = cg.SetMemoryLimit(memoryLimit << 20); err != nil {
   207  			return nil, err
   208  		}
   209  	}
   210  
   211  	syncFunc := func(pid int) error {
   212  		if cg != nil {
   213  			if err := cg.AddProc(pid); err != nil {
   214  				return err
   215  			}
   216  		}
   217  		return nil
   218  	}
   219  
   220  	if memfile {
   221  		fin, err := os.Open(args[0])
   222  		if err != nil {
   223  			return nil, fmt.Errorf("failed to open args[0]: %v", err)
   224  		}
   225  		execf, err := memfd.DupToMemfd("run_program", fin)
   226  		if err != nil {
   227  			return nil, fmt.Errorf("dup to memfd failed: %v", err)
   228  		}
   229  		fin.Close()
   230  		defer execf.Close()
   231  		execFile = execf.Fd()
   232  		debug("memfd: ", execFile)
   233  	}
   234  
   235  	// open input / output / err files
   236  	files, err := prepareFiles(inputFileName, outputFileName, errorFileName)
   237  	if err != nil {
   238  		return nil, fmt.Errorf("failed to prepare files: %v", err)
   239  	}
   240  	defer closeFiles(files)
   241  
   242  	// if not defined, then use the original value
   243  	fds := make([]uintptr, len(files))
   244  	for i, f := range files {
   245  		if f != nil {
   246  			fds[i] = f.Fd()
   247  		} else {
   248  			fds[i] = uintptr(i)
   249  		}
   250  	}
   251  
   252  	rlims := rlimit.RLimits{
   253  		CPU:         timeLimit,
   254  		CPUHard:     realTimeLimit,
   255  		FileSize:    outputLimit << 20,
   256  		Stack:       stackLimit << 20,
   257  		Data:        memoryLimit << 20,
   258  		OpenFile:    256,
   259  		DisableCore: true,
   260  	}
   261  	debug("rlimit: ", rlims)
   262  
   263  	actionDefault := libseccomp.ActionKill
   264  	if showDetails {
   265  		actionDefault = libseccomp.ActionTrace
   266  	}
   267  	if runt != "ptrace" {
   268  		allow = append(allow, trace...)
   269  		trace = nil
   270  	}
   271  	builder := libseccomp.Builder{
   272  		Allow:   allow,
   273  		Trace:   trace,
   274  		Default: actionDefault,
   275  	}
   276  	// do not build filter for container unsafe since seccomp is not compatible with aarch64 syscalls
   277  	var filter seccomp.Filter
   278  	if !unsafe || runt != "container" {
   279  		filter, err = builder.Build()
   280  		if err != nil {
   281  			return nil, fmt.Errorf("failed to create seccomp filter %v", err)
   282  		}
   283  	}
   284  
   285  	limit := runner.Limit{
   286  		TimeLimit:   time.Duration(timeLimit) * time.Second,
   287  		MemoryLimit: runner.Size(memoryLimit << 20),
   288  	}
   289  
   290  	if runt == "container" {
   291  		var credG container.CredGenerator
   292  		if cred {
   293  			credG = newCredGen()
   294  		}
   295  		var stderr io.Writer
   296  		if showDetails {
   297  			stderr = os.Stderr
   298  		}
   299  
   300  		cloneFlag := forkexec.UnshareFlags
   301  		if nucg {
   302  			cloneFlag &= ^unix.CLONE_NEWCGROUP
   303  		}
   304  
   305  		b := container.Builder{
   306  			TmpRoot:       "dm",
   307  			Mounts:        mb.Mounts,
   308  			Stderr:        stderr,
   309  			CredGenerator: credG,
   310  			CloneFlags:    uintptr(cloneFlag),
   311  		}
   312  
   313  		m, err := b.Build()
   314  		if err != nil {
   315  			return nil, fmt.Errorf("failed to new container: %v", err)
   316  		}
   317  		defer m.Destroy()
   318  		err = m.Ping()
   319  		if err != nil {
   320  			return nil, fmt.Errorf("failed to ping container: %v", err)
   321  		}
   322  		if unsafe {
   323  			filter = nil
   324  		}
   325  		r = &containerRunner{
   326  			Environment: m,
   327  			ExecveParam: container.ExecveParam{
   328  				Args:     args,
   329  				Env:      []string{pathEnv},
   330  				Files:    fds,
   331  				ExecFile: execFile,
   332  				RLimits:  rlims.PrepareRLimit(),
   333  				Seccomp:  filter,
   334  				SyncFunc: syncFunc,
   335  			},
   336  		}
   337  	} else if runt == "ns" {
   338  		root, err := os.MkdirTemp("", "ns")
   339  		if err != nil {
   340  			return nil, fmt.Errorf("cannot make temp root for new namespace")
   341  		}
   342  		defer os.RemoveAll(root)
   343  		r = &unshare.Runner{
   344  			Args:        args,
   345  			Env:         []string{pathEnv},
   346  			ExecFile:    execFile,
   347  			WorkDir:     "/w",
   348  			Files:       fds,
   349  			RLimits:     rlims.PrepareRLimit(),
   350  			Limit:       limit,
   351  			Seccomp:     filter,
   352  			Root:        root,
   353  			Mounts:      mt,
   354  			ShowDetails: showDetails,
   355  			SyncFunc:    syncFunc,
   356  			HostName:    "run_program",
   357  			DomainName:  "run_program",
   358  		}
   359  	} else if runt == "ptrace" {
   360  		r = &ptrace.Runner{
   361  			Args:        args,
   362  			Env:         []string{pathEnv},
   363  			ExecFile:    execFile,
   364  			WorkDir:     workPath,
   365  			RLimits:     rlims.PrepareRLimit(),
   366  			Limit:       limit,
   367  			Files:       fds,
   368  			Seccomp:     filter,
   369  			ShowDetails: showDetails,
   370  			Unsafe:      unsafe,
   371  			Handler:     h,
   372  			SyncFunc:    syncFunc,
   373  		}
   374  	} else {
   375  		return nil, fmt.Errorf("invalid runner type: %s", runt)
   376  	}
   377  
   378  	// gracefully shutdown
   379  	sig := make(chan os.Signal, 1)
   380  	signal.Notify(sig, os.Interrupt)
   381  
   382  	// Run tracer
   383  	sTime := time.Now()
   384  	c, cancel := context.WithTimeout(context.Background(), time.Duration(int64(realTimeLimit)*int64(time.Second)))
   385  	defer cancel()
   386  
   387  	s := make(chan runner.Result, 1)
   388  	go func() {
   389  		s <- r.Run(c)
   390  	}()
   391  	rTime := time.Now()
   392  
   393  	select {
   394  	case <-sig:
   395  		cancel()
   396  		rt = <-s
   397  		rt.Status = runner.StatusRunnerError
   398  
   399  	case rt = <-s:
   400  	}
   401  	eTime := time.Now()
   402  
   403  	if rt.SetUpTime == 0 {
   404  		rt.SetUpTime = rTime.Sub(sTime)
   405  		rt.RunningTime = eTime.Sub(rTime)
   406  	}
   407  
   408  	debug("results:", rt, err)
   409  
   410  	if useCGroup {
   411  		cpu, err := cg.CPUUsage()
   412  		if err != nil {
   413  			return nil, fmt.Errorf("cgroup cpu: %v", err)
   414  		}
   415  		// max memory usage may not exist in cgroup v2
   416  		memory, err := cg.MemoryMaxUsage()
   417  		if err != nil && !errors.Is(err, os.ErrNotExist) {
   418  			return nil, fmt.Errorf("cgroup memory: %v", err)
   419  		}
   420  		debug("cgroup: cpu: ", cpu, " memory: ", memory)
   421  		rt.Time = time.Duration(cpu)
   422  		if memory > 0 {
   423  			rt.Memory = runner.Size(memory)
   424  		}
   425  		debug("cgroup:", rt)
   426  	}
   427  	return &rt, nil
   428  }
   429  
   430  type credGen struct {
   431  	cur uint32
   432  }
   433  
   434  func newCredGen() *credGen {
   435  	return &credGen{cur: 10000}
   436  }
   437  
   438  func (c *credGen) Get() syscall.Credential {
   439  	n := atomic.AddUint32(&c.cur, 1)
   440  	return syscall.Credential{
   441  		Uid: n,
   442  		Gid: n,
   443  	}
   444  }