github.com/criyle/go-sandbox@v0.10.3/cmd/runprog/main_linux.go (about) 1 // Command runprog executes program defined restricted environment including seccomp-ptraced, namespaced and containerized. 2 package main 3 4 import ( 5 "context" 6 "errors" 7 "flag" 8 "fmt" 9 "io" 10 "os" 11 "os/signal" 12 "sync/atomic" 13 "syscall" 14 "time" 15 16 "github.com/criyle/go-sandbox/cmd/runprog/config" 17 "github.com/criyle/go-sandbox/container" 18 "github.com/criyle/go-sandbox/pkg/cgroup" 19 "github.com/criyle/go-sandbox/pkg/forkexec" 20 "github.com/criyle/go-sandbox/pkg/memfd" 21 "github.com/criyle/go-sandbox/pkg/mount" 22 "github.com/criyle/go-sandbox/pkg/rlimit" 23 "github.com/criyle/go-sandbox/pkg/seccomp" 24 "github.com/criyle/go-sandbox/pkg/seccomp/libseccomp" 25 "github.com/criyle/go-sandbox/runner" 26 "github.com/criyle/go-sandbox/runner/ptrace" 27 "github.com/criyle/go-sandbox/runner/ptrace/filehandler" 28 "github.com/criyle/go-sandbox/runner/unshare" 29 "golang.org/x/sys/unix" 30 ) 31 32 var ( 33 addReadable, addWritable, addRawReadable, addRawWritable arrayFlags 34 allowProc, unsafe, showDetails, useCGroup, memfile, cred, nucg bool 35 timeLimit, realTimeLimit, memoryLimit, outputLimit, stackLimit uint64 36 inputFileName, outputFileName, errorFileName, workPath, runt string 37 38 pType, result string 39 args []string 40 ) 41 42 // container init 43 func init() { 44 container.Init() 45 } 46 47 func main() { 48 flag.Usage = printUsage 49 flag.Uint64Var(&timeLimit, "tl", 1, "Set time limit (in second)") 50 flag.Uint64Var(&realTimeLimit, "rtl", 0, "Set real time limit (in second)") 51 flag.Uint64Var(&memoryLimit, "ml", 256, "Set memory limit (in mb)") 52 flag.Uint64Var(&outputLimit, "ol", 64, "Set output limit (in mb)") 53 flag.Uint64Var(&stackLimit, "sl", 1024, "Set stack limit (in mb)") 54 flag.StringVar(&inputFileName, "in", "", "Set input file name") 55 flag.StringVar(&outputFileName, "out", "", "Set output file name") 56 flag.StringVar(&errorFileName, "err", "", "Set error file name") 57 flag.StringVar(&workPath, "work-path", "", "Set the work path of the program") 58 flag.StringVar(&pType, "type", "default", "Set the program type (for some program such as python)") 59 flag.StringVar(&result, "res", "stdout", "Set the file name for output the result") 60 flag.Var(&addReadable, "add-readable", "Add a readable file") 61 flag.Var(&addWritable, "add-writable", "Add a writable file") 62 flag.BoolVar(&unsafe, "unsafe", false, "Don't check dangerous syscalls") 63 flag.BoolVar(&showDetails, "show-trace-details", false, "Show trace details") 64 flag.BoolVar(&allowProc, "allow-proc", false, "Allow fork, exec... etc.") 65 flag.Var(&addRawReadable, "add-readable-raw", "Add a readable file (don't transform to its real path)") 66 flag.Var(&addRawWritable, "add-writable-raw", "Add a writable file (don't transform to its real path)") 67 flag.BoolVar(&useCGroup, "cgroup", false, "Use cgroup to colloct resource usage") 68 flag.BoolVar(&memfile, "memfd", false, "Use memfd as exec file") 69 flag.StringVar(&runt, "runner", "ptrace", "Runner for the program (ptrace, ns, container)") 70 flag.BoolVar(&cred, "cred", false, "Generate credential for containers (uid=10000)") 71 flag.BoolVar(&nucg, "nucg", false, "don't unshare cgroup") 72 flag.Parse() 73 74 args = flag.Args() 75 if len(args) == 0 { 76 printUsage() 77 } 78 79 if realTimeLimit < timeLimit { 80 realTimeLimit = timeLimit + 2 81 } 82 if stackLimit > memoryLimit { 83 stackLimit = memoryLimit 84 } 85 if workPath == "" { 86 workPath, _ = os.Getwd() 87 } 88 89 var ( 90 f *os.File 91 err error 92 ) 93 if result == "stdout" { 94 f = os.Stdout 95 } else if result == "stderr" { 96 f = os.Stderr 97 } else { 98 f, err = os.Create(result) 99 if err != nil { 100 debug("Failed to open result file:", err) 101 return 102 } 103 defer f.Close() 104 } 105 106 rt, err := start() 107 if rt == nil { 108 rt = &runner.Result{ 109 Status: runner.StatusRunnerError, 110 } 111 } 112 if err == nil && rt.Status != runner.StatusNormal { 113 err = rt.Status 114 } 115 debug("setupTime: ", rt.SetUpTime) 116 debug("runningTime: ", rt.RunningTime) 117 if err != nil { 118 debug(err) 119 c, ok := err.(runner.Status) 120 if !ok { 121 c = runner.StatusRunnerError 122 } 123 // Handle fatal error from trace 124 fmt.Fprintf(f, "%d %d %d %d\n", getStatus(c), 125 int(rt.Time.Round(time.Millisecond)/time.Millisecond), uint64(rt.Memory)>>10, rt.ExitStatus) 126 if c == runner.StatusRunnerError { 127 os.Exit(1) 128 } 129 } else { 130 fmt.Fprintf(f, "%d %d %d %d\n", 0, 131 int(rt.Time.Round(time.Millisecond)/time.Millisecond), uint64(rt.Memory)>>10, rt.ExitStatus) 132 } 133 } 134 135 type containerRunner struct { 136 container.Environment 137 container.ExecveParam 138 } 139 140 func (r *containerRunner) Run(c context.Context) runner.Result { 141 return r.Environment.Execve(c, r.ExecveParam) 142 } 143 144 func start() (*runner.Result, error) { 145 var ( 146 r runner.Runner 147 cg cgroup.Cgroup 148 err error 149 execFile uintptr 150 rt runner.Result 151 ) 152 153 addRead := filehandler.GetExtraSet(addReadable, addRawReadable) 154 addWrite := filehandler.GetExtraSet(addWritable, addRawWritable) 155 args, allow, trace, h := config.GetConf(pType, workPath, args, addRead, addWrite, allowProc) 156 157 mb := mount.NewBuilder(). 158 // basic exec and lib 159 WithBind("/bin", "bin", true). 160 WithBind("/lib", "lib", true). 161 WithBind("/lib64", "lib64", true). 162 WithBind("/usr", "usr", true). 163 // java wants /proc/self/exe as it need relative path for lib 164 // however, /proc gives interface like /proc/1/fd/3 .. 165 // it is fine since open that file will be a EPERM 166 // changing the fs uid and gid would be a good idea 167 WithProc(). 168 // some compiler have multiple version 169 WithBind("/etc/alternatives", "etc/alternatives", true). 170 // fpc wants /etc/fpc.cfg 171 WithBind("/etc/fpc.cfg", "etc/fpc.cfg", true). 172 // go wants /dev/null 173 WithBind("/dev/null", "dev/null", false). 174 // ghc wants /var/lib/ghc 175 WithBind("/var/lib/ghc", "var/lib/ghc", true). 176 // work dir 177 WithTmpfs("w", "size=8m,nr_inodes=4k"). 178 // tmp dir 179 WithTmpfs("tmp", "size=8m,nr_inodes=4k"). 180 FilterNotExist() 181 182 mt, err := mb.FilterNotExist().Build() 183 if err != nil { 184 return nil, err 185 } 186 187 if useCGroup { 188 t := cgroup.DetectType() 189 if t == cgroup.TypeV2 { 190 cgroup.EnableV2Nesting() 191 } 192 ct, err := cgroup.GetAvailableController() 193 if err != nil { 194 return nil, err 195 } 196 b, err := cgroup.New("runprog", ct) 197 if err != nil { 198 return nil, err 199 } 200 debug(b) 201 cg, err = b.Random("runprog") 202 if err != nil { 203 return nil, err 204 } 205 defer cg.Destroy() 206 if err = cg.SetMemoryLimit(memoryLimit << 20); err != nil { 207 return nil, err 208 } 209 } 210 211 syncFunc := func(pid int) error { 212 if cg != nil { 213 if err := cg.AddProc(pid); err != nil { 214 return err 215 } 216 } 217 return nil 218 } 219 220 if memfile { 221 fin, err := os.Open(args[0]) 222 if err != nil { 223 return nil, fmt.Errorf("failed to open args[0]: %v", err) 224 } 225 execf, err := memfd.DupToMemfd("run_program", fin) 226 if err != nil { 227 return nil, fmt.Errorf("dup to memfd failed: %v", err) 228 } 229 fin.Close() 230 defer execf.Close() 231 execFile = execf.Fd() 232 debug("memfd: ", execFile) 233 } 234 235 // open input / output / err files 236 files, err := prepareFiles(inputFileName, outputFileName, errorFileName) 237 if err != nil { 238 return nil, fmt.Errorf("failed to prepare files: %v", err) 239 } 240 defer closeFiles(files) 241 242 // if not defined, then use the original value 243 fds := make([]uintptr, len(files)) 244 for i, f := range files { 245 if f != nil { 246 fds[i] = f.Fd() 247 } else { 248 fds[i] = uintptr(i) 249 } 250 } 251 252 rlims := rlimit.RLimits{ 253 CPU: timeLimit, 254 CPUHard: realTimeLimit, 255 FileSize: outputLimit << 20, 256 Stack: stackLimit << 20, 257 Data: memoryLimit << 20, 258 OpenFile: 256, 259 DisableCore: true, 260 } 261 debug("rlimit: ", rlims) 262 263 actionDefault := libseccomp.ActionKill 264 if showDetails { 265 actionDefault = libseccomp.ActionTrace 266 } 267 if runt != "ptrace" { 268 allow = append(allow, trace...) 269 trace = nil 270 } 271 builder := libseccomp.Builder{ 272 Allow: allow, 273 Trace: trace, 274 Default: actionDefault, 275 } 276 // do not build filter for container unsafe since seccomp is not compatible with aarch64 syscalls 277 var filter seccomp.Filter 278 if !unsafe || runt != "container" { 279 filter, err = builder.Build() 280 if err != nil { 281 return nil, fmt.Errorf("failed to create seccomp filter %v", err) 282 } 283 } 284 285 limit := runner.Limit{ 286 TimeLimit: time.Duration(timeLimit) * time.Second, 287 MemoryLimit: runner.Size(memoryLimit << 20), 288 } 289 290 if runt == "container" { 291 var credG container.CredGenerator 292 if cred { 293 credG = newCredGen() 294 } 295 var stderr io.Writer 296 if showDetails { 297 stderr = os.Stderr 298 } 299 300 cloneFlag := forkexec.UnshareFlags 301 if nucg { 302 cloneFlag &= ^unix.CLONE_NEWCGROUP 303 } 304 305 b := container.Builder{ 306 TmpRoot: "dm", 307 Mounts: mb.Mounts, 308 Stderr: stderr, 309 CredGenerator: credG, 310 CloneFlags: uintptr(cloneFlag), 311 } 312 313 m, err := b.Build() 314 if err != nil { 315 return nil, fmt.Errorf("failed to new container: %v", err) 316 } 317 defer m.Destroy() 318 err = m.Ping() 319 if err != nil { 320 return nil, fmt.Errorf("failed to ping container: %v", err) 321 } 322 if unsafe { 323 filter = nil 324 } 325 r = &containerRunner{ 326 Environment: m, 327 ExecveParam: container.ExecveParam{ 328 Args: args, 329 Env: []string{pathEnv}, 330 Files: fds, 331 ExecFile: execFile, 332 RLimits: rlims.PrepareRLimit(), 333 Seccomp: filter, 334 SyncFunc: syncFunc, 335 }, 336 } 337 } else if runt == "ns" { 338 root, err := os.MkdirTemp("", "ns") 339 if err != nil { 340 return nil, fmt.Errorf("cannot make temp root for new namespace") 341 } 342 defer os.RemoveAll(root) 343 r = &unshare.Runner{ 344 Args: args, 345 Env: []string{pathEnv}, 346 ExecFile: execFile, 347 WorkDir: "/w", 348 Files: fds, 349 RLimits: rlims.PrepareRLimit(), 350 Limit: limit, 351 Seccomp: filter, 352 Root: root, 353 Mounts: mt, 354 ShowDetails: showDetails, 355 SyncFunc: syncFunc, 356 HostName: "run_program", 357 DomainName: "run_program", 358 } 359 } else if runt == "ptrace" { 360 r = &ptrace.Runner{ 361 Args: args, 362 Env: []string{pathEnv}, 363 ExecFile: execFile, 364 WorkDir: workPath, 365 RLimits: rlims.PrepareRLimit(), 366 Limit: limit, 367 Files: fds, 368 Seccomp: filter, 369 ShowDetails: showDetails, 370 Unsafe: unsafe, 371 Handler: h, 372 SyncFunc: syncFunc, 373 } 374 } else { 375 return nil, fmt.Errorf("invalid runner type: %s", runt) 376 } 377 378 // gracefully shutdown 379 sig := make(chan os.Signal, 1) 380 signal.Notify(sig, os.Interrupt) 381 382 // Run tracer 383 sTime := time.Now() 384 c, cancel := context.WithTimeout(context.Background(), time.Duration(int64(realTimeLimit)*int64(time.Second))) 385 defer cancel() 386 387 s := make(chan runner.Result, 1) 388 go func() { 389 s <- r.Run(c) 390 }() 391 rTime := time.Now() 392 393 select { 394 case <-sig: 395 cancel() 396 rt = <-s 397 rt.Status = runner.StatusRunnerError 398 399 case rt = <-s: 400 } 401 eTime := time.Now() 402 403 if rt.SetUpTime == 0 { 404 rt.SetUpTime = rTime.Sub(sTime) 405 rt.RunningTime = eTime.Sub(rTime) 406 } 407 408 debug("results:", rt, err) 409 410 if useCGroup { 411 cpu, err := cg.CPUUsage() 412 if err != nil { 413 return nil, fmt.Errorf("cgroup cpu: %v", err) 414 } 415 // max memory usage may not exist in cgroup v2 416 memory, err := cg.MemoryMaxUsage() 417 if err != nil && !errors.Is(err, os.ErrNotExist) { 418 return nil, fmt.Errorf("cgroup memory: %v", err) 419 } 420 debug("cgroup: cpu: ", cpu, " memory: ", memory) 421 rt.Time = time.Duration(cpu) 422 if memory > 0 { 423 rt.Memory = runner.Size(memory) 424 } 425 debug("cgroup:", rt) 426 } 427 return &rt, nil 428 } 429 430 type credGen struct { 431 cur uint32 432 } 433 434 func newCredGen() *credGen { 435 return &credGen{cur: 10000} 436 } 437 438 func (c *credGen) Get() syscall.Credential { 439 n := atomic.AddUint32(&c.cur, 1) 440 return syscall.Credential{ 441 Uid: n, 442 Gid: n, 443 } 444 }