github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/strace/strace.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package strace implements the logic to print out the input and the return value
    16  // of each traced syscall.
    17  package strace
    18  
    19  import (
    20  	"fmt"
    21  	"strconv"
    22  	"strings"
    23  	"time"
    24  
    25  	"github.com/SagerNet/gvisor/pkg/abi"
    26  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    27  	"github.com/SagerNet/gvisor/pkg/bits"
    28  	"github.com/SagerNet/gvisor/pkg/eventchannel"
    29  	"github.com/SagerNet/gvisor/pkg/marshal/primitive"
    30  	"github.com/SagerNet/gvisor/pkg/seccomp"
    31  	"github.com/SagerNet/gvisor/pkg/sentry/arch"
    32  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    33  	pb "github.com/SagerNet/gvisor/pkg/sentry/strace/strace_go_proto"
    34  	slinux "github.com/SagerNet/gvisor/pkg/sentry/syscalls/linux"
    35  
    36  	"github.com/SagerNet/gvisor/pkg/hostarch"
    37  )
    38  
    39  // DefaultLogMaximumSize is the default LogMaximumSize.
    40  const DefaultLogMaximumSize = 1024
    41  
    42  // LogMaximumSize determines the maximum display size for data blobs (read,
    43  // write, etc.).
    44  var LogMaximumSize uint = DefaultLogMaximumSize
    45  
    46  // EventMaximumSize determines the maximum size for data blobs (read, write,
    47  // etc.) sent over the event channel. Default is 0 because most clients cannot
    48  // do anything useful with binary text dump of byte array arguments.
    49  var EventMaximumSize uint
    50  
    51  // ItimerTypes are the possible itimer types.
    52  var ItimerTypes = abi.ValueSet{
    53  	linux.ITIMER_REAL:    "ITIMER_REAL",
    54  	linux.ITIMER_VIRTUAL: "ITIMER_VIRTUAL",
    55  	linux.ITIMER_PROF:    "ITIMER_PROF",
    56  }
    57  
    58  func hexNum(num uint64) string {
    59  	return "0x" + strconv.FormatUint(num, 16)
    60  }
    61  
    62  func hexArg(arg arch.SyscallArgument) string {
    63  	return hexNum(arg.Uint64())
    64  }
    65  
    66  func iovecs(t *kernel.Task, addr hostarch.Addr, iovcnt int, printContent bool, maxBytes uint64) string {
    67  	if iovcnt < 0 || iovcnt > linux.UIO_MAXIOV {
    68  		return fmt.Sprintf("%#x (error decoding iovecs: invalid iovcnt)", addr)
    69  	}
    70  	ars, err := t.CopyInIovecs(addr, iovcnt)
    71  	if err != nil {
    72  		return fmt.Sprintf("%#x (error decoding iovecs: %v)", addr, err)
    73  	}
    74  
    75  	var totalBytes uint64
    76  	var truncated bool
    77  	iovs := make([]string, iovcnt)
    78  	for i := 0; !ars.IsEmpty(); i, ars = i+1, ars.Tail() {
    79  		ar := ars.Head()
    80  		if ar.Length() == 0 || !printContent {
    81  			iovs[i] = fmt.Sprintf("{base=%#x, len=%d}", ar.Start, ar.Length())
    82  			continue
    83  		}
    84  
    85  		size := uint64(ar.Length())
    86  		if truncated || totalBytes+size > maxBytes {
    87  			truncated = true
    88  			size = maxBytes - totalBytes
    89  		} else {
    90  			totalBytes += uint64(ar.Length())
    91  		}
    92  
    93  		b := make([]byte, size)
    94  		amt, err := t.CopyInBytes(ar.Start, b)
    95  		if err != nil {
    96  			iovs[i] = fmt.Sprintf("{base=%#x, len=%d, %q..., error decoding string: %v}", ar.Start, ar.Length(), b[:amt], err)
    97  			continue
    98  		}
    99  
   100  		dot := ""
   101  		if truncated {
   102  			// Indicate truncation.
   103  			dot = "..."
   104  		}
   105  		iovs[i] = fmt.Sprintf("{base=%#x, len=%d, %q%s}", ar.Start, ar.Length(), b[:amt], dot)
   106  	}
   107  
   108  	return fmt.Sprintf("%#x %s", addr, strings.Join(iovs, ", "))
   109  }
   110  
   111  func dump(t *kernel.Task, addr hostarch.Addr, size uint, maximumBlobSize uint) string {
   112  	origSize := size
   113  	if size > maximumBlobSize {
   114  		size = maximumBlobSize
   115  	}
   116  	if size == 0 {
   117  		return ""
   118  	}
   119  
   120  	b := make([]byte, size)
   121  	amt, err := t.CopyInBytes(addr, b)
   122  	if err != nil {
   123  		return fmt.Sprintf("%#x (error decoding string: %s)", addr, err)
   124  	}
   125  
   126  	dot := ""
   127  	if uint(amt) < origSize {
   128  		// ... if we truncated the dump.
   129  		dot = "..."
   130  	}
   131  
   132  	return fmt.Sprintf("%#x %q%s", addr, b[:amt], dot)
   133  }
   134  
   135  func path(t *kernel.Task, addr hostarch.Addr) string {
   136  	if addr == 0 {
   137  		return "<null>"
   138  	}
   139  	path, err := t.CopyInString(addr, linux.PATH_MAX)
   140  	if err != nil {
   141  		return fmt.Sprintf("%#x (error decoding path: %s)", addr, err)
   142  	}
   143  	return fmt.Sprintf("%#x %s", addr, path)
   144  }
   145  
   146  func fd(t *kernel.Task, fd int32) string {
   147  	if kernel.VFS2Enabled {
   148  		return fdVFS2(t, fd)
   149  	}
   150  
   151  	root := t.FSContext().RootDirectory()
   152  	if root != nil {
   153  		defer root.DecRef(t)
   154  	}
   155  
   156  	if fd == linux.AT_FDCWD {
   157  		wd := t.FSContext().WorkingDirectory()
   158  		var name string
   159  		if wd != nil {
   160  			defer wd.DecRef(t)
   161  			name, _ = wd.FullName(root)
   162  		} else {
   163  			name = "(unknown cwd)"
   164  		}
   165  		return fmt.Sprintf("AT_FDCWD %s", name)
   166  	}
   167  
   168  	file := t.GetFile(fd)
   169  	if file == nil {
   170  		// Cast FD to uint64 to avoid printing negative hex.
   171  		return fmt.Sprintf("%#x (bad FD)", uint64(fd))
   172  	}
   173  	defer file.DecRef(t)
   174  
   175  	name, _ := file.Dirent.FullName(root)
   176  	return fmt.Sprintf("%#x %s", fd, name)
   177  }
   178  
   179  func fdVFS2(t *kernel.Task, fd int32) string {
   180  	root := t.FSContext().RootDirectoryVFS2()
   181  	defer root.DecRef(t)
   182  
   183  	vfsObj := root.Mount().Filesystem().VirtualFilesystem()
   184  	if fd == linux.AT_FDCWD {
   185  		wd := t.FSContext().WorkingDirectoryVFS2()
   186  		defer wd.DecRef(t)
   187  
   188  		name, _ := vfsObj.PathnameWithDeleted(t, root, wd)
   189  		return fmt.Sprintf("AT_FDCWD %s", name)
   190  	}
   191  
   192  	file := t.GetFileVFS2(fd)
   193  	if file == nil {
   194  		// Cast FD to uint64 to avoid printing negative hex.
   195  		return fmt.Sprintf("%#x (bad FD)", uint64(fd))
   196  	}
   197  	defer file.DecRef(t)
   198  
   199  	name, _ := vfsObj.PathnameWithDeleted(t, root, file.VirtualDentry())
   200  	return fmt.Sprintf("%#x %s", fd, name)
   201  }
   202  
   203  func fdpair(t *kernel.Task, addr hostarch.Addr) string {
   204  	var fds [2]int32
   205  	_, err := primitive.CopyInt32SliceIn(t, addr, fds[:])
   206  	if err != nil {
   207  		return fmt.Sprintf("%#x (error decoding fds: %s)", addr, err)
   208  	}
   209  
   210  	return fmt.Sprintf("%#x [%d %d]", addr, fds[0], fds[1])
   211  }
   212  
   213  func uname(t *kernel.Task, addr hostarch.Addr) string {
   214  	var u linux.UtsName
   215  	if _, err := u.CopyIn(t, addr); err != nil {
   216  		return fmt.Sprintf("%#x (error decoding utsname: %s)", addr, err)
   217  	}
   218  
   219  	return fmt.Sprintf("%#x %s", addr, u)
   220  }
   221  
   222  func utimensTimespec(t *kernel.Task, addr hostarch.Addr) string {
   223  	if addr == 0 {
   224  		return "null"
   225  	}
   226  
   227  	var tim linux.Timespec
   228  	if _, err := tim.CopyIn(t, addr); err != nil {
   229  		return fmt.Sprintf("%#x (error decoding timespec: %s)", addr, err)
   230  	}
   231  
   232  	var ns string
   233  	switch tim.Nsec {
   234  	case linux.UTIME_NOW:
   235  		ns = "UTIME_NOW"
   236  	case linux.UTIME_OMIT:
   237  		ns = "UTIME_OMIT"
   238  	default:
   239  		ns = fmt.Sprintf("%v", tim.Nsec)
   240  	}
   241  	return fmt.Sprintf("%#x {sec=%v nsec=%s}", addr, tim.Sec, ns)
   242  }
   243  
   244  func timespec(t *kernel.Task, addr hostarch.Addr) string {
   245  	if addr == 0 {
   246  		return "null"
   247  	}
   248  
   249  	var tim linux.Timespec
   250  	if _, err := tim.CopyIn(t, addr); err != nil {
   251  		return fmt.Sprintf("%#x (error decoding timespec: %s)", addr, err)
   252  	}
   253  	return fmt.Sprintf("%#x {sec=%v nsec=%v}", addr, tim.Sec, tim.Nsec)
   254  }
   255  
   256  func timeval(t *kernel.Task, addr hostarch.Addr) string {
   257  	if addr == 0 {
   258  		return "null"
   259  	}
   260  
   261  	var tim linux.Timeval
   262  	if _, err := tim.CopyIn(t, addr); err != nil {
   263  		return fmt.Sprintf("%#x (error decoding timeval: %s)", addr, err)
   264  	}
   265  
   266  	return fmt.Sprintf("%#x {sec=%v usec=%v}", addr, tim.Sec, tim.Usec)
   267  }
   268  
   269  func utimbuf(t *kernel.Task, addr hostarch.Addr) string {
   270  	if addr == 0 {
   271  		return "null"
   272  	}
   273  
   274  	var utim linux.Utime
   275  	if _, err := utim.CopyIn(t, addr); err != nil {
   276  		return fmt.Sprintf("%#x (error decoding utimbuf: %s)", addr, err)
   277  	}
   278  
   279  	return fmt.Sprintf("%#x {actime=%v, modtime=%v}", addr, utim.Actime, utim.Modtime)
   280  }
   281  
   282  func stat(t *kernel.Task, addr hostarch.Addr) string {
   283  	if addr == 0 {
   284  		return "null"
   285  	}
   286  
   287  	var stat linux.Stat
   288  	if _, err := stat.CopyIn(t, addr); err != nil {
   289  		return fmt.Sprintf("%#x (error decoding stat: %s)", addr, err)
   290  	}
   291  	return fmt.Sprintf("%#x {dev=%d, ino=%d, mode=%s, nlink=%d, uid=%d, gid=%d, rdev=%d, size=%d, blksize=%d, blocks=%d, atime=%s, mtime=%s, ctime=%s}", addr, stat.Dev, stat.Ino, linux.FileMode(stat.Mode), stat.Nlink, stat.UID, stat.GID, stat.Rdev, stat.Size, stat.Blksize, stat.Blocks, time.Unix(stat.ATime.Sec, stat.ATime.Nsec), time.Unix(stat.MTime.Sec, stat.MTime.Nsec), time.Unix(stat.CTime.Sec, stat.CTime.Nsec))
   292  }
   293  
   294  func itimerval(t *kernel.Task, addr hostarch.Addr) string {
   295  	if addr == 0 {
   296  		return "null"
   297  	}
   298  
   299  	interval := timeval(t, addr)
   300  	value := timeval(t, addr+hostarch.Addr((*linux.Timeval)(nil).SizeBytes()))
   301  	return fmt.Sprintf("%#x {interval=%s, value=%s}", addr, interval, value)
   302  }
   303  
   304  func itimerspec(t *kernel.Task, addr hostarch.Addr) string {
   305  	if addr == 0 {
   306  		return "null"
   307  	}
   308  
   309  	interval := timespec(t, addr)
   310  	value := timespec(t, addr+hostarch.Addr((*linux.Timespec)(nil).SizeBytes()))
   311  	return fmt.Sprintf("%#x {interval=%s, value=%s}", addr, interval, value)
   312  }
   313  
   314  func stringVector(t *kernel.Task, addr hostarch.Addr) string {
   315  	vec, err := t.CopyInVector(addr, slinux.ExecMaxElemSize, slinux.ExecMaxTotalSize)
   316  	if err != nil {
   317  		return fmt.Sprintf("%#x {error copying vector: %v}", addr, err)
   318  	}
   319  	s := fmt.Sprintf("%#x [", addr)
   320  	for i, v := range vec {
   321  		if i != 0 {
   322  			s += ", "
   323  		}
   324  		s += fmt.Sprintf("%q", v)
   325  	}
   326  	s += "]"
   327  	return s
   328  }
   329  
   330  func rusage(t *kernel.Task, addr hostarch.Addr) string {
   331  	if addr == 0 {
   332  		return "null"
   333  	}
   334  
   335  	var ru linux.Rusage
   336  	if _, err := ru.CopyIn(t, addr); err != nil {
   337  		return fmt.Sprintf("%#x (error decoding rusage: %s)", addr, err)
   338  	}
   339  	return fmt.Sprintf("%#x %+v", addr, ru)
   340  }
   341  
   342  func capHeader(t *kernel.Task, addr hostarch.Addr) string {
   343  	if addr == 0 {
   344  		return "null"
   345  	}
   346  
   347  	var hdr linux.CapUserHeader
   348  	if _, err := hdr.CopyIn(t, addr); err != nil {
   349  		return fmt.Sprintf("%#x (error decoding header: %s)", addr, err)
   350  	}
   351  
   352  	var version string
   353  	switch hdr.Version {
   354  	case linux.LINUX_CAPABILITY_VERSION_1:
   355  		version = "1"
   356  	case linux.LINUX_CAPABILITY_VERSION_2:
   357  		version = "2"
   358  	case linux.LINUX_CAPABILITY_VERSION_3:
   359  		version = "3"
   360  	default:
   361  		version = strconv.FormatUint(uint64(hdr.Version), 16)
   362  	}
   363  
   364  	return fmt.Sprintf("%#x {Version: %s, Pid: %d}", addr, version, hdr.Pid)
   365  }
   366  
   367  func capData(t *kernel.Task, hdrAddr, dataAddr hostarch.Addr) string {
   368  	if dataAddr == 0 {
   369  		return "null"
   370  	}
   371  
   372  	var hdr linux.CapUserHeader
   373  	if _, err := hdr.CopyIn(t, hdrAddr); err != nil {
   374  		return fmt.Sprintf("%#x (error decoding header: %v)", dataAddr, err)
   375  	}
   376  
   377  	var p, i, e uint64
   378  
   379  	switch hdr.Version {
   380  	case linux.LINUX_CAPABILITY_VERSION_1:
   381  		var data linux.CapUserData
   382  		if _, err := data.CopyIn(t, dataAddr); err != nil {
   383  			return fmt.Sprintf("%#x (error decoding data: %v)", dataAddr, err)
   384  		}
   385  		p = uint64(data.Permitted)
   386  		i = uint64(data.Inheritable)
   387  		e = uint64(data.Effective)
   388  	case linux.LINUX_CAPABILITY_VERSION_2, linux.LINUX_CAPABILITY_VERSION_3:
   389  		var data [2]linux.CapUserData
   390  		if _, err := linux.CopyCapUserDataSliceIn(t, dataAddr, data[:]); err != nil {
   391  			return fmt.Sprintf("%#x (error decoding data: %v)", dataAddr, err)
   392  		}
   393  		p = uint64(data[0].Permitted) | (uint64(data[1].Permitted) << 32)
   394  		i = uint64(data[0].Inheritable) | (uint64(data[1].Inheritable) << 32)
   395  		e = uint64(data[0].Effective) | (uint64(data[1].Effective) << 32)
   396  	default:
   397  		return fmt.Sprintf("%#x (unknown version %d)", dataAddr, hdr.Version)
   398  	}
   399  
   400  	return fmt.Sprintf("%#x {Permitted: %s, Inheritable: %s, Effective: %s}", dataAddr, CapabilityBitset.Parse(p), CapabilityBitset.Parse(i), CapabilityBitset.Parse(e))
   401  }
   402  
   403  // pre fills in the pre-execution arguments for a system call. If an argument
   404  // cannot be interpreted before the system call is executed, then a hex value
   405  // will be used. Note that a full output slice will always be provided, that is
   406  // len(return) == len(args).
   407  func (i *SyscallInfo) pre(t *kernel.Task, args arch.SyscallArguments, maximumBlobSize uint) []string {
   408  	var output []string
   409  
   410  	for arg := range args {
   411  		if arg >= len(i.format) {
   412  			break
   413  		}
   414  		switch i.format[arg] {
   415  		case FD:
   416  			output = append(output, fd(t, args[arg].Int()))
   417  		case WriteBuffer:
   418  			output = append(output, dump(t, args[arg].Pointer(), args[arg+1].SizeT(), maximumBlobSize))
   419  		case WriteIOVec:
   420  			output = append(output, iovecs(t, args[arg].Pointer(), int(args[arg+1].Int()), true /* content */, uint64(maximumBlobSize)))
   421  		case IOVec:
   422  			output = append(output, iovecs(t, args[arg].Pointer(), int(args[arg+1].Int()), false /* content */, uint64(maximumBlobSize)))
   423  		case SendMsgHdr:
   424  			output = append(output, msghdr(t, args[arg].Pointer(), true /* content */, uint64(maximumBlobSize)))
   425  		case RecvMsgHdr:
   426  			output = append(output, msghdr(t, args[arg].Pointer(), false /* content */, uint64(maximumBlobSize)))
   427  		case Path:
   428  			output = append(output, path(t, args[arg].Pointer()))
   429  		case ExecveStringVector:
   430  			output = append(output, stringVector(t, args[arg].Pointer()))
   431  		case SetSockOptVal:
   432  			output = append(output, sockOptVal(t, args[arg-2].Uint64() /* level */, args[arg-1].Uint64() /* optName */, args[arg].Pointer() /* optVal */, args[arg+1].Uint64() /* optLen */, maximumBlobSize))
   433  		case SockOptLevel:
   434  			output = append(output, sockOptLevels.Parse(args[arg].Uint64()))
   435  		case SockOptName:
   436  			output = append(output, sockOptNames[args[arg-1].Uint64() /* level */].Parse(args[arg].Uint64()))
   437  		case SockAddr:
   438  			output = append(output, sockAddr(t, args[arg].Pointer(), uint32(args[arg+1].Uint64())))
   439  		case SockLen:
   440  			output = append(output, sockLenPointer(t, args[arg].Pointer()))
   441  		case SockFamily:
   442  			output = append(output, SocketFamily.Parse(uint64(args[arg].Int())))
   443  		case SockType:
   444  			output = append(output, sockType(args[arg].Int()))
   445  		case SockProtocol:
   446  			output = append(output, sockProtocol(args[arg-2].Int(), args[arg].Int()))
   447  		case SockFlags:
   448  			output = append(output, sockFlags(args[arg].Int()))
   449  		case Timespec:
   450  			output = append(output, timespec(t, args[arg].Pointer()))
   451  		case UTimeTimespec:
   452  			output = append(output, utimensTimespec(t, args[arg].Pointer()))
   453  		case ItimerVal:
   454  			output = append(output, itimerval(t, args[arg].Pointer()))
   455  		case ItimerSpec:
   456  			output = append(output, itimerspec(t, args[arg].Pointer()))
   457  		case Timeval:
   458  			output = append(output, timeval(t, args[arg].Pointer()))
   459  		case Utimbuf:
   460  			output = append(output, utimbuf(t, args[arg].Pointer()))
   461  		case CloneFlags:
   462  			output = append(output, CloneFlagSet.Parse(uint64(args[arg].Uint())))
   463  		case OpenFlags:
   464  			output = append(output, open(uint64(args[arg].Uint())))
   465  		case Mode:
   466  			output = append(output, linux.FileMode(args[arg].ModeT()).String())
   467  		case FutexOp:
   468  			output = append(output, futex(uint64(args[arg].Uint())))
   469  		case PtraceRequest:
   470  			output = append(output, PtraceRequestSet.Parse(args[arg].Uint64()))
   471  		case ItimerType:
   472  			output = append(output, ItimerTypes.Parse(uint64(args[arg].Int())))
   473  		case Signal:
   474  			output = append(output, signalNames.ParseDecimal(args[arg].Uint64()))
   475  		case SignalMaskAction:
   476  			output = append(output, signalMaskActions.Parse(uint64(args[arg].Int())))
   477  		case SigSet:
   478  			output = append(output, sigSet(t, args[arg].Pointer()))
   479  		case SigAction:
   480  			output = append(output, sigAction(t, args[arg].Pointer()))
   481  		case CapHeader:
   482  			output = append(output, capHeader(t, args[arg].Pointer()))
   483  		case CapData:
   484  			output = append(output, capData(t, args[arg-1].Pointer(), args[arg].Pointer()))
   485  		case PollFDs:
   486  			output = append(output, pollFDs(t, args[arg].Pointer(), uint(args[arg+1].Uint()), false))
   487  		case EpollCtlOp:
   488  			output = append(output, epollCtlOps.Parse(uint64(args[arg].Int())))
   489  		case EpollEvent:
   490  			output = append(output, epollEvent(t, args[arg].Pointer()))
   491  		case EpollEvents:
   492  			output = append(output, epollEvents(t, args[arg].Pointer(), 0 /* numEvents */, uint64(maximumBlobSize)))
   493  		case SelectFDSet:
   494  			output = append(output, fdSet(t, int(args[0].Int()), args[arg].Pointer()))
   495  		case MmapProt:
   496  			output = append(output, ProtectionFlagSet.Parse(uint64(args[arg].Uint())))
   497  		case MmapFlags:
   498  			output = append(output, MmapFlagSet.Parse(uint64(args[arg].Uint())))
   499  		case Oct:
   500  			output = append(output, "0o"+strconv.FormatUint(args[arg].Uint64(), 8))
   501  		case Hex:
   502  			fallthrough
   503  		default:
   504  			output = append(output, hexArg(args[arg]))
   505  		}
   506  	}
   507  
   508  	return output
   509  }
   510  
   511  // post fills in the post-execution arguments for a system call. This modifies
   512  // the given output slice in place with arguments that may only be interpreted
   513  // after the system call has been executed.
   514  func (i *SyscallInfo) post(t *kernel.Task, args arch.SyscallArguments, rval uintptr, output []string, maximumBlobSize uint) {
   515  	for arg := range output {
   516  		if arg >= len(i.format) {
   517  			break
   518  		}
   519  		switch i.format[arg] {
   520  		case ReadBuffer:
   521  			output[arg] = dump(t, args[arg].Pointer(), uint(rval), maximumBlobSize)
   522  		case ReadIOVec:
   523  			printLength := uint64(rval)
   524  			if printLength > uint64(maximumBlobSize) {
   525  				printLength = uint64(maximumBlobSize)
   526  			}
   527  			output[arg] = iovecs(t, args[arg].Pointer(), int(args[arg+1].Int()), true /* content */, printLength)
   528  		case WriteIOVec, IOVec, WriteBuffer:
   529  			// We already have a big blast from write.
   530  			output[arg] = "..."
   531  		case SendMsgHdr:
   532  			output[arg] = msghdr(t, args[arg].Pointer(), false /* content */, uint64(maximumBlobSize))
   533  		case RecvMsgHdr:
   534  			output[arg] = msghdr(t, args[arg].Pointer(), true /* content */, uint64(maximumBlobSize))
   535  		case PostPath:
   536  			output[arg] = path(t, args[arg].Pointer())
   537  		case PipeFDs:
   538  			output[arg] = fdpair(t, args[arg].Pointer())
   539  		case Uname:
   540  			output[arg] = uname(t, args[arg].Pointer())
   541  		case Stat:
   542  			output[arg] = stat(t, args[arg].Pointer())
   543  		case PostSockAddr:
   544  			output[arg] = postSockAddr(t, args[arg].Pointer(), args[arg+1].Pointer())
   545  		case SockLen:
   546  			output[arg] = sockLenPointer(t, args[arg].Pointer())
   547  		case PostTimespec:
   548  			output[arg] = timespec(t, args[arg].Pointer())
   549  		case PostItimerVal:
   550  			output[arg] = itimerval(t, args[arg].Pointer())
   551  		case PostItimerSpec:
   552  			output[arg] = itimerspec(t, args[arg].Pointer())
   553  		case Timeval:
   554  			output[arg] = timeval(t, args[arg].Pointer())
   555  		case Rusage:
   556  			output[arg] = rusage(t, args[arg].Pointer())
   557  		case PostSigSet:
   558  			output[arg] = sigSet(t, args[arg].Pointer())
   559  		case PostSigAction:
   560  			output[arg] = sigAction(t, args[arg].Pointer())
   561  		case PostCapData:
   562  			output[arg] = capData(t, args[arg-1].Pointer(), args[arg].Pointer())
   563  		case PollFDs:
   564  			output[arg] = pollFDs(t, args[arg].Pointer(), uint(args[arg+1].Uint()), true)
   565  		case EpollEvents:
   566  			output[arg] = epollEvents(t, args[arg].Pointer(), uint64(rval), uint64(maximumBlobSize))
   567  		case GetSockOptVal:
   568  			output[arg] = getSockOptVal(t, args[arg-2].Uint64() /* level */, args[arg-1].Uint64() /* optName */, args[arg].Pointer() /* optVal */, args[arg+1].Pointer() /* optLen */, maximumBlobSize, rval)
   569  		case SetSockOptVal:
   570  			// No need to print the value again. While it usually
   571  			// isn't, the string version of this arg can be long.
   572  			output[arg] = hexArg(args[arg])
   573  		}
   574  	}
   575  }
   576  
   577  // printEntry prints the given system call entry.
   578  func (i *SyscallInfo) printEnter(t *kernel.Task, args arch.SyscallArguments) []string {
   579  	output := i.pre(t, args, LogMaximumSize)
   580  
   581  	switch len(output) {
   582  	case 0:
   583  		t.Infof("%s E %s()", t.Name(), i.name)
   584  	case 1:
   585  		t.Infof("%s E %s(%s)", t.Name(), i.name,
   586  			output[0])
   587  	case 2:
   588  		t.Infof("%s E %s(%s, %s)", t.Name(), i.name,
   589  			output[0], output[1])
   590  	case 3:
   591  		t.Infof("%s E %s(%s, %s, %s)", t.Name(), i.name,
   592  			output[0], output[1], output[2])
   593  	case 4:
   594  		t.Infof("%s E %s(%s, %s, %s, %s)", t.Name(), i.name,
   595  			output[0], output[1], output[2], output[3])
   596  	case 5:
   597  		t.Infof("%s E %s(%s, %s, %s, %s, %s)", t.Name(), i.name,
   598  			output[0], output[1], output[2], output[3], output[4])
   599  	case 6:
   600  		t.Infof("%s E %s(%s, %s, %s, %s, %s, %s)", t.Name(), i.name,
   601  			output[0], output[1], output[2], output[3], output[4], output[5])
   602  	}
   603  
   604  	return output
   605  }
   606  
   607  // printExit prints the given system call exit.
   608  func (i *SyscallInfo) printExit(t *kernel.Task, elapsed time.Duration, output []string, args arch.SyscallArguments, retval uintptr, err error, errno int) {
   609  	var rval string
   610  	if err == nil {
   611  		// Fill in the output after successful execution.
   612  		i.post(t, args, retval, output, LogMaximumSize)
   613  		rval = fmt.Sprintf("%#x (%v)", retval, elapsed)
   614  	} else {
   615  		rval = fmt.Sprintf("%#x errno=%d (%s) (%v)", retval, errno, err, elapsed)
   616  	}
   617  
   618  	switch len(output) {
   619  	case 0:
   620  		t.Infof("%s X %s() = %s", t.Name(), i.name,
   621  			rval)
   622  	case 1:
   623  		t.Infof("%s X %s(%s) = %s", t.Name(), i.name,
   624  			output[0], rval)
   625  	case 2:
   626  		t.Infof("%s X %s(%s, %s) = %s", t.Name(), i.name,
   627  			output[0], output[1], rval)
   628  	case 3:
   629  		t.Infof("%s X %s(%s, %s, %s) = %s", t.Name(), i.name,
   630  			output[0], output[1], output[2], rval)
   631  	case 4:
   632  		t.Infof("%s X %s(%s, %s, %s, %s) = %s", t.Name(), i.name,
   633  			output[0], output[1], output[2], output[3], rval)
   634  	case 5:
   635  		t.Infof("%s X %s(%s, %s, %s, %s, %s) = %s", t.Name(), i.name,
   636  			output[0], output[1], output[2], output[3], output[4], rval)
   637  	case 6:
   638  		t.Infof("%s X %s(%s, %s, %s, %s, %s, %s) = %s", t.Name(), i.name,
   639  			output[0], output[1], output[2], output[3], output[4], output[5], rval)
   640  	}
   641  }
   642  
   643  // sendEnter sends the syscall enter to event log.
   644  func (i *SyscallInfo) sendEnter(t *kernel.Task, args arch.SyscallArguments) []string {
   645  	output := i.pre(t, args, EventMaximumSize)
   646  
   647  	event := pb.Strace{
   648  		Process:  t.Name(),
   649  		Function: i.name,
   650  		Info: &pb.Strace_Enter{
   651  			Enter: &pb.StraceEnter{},
   652  		},
   653  	}
   654  	for _, arg := range output {
   655  		event.Args = append(event.Args, arg)
   656  	}
   657  	eventchannel.Emit(&event)
   658  
   659  	return output
   660  }
   661  
   662  // sendExit sends the syscall exit to event log.
   663  func (i *SyscallInfo) sendExit(t *kernel.Task, elapsed time.Duration, output []string, args arch.SyscallArguments, rval uintptr, err error, errno int) {
   664  	if err == nil {
   665  		// Fill in the output after successful execution.
   666  		i.post(t, args, rval, output, EventMaximumSize)
   667  	}
   668  
   669  	exit := &pb.StraceExit{
   670  		Return:    fmt.Sprintf("%#x", rval),
   671  		ElapsedNs: elapsed.Nanoseconds(),
   672  	}
   673  	if err != nil {
   674  		exit.Error = err.Error()
   675  		exit.ErrNo = int64(errno)
   676  	}
   677  	event := pb.Strace{
   678  		Process:  t.Name(),
   679  		Function: i.name,
   680  		Info:     &pb.Strace_Exit{Exit: exit},
   681  	}
   682  	for _, arg := range output {
   683  		event.Args = append(event.Args, arg)
   684  	}
   685  	eventchannel.Emit(&event)
   686  }
   687  
   688  type syscallContext struct {
   689  	info        SyscallInfo
   690  	args        arch.SyscallArguments
   691  	start       time.Time
   692  	logOutput   []string
   693  	eventOutput []string
   694  	flags       uint32
   695  }
   696  
   697  // SyscallEnter implements kernel.Stracer.SyscallEnter. It logs the syscall
   698  // entry trace.
   699  func (s SyscallMap) SyscallEnter(t *kernel.Task, sysno uintptr, args arch.SyscallArguments, flags uint32) interface{} {
   700  	info, ok := s[sysno]
   701  	if !ok {
   702  		info = SyscallInfo{
   703  			name:   fmt.Sprintf("sys_%d", sysno),
   704  			format: defaultFormat,
   705  		}
   706  	}
   707  
   708  	var output, eventOutput []string
   709  	if bits.IsOn32(flags, kernel.StraceEnableLog) {
   710  		output = info.printEnter(t, args)
   711  	}
   712  	if bits.IsOn32(flags, kernel.StraceEnableEvent) {
   713  		eventOutput = info.sendEnter(t, args)
   714  	}
   715  
   716  	return &syscallContext{
   717  		info:        info,
   718  		args:        args,
   719  		start:       time.Now(),
   720  		logOutput:   output,
   721  		eventOutput: eventOutput,
   722  		flags:       flags,
   723  	}
   724  }
   725  
   726  // SyscallExit implements kernel.Stracer.SyscallExit. It logs the syscall
   727  // exit trace.
   728  func (s SyscallMap) SyscallExit(context interface{}, t *kernel.Task, sysno, rval uintptr, err error) {
   729  	errno := kernel.ExtractErrno(err, int(sysno))
   730  	c := context.(*syscallContext)
   731  
   732  	elapsed := time.Since(c.start)
   733  	if bits.IsOn32(c.flags, kernel.StraceEnableLog) {
   734  		c.info.printExit(t, elapsed, c.logOutput, c.args, rval, err, errno)
   735  	}
   736  	if bits.IsOn32(c.flags, kernel.StraceEnableEvent) {
   737  		c.info.sendExit(t, elapsed, c.eventOutput, c.args, rval, err, errno)
   738  	}
   739  }
   740  
   741  // ConvertToSysnoMap converts the names to a map keyed on the syscall number
   742  // and value set to true.
   743  //
   744  // The map is in a convenient format to pass to SyscallFlagsTable.Enable().
   745  func (s SyscallMap) ConvertToSysnoMap(syscalls []string) (map[uintptr]bool, error) {
   746  	if syscalls == nil {
   747  		// Sentinel: no list.
   748  		return nil, nil
   749  	}
   750  
   751  	l := make(map[uintptr]bool)
   752  	for _, sc := range syscalls {
   753  		// Try to match this system call.
   754  		sysno, ok := s.ConvertToSysno(sc)
   755  		if !ok {
   756  			return nil, fmt.Errorf("syscall %q not found", sc)
   757  		}
   758  		l[sysno] = true
   759  	}
   760  
   761  	// Success.
   762  	return l, nil
   763  }
   764  
   765  // ConvertToSysno converts the name to system call number. Returns false
   766  // if syscall with same name is not found.
   767  func (s SyscallMap) ConvertToSysno(syscall string) (uintptr, bool) {
   768  	for sysno, info := range s {
   769  		if info.name != "" && info.name == syscall {
   770  			return sysno, true
   771  		}
   772  	}
   773  	return 0, false
   774  }
   775  
   776  // Name returns the syscall name.
   777  func (s SyscallMap) Name(sysno uintptr) string {
   778  	if info, ok := s[sysno]; ok {
   779  		return info.name
   780  	}
   781  	return fmt.Sprintf("sys_%d", sysno)
   782  }
   783  
   784  // Initialize prepares all syscall tables for use by this package.
   785  //
   786  // N.B. This is not in an init function because we can't be sure all syscall
   787  // tables are registered with the kernel when init runs.
   788  func Initialize() {
   789  	for _, table := range kernel.SyscallTables() {
   790  		// Is this known?
   791  		sys, ok := Lookup(table.OS, table.Arch)
   792  		if !ok {
   793  			continue
   794  		}
   795  
   796  		table.Stracer = sys
   797  	}
   798  }
   799  
   800  // SinkType defines where to send straces to.
   801  type SinkType uint32
   802  
   803  const (
   804  	// SinkTypeLog sends straces to text log
   805  	SinkTypeLog SinkType = 1 << iota
   806  
   807  	// SinkTypeEvent sends strace to event log
   808  	SinkTypeEvent
   809  )
   810  
   811  func convertToSyscallFlag(sinks SinkType) uint32 {
   812  	ret := uint32(0)
   813  	if bits.IsOn32(uint32(sinks), uint32(SinkTypeLog)) {
   814  		ret |= kernel.StraceEnableLog
   815  	}
   816  	if bits.IsOn32(uint32(sinks), uint32(SinkTypeEvent)) {
   817  		ret |= kernel.StraceEnableEvent
   818  	}
   819  	return ret
   820  }
   821  
   822  // Enable enables the syscalls in whitelist in all syscall tables.
   823  //
   824  // Preconditions: Initialize has been called.
   825  func Enable(whitelist []string, sinks SinkType) error {
   826  	flags := convertToSyscallFlag(sinks)
   827  	for _, table := range kernel.SyscallTables() {
   828  		// Is this known?
   829  		sys, ok := Lookup(table.OS, table.Arch)
   830  		if !ok {
   831  			continue
   832  		}
   833  
   834  		// Convert to a set of system calls numbers.
   835  		wl, err := sys.ConvertToSysnoMap(whitelist)
   836  		if err != nil {
   837  			return err
   838  		}
   839  
   840  		table.FeatureEnable.Enable(flags, wl, true)
   841  	}
   842  
   843  	// Done.
   844  	return nil
   845  }
   846  
   847  // Disable will disable Strace for all system calls and missing syscalls.
   848  //
   849  // Preconditions: Initialize has been called.
   850  func Disable(sinks SinkType) {
   851  	flags := convertToSyscallFlag(sinks)
   852  	for _, table := range kernel.SyscallTables() {
   853  		// Strace will be disabled for all syscalls including missing.
   854  		table.FeatureEnable.Enable(flags, nil, false)
   855  	}
   856  }
   857  
   858  // EnableAll enables all syscalls in all syscall tables.
   859  //
   860  // Preconditions: Initialize has been called.
   861  func EnableAll(sinks SinkType) {
   862  	flags := convertToSyscallFlag(sinks)
   863  	for _, table := range kernel.SyscallTables() {
   864  		// Is this known?
   865  		if _, ok := Lookup(table.OS, table.Arch); !ok {
   866  			continue
   867  		}
   868  
   869  		table.FeatureEnable.EnableAll(flags)
   870  	}
   871  }
   872  
   873  func init() {
   874  	t, ok := Lookup(abi.Host, arch.Host)
   875  	if ok {
   876  		// Provide the native table as the lookup for seccomp
   877  		// debugging. This is best-effort. This is provided this way to
   878  		// avoid dependencies from seccomp to this package.
   879  		seccomp.SyscallName = t.Name
   880  	}
   881  }