github.com/jspc/eggos@v0.5.1-0.20221028160421-556c75c878a5/kernel/syscall.go (about)

     1  package kernel
     2  
     3  import (
     4  	"syscall"
     5  	"unsafe"
     6  
     7  	"github.com/jspc/eggos/drivers/qemu"
     8  	"github.com/jspc/eggos/drivers/uart"
     9  	"github.com/jspc/eggos/kernel/isyscall"
    10  	"github.com/jspc/eggos/kernel/mm"
    11  	"github.com/jspc/eggos/kernel/sys"
    12  	"github.com/jspc/eggos/kernel/trap"
    13  	"github.com/jspc/eggos/log"
    14  	"golang.org/x/sys/unix"
    15  	"gvisor.dev/gvisor/pkg/abi/linux"
    16  	"gvisor.dev/gvisor/pkg/abi/linux/errno"
    17  )
    18  
    19  const (
    20  	_MSR_LSTAR = 0xc0000082
    21  	_MSR_STAR  = 0xc0000081
    22  	_MSR_FSTAR = 0xc0000084
    23  
    24  	_MSR_IA32_EFER = 0xc0000080
    25  	_MSR_FS_BASE   = 0xc0000100
    26  	_MSR_GS_BASE   = 0xc0000101
    27  
    28  	_EFER_SCE = 1 << 0 // Enable SYSCALL.
    29  )
    30  
    31  const (
    32  	SYS_WAIT_IRQ     = 500
    33  	SYS_WAIT_SYSCALL = 501
    34  	SYS_FIXED_MMAP   = 502
    35  	SYS_EPOLL_NOTIFY = 503
    36  )
    37  
    38  const (
    39  	// copy from runtime, need by readgstatus
    40  	_Grunning = 2
    41  	_Gsyscall = 3
    42  )
    43  
    44  var (
    45  	bootstrapDone = false
    46  
    47  	kernelCalls = [...]uintptr{
    48  		syscall.SYS_ARCH_PRCTL,
    49  		syscall.SYS_MMAP,
    50  		syscall.SYS_MUNMAP,
    51  		syscall.SYS_CLOCK_GETTIME,
    52  		syscall.SYS_RT_SIGPROCMASK,
    53  		syscall.SYS_SIGALTSTACK,
    54  		syscall.SYS_RT_SIGACTION,
    55  		syscall.SYS_GETTID,
    56  		syscall.SYS_CLONE,
    57  		syscall.SYS_FUTEX,
    58  		syscall.SYS_NANOSLEEP,
    59  		syscall.SYS_SCHED_YIELD,
    60  		syscall.SYS_MADVISE,
    61  		syscall.SYS_EXIT_GROUP,
    62  
    63  		// TODO: real random
    64  		unix.SYS_GETRANDOM,
    65  
    66  		// may removed in the future
    67  		syscall.SYS_EPOLL_CREATE1,
    68  		syscall.SYS_EPOLL_CTL,
    69  		syscall.SYS_EPOLL_WAIT,
    70  		syscall.SYS_EPOLL_PWAIT,
    71  		syscall.SYS_PIPE2,
    72  
    73  		SYS_WAIT_IRQ,
    74  		SYS_WAIT_SYSCALL,
    75  		SYS_FIXED_MMAP,
    76  		SYS_EPOLL_NOTIFY,
    77  	}
    78  )
    79  
    80  //go:nosplit
    81  func syscallEntry()
    82  
    83  //go:nosplit
    84  func getg() uintptr
    85  
    86  //go:linkname readgstatus runtime.readgstatus
    87  func readgstatus(uintptr) uint32
    88  
    89  //go:nosplit
    90  func syscallIntr() {
    91  	my := Mythread()
    92  	tf := my.tf
    93  	my.systf = *tf
    94  
    95  	req := tf.SyscallRequest()
    96  	doInKernel := !(bootstrapDone && canForward(&req))
    97  	if doInKernel {
    98  		doSyscall(&req)
    99  		return
   100  	}
   101  
   102  	// use tricks to get whether the current g has p
   103  	status := readgstatus(getg())
   104  	if status != _Grunning {
   105  		forwardCall(&req)
   106  	} else {
   107  		// tf.AX = doForwardSyscall(tf.AX, tf.BX, tf.CX, tf.DX, tf.SI, tf.DI, tf.BP)
   108  		// making all forwarded syscall as blocked syscall, so the syscall task can acquire a P
   109  		// make the caller call blocksyscall, it will call syscallIntr again with syscall status.
   110  		my.systf = *tf
   111  		changeReturnPC(tf, sys.FuncPC(blocksyscall))
   112  		return
   113  	}
   114  }
   115  
   116  //go:nosplit
   117  func canForward(req *isyscall.Request) bool {
   118  	no := req.NO()
   119  	my := Mythread()
   120  	// syscall thread can't call self
   121  	if syscalltask != 0 && my == syscalltask.ptr() {
   122  		return false
   123  	}
   124  
   125  	switch no {
   126  	case syscall.SYS_WRITE:
   127  		// handle panic write
   128  		if req.Arg(0) == 2 {
   129  			return false
   130  		}
   131  		// handle pipe write
   132  		if req.Arg(0) == pipeWriteFd {
   133  			return false
   134  		}
   135  	case syscall.SYS_READ:
   136  		// handle pipe read
   137  		if req.Arg(0) == pipeReadFd {
   138  			return false
   139  		}
   140  	}
   141  
   142  	for i := 0; i < len(kernelCalls); i++ {
   143  		if no == kernelCalls[i] {
   144  			return false
   145  		}
   146  	}
   147  	return true
   148  }
   149  
   150  //go:nosplit
   151  func blocksyscall() {
   152  	tf := &Mythread().systf
   153  	c := tf.SyscallRequest()
   154  	ret, _, errno := syscall.Syscall6(c.NO(), c.Arg(0), c.Arg(1), c.Arg(2),
   155  		c.Arg(3), c.Arg(4), c.Arg(5))
   156  	if errno != 0 {
   157  		sys.SetAX(-uintptr(errno))
   158  	} else {
   159  		sys.SetAX(ret)
   160  	}
   161  }
   162  
   163  //go:nosplit
   164  func panicNosys() {
   165  	req := Mythread().systf.SyscallRequest()
   166  	log.PrintStr("syscall not found:")
   167  	log.PrintStr(syscallName(int(req.NO())))
   168  	log.PrintStr("\n")
   169  	throw("")
   170  }
   171  
   172  //go:nosplit
   173  func doSyscall(req *isyscall.Request) {
   174  	no := req.NO()
   175  	req.SetRet(0)
   176  
   177  	// if no != syscall.SYS_SCHED_YIELD {
   178  	// 	log.PrintStr("call ")
   179  	// 	if int(no) < len(sysnum) {
   180  	// 		log.PrintStr(sysnum[no])
   181  	// 	} else {
   182  	// 		log.PrintHex(no)
   183  	// 	}
   184  	// 	log.PrintStr("\n")
   185  	// }
   186  	switch no {
   187  	case syscall.SYS_ARCH_PRCTL:
   188  		sysArchPrctl(req)
   189  	case syscall.SYS_SCHED_GETAFFINITY:
   190  		req.SetRet(0)
   191  	case syscall.SYS_OPENAT:
   192  		req.SetRet(isyscall.Errno(errno.ENOSYS))
   193  	case syscall.SYS_MMAP:
   194  		sysMmap(req)
   195  	case syscall.SYS_MUNMAP:
   196  		sysMunmap(req)
   197  	case syscall.SYS_MADVISE:
   198  	case syscall.SYS_READ:
   199  		sysRead(req)
   200  	case syscall.SYS_WRITE:
   201  		sysWrite(req)
   202  	case syscall.SYS_CLOSE:
   203  	case syscall.SYS_CLOCK_GETTIME:
   204  		sysClockGetTime(req)
   205  	case syscall.SYS_RT_SIGPROCMASK:
   206  	case syscall.SYS_SIGALTSTACK:
   207  	case syscall.SYS_RT_SIGACTION:
   208  	case syscall.SYS_GETTID:
   209  		req.SetRet(uintptr(Mythread().id))
   210  	case syscall.SYS_CLONE:
   211  		sysClone(req)
   212  	case syscall.SYS_FUTEX:
   213  		sysFutex(req)
   214  	case syscall.SYS_NANOSLEEP:
   215  		sysNanosleep(req)
   216  	case syscall.SYS_SCHED_YIELD:
   217  		Yield()
   218  	case syscall.SYS_EXIT_GROUP:
   219  		sysExitGroup(req)
   220  
   221  	case unix.SYS_GETRANDOM:
   222  		req.SetRet(req.Arg(1))
   223  
   224  	case syscall.SYS_EPOLL_CREATE1:
   225  		sysEpollCreate(req)
   226  	case syscall.SYS_EPOLL_CTL:
   227  		sysEpollCtl(req)
   228  	case syscall.SYS_EPOLL_WAIT, syscall.SYS_EPOLL_PWAIT:
   229  		sysEpollWait(req)
   230  	case syscall.SYS_PIPE2:
   231  		sysPipe2(req)
   232  
   233  	case SYS_WAIT_IRQ:
   234  		sysWaitIRQ(req)
   235  	case SYS_WAIT_SYSCALL:
   236  		sysWaitSyscall(req)
   237  	case SYS_FIXED_MMAP:
   238  		sysFixedMmap(req)
   239  	case SYS_EPOLL_NOTIFY:
   240  		sysEpollNotify(req)
   241  
   242  	default:
   243  		req.SetRet(isyscall.Errno(errno.ENOSYS))
   244  		if no == syscall.SYS_PIPE2 {
   245  			changeReturnPC(Mythread().tf, sys.FuncPC(panicNosys))
   246  		}
   247  	}
   248  }
   249  
   250  //go:nosplit
   251  func sysArchPrctl(req *isyscall.Request) {
   252  	switch req.Arg(0) {
   253  	case linux.ARCH_SET_FS:
   254  		wrmsr(_MSR_FS_BASE, req.Arg(1))
   255  		Mythread().fsBase = req.Arg(1)
   256  	default:
   257  		preparePanic(Mythread().tf)
   258  		req.SetRet(errno.EINVAL)
   259  	}
   260  }
   261  
   262  //go:nosplit
   263  func sysMmap(req *isyscall.Request) {
   264  	addr := req.Arg(0)
   265  	n := req.Arg(1)
   266  	prot := req.Arg(2)
   267  	// called on sysReserve
   268  	if prot == syscall.PROT_NONE {
   269  		if addr == 0 {
   270  			req.SetRet(mm.Sbrk(n))
   271  		}
   272  		return
   273  	}
   274  
   275  	// called on sysMap and sysAlloc
   276  	req.SetRet(mm.Mmap(addr, n))
   277  	return
   278  }
   279  
   280  //go:nosplit
   281  func sysMunmap(req *isyscall.Request) {
   282  	addr := req.Arg(0)
   283  	n := req.Arg(1)
   284  	mm.Munmap(addr, n)
   285  }
   286  
   287  //go:nosplit
   288  func sysRead(req *isyscall.Request) {
   289  	fd := req.Arg(0)
   290  	if fd == pipeReadFd {
   291  		sysPipeRead(req)
   292  		return
   293  	}
   294  	req.SetRet(isyscall.Errno(errno.EINVAL))
   295  	return
   296  }
   297  
   298  //go:nosplit
   299  func sysWrite(req *isyscall.Request) {
   300  	fd := req.Arg(0)
   301  	buf := req.Arg(1)
   302  	len := req.Arg(2)
   303  	switch fd {
   304  	case 2:
   305  		buffer := sys.UnsafeBuffer(buf, int(len))
   306  		uart.Write(buffer)
   307  		req.SetRet(len)
   308  		return
   309  	case pipeWriteFd:
   310  		sysPipeWrite(req)
   311  		return
   312  	default:
   313  		req.SetErrorNO(syscall.EINVAL)
   314  	}
   315  }
   316  
   317  //go:nosplit
   318  func sysClockGetTime(req *isyscall.Request) {
   319  	ts := (*linux.Timespec)(unsafe.Pointer(req.Arg(1)))
   320  	*ts = clocktime()
   321  }
   322  
   323  //go:nosplit
   324  func sysClone(req *isyscall.Request) {
   325  	pc := Mythread().tf.IP
   326  	flags := req.Arg(0)
   327  	stack := req.Arg(1)
   328  	tls := req.Arg(4)
   329  	tid := clone(pc, stack, flags, tls)
   330  	req.SetRet(uintptr(tid))
   331  }
   332  
   333  //go:nosplit
   334  func sysFutex(req *isyscall.Request) {
   335  	addr := (*uintptr)(unsafe.Pointer(req.Arg(0)))
   336  	op := req.Arg(1)
   337  	val := req.Arg(2)
   338  	ts := (*linux.Timespec)(unsafe.Pointer(req.Arg(3)))
   339  	futex(addr, op, val, ts)
   340  }
   341  
   342  //go:nosplit
   343  func sysNanosleep(req *isyscall.Request) {
   344  	tc := (*linux.Timespec)(unsafe.Pointer(req.Arg(0)))
   345  	nanosleep(tc)
   346  }
   347  
   348  //go:nosplit
   349  func sysEpollCreate(req *isyscall.Request) {
   350  	req.SetRet(epollFd)
   351  }
   352  
   353  //go:nosplit
   354  func sysEpollCtl(req *isyscall.Request) {
   355  	efd := req.Arg(0)
   356  	op := req.Arg(1)
   357  	fd := req.Arg(2)
   358  	desc := req.Arg(3)
   359  	req.SetRet(epollCtl(efd, op, fd, desc))
   360  }
   361  
   362  //go:nosplit
   363  func sysEpollWait(req *isyscall.Request) {
   364  	efd := req.Arg(0)
   365  	evs := req.Arg(1)
   366  	len := req.Arg(2)
   367  	_ms := req.Arg(3)
   368  	req.SetRet(epollWait(efd, evs, len, _ms))
   369  }
   370  
   371  //go:nosplit
   372  func sysWaitIRQ(req *isyscall.Request) {
   373  	req.SetRet(waitIRQ())
   374  }
   375  
   376  //go:nosplit
   377  func sysWaitSyscall(req *isyscall.Request) {
   378  	req.SetRet(fetchPendingCall())
   379  }
   380  
   381  //go:nosplit
   382  func sysFixedMmap(req *isyscall.Request) {
   383  	vaddr := req.Arg(0)
   384  	paddr := req.Arg(1)
   385  	len := req.Arg(2)
   386  	mm.Fixmap(vaddr, paddr, len)
   387  }
   388  
   389  //go:nosplit
   390  func sysExitGroup(req *isyscall.Request) {
   391  	qemu.Exit(int(req.Arg(0)))
   392  	for {
   393  	}
   394  }
   395  
   396  //go:nosplit
   397  func sysEpollNotify(req *isyscall.Request) {
   398  	epollNotify(req.Arg(0), req.Arg(1))
   399  }
   400  
   401  const vdsoGettimeofdaySym = 0xffffffffff600000
   402  
   403  //go:nosplit
   404  func vdsoGettimeofday()
   405  
   406  //go:nosplit
   407  func vdsoInit() {
   408  	dst := sys.UnsafeBuffer(mm.Mmap(vdsoGettimeofdaySym, 0x100), 0x100)
   409  	src := sys.UnsafeBuffer(sys.FuncPC(vdsoGettimeofday), 0x100)
   410  	copy(dst, src)
   411  }
   412  
   413  //go:nosplit
   414  func syscallInit() {
   415  	// write syscall selector
   416  	wrmsr(_MSR_STAR, 8<<32)
   417  	// clear IF when enter syscall
   418  	wrmsr(_MSR_FSTAR, 0x200)
   419  	// set syscall entry
   420  	wrmsr(_MSR_LSTAR, sys.FuncPC(syscallEntry))
   421  
   422  	// Enable SYSCALL instruction.
   423  	efer := rdmsr(_MSR_IA32_EFER)
   424  	wrmsr(_MSR_IA32_EFER, efer|_EFER_SCE)
   425  
   426  	trap.Register(0x80, syscallIntr)
   427  	epollInit()
   428  	vdsoInit()
   429  }