github.com/icexin/eggos@v0.4.2-0.20220216025428-78b167e4f349/kernel/syscall.go (about) 1 package kernel 2 3 import ( 4 "syscall" 5 "unsafe" 6 7 "github.com/icexin/eggos/drivers/qemu" 8 "github.com/icexin/eggos/drivers/uart" 9 "github.com/icexin/eggos/kernel/isyscall" 10 "github.com/icexin/eggos/kernel/mm" 11 "github.com/icexin/eggos/kernel/sys" 12 "github.com/icexin/eggos/kernel/trap" 13 "github.com/icexin/eggos/log" 14 "golang.org/x/sys/unix" 15 "gvisor.dev/gvisor/pkg/abi/linux" 16 "gvisor.dev/gvisor/pkg/abi/linux/errno" 17 ) 18 19 const ( 20 _MSR_LSTAR = 0xc0000082 21 _MSR_STAR = 0xc0000081 22 _MSR_FSTAR = 0xc0000084 23 24 _MSR_IA32_EFER = 0xc0000080 25 _MSR_FS_BASE = 0xc0000100 26 _MSR_GS_BASE = 0xc0000101 27 28 _EFER_SCE = 1 << 0 // Enable SYSCALL. 29 ) 30 31 const ( 32 SYS_WAIT_IRQ = 500 33 SYS_WAIT_SYSCALL = 501 34 SYS_FIXED_MMAP = 502 35 SYS_EPOLL_NOTIFY = 503 36 ) 37 38 const ( 39 // copy from runtime, need by readgstatus 40 _Grunning = 2 41 _Gsyscall = 3 42 ) 43 44 var ( 45 bootstrapDone = false 46 47 kernelCalls = [...]uintptr{ 48 syscall.SYS_ARCH_PRCTL, 49 syscall.SYS_MMAP, 50 syscall.SYS_MUNMAP, 51 syscall.SYS_CLOCK_GETTIME, 52 syscall.SYS_RT_SIGPROCMASK, 53 syscall.SYS_SIGALTSTACK, 54 syscall.SYS_RT_SIGACTION, 55 syscall.SYS_GETTID, 56 syscall.SYS_CLONE, 57 syscall.SYS_FUTEX, 58 syscall.SYS_NANOSLEEP, 59 syscall.SYS_SCHED_YIELD, 60 syscall.SYS_MADVISE, 61 syscall.SYS_EXIT_GROUP, 62 63 // TODO: real random 64 unix.SYS_GETRANDOM, 65 66 // may removed in the future 67 syscall.SYS_EPOLL_CREATE1, 68 syscall.SYS_EPOLL_CTL, 69 syscall.SYS_EPOLL_WAIT, 70 syscall.SYS_EPOLL_PWAIT, 71 syscall.SYS_PIPE2, 72 73 SYS_WAIT_IRQ, 74 SYS_WAIT_SYSCALL, 75 SYS_FIXED_MMAP, 76 SYS_EPOLL_NOTIFY, 77 } 78 ) 79 80 //go:nosplit 81 func syscallEntry() 82 83 //go:nosplit 84 func getg() uintptr 85 86 //go:linkname readgstatus runtime.readgstatus 87 func readgstatus(uintptr) uint32 88 89 //go:nosplit 90 func syscallIntr() { 91 my := Mythread() 92 tf := my.tf 93 my.systf = *tf 94 95 req := tf.SyscallRequest() 96 doInKernel := !(bootstrapDone && canForward(&req)) 97 if doInKernel { 98 doSyscall(&req) 99 return 100 } 101 102 // use tricks to get whether the current g has p 103 status := readgstatus(getg()) 104 if status != _Grunning { 105 forwardCall(&req) 106 } else { 107 // tf.AX = doForwardSyscall(tf.AX, tf.BX, tf.CX, tf.DX, tf.SI, tf.DI, tf.BP) 108 // making all forwarded syscall as blocked syscall, so the syscall task can acquire a P 109 // make the caller call blocksyscall, it will call syscallIntr again with syscall status. 110 my.systf = *tf 111 changeReturnPC(tf, sys.FuncPC(blocksyscall)) 112 return 113 } 114 } 115 116 //go:nosplit 117 func canForward(req *isyscall.Request) bool { 118 no := req.NO() 119 my := Mythread() 120 // syscall thread can't call self 121 if syscalltask != 0 && my == syscalltask.ptr() { 122 return false 123 } 124 125 switch no { 126 case syscall.SYS_WRITE: 127 // handle panic write 128 if req.Arg(0) == 2 { 129 return false 130 } 131 // handle pipe write 132 if req.Arg(0) == pipeWriteFd { 133 return false 134 } 135 case syscall.SYS_READ: 136 // handle pipe read 137 if req.Arg(0) == pipeReadFd { 138 return false 139 } 140 } 141 142 for i := 0; i < len(kernelCalls); i++ { 143 if no == kernelCalls[i] { 144 return false 145 } 146 } 147 return true 148 } 149 150 //go:nosplit 151 func blocksyscall() { 152 tf := &Mythread().systf 153 c := tf.SyscallRequest() 154 ret, _, errno := syscall.Syscall6(c.NO(), c.Arg(0), c.Arg(1), c.Arg(2), 155 c.Arg(3), c.Arg(4), c.Arg(5)) 156 if errno != 0 { 157 sys.SetAX(-uintptr(errno)) 158 } else { 159 sys.SetAX(ret) 160 } 161 } 162 163 //go:nosplit 164 func panicNosys() { 165 req := Mythread().systf.SyscallRequest() 166 log.PrintStr("syscall not found:") 167 log.PrintStr(syscallName(int(req.NO()))) 168 log.PrintStr("\n") 169 throw("") 170 } 171 172 //go:nosplit 173 func doSyscall(req *isyscall.Request) { 174 no := req.NO() 175 req.SetRet(0) 176 177 // if no != syscall.SYS_SCHED_YIELD { 178 // log.PrintStr("call ") 179 // if int(no) < len(sysnum) { 180 // log.PrintStr(sysnum[no]) 181 // } else { 182 // log.PrintHex(no) 183 // } 184 // log.PrintStr("\n") 185 // } 186 switch no { 187 case syscall.SYS_ARCH_PRCTL: 188 sysArchPrctl(req) 189 case syscall.SYS_SCHED_GETAFFINITY: 190 req.SetRet(0) 191 case syscall.SYS_OPENAT: 192 req.SetRet(isyscall.Errno(errno.ENOSYS)) 193 case syscall.SYS_MMAP: 194 sysMmap(req) 195 case syscall.SYS_MUNMAP: 196 sysMunmap(req) 197 case syscall.SYS_MADVISE: 198 case syscall.SYS_READ: 199 sysRead(req) 200 case syscall.SYS_WRITE: 201 sysWrite(req) 202 case syscall.SYS_CLOSE: 203 case syscall.SYS_CLOCK_GETTIME: 204 sysClockGetTime(req) 205 case syscall.SYS_RT_SIGPROCMASK: 206 case syscall.SYS_SIGALTSTACK: 207 case syscall.SYS_RT_SIGACTION: 208 case syscall.SYS_GETTID: 209 req.SetRet(uintptr(Mythread().id)) 210 case syscall.SYS_CLONE: 211 sysClone(req) 212 case syscall.SYS_FUTEX: 213 sysFutex(req) 214 case syscall.SYS_NANOSLEEP: 215 sysNanosleep(req) 216 case syscall.SYS_SCHED_YIELD: 217 Yield() 218 case syscall.SYS_EXIT_GROUP: 219 sysExitGroup(req) 220 221 case unix.SYS_GETRANDOM: 222 req.SetRet(req.Arg(1)) 223 224 case syscall.SYS_EPOLL_CREATE1: 225 sysEpollCreate(req) 226 case syscall.SYS_EPOLL_CTL: 227 sysEpollCtl(req) 228 case syscall.SYS_EPOLL_WAIT, syscall.SYS_EPOLL_PWAIT: 229 sysEpollWait(req) 230 case syscall.SYS_PIPE2: 231 sysPipe2(req) 232 233 case SYS_WAIT_IRQ: 234 sysWaitIRQ(req) 235 case SYS_WAIT_SYSCALL: 236 sysWaitSyscall(req) 237 case SYS_FIXED_MMAP: 238 sysFixedMmap(req) 239 case SYS_EPOLL_NOTIFY: 240 sysEpollNotify(req) 241 242 default: 243 req.SetRet(isyscall.Errno(errno.ENOSYS)) 244 if no == syscall.SYS_PIPE2 { 245 changeReturnPC(Mythread().tf, sys.FuncPC(panicNosys)) 246 } 247 } 248 } 249 250 //go:nosplit 251 func sysArchPrctl(req *isyscall.Request) { 252 switch req.Arg(0) { 253 case linux.ARCH_SET_FS: 254 wrmsr(_MSR_FS_BASE, req.Arg(1)) 255 Mythread().fsBase = req.Arg(1) 256 default: 257 preparePanic(Mythread().tf) 258 req.SetRet(errno.EINVAL) 259 } 260 } 261 262 //go:nosplit 263 func sysMmap(req *isyscall.Request) { 264 addr := req.Arg(0) 265 n := req.Arg(1) 266 prot := req.Arg(2) 267 // called on sysReserve 268 if prot == syscall.PROT_NONE { 269 if addr == 0 { 270 req.SetRet(mm.Sbrk(n)) 271 } 272 return 273 } 274 275 // called on sysMap and sysAlloc 276 req.SetRet(mm.Mmap(addr, n)) 277 return 278 } 279 280 //go:nosplit 281 func sysMunmap(req *isyscall.Request) { 282 addr := req.Arg(0) 283 n := req.Arg(1) 284 mm.Munmap(addr, n) 285 } 286 287 //go:nosplit 288 func sysRead(req *isyscall.Request) { 289 fd := req.Arg(0) 290 if fd == pipeReadFd { 291 sysPipeRead(req) 292 return 293 } 294 req.SetRet(isyscall.Errno(errno.EINVAL)) 295 return 296 } 297 298 //go:nosplit 299 func sysWrite(req *isyscall.Request) { 300 fd := req.Arg(0) 301 buf := req.Arg(1) 302 len := req.Arg(2) 303 switch fd { 304 case 2: 305 buffer := sys.UnsafeBuffer(buf, int(len)) 306 uart.Write(buffer) 307 req.SetRet(len) 308 return 309 case pipeWriteFd: 310 sysPipeWrite(req) 311 return 312 default: 313 req.SetErrorNO(syscall.EINVAL) 314 } 315 } 316 317 //go:nosplit 318 func sysClockGetTime(req *isyscall.Request) { 319 ts := (*linux.Timespec)(unsafe.Pointer(req.Arg(1))) 320 *ts = clocktime() 321 } 322 323 //go:nosplit 324 func sysClone(req *isyscall.Request) { 325 pc := Mythread().tf.IP 326 flags := req.Arg(0) 327 stack := req.Arg(1) 328 tls := req.Arg(4) 329 tid := clone(pc, stack, flags, tls) 330 req.SetRet(uintptr(tid)) 331 } 332 333 //go:nosplit 334 func sysFutex(req *isyscall.Request) { 335 addr := (*uintptr)(unsafe.Pointer(req.Arg(0))) 336 op := req.Arg(1) 337 val := req.Arg(2) 338 ts := (*linux.Timespec)(unsafe.Pointer(req.Arg(3))) 339 futex(addr, op, val, ts) 340 } 341 342 //go:nosplit 343 func sysNanosleep(req *isyscall.Request) { 344 tc := (*linux.Timespec)(unsafe.Pointer(req.Arg(0))) 345 nanosleep(tc) 346 } 347 348 //go:nosplit 349 func sysEpollCreate(req *isyscall.Request) { 350 req.SetRet(epollFd) 351 } 352 353 //go:nosplit 354 func sysEpollCtl(req *isyscall.Request) { 355 efd := req.Arg(0) 356 op := req.Arg(1) 357 fd := req.Arg(2) 358 desc := req.Arg(3) 359 req.SetRet(epollCtl(efd, op, fd, desc)) 360 } 361 362 //go:nosplit 363 func sysEpollWait(req *isyscall.Request) { 364 efd := req.Arg(0) 365 evs := req.Arg(1) 366 len := req.Arg(2) 367 _ms := req.Arg(3) 368 req.SetRet(epollWait(efd, evs, len, _ms)) 369 } 370 371 //go:nosplit 372 func sysWaitIRQ(req *isyscall.Request) { 373 req.SetRet(waitIRQ()) 374 } 375 376 //go:nosplit 377 func sysWaitSyscall(req *isyscall.Request) { 378 req.SetRet(fetchPendingCall()) 379 } 380 381 //go:nosplit 382 func sysFixedMmap(req *isyscall.Request) { 383 vaddr := req.Arg(0) 384 paddr := req.Arg(1) 385 len := req.Arg(2) 386 mm.Fixmap(vaddr, paddr, len) 387 } 388 389 //go:nosplit 390 func sysExitGroup(req *isyscall.Request) { 391 qemu.Exit(int(req.Arg(0))) 392 for { 393 } 394 } 395 396 //go:nosplit 397 func sysEpollNotify(req *isyscall.Request) { 398 epollNotify(req.Arg(0), req.Arg(1)) 399 } 400 401 const vdsoGettimeofdaySym = 0xffffffffff600000 402 403 //go:nosplit 404 func vdsoGettimeofday() 405 406 //go:nosplit 407 func vdsoInit() { 408 dst := sys.UnsafeBuffer(mm.Mmap(vdsoGettimeofdaySym, 0x100), 0x100) 409 src := sys.UnsafeBuffer(sys.FuncPC(vdsoGettimeofday), 0x100) 410 copy(dst, src) 411 } 412 413 //go:nosplit 414 func syscallInit() { 415 // write syscall selector 416 wrmsr(_MSR_STAR, 8<<32) 417 // clear IF when enter syscall 418 wrmsr(_MSR_FSTAR, _FLAGS_IF) 419 // set syscall entry 420 wrmsr(_MSR_LSTAR, sys.FuncPC(syscallEntry)) 421 422 // Enable SYSCALL instruction. 423 efer := rdmsr(_MSR_IA32_EFER) 424 wrmsr(_MSR_IA32_EFER, efer|_EFER_SCE) 425 426 trap.Register(0x80, syscallIntr) 427 epollInit() 428 vdsoInit() 429 }