gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/platform/systrap/sysmsg_thread.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package systrap 16 17 import ( 18 "fmt" 19 20 "golang.org/x/sys/unix" 21 "gvisor.dev/gvisor/pkg/abi/linux" 22 "gvisor.dev/gvisor/pkg/bpf" 23 "gvisor.dev/gvisor/pkg/log" 24 "gvisor.dev/gvisor/pkg/seccomp" 25 "gvisor.dev/gvisor/pkg/sentry/arch" 26 "gvisor.dev/gvisor/pkg/sentry/memmap" 27 "gvisor.dev/gvisor/pkg/sentry/platform/systrap/sysmsg" 28 ) 29 30 // sysmsgThread describes a sysmsg stub thread which isn't traced 31 // and communicates with the Sentry via the sysmsg protocol. 32 // 33 // This type of thread is used to execute user processes. 34 type sysmsgThread struct { 35 // subproc is a link to the subprocess which is used to call native 36 // system calls. 37 subproc *subprocess 38 39 // thread is a thread identifier. 40 thread *thread 41 42 // msg is a pointer to a shared sysmsg structure in the Sentry address 43 // space which is used to communicate with the thread. 44 msg *sysmsg.Msg 45 46 // context is the last context that ran on this thread. 47 context *platformContext 48 49 // stackRange is a sysmsg stack in the memory file. 50 stackRange memmap.FileRange 51 52 // fpuStateToMsgOffset is the offset of a thread fpu state relative to sysmsg. 53 fpuStateToMsgOffset uint64 54 } 55 56 // sysmsgPerThreadMemAddr returns a sysmsg stack address in the thread address 57 // space. 58 func (p *sysmsgThread) sysmsgPerThreadMemAddr() uintptr { 59 return stubSysmsgStack + sysmsg.PerThreadMemSize*uintptr(p.thread.sysmsgStackID) 60 } 61 62 // mapStack maps a sysmsg stack into the thread address space. 63 func (p *sysmsgThread) mapStack(addr uintptr, readOnly bool) error { 64 prot := uintptr(unix.PROT_READ) 65 if !readOnly { 66 prot |= unix.PROT_WRITE 67 } 68 _, err := p.thread.syscallIgnoreInterrupt(&p.thread.initRegs, unix.SYS_MMAP, 69 arch.SyscallArgument{Value: addr}, 70 arch.SyscallArgument{Value: uintptr(p.stackRange.Length())}, 71 arch.SyscallArgument{Value: prot}, 72 arch.SyscallArgument{Value: unix.MAP_SHARED | unix.MAP_FILE | unix.MAP_FIXED}, 73 arch.SyscallArgument{Value: uintptr(p.subproc.memoryFile.FD())}, 74 arch.SyscallArgument{Value: uintptr(p.stackRange.Start)}) 75 return err 76 } 77 78 // mapPrivateStack maps a private stack into the thread address space. 79 func (p *sysmsgThread) mapPrivateStack(addr uintptr, size uintptr) error { 80 prot := uintptr(unix.PROT_READ | unix.PROT_WRITE) 81 _, err := p.thread.syscallIgnoreInterrupt(&p.thread.initRegs, unix.SYS_MMAP, 82 arch.SyscallArgument{Value: addr}, 83 arch.SyscallArgument{Value: size}, 84 arch.SyscallArgument{Value: prot}, 85 arch.SyscallArgument{Value: unix.MAP_PRIVATE | unix.MAP_ANONYMOUS | unix.MAP_FIXED}, 86 arch.SyscallArgument{Value: 0}, 87 arch.SyscallArgument{Value: 0}) 88 return err 89 } 90 91 func (p *sysmsgThread) Debugf(format string, v ...any) { 92 if !log.IsLogging(log.Debug) { 93 return 94 } 95 msg := p.msg 96 postfix := fmt.Sprintf(": %s", msg) 97 p.thread.Debugf(format+postfix, v...) 98 } 99 100 func sysmsgSyscallNotifyRules() []bpf.Instruction { 101 rules := []seccomp.RuleSet{ 102 seccomp.RuleSet{ 103 Rules: seccomp.MakeSyscallRules(map[uintptr]seccomp.SyscallRule{ 104 unix.SYS_EXIT_GROUP: seccomp.MatchAll{}, 105 }), 106 Action: linux.SECCOMP_RET_USER_NOTIF, 107 }, 108 } 109 instrs, _, err := seccomp.BuildProgram(rules, seccomp.ProgramOptions{ 110 DefaultAction: linux.SECCOMP_RET_ALLOW, 111 BadArchAction: linux.SECCOMP_RET_ALLOW, 112 }) 113 if err != nil { 114 panic(fmt.Sprintf("failed to build rules for sysmsg threads: %v", err)) 115 } 116 return instrs 117 } 118 119 func sysmsgThreadRules(stubStart uintptr) []bpf.Instruction { 120 rules := []seccomp.RuleSet{} 121 rules = appendSysThreadArchSeccompRules(rules) 122 rules = append(rules, []seccomp.RuleSet{ 123 // Allow instructions from the sysmsg code stub, which is limited by one page. 124 { 125 Rules: seccomp.MakeSyscallRules(map[uintptr]seccomp.SyscallRule{ 126 unix.SYS_FUTEX: seccomp.Or{ 127 seccomp.PerArg{ 128 seccomp.GreaterThan(stubStart), 129 seccomp.EqualTo(linux.FUTEX_WAKE), 130 seccomp.EqualTo(1), 131 seccomp.EqualTo(0), 132 seccomp.EqualTo(0), 133 seccomp.EqualTo(0), 134 seccomp.GreaterThan(stubStart), // rip 135 }, 136 seccomp.PerArg{ 137 seccomp.GreaterThan(stubStart), 138 seccomp.EqualTo(linux.FUTEX_WAIT), 139 seccomp.AnyValue{}, 140 seccomp.EqualTo(0), 141 seccomp.EqualTo(0), 142 seccomp.EqualTo(0), 143 seccomp.GreaterThan(stubStart), // rip 144 }, 145 }, 146 unix.SYS_RT_SIGRETURN: seccomp.PerArg{ 147 seccomp.AnyValue{}, 148 seccomp.AnyValue{}, 149 seccomp.AnyValue{}, 150 seccomp.AnyValue{}, 151 seccomp.AnyValue{}, 152 seccomp.AnyValue{}, 153 seccomp.GreaterThan(stubStart), // rip 154 }, 155 unix.SYS_SCHED_YIELD: seccomp.PerArg{ 156 seccomp.AnyValue{}, 157 seccomp.AnyValue{}, 158 seccomp.AnyValue{}, 159 seccomp.AnyValue{}, 160 seccomp.AnyValue{}, 161 seccomp.AnyValue{}, 162 seccomp.GreaterThan(stubStart), // rip 163 }, 164 }), 165 Action: linux.SECCOMP_RET_ALLOW, 166 }, 167 }...) 168 instrs, _, err := seccomp.BuildProgram(rules, seccomp.ProgramOptions{ 169 DefaultAction: linux.SECCOMP_RET_TRAP, 170 BadArchAction: linux.SECCOMP_RET_TRAP, 171 }) 172 if err != nil { 173 panic(fmt.Sprintf("failed to build rules for sysmsg threads: %v", err)) 174 } 175 176 return instrs 177 }