github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/arch/fpu/fpu_amd64.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // +build amd64 i386 16 17 package fpu 18 19 import ( 20 "io" 21 22 "golang.org/x/sys/unix" 23 "github.com/SagerNet/gvisor/pkg/cpuid" 24 "github.com/SagerNet/gvisor/pkg/hostarch" 25 "github.com/SagerNet/gvisor/pkg/sync" 26 "github.com/SagerNet/gvisor/pkg/syserror" 27 ) 28 29 // initX86FPState (defined in asm files) sets up initial state. 30 func initX86FPState(data *byte, useXsave bool) 31 32 func newX86FPStateSlice() State { 33 size, align := cpuid.HostFeatureSet().ExtendedStateSize() 34 capacity := size 35 // Always use at least 4096 bytes. 36 // 37 // For the KVM platform, this state is a fixed 4096 bytes, so make sure 38 // that the underlying array is at _least_ that size otherwise we will 39 // corrupt random memory. This is not a pleasant thing to debug. 40 if capacity < 4096 { 41 capacity = 4096 42 } 43 return alignedBytes(capacity, align)[:size] 44 } 45 46 // NewState returns an initialized floating point state. 47 // 48 // The returned state is large enough to store all floating point state 49 // supported by host, even if the app won't use much of it due to a restricted 50 // FeatureSet. Since they may still be able to see state not advertised by 51 // CPUID we must ensure it does not contain any sentry state. 52 func NewState() State { 53 f := newX86FPStateSlice() 54 initX86FPState(&f[0], cpuid.HostFeatureSet().UseXsave()) 55 return f 56 } 57 58 // Fork creates and returns an identical copy of the x86 floating point state. 59 func (s *State) Fork() State { 60 n := newX86FPStateSlice() 61 copy(n, *s) 62 return n 63 } 64 65 // ptraceFPRegsSize is the size in bytes of Linux's user_i387_struct, the type 66 // manipulated by PTRACE_GETFPREGS and PTRACE_SETFPREGS on x86. Equivalently, 67 // ptraceFPRegsSize is the size in bytes of the x86 FXSAVE area. 68 const ptraceFPRegsSize = 512 69 70 // PtraceGetFPRegs implements Context.PtraceGetFPRegs. 71 func (s *State) PtraceGetFPRegs(dst io.Writer, maxlen int) (int, error) { 72 if maxlen < ptraceFPRegsSize { 73 return 0, syserror.EFAULT 74 } 75 76 return dst.Write((*s)[:ptraceFPRegsSize]) 77 } 78 79 // PtraceSetFPRegs implements Context.PtraceSetFPRegs. 80 func (s *State) PtraceSetFPRegs(src io.Reader, maxlen int) (int, error) { 81 if maxlen < ptraceFPRegsSize { 82 return 0, syserror.EFAULT 83 } 84 85 var f [ptraceFPRegsSize]byte 86 n, err := io.ReadFull(src, f[:]) 87 if err != nil { 88 return 0, err 89 } 90 // Force reserved bits in MXCSR to 0. This is consistent with Linux. 91 sanitizeMXCSR(State(f[:])) 92 // N.B. this only copies the beginning of the FP state, which 93 // corresponds to the FXSAVE area. 94 copy(*s, f[:]) 95 return n, nil 96 } 97 98 const ( 99 // mxcsrOffset is the offset in bytes of the MXCSR field from the start of 100 // the FXSAVE area. (Intel SDM Vol. 1, Table 10-2 "Format of an FXSAVE 101 // Area") 102 mxcsrOffset = 24 103 104 // mxcsrMaskOffset is the offset in bytes of the MXCSR_MASK field from the 105 // start of the FXSAVE area. 106 mxcsrMaskOffset = 28 107 ) 108 109 var ( 110 mxcsrMask uint32 111 initMXCSRMask sync.Once 112 ) 113 114 const ( 115 // minXstateBytes is the minimum size in bytes of an x86 XSAVE area, equal 116 // to the size of the XSAVE legacy area (512 bytes) plus the size of the 117 // XSAVE header (64 bytes). Equivalently, minXstateBytes is GDB's 118 // X86_XSTATE_SSE_SIZE. 119 minXstateBytes = 512 + 64 120 121 // userXstateXCR0Offset is the offset in bytes of the USER_XSTATE_XCR0_WORD 122 // field in Linux's struct user_xstateregs, which is the type manipulated 123 // by ptrace(PTRACE_GET/SETREGSET, NT_X86_XSTATE). Equivalently, 124 // userXstateXCR0Offset is GDB's I386_LINUX_XSAVE_XCR0_OFFSET. 125 userXstateXCR0Offset = 464 126 127 // xstateBVOffset is the offset in bytes of the XSTATE_BV field in an x86 128 // XSAVE area. 129 xstateBVOffset = 512 130 131 // xsaveHeaderZeroedOffset and xsaveHeaderZeroedBytes indicate parts of the 132 // XSAVE header that we coerce to zero: "Bytes 15:8 of the XSAVE header is 133 // a state-component bitmap called XCOMP_BV. ... Bytes 63:16 of the XSAVE 134 // header are reserved." - Intel SDM Vol. 1, Section 13.4.2 "XSAVE Header". 135 // Linux ignores XCOMP_BV, but it's able to recover from XRSTOR #GP 136 // exceptions resulting from invalid values; we aren't. Linux also never 137 // uses the compacted format when doing XSAVE and doesn't even define the 138 // compaction extensions to XSAVE as a CPU feature, so for simplicity we 139 // assume no one is using them. 140 xsaveHeaderZeroedOffset = 512 + 8 141 xsaveHeaderZeroedBytes = 64 - 8 142 ) 143 144 // sanitizeMXCSR coerces reserved bits in the MXCSR field of f to 0. ("FXRSTOR 145 // generates a general-protection fault (#GP) in response to an attempt to set 146 // any of the reserved bits of the MXCSR register." - Intel SDM Vol. 1, Section 147 // 10.5.1.2 "SSE State") 148 func sanitizeMXCSR(f State) { 149 mxcsr := hostarch.ByteOrder.Uint32(f[mxcsrOffset:]) 150 initMXCSRMask.Do(func() { 151 temp := State(alignedBytes(uint(ptraceFPRegsSize), 16)) 152 initX86FPState(&temp[0], false /* useXsave */) 153 mxcsrMask = hostarch.ByteOrder.Uint32(temp[mxcsrMaskOffset:]) 154 if mxcsrMask == 0 { 155 // "If the value of the MXCSR_MASK field is 00000000H, then the 156 // MXCSR_MASK value is the default value of 0000FFBFH." - Intel SDM 157 // Vol. 1, Section 11.6.6 "Guidelines for Writing to the MXCSR 158 // Register" 159 mxcsrMask = 0xffbf 160 } 161 }) 162 mxcsr &= mxcsrMask 163 hostarch.ByteOrder.PutUint32(f[mxcsrOffset:], mxcsr) 164 } 165 166 // PtraceGetXstateRegs implements ptrace(PTRACE_GETREGS, NT_X86_XSTATE) by 167 // writing the floating point registers from this state to dst and returning the 168 // number of bytes written, which must be less than or equal to maxlen. 169 func (s *State) PtraceGetXstateRegs(dst io.Writer, maxlen int, featureSet *cpuid.FeatureSet) (int, error) { 170 // N.B. s.x86FPState may contain more state than the application 171 // expects. We only copy the subset that would be in their XSAVE area. 172 ess, _ := featureSet.ExtendedStateSize() 173 f := make([]byte, ess) 174 copy(f, *s) 175 // "The XSAVE feature set does not use bytes 511:416; bytes 463:416 are 176 // reserved." - Intel SDM Vol 1., Section 13.4.1 "Legacy Region of an XSAVE 177 // Area". Linux uses the first 8 bytes of this area to store the OS XSTATE 178 // mask. GDB relies on this: see 179 // gdb/x86-linux-nat.c:x86_linux_read_description(). 180 hostarch.ByteOrder.PutUint64(f[userXstateXCR0Offset:], featureSet.ValidXCR0Mask()) 181 if len(f) > maxlen { 182 f = f[:maxlen] 183 } 184 return dst.Write(f) 185 } 186 187 // PtraceSetXstateRegs implements ptrace(PTRACE_SETREGS, NT_X86_XSTATE) by 188 // reading floating point registers from src and returning the number of bytes 189 // read, which must be less than or equal to maxlen. 190 func (s *State) PtraceSetXstateRegs(src io.Reader, maxlen int, featureSet *cpuid.FeatureSet) (int, error) { 191 // Allow users to pass an xstate register set smaller than ours (they can 192 // mask bits out of XSTATE_BV), as long as it's at least minXstateBytes. 193 // Also allow users to pass a register set larger than ours; anything after 194 // their ExtendedStateSize will be ignored. (I think Linux technically 195 // permits setting a register set smaller than minXstateBytes, but it has 196 // the same silent truncation behavior in kernel/ptrace.c:ptrace_regset().) 197 if maxlen < minXstateBytes { 198 return 0, unix.EFAULT 199 } 200 ess, _ := featureSet.ExtendedStateSize() 201 if maxlen > int(ess) { 202 maxlen = int(ess) 203 } 204 f := make([]byte, maxlen) 205 if _, err := io.ReadFull(src, f); err != nil { 206 return 0, err 207 } 208 // Force reserved bits in MXCSR to 0. This is consistent with Linux. 209 sanitizeMXCSR(State(f)) 210 // Users can't enable *more* XCR0 bits than what we, and the CPU, support. 211 xstateBV := hostarch.ByteOrder.Uint64(f[xstateBVOffset:]) 212 xstateBV &= featureSet.ValidXCR0Mask() 213 hostarch.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV) 214 // Force XCOMP_BV and reserved bytes in the XSAVE header to 0. 215 reserved := f[xsaveHeaderZeroedOffset : xsaveHeaderZeroedOffset+xsaveHeaderZeroedBytes] 216 for i := range reserved { 217 reserved[i] = 0 218 } 219 return copy(*s, f), nil 220 } 221 222 // SetMXCSR sets the MXCSR control/status register in the state. 223 func (s *State) SetMXCSR(mxcsr uint32) { 224 hostarch.ByteOrder.PutUint32((*s)[mxcsrOffset:], mxcsr) 225 } 226 227 // BytePointer returns a pointer to the first byte of the state. 228 // 229 //go:nosplit 230 func (s *State) BytePointer() *byte { 231 return &(*s)[0] 232 } 233 234 // XSTATE_BV does not exist if FXSAVE is used, but FXSAVE implicitly saves x87 235 // and SSE state, so this is the equivalent XSTATE_BV value. 236 const fxsaveBV uint64 = cpuid.XSAVEFeatureX87 | cpuid.XSAVEFeatureSSE 237 238 // AfterLoad converts the loaded state to the format that compatible with the 239 // current processor. 240 func (s *State) AfterLoad() { 241 old := *s 242 243 // Recreate the slice. This is done to ensure that it is aligned 244 // appropriately in memory, and large enough to accommodate any new 245 // state that may be saved by the new CPU. Even if extraneous new state 246 // is saved, the state we care about is guaranteed to be a subset of 247 // new state. Later optimizations can use less space when using a 248 // smaller state component bitmap. Intel SDM Volume 1 Chapter 13 has 249 // more info. 250 *s = NewState() 251 252 // x86FPState always contains all the FP state supported by the host. 253 // We may have come from a newer machine that supports additional state 254 // which we cannot restore. 255 // 256 // The x86 FP state areas are backwards compatible, so we can simply 257 // truncate the additional floating point state. 258 // 259 // Applications should not depend on the truncated state because it 260 // should relate only to features that were not exposed in the app 261 // FeatureSet. However, because we do not *prevent* them from using 262 // this state, we must verify here that there is no in-use state 263 // (according to XSTATE_BV) which we do not support. 264 if len(*s) < len(old) { 265 // What do we support? 266 supportedBV := fxsaveBV 267 if fs := cpuid.HostFeatureSet(); fs.UseXsave() { 268 supportedBV = fs.ValidXCR0Mask() 269 } 270 271 // What was in use? 272 savedBV := fxsaveBV 273 if len(old) >= xstateBVOffset+8 { 274 savedBV = hostarch.ByteOrder.Uint64(old[xstateBVOffset:]) 275 } 276 277 // Supported features must be a superset of saved features. 278 if savedBV&^supportedBV != 0 { 279 panic(ErrLoadingState{supportedFeatures: supportedBV, savedFeatures: savedBV}) 280 } 281 } 282 283 // Copy to the new, aligned location. 284 copy(*s, old) 285 }