github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/arch/fpu/fpu_amd64.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build amd64 || i386 16 // +build amd64 i386 17 18 package fpu 19 20 import ( 21 "fmt" 22 "io" 23 24 "golang.org/x/sys/unix" 25 "github.com/MerlinKodo/gvisor/pkg/cpuid" 26 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 27 "github.com/MerlinKodo/gvisor/pkg/hostarch" 28 "github.com/MerlinKodo/gvisor/pkg/safecopy" 29 "github.com/MerlinKodo/gvisor/pkg/sync" 30 ) 31 32 // FPSoftwareFrame is equivalent to struct _fpx_sw_bytes, the data stored by 33 // Linux in bytes 464:511 of the fxsave/xsave frame. 34 // 35 // +marshal 36 type FPSoftwareFrame struct { 37 Magic1 uint32 38 ExtendedSize uint32 39 Xfeatures uint64 40 XstateSize uint32 41 Padding [7]uint32 42 } 43 44 // From Linux's arch/x86/include/uapi/asm/sigcontext.h. 45 const ( 46 // FP_XSTATE_MAGIC1 is the value of FPSoftwareFrame.Magic1. 47 FP_XSTATE_MAGIC1 = 0x46505853 48 // FP_SW_FRAME_OFFSET is the offset of FPSoftwareFrame in the 49 // fxsave/xsave area. 50 FP_SW_FRAME_OFFSET = 464 51 52 // FP_XSTATE_MAGIC2 is the value written to the 4 bytes inserted by 53 // Linux after the fxsave/xsave area in the signal frame. 54 FP_XSTATE_MAGIC2 = 0x46505845 55 // FP_XSTATE_MAGIC2_SIZE is the size of FP_XSTATE_MAGIC2. 56 FP_XSTATE_MAGIC2_SIZE = 4 57 ) 58 59 // From Linux's arch/x86/include/asm/fpu/types.h. 60 const ( 61 // XFEATURE_MASK_FPSSE is xsave features that are always enabled in 62 // signal frame fpstate. 63 XFEATURE_MASK_FPSSE = 0x3 64 65 // FXSAVE_AREA_SIZE is the size of the FXSAVE area. 66 FXSAVE_AREA_SIZE = 512 67 ) 68 69 // initX86FPState (defined in asm files) sets up initial state. 70 func initX86FPState(data *byte, useXsave bool) 71 72 func newX86FPStateSlice() State { 73 size, align := cpuid.HostFeatureSet().ExtendedStateSize() 74 capacity := size + FP_XSTATE_MAGIC2_SIZE 75 // Always use at least 4096 bytes. 76 // 77 // For the KVM platform, this state is a fixed 4096 bytes, so make sure 78 // that the underlying array is at _least_ that size otherwise we will 79 // corrupt random memory. This is not a pleasant thing to debug. 80 if capacity < 4096 { 81 capacity = 4096 82 } 83 return alignedBytes(capacity, align)[:size+FP_XSTATE_MAGIC2_SIZE] 84 } 85 86 // Slice returns the byte array that contains only the fpu state. `s` has the 87 // fpu state and FP_XSTATE_MAGIC2. 88 func (s State) Slice() []byte { 89 return s[:len(s)-FP_XSTATE_MAGIC2_SIZE] 90 } 91 92 // NewState returns an initialized floating point state. 93 // 94 // The returned state is large enough to store all floating point state 95 // supported by host, even if the app won't use much of it due to a restricted 96 // FeatureSet. Since they may still be able to see state not advertised by 97 // CPUID we must ensure it does not contain any sentry state. 98 func NewState() State { 99 f := newX86FPStateSlice() 100 initX86FPState(&f[0], cpuid.HostFeatureSet().UseXsave()) 101 return f 102 } 103 104 // Fork creates and returns an identical copy of the x86 floating point state. 105 func (s *State) Fork() State { 106 n := newX86FPStateSlice() 107 copy(n, *s) 108 return n 109 } 110 111 // Reset resets s to its initial state. 112 func (s *State) Reset() { 113 f := *s 114 for i := range f { 115 f[i] = 0 116 } 117 initX86FPState(&f[0], cpuid.HostFeatureSet().UseXsave()) 118 } 119 120 var ( 121 hostXCR0Mask uint64 122 hostFPSize uint 123 hostUseXsave bool 124 initHostStateOnce sync.Once 125 ) 126 127 // InitHostState initializes host parameters. 128 func InitHostState() { 129 initHostStateOnce.Do(func() { 130 featureSet := cpuid.HostFeatureSet() 131 hostXCR0Mask = featureSet.ValidXCR0Mask() 132 hostUseXsave = featureSet.UseXsave() 133 hostFPSize, _ = featureSet.ExtendedStateSize() 134 }) 135 } 136 137 // ptraceFPRegsSize is the size in bytes of Linux's user_i387_struct, the type 138 // manipulated by PTRACE_GETFPREGS and PTRACE_SETFPREGS on x86. Equivalently, 139 // ptraceFPRegsSize is the size in bytes of the x86 FXSAVE area. 140 const ptraceFPRegsSize = 512 141 142 // PtraceGetFPRegs implements Context.PtraceGetFPRegs. 143 func (s *State) PtraceGetFPRegs(dst io.Writer, maxlen int) (int, error) { 144 if maxlen < ptraceFPRegsSize { 145 return 0, linuxerr.EFAULT 146 } 147 148 return dst.Write((*s)[:ptraceFPRegsSize]) 149 } 150 151 // PtraceSetFPRegs implements Context.PtraceSetFPRegs. 152 func (s *State) PtraceSetFPRegs(src io.Reader, maxlen int) (int, error) { 153 if maxlen < ptraceFPRegsSize { 154 return 0, linuxerr.EFAULT 155 } 156 157 var f [ptraceFPRegsSize]byte 158 n, err := io.ReadFull(src, f[:]) 159 if err != nil { 160 return 0, err 161 } 162 // Force reserved bits in MXCSR to 0. This is consistent with Linux. 163 sanitizeMXCSR(State(f[:])) 164 // N.B. this only copies the beginning of the FP state, which 165 // corresponds to the FXSAVE area. 166 copy(*s, f[:]) 167 return n, nil 168 } 169 170 const ( 171 // mxcsrOffset is the offset in bytes of the MXCSR field from the start of 172 // the FXSAVE area. (Intel SDM Vol. 1, Table 10-2 "Format of an FXSAVE 173 // Area") 174 mxcsrOffset = 24 175 176 // mxcsrMaskOffset is the offset in bytes of the MXCSR_MASK field from the 177 // start of the FXSAVE area. 178 mxcsrMaskOffset = 28 179 ) 180 181 const ( 182 // minXstateBytes is the minimum size in bytes of an x86 XSAVE area, equal 183 // to the size of the XSAVE legacy area (512 bytes) plus the size of the 184 // XSAVE header (64 bytes). Equivalently, minXstateBytes is GDB's 185 // X86_XSTATE_SSE_SIZE. 186 minXstateBytes = 512 + 64 187 188 // userXstateXCR0Offset is the offset in bytes of the USER_XSTATE_XCR0_WORD 189 // field in Linux's struct user_xstateregs, which is the type manipulated 190 // by ptrace(PTRACE_GET/SETREGSET, NT_X86_XSTATE). Equivalently, 191 // userXstateXCR0Offset is GDB's I386_LINUX_XSAVE_XCR0_OFFSET. 192 userXstateXCR0Offset = 464 193 194 // xstateBVOffset is the offset in bytes of the XSTATE_BV field in an x86 195 // XSAVE area. 196 xstateBVOffset = 512 197 198 // xsaveHeaderZeroedOffset and xsaveHeaderZeroedBytes indicate parts of the 199 // XSAVE header that we coerce to zero: "Bytes 15:8 of the XSAVE header is 200 // a state-component bitmap called XCOMP_BV. ... Bytes 63:16 of the XSAVE 201 // header are reserved." - Intel SDM Vol. 1, Section 13.4.2 "XSAVE Header". 202 // Linux ignores XCOMP_BV, but it's able to recover from XRSTOR #GP 203 // exceptions resulting from invalid values; we aren't. Linux also never 204 // uses the compacted format when doing XSAVE and doesn't even define the 205 // compaction extensions to XSAVE as a CPU feature, so for simplicity we 206 // assume no one is using them. 207 xsaveHeaderZeroedOffset = 512 + 8 208 xsaveHeaderZeroedBytes = 64 - 8 209 ) 210 211 // PtraceGetXstateRegs implements ptrace(PTRACE_GETREGS, NT_X86_XSTATE) by 212 // writing the floating point registers from this state to dst and returning the 213 // number of bytes written, which must be less than or equal to maxlen. 214 func (s *State) PtraceGetXstateRegs(dst io.Writer, maxlen int, featureSet cpuid.FeatureSet) (int, error) { 215 // N.B. s.x86FPState may contain more state than the application 216 // expects. We only copy the subset that would be in their XSAVE area. 217 ess, _ := featureSet.ExtendedStateSize() 218 f := make([]byte, ess) 219 copy(f, *s) 220 // "The XSAVE feature set does not use bytes 511:416; bytes 463:416 are 221 // reserved." - Intel SDM Vol 1., Section 13.4.1 "Legacy Region of an XSAVE 222 // Area". Linux uses the first 8 bytes of this area to store the OS XSTATE 223 // mask. GDB relies on this: see 224 // gdb/x86-linux-nat.c:x86_linux_read_description(). 225 hostarch.ByteOrder.PutUint64(f[userXstateXCR0Offset:], featureSet.ValidXCR0Mask()) 226 if len(f) > maxlen { 227 f = f[:maxlen] 228 } 229 return dst.Write(f) 230 } 231 232 // PtraceSetXstateRegs implements ptrace(PTRACE_SETREGS, NT_X86_XSTATE) by 233 // reading floating point registers from src and returning the number of bytes 234 // read, which must be less than or equal to maxlen. 235 func (s *State) PtraceSetXstateRegs(src io.Reader, maxlen int, featureSet cpuid.FeatureSet) (int, error) { 236 // Allow users to pass an xstate register set smaller than ours (they can 237 // mask bits out of XSTATE_BV), as long as it's at least minXstateBytes. 238 // Also allow users to pass a register set larger than ours; anything after 239 // their ExtendedStateSize will be ignored. (I think Linux technically 240 // permits setting a register set smaller than minXstateBytes, but it has 241 // the same silent truncation behavior in kernel/ptrace.c:ptrace_regset().) 242 if maxlen < minXstateBytes { 243 return 0, unix.EFAULT 244 } 245 ess, _ := featureSet.ExtendedStateSize() 246 if maxlen > int(ess) { 247 maxlen = int(ess) 248 } 249 f := make([]byte, maxlen) 250 if _, err := io.ReadFull(src, f); err != nil { 251 return 0, err 252 } 253 n := copy(*s, f) 254 s.SanitizeUser(featureSet) 255 return n, nil 256 } 257 258 // SanitizeUser mutates s to ensure that restoring it is safe. 259 func (s *State) SanitizeUser(featureSet cpuid.FeatureSet) { 260 f := *s 261 262 // Force reserved bits in MXCSR to 0. This is consistent with Linux. 263 sanitizeMXCSR(f) 264 265 if len(f) >= minXstateBytes { 266 // Users can't enable *more* XCR0 bits than what we, and the CPU, support. 267 xstateBV := hostarch.ByteOrder.Uint64(f[xstateBVOffset:]) 268 xstateBV &= featureSet.ValidXCR0Mask() 269 hostarch.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV) 270 // Force XCOMP_BV and reserved bytes in the XSAVE header to 0. 271 reserved := f[xsaveHeaderZeroedOffset : xsaveHeaderZeroedOffset+xsaveHeaderZeroedBytes] 272 for i := range reserved { 273 reserved[i] = 0 274 } 275 } 276 } 277 278 var ( 279 mxcsrMask uint32 280 initMXCSRMask sync.Once 281 ) 282 283 // sanitizeMXCSR coerces reserved bits in the MXCSR field of f to 0. ("FXRSTOR 284 // generates a general-protection fault (#GP) in response to an attempt to set 285 // any of the reserved bits of the MXCSR register." - Intel SDM Vol. 1, Section 286 // 10.5.1.2 "SSE State") 287 func sanitizeMXCSR(f State) { 288 mxcsr := hostarch.ByteOrder.Uint32(f[mxcsrOffset:]) 289 initMXCSRMask.Do(func() { 290 temp := State(alignedBytes(uint(ptraceFPRegsSize), 16)) 291 initX86FPState(&temp[0], false /* useXsave */) 292 mxcsrMask = hostarch.ByteOrder.Uint32(temp[mxcsrMaskOffset:]) 293 if mxcsrMask == 0 { 294 // "If the value of the MXCSR_MASK field is 00000000H, then the 295 // MXCSR_MASK value is the default value of 0000FFBFH." - Intel SDM 296 // Vol. 1, Section 11.6.6 "Guidelines for Writing to the MXCSR 297 // Register" 298 mxcsrMask = 0xffbf 299 } 300 }) 301 mxcsr &= mxcsrMask 302 hostarch.ByteOrder.PutUint32(f[mxcsrOffset:], mxcsr) 303 } 304 305 // SetMXCSR sets the MXCSR control/status register in the state. 306 func (s *State) SetMXCSR(mxcsr uint32) { 307 hostarch.ByteOrder.PutUint32((*s)[mxcsrOffset:], mxcsr) 308 } 309 310 // GetMXCSR gets the MXCSR control/status register in the state. 311 func (s *State) GetMXCSR() uint32 { 312 return hostarch.ByteOrder.Uint32((*s)[mxcsrOffset:]) 313 } 314 315 // BytePointer returns a pointer to the first byte of the state. 316 // 317 //go:nosplit 318 func (s *State) BytePointer() *byte { 319 return &(*s)[0] 320 } 321 322 // XSTATE_BV does not exist if FXSAVE is used, but FXSAVE implicitly saves x87 323 // and SSE state, so this is the equivalent XSTATE_BV value. 324 const fxsaveBV uint64 = cpuid.XSAVEFeatureX87 | cpuid.XSAVEFeatureSSE 325 326 // AfterLoad converts the loaded state to the format that compatible with the 327 // current processor. 328 func (s *State) AfterLoad() { 329 old := s.Slice() 330 331 // Recreate the slice. This is done to ensure that it is aligned 332 // appropriately in memory, and large enough to accommodate any new 333 // state that may be saved by the new CPU. Even if extraneous new state 334 // is saved, the state we care about is guaranteed to be a subset of 335 // new state. Later optimizations can use less space when using a 336 // smaller state component bitmap. Intel SDM Volume 1 Chapter 13 has 337 // more info. 338 *s = NewState() 339 340 // x86FPState always contains all the FP state supported by the host. 341 // We may have come from a newer machine that supports additional state 342 // which we cannot restore. 343 // 344 // The x86 FP state areas are backwards compatible, so we can simply 345 // truncate the additional floating point state. 346 // 347 // Applications should not depend on the truncated state because it 348 // should relate only to features that were not exposed in the app 349 // FeatureSet. However, because we do not *prevent* them from using 350 // this state, we must verify here that there is no in-use state 351 // (according to XSTATE_BV) which we do not support. 352 // What do we support? 353 supportedBV := fxsaveBV 354 hostFeatureSet := cpuid.HostFeatureSet() 355 if hostFeatureSet.UseXsave() { 356 supportedBV = hostFeatureSet.ValidXCR0Mask() 357 } 358 359 // What was in use? 360 savedBV := fxsaveBV 361 if len(old) >= xstateBVOffset+8 { 362 savedBV = hostarch.ByteOrder.Uint64(old[xstateBVOffset:]) 363 } 364 365 // Supported features must be a superset of saved features. 366 if savedBV&^supportedBV != 0 { 367 panic(ErrLoadingState{supportedFeatures: supportedBV, savedFeatures: savedBV}) 368 } 369 370 // Copy to the new, aligned location. 371 copy(*s, old) 372 373 mxcsrBefore := s.GetMXCSR() 374 sanitizeMXCSR(*s) 375 mxcsrAfter := s.GetMXCSR() 376 if mxcsrBefore != mxcsrAfter { 377 panic(fmt.Sprintf("incompatible mxcsr value: %x (%x)", mxcsrBefore, mxcsrAfter)) 378 } 379 if hostFeatureSet.UseXsave() { 380 if err := safecopy.CheckXstate(s.BytePointer()); err != nil { 381 panic(fmt.Sprintf("incompatible state: %s (%#v)", err, *s)) 382 } 383 } 384 }