gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/arch/fpu/fpu_amd64.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build amd64 || i386 16 // +build amd64 i386 17 18 package fpu 19 20 import ( 21 "fmt" 22 "io" 23 24 "golang.org/x/sys/unix" 25 "gvisor.dev/gvisor/pkg/cpuid" 26 "gvisor.dev/gvisor/pkg/errors/linuxerr" 27 "gvisor.dev/gvisor/pkg/hostarch" 28 "gvisor.dev/gvisor/pkg/safecopy" 29 "gvisor.dev/gvisor/pkg/sync" 30 ) 31 32 // FPSoftwareFrame is equivalent to struct _fpx_sw_bytes, the data stored by 33 // Linux in bytes 464:511 of the fxsave/xsave frame. 34 // 35 // +marshal 36 type FPSoftwareFrame struct { 37 Magic1 uint32 38 ExtendedSize uint32 39 Xfeatures uint64 40 XstateSize uint32 41 Padding [7]uint32 42 } 43 44 // From Linux's arch/x86/include/uapi/asm/sigcontext.h. 45 const ( 46 // FP_XSTATE_MAGIC1 is the value of FPSoftwareFrame.Magic1. 47 FP_XSTATE_MAGIC1 = 0x46505853 48 // FP_SW_FRAME_OFFSET is the offset of FPSoftwareFrame in the 49 // fxsave/xsave area. 50 FP_SW_FRAME_OFFSET = 464 51 52 // FP_XSTATE_MAGIC2 is the value written to the 4 bytes inserted by 53 // Linux after the fxsave/xsave area in the signal frame. 54 FP_XSTATE_MAGIC2 = 0x46505845 55 // FP_XSTATE_MAGIC2_SIZE is the size of FP_XSTATE_MAGIC2. 56 FP_XSTATE_MAGIC2_SIZE = 4 57 ) 58 59 // From Linux's arch/x86/include/asm/fpu/types.h. 60 const ( 61 // XFEATURE_MASK_FPSSE is xsave features that are always enabled in 62 // signal frame fpstate. 63 XFEATURE_MASK_FPSSE = 0x3 64 65 // FXSAVE_AREA_SIZE is the size of the FXSAVE area. 66 FXSAVE_AREA_SIZE = 512 67 ) 68 69 // initX86FPState (defined in asm files) sets up initial state. 70 func initX86FPState(data *byte, useXsave bool) 71 72 func newX86FPStateSlice() State { 73 maxsize, align := cpuid.HostFeatureSet().ExtendedStateSize() 74 // We need capacity to be large enough to hold AMX bytes because of 75 // ptrace. PTRACE_SETREGSET/GETREGSET assume that AMX portions should 76 // always be used. 77 // TODO(gvisor.dev/issues/9896): Implement AMX Support. 78 capacity := maxsize + FP_XSTATE_MAGIC2_SIZE 79 size := maxsize - cpuid.HostFeatureSet().AMXExtendedStateSize() 80 // Always use at least 4096 bytes. 81 // 82 // For the KVM platform, this state is a fixed 4096 bytes, so make sure 83 // that the underlying array is at _least_ that size otherwise we will 84 // corrupt random memory. This is not a pleasant thing to debug. 85 if capacity < 4096 { 86 capacity = 4096 87 } 88 return alignedBytes(capacity, align)[:size+FP_XSTATE_MAGIC2_SIZE] 89 } 90 91 // Slice returns the byte array that contains only the fpu state. `s` has the 92 // fpu state and FP_XSTATE_MAGIC2. 93 func (s State) Slice() []byte { 94 return s[:len(s)-FP_XSTATE_MAGIC2_SIZE] 95 } 96 97 // NewState returns an initialized floating point state. 98 // 99 // The returned state is large enough to store all floating point state 100 // supported by host, even if the app won't use much of it due to a restricted 101 // FeatureSet. Since they may still be able to see state not advertised by 102 // CPUID we must ensure it does not contain any sentry state. 103 func NewState() State { 104 f := newX86FPStateSlice() 105 initX86FPState(&f[0], cpuid.HostFeatureSet().UseXsave()) 106 return f 107 } 108 109 // Fork creates and returns an identical copy of the x86 floating point state. 110 func (s *State) Fork() State { 111 n := newX86FPStateSlice() 112 copy(n, *s) 113 return n 114 } 115 116 // Reset resets s to its initial state. 117 func (s *State) Reset() { 118 f := *s 119 clear(f) 120 initX86FPState(&f[0], cpuid.HostFeatureSet().UseXsave()) 121 } 122 123 var ( 124 hostXCR0Mask uint64 125 hostFPSize uint 126 hostUseXsave bool 127 initHostStateOnce sync.Once 128 ) 129 130 // InitHostState initializes host parameters. 131 func InitHostState() { 132 initHostStateOnce.Do(func() { 133 featureSet := cpuid.HostFeatureSet() 134 hostXCR0Mask = featureSet.ValidXCR0Mask() 135 hostUseXsave = featureSet.UseXsave() 136 hostFPSize, _ = featureSet.ExtendedStateSize() 137 // TODO(gvisor.dev/issues/9896): Implement AMX Support. 138 hostFPSize = hostFPSize - featureSet.AMXExtendedStateSize() 139 }) 140 } 141 142 // ptraceFPRegsSize is the size in bytes of Linux's user_i387_struct, the type 143 // manipulated by PTRACE_GETFPREGS and PTRACE_SETFPREGS on x86. Equivalently, 144 // ptraceFPRegsSize is the size in bytes of the x86 FXSAVE area. 145 const ptraceFPRegsSize = 512 146 147 // PtraceGetFPRegs implements Context.PtraceGetFPRegs. 148 func (s *State) PtraceGetFPRegs(dst io.Writer, maxlen int) (int, error) { 149 if maxlen < ptraceFPRegsSize { 150 return 0, linuxerr.EFAULT 151 } 152 153 return dst.Write((*s)[:ptraceFPRegsSize]) 154 } 155 156 // PtraceSetFPRegs implements Context.PtraceSetFPRegs. 157 func (s *State) PtraceSetFPRegs(src io.Reader, maxlen int) (int, error) { 158 if maxlen < ptraceFPRegsSize { 159 return 0, linuxerr.EFAULT 160 } 161 162 var f [ptraceFPRegsSize]byte 163 n, err := io.ReadFull(src, f[:]) 164 if err != nil { 165 return 0, err 166 } 167 // Force reserved bits in MXCSR to 0. This is consistent with Linux. 168 sanitizeMXCSR(State(f[:])) 169 // N.B. this only copies the beginning of the FP state, which 170 // corresponds to the FXSAVE area. 171 copy(*s, f[:]) 172 return n, nil 173 } 174 175 const ( 176 // mxcsrOffset is the offset in bytes of the MXCSR field from the start of 177 // the FXSAVE area. (Intel SDM Vol. 1, Table 10-2 "Format of an FXSAVE 178 // Area") 179 mxcsrOffset = 24 180 181 // mxcsrMaskOffset is the offset in bytes of the MXCSR_MASK field from the 182 // start of the FXSAVE area. 183 mxcsrMaskOffset = 28 184 ) 185 186 const ( 187 // minXstateBytes is the minimum size in bytes of an x86 XSAVE area, equal 188 // to the size of the XSAVE legacy area (512 bytes) plus the size of the 189 // XSAVE header (64 bytes). Equivalently, minXstateBytes is GDB's 190 // X86_XSTATE_SSE_SIZE. 191 minXstateBytes = 512 + 64 192 193 // userXstateXCR0Offset is the offset in bytes of the USER_XSTATE_XCR0_WORD 194 // field in Linux's struct user_xstateregs, which is the type manipulated 195 // by ptrace(PTRACE_GET/SETREGSET, NT_X86_XSTATE). Equivalently, 196 // userXstateXCR0Offset is GDB's I386_LINUX_XSAVE_XCR0_OFFSET. 197 userXstateXCR0Offset = 464 198 199 // xstateBVOffset is the offset in bytes of the XSTATE_BV field in an x86 200 // XSAVE area. 201 xstateBVOffset = 512 202 xcompBVOffset = 520 203 204 // xsaveHeaderZeroedOffset and xsaveHeaderZeroedBytes indicate parts of the 205 // XSAVE header that we coerce to zero: "Bytes 15:8 of the XSAVE header is 206 // a state-component bitmap called XCOMP_BV. ... Bytes 63:16 of the XSAVE 207 // header are reserved." - Intel SDM Vol. 1, Section 13.4.2 "XSAVE Header". 208 // Linux ignores XCOMP_BV, but it's able to recover from XRSTOR #GP 209 // exceptions resulting from invalid values; we aren't. Linux also never 210 // uses the compacted format when doing XSAVE and doesn't even define the 211 // compaction extensions to XSAVE as a CPU feature, so for simplicity we 212 // assume no one is using them. 213 xsaveHeaderZeroedOffset = 512 + 8 214 xsaveHeaderZeroedBytes = 64 - 8 215 ) 216 217 // PtraceGetXstateRegs implements ptrace(PTRACE_GETREGS, NT_X86_XSTATE) by 218 // writing the floating point registers from this state to dst and returning the 219 // number of bytes written, which must be less than or equal to maxlen. 220 func (s *State) PtraceGetXstateRegs(dst io.Writer, maxlen int, featureSet cpuid.FeatureSet) (int, error) { 221 // N.B. s.x86FPState may contain more state than the application 222 // expects. We only copy the subset that would be in their XSAVE area. 223 ess, _ := featureSet.ExtendedStateSize() 224 f := make([]byte, ess) 225 copy(f, *s) 226 // "The XSAVE feature set does not use bytes 511:416; bytes 463:416 are 227 // reserved." - Intel SDM Vol 1., Section 13.4.1 "Legacy Region of an XSAVE 228 // Area". Linux uses the first 8 bytes of this area to store the OS XSTATE 229 // mask. GDB relies on this: see 230 // gdb/x86-linux-nat.c:x86_linux_read_description(). 231 hostarch.ByteOrder.PutUint64(f[userXstateXCR0Offset:], featureSet.ValidXCR0Mask()) 232 if len(f) > maxlen { 233 f = f[:maxlen] 234 } 235 return dst.Write(f) 236 } 237 238 // PtraceSetXstateRegs implements ptrace(PTRACE_SETREGS, NT_X86_XSTATE) by 239 // reading floating point registers from src and returning the number of bytes 240 // read, which must be less than or equal to maxlen. 241 func (s *State) PtraceSetXstateRegs(src io.Reader, maxlen int, featureSet cpuid.FeatureSet) (int, error) { 242 // Allow users to pass an xstate register set smaller than ours (they can 243 // mask bits out of XSTATE_BV), as long as it's at least minXstateBytes. 244 // Also allow users to pass a register set larger than ours; anything after 245 // their ExtendedStateSize will be ignored. (I think Linux technically 246 // permits setting a register set smaller than minXstateBytes, but it has 247 // the same silent truncation behavior in kernel/ptrace.c:ptrace_regset().) 248 if maxlen < minXstateBytes { 249 return 0, unix.EFAULT 250 } 251 ess, _ := featureSet.ExtendedStateSize() 252 if maxlen > int(ess) { 253 maxlen = int(ess) 254 } 255 f := make([]byte, maxlen) 256 if _, err := io.ReadFull(src, f); err != nil { 257 return 0, err 258 } 259 n := copy(*s, f) 260 s.SanitizeUser(featureSet) 261 return n, nil 262 } 263 264 // SanitizeUser mutates s to ensure that restoring it is safe. 265 func (s *State) SanitizeUser(featureSet cpuid.FeatureSet) { 266 f := *s 267 268 // Force reserved bits in MXCSR to 0. This is consistent with Linux. 269 sanitizeMXCSR(f) 270 271 if len(f) >= minXstateBytes { 272 // Users can't enable *more* XCR0 bits than what we, and the CPU, support. 273 xstateBV := hostarch.ByteOrder.Uint64(f[xstateBVOffset:]) 274 xstateBV &= featureSet.ValidXCR0Mask() 275 hostarch.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV) 276 // Force XCOMP_BV and reserved bytes in the XSAVE header to 0. 277 reserved := f[xsaveHeaderZeroedOffset : xsaveHeaderZeroedOffset+xsaveHeaderZeroedBytes] 278 clear(reserved) 279 } 280 } 281 282 var ( 283 mxcsrMask uint32 284 initMXCSRMask sync.Once 285 ) 286 287 // sanitizeMXCSR coerces reserved bits in the MXCSR field of f to 0. ("FXRSTOR 288 // generates a general-protection fault (#GP) in response to an attempt to set 289 // any of the reserved bits of the MXCSR register." - Intel SDM Vol. 1, Section 290 // 10.5.1.2 "SSE State") 291 func sanitizeMXCSR(f State) { 292 mxcsr := hostarch.ByteOrder.Uint32(f[mxcsrOffset:]) 293 initMXCSRMask.Do(func() { 294 temp := State(alignedBytes(uint(ptraceFPRegsSize), 16)) 295 initX86FPState(&temp[0], false /* useXsave */) 296 mxcsrMask = hostarch.ByteOrder.Uint32(temp[mxcsrMaskOffset:]) 297 if mxcsrMask == 0 { 298 // "If the value of the MXCSR_MASK field is 00000000H, then the 299 // MXCSR_MASK value is the default value of 0000FFBFH." - Intel SDM 300 // Vol. 1, Section 11.6.6 "Guidelines for Writing to the MXCSR 301 // Register" 302 mxcsrMask = 0xffbf 303 } 304 }) 305 mxcsr &= mxcsrMask 306 hostarch.ByteOrder.PutUint32(f[mxcsrOffset:], mxcsr) 307 } 308 309 // SetMXCSR sets the MXCSR control/status register in the state. 310 func (s *State) SetMXCSR(mxcsr uint32) { 311 hostarch.ByteOrder.PutUint32((*s)[mxcsrOffset:], mxcsr) 312 } 313 314 // GetMXCSR gets the MXCSR control/status register in the state. 315 func (s *State) GetMXCSR() uint32 { 316 return hostarch.ByteOrder.Uint32((*s)[mxcsrOffset:]) 317 } 318 319 // BytePointer returns a pointer to the first byte of the state. 320 // 321 //go:nosplit 322 func (s *State) BytePointer() *byte { 323 return &(*s)[0] 324 } 325 326 // XSTATE_BV does not exist if FXSAVE is used, but FXSAVE implicitly saves x87 327 // and SSE state, so this is the equivalent XSTATE_BV value. 328 const fxsaveBV uint64 = cpuid.XSAVEFeatureX87 | cpuid.XSAVEFeatureSSE 329 330 // AfterLoad converts the loaded state to the format that compatible with the 331 // current processor. 332 func (s *State) AfterLoad() { 333 old := s.Slice() 334 335 // Recreate the slice. This is done to ensure that it is aligned 336 // appropriately in memory, and large enough to accommodate any new 337 // state that may be saved by the new CPU. Even if extraneous new state 338 // is saved, the state we care about is guaranteed to be a subset of 339 // new state. Later optimizations can use less space when using a 340 // smaller state component bitmap. Intel SDM Volume 1 Chapter 13 has 341 // more info. 342 *s = NewState() 343 344 // x86FPState always contains all the FP state supported by the host. 345 // We may have come from a newer machine that supports additional state 346 // which we cannot restore. 347 // 348 // The x86 FP state areas are backwards compatible, so we can simply 349 // truncate the additional floating point state. 350 // 351 // Applications should not depend on the truncated state because it 352 // should relate only to features that were not exposed in the app 353 // FeatureSet. However, because we do not *prevent* them from using 354 // this state, we must verify here that there is no in-use state 355 // (according to XSTATE_BV) which we do not support. 356 // What do we support? 357 supportedBV := fxsaveBV 358 hostFeatureSet := cpuid.HostFeatureSet() 359 if hostFeatureSet.UseXsave() { 360 supportedBV = hostFeatureSet.ValidXCR0Mask() 361 } 362 363 // What was in use? 364 savedBV := fxsaveBV 365 if len(old) >= xstateBVOffset+8 { 366 savedBV = hostarch.ByteOrder.Uint64(old[xstateBVOffset:]) 367 } 368 369 // Supported features must be a superset of saved features. 370 if savedBV&^supportedBV != 0 { 371 panic(ErrLoadingState{supportedFeatures: supportedBV, savedFeatures: savedBV}) 372 } 373 374 // Copy to the new, aligned location. 375 copy(*s, old) 376 377 mxcsrBefore := s.GetMXCSR() 378 sanitizeMXCSR(*s) 379 mxcsrAfter := s.GetMXCSR() 380 if mxcsrBefore != mxcsrAfter { 381 panic(fmt.Sprintf("incompatible mxcsr value: %x (%x)", mxcsrBefore, mxcsrAfter)) 382 } 383 if hostFeatureSet.UseXsave() { 384 if err := safecopy.CheckXstate(s.BytePointer()); err != nil { 385 xcompBV := uint64(0) 386 if len(old) >= xcompBVOffset+8 { 387 xcompBV = hostarch.ByteOrder.Uint64(old[xcompBVOffset:]) 388 } 389 panic(fmt.Sprintf("incompatible state: %s\nlen(old)=%d len(new)=%d supportedBV=%#x XSTATE_BV=%#x XCOMP_BV=%#x", err, len(old), len(*s), supportedBV, savedBV, xcompBV)) 390 } 391 } 392 }