github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/arch/fpu/fpu_amd64.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build amd64 || i386 16 // +build amd64 i386 17 18 package fpu 19 20 import ( 21 "fmt" 22 "io" 23 24 "golang.org/x/sys/unix" 25 "github.com/metacubex/gvisor/pkg/cpuid" 26 "github.com/metacubex/gvisor/pkg/errors/linuxerr" 27 "github.com/metacubex/gvisor/pkg/hostarch" 28 "github.com/metacubex/gvisor/pkg/safecopy" 29 "github.com/metacubex/gvisor/pkg/sync" 30 ) 31 32 // FPSoftwareFrame is equivalent to struct _fpx_sw_bytes, the data stored by 33 // Linux in bytes 464:511 of the fxsave/xsave frame. 34 // 35 // +marshal 36 type FPSoftwareFrame struct { 37 Magic1 uint32 38 ExtendedSize uint32 39 Xfeatures uint64 40 XstateSize uint32 41 Padding [7]uint32 42 } 43 44 // From Linux's arch/x86/include/uapi/asm/sigcontext.h. 45 const ( 46 // FP_XSTATE_MAGIC1 is the value of FPSoftwareFrame.Magic1. 47 FP_XSTATE_MAGIC1 = 0x46505853 48 // FP_SW_FRAME_OFFSET is the offset of FPSoftwareFrame in the 49 // fxsave/xsave area. 50 FP_SW_FRAME_OFFSET = 464 51 52 // FP_XSTATE_MAGIC2 is the value written to the 4 bytes inserted by 53 // Linux after the fxsave/xsave area in the signal frame. 54 FP_XSTATE_MAGIC2 = 0x46505845 55 // FP_XSTATE_MAGIC2_SIZE is the size of FP_XSTATE_MAGIC2. 56 FP_XSTATE_MAGIC2_SIZE = 4 57 ) 58 59 // From Linux's arch/x86/include/asm/fpu/types.h. 60 const ( 61 // XFEATURE_MASK_FPSSE is xsave features that are always enabled in 62 // signal frame fpstate. 63 XFEATURE_MASK_FPSSE = 0x3 64 65 // FXSAVE_AREA_SIZE is the size of the FXSAVE area. 66 FXSAVE_AREA_SIZE = 512 67 ) 68 69 // initX86FPState (defined in asm files) sets up initial state. 70 func initX86FPState(data *byte, useXsave bool) 71 72 func newX86FPStateSlice() State { 73 maxsize, align := cpuid.HostFeatureSet().ExtendedStateSize() 74 // We need capacity to be large enough to hold AMX bytes because of 75 // ptrace. PTRACE_SETREGSET/GETREGSET assume that AMX portions should 76 // always be used. 77 // TODO(gvisor.dev/issues/9896): Implement AMX Support. 78 capacity := maxsize + FP_XSTATE_MAGIC2_SIZE 79 size := maxsize - cpuid.HostFeatureSet().AMXExtendedStateSize() 80 // Always use at least 4096 bytes. 81 // 82 // For the KVM platform, this state is a fixed 4096 bytes, so make sure 83 // that the underlying array is at _least_ that size otherwise we will 84 // corrupt random memory. This is not a pleasant thing to debug. 85 if capacity < 4096 { 86 capacity = 4096 87 } 88 return alignedBytes(capacity, align)[:size+FP_XSTATE_MAGIC2_SIZE] 89 } 90 91 // Slice returns the byte array that contains only the fpu state. `s` has the 92 // fpu state and FP_XSTATE_MAGIC2. 93 func (s State) Slice() []byte { 94 return s[:len(s)-FP_XSTATE_MAGIC2_SIZE] 95 } 96 97 // NewState returns an initialized floating point state. 98 // 99 // The returned state is large enough to store all floating point state 100 // supported by host, even if the app won't use much of it due to a restricted 101 // FeatureSet. Since they may still be able to see state not advertised by 102 // CPUID we must ensure it does not contain any sentry state. 103 func NewState() State { 104 f := newX86FPStateSlice() 105 initX86FPState(&f[0], cpuid.HostFeatureSet().UseXsave()) 106 return f 107 } 108 109 // Fork creates and returns an identical copy of the x86 floating point state. 110 func (s *State) Fork() State { 111 n := newX86FPStateSlice() 112 copy(n, *s) 113 return n 114 } 115 116 // Reset resets s to its initial state. 117 func (s *State) Reset() { 118 f := *s 119 for i := range f { 120 f[i] = 0 121 } 122 initX86FPState(&f[0], cpuid.HostFeatureSet().UseXsave()) 123 } 124 125 var ( 126 hostXCR0Mask uint64 127 hostFPSize uint 128 hostUseXsave bool 129 initHostStateOnce sync.Once 130 ) 131 132 // InitHostState initializes host parameters. 133 func InitHostState() { 134 initHostStateOnce.Do(func() { 135 featureSet := cpuid.HostFeatureSet() 136 hostXCR0Mask = featureSet.ValidXCR0Mask() 137 hostUseXsave = featureSet.UseXsave() 138 hostFPSize, _ = featureSet.ExtendedStateSize() 139 // TODO(gvisor.dev/issues/9896): Implement AMX Support. 140 hostFPSize = hostFPSize - featureSet.AMXExtendedStateSize() 141 }) 142 } 143 144 // ptraceFPRegsSize is the size in bytes of Linux's user_i387_struct, the type 145 // manipulated by PTRACE_GETFPREGS and PTRACE_SETFPREGS on x86. Equivalently, 146 // ptraceFPRegsSize is the size in bytes of the x86 FXSAVE area. 147 const ptraceFPRegsSize = 512 148 149 // PtraceGetFPRegs implements Context.PtraceGetFPRegs. 150 func (s *State) PtraceGetFPRegs(dst io.Writer, maxlen int) (int, error) { 151 if maxlen < ptraceFPRegsSize { 152 return 0, linuxerr.EFAULT 153 } 154 155 return dst.Write((*s)[:ptraceFPRegsSize]) 156 } 157 158 // PtraceSetFPRegs implements Context.PtraceSetFPRegs. 159 func (s *State) PtraceSetFPRegs(src io.Reader, maxlen int) (int, error) { 160 if maxlen < ptraceFPRegsSize { 161 return 0, linuxerr.EFAULT 162 } 163 164 var f [ptraceFPRegsSize]byte 165 n, err := io.ReadFull(src, f[:]) 166 if err != nil { 167 return 0, err 168 } 169 // Force reserved bits in MXCSR to 0. This is consistent with Linux. 170 sanitizeMXCSR(State(f[:])) 171 // N.B. this only copies the beginning of the FP state, which 172 // corresponds to the FXSAVE area. 173 copy(*s, f[:]) 174 return n, nil 175 } 176 177 const ( 178 // mxcsrOffset is the offset in bytes of the MXCSR field from the start of 179 // the FXSAVE area. (Intel SDM Vol. 1, Table 10-2 "Format of an FXSAVE 180 // Area") 181 mxcsrOffset = 24 182 183 // mxcsrMaskOffset is the offset in bytes of the MXCSR_MASK field from the 184 // start of the FXSAVE area. 185 mxcsrMaskOffset = 28 186 ) 187 188 const ( 189 // minXstateBytes is the minimum size in bytes of an x86 XSAVE area, equal 190 // to the size of the XSAVE legacy area (512 bytes) plus the size of the 191 // XSAVE header (64 bytes). Equivalently, minXstateBytes is GDB's 192 // X86_XSTATE_SSE_SIZE. 193 minXstateBytes = 512 + 64 194 195 // userXstateXCR0Offset is the offset in bytes of the USER_XSTATE_XCR0_WORD 196 // field in Linux's struct user_xstateregs, which is the type manipulated 197 // by ptrace(PTRACE_GET/SETREGSET, NT_X86_XSTATE). Equivalently, 198 // userXstateXCR0Offset is GDB's I386_LINUX_XSAVE_XCR0_OFFSET. 199 userXstateXCR0Offset = 464 200 201 // xstateBVOffset is the offset in bytes of the XSTATE_BV field in an x86 202 // XSAVE area. 203 xstateBVOffset = 512 204 xcompBVOffset = 520 205 206 // xsaveHeaderZeroedOffset and xsaveHeaderZeroedBytes indicate parts of the 207 // XSAVE header that we coerce to zero: "Bytes 15:8 of the XSAVE header is 208 // a state-component bitmap called XCOMP_BV. ... Bytes 63:16 of the XSAVE 209 // header are reserved." - Intel SDM Vol. 1, Section 13.4.2 "XSAVE Header". 210 // Linux ignores XCOMP_BV, but it's able to recover from XRSTOR #GP 211 // exceptions resulting from invalid values; we aren't. Linux also never 212 // uses the compacted format when doing XSAVE and doesn't even define the 213 // compaction extensions to XSAVE as a CPU feature, so for simplicity we 214 // assume no one is using them. 215 xsaveHeaderZeroedOffset = 512 + 8 216 xsaveHeaderZeroedBytes = 64 - 8 217 ) 218 219 // PtraceGetXstateRegs implements ptrace(PTRACE_GETREGS, NT_X86_XSTATE) by 220 // writing the floating point registers from this state to dst and returning the 221 // number of bytes written, which must be less than or equal to maxlen. 222 func (s *State) PtraceGetXstateRegs(dst io.Writer, maxlen int, featureSet cpuid.FeatureSet) (int, error) { 223 // N.B. s.x86FPState may contain more state than the application 224 // expects. We only copy the subset that would be in their XSAVE area. 225 ess, _ := featureSet.ExtendedStateSize() 226 f := make([]byte, ess) 227 copy(f, *s) 228 // "The XSAVE feature set does not use bytes 511:416; bytes 463:416 are 229 // reserved." - Intel SDM Vol 1., Section 13.4.1 "Legacy Region of an XSAVE 230 // Area". Linux uses the first 8 bytes of this area to store the OS XSTATE 231 // mask. GDB relies on this: see 232 // gdb/x86-linux-nat.c:x86_linux_read_description(). 233 hostarch.ByteOrder.PutUint64(f[userXstateXCR0Offset:], featureSet.ValidXCR0Mask()) 234 if len(f) > maxlen { 235 f = f[:maxlen] 236 } 237 return dst.Write(f) 238 } 239 240 // PtraceSetXstateRegs implements ptrace(PTRACE_SETREGS, NT_X86_XSTATE) by 241 // reading floating point registers from src and returning the number of bytes 242 // read, which must be less than or equal to maxlen. 243 func (s *State) PtraceSetXstateRegs(src io.Reader, maxlen int, featureSet cpuid.FeatureSet) (int, error) { 244 // Allow users to pass an xstate register set smaller than ours (they can 245 // mask bits out of XSTATE_BV), as long as it's at least minXstateBytes. 246 // Also allow users to pass a register set larger than ours; anything after 247 // their ExtendedStateSize will be ignored. (I think Linux technically 248 // permits setting a register set smaller than minXstateBytes, but it has 249 // the same silent truncation behavior in kernel/ptrace.c:ptrace_regset().) 250 if maxlen < minXstateBytes { 251 return 0, unix.EFAULT 252 } 253 ess, _ := featureSet.ExtendedStateSize() 254 if maxlen > int(ess) { 255 maxlen = int(ess) 256 } 257 f := make([]byte, maxlen) 258 if _, err := io.ReadFull(src, f); err != nil { 259 return 0, err 260 } 261 n := copy(*s, f) 262 s.SanitizeUser(featureSet) 263 return n, nil 264 } 265 266 // SanitizeUser mutates s to ensure that restoring it is safe. 267 func (s *State) SanitizeUser(featureSet cpuid.FeatureSet) { 268 f := *s 269 270 // Force reserved bits in MXCSR to 0. This is consistent with Linux. 271 sanitizeMXCSR(f) 272 273 if len(f) >= minXstateBytes { 274 // Users can't enable *more* XCR0 bits than what we, and the CPU, support. 275 xstateBV := hostarch.ByteOrder.Uint64(f[xstateBVOffset:]) 276 xstateBV &= featureSet.ValidXCR0Mask() 277 hostarch.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV) 278 // Force XCOMP_BV and reserved bytes in the XSAVE header to 0. 279 reserved := f[xsaveHeaderZeroedOffset : xsaveHeaderZeroedOffset+xsaveHeaderZeroedBytes] 280 for i := range reserved { 281 reserved[i] = 0 282 } 283 } 284 } 285 286 var ( 287 mxcsrMask uint32 288 initMXCSRMask sync.Once 289 ) 290 291 // sanitizeMXCSR coerces reserved bits in the MXCSR field of f to 0. ("FXRSTOR 292 // generates a general-protection fault (#GP) in response to an attempt to set 293 // any of the reserved bits of the MXCSR register." - Intel SDM Vol. 1, Section 294 // 10.5.1.2 "SSE State") 295 func sanitizeMXCSR(f State) { 296 mxcsr := hostarch.ByteOrder.Uint32(f[mxcsrOffset:]) 297 initMXCSRMask.Do(func() { 298 temp := State(alignedBytes(uint(ptraceFPRegsSize), 16)) 299 initX86FPState(&temp[0], false /* useXsave */) 300 mxcsrMask = hostarch.ByteOrder.Uint32(temp[mxcsrMaskOffset:]) 301 if mxcsrMask == 0 { 302 // "If the value of the MXCSR_MASK field is 00000000H, then the 303 // MXCSR_MASK value is the default value of 0000FFBFH." - Intel SDM 304 // Vol. 1, Section 11.6.6 "Guidelines for Writing to the MXCSR 305 // Register" 306 mxcsrMask = 0xffbf 307 } 308 }) 309 mxcsr &= mxcsrMask 310 hostarch.ByteOrder.PutUint32(f[mxcsrOffset:], mxcsr) 311 } 312 313 // SetMXCSR sets the MXCSR control/status register in the state. 314 func (s *State) SetMXCSR(mxcsr uint32) { 315 hostarch.ByteOrder.PutUint32((*s)[mxcsrOffset:], mxcsr) 316 } 317 318 // GetMXCSR gets the MXCSR control/status register in the state. 319 func (s *State) GetMXCSR() uint32 { 320 return hostarch.ByteOrder.Uint32((*s)[mxcsrOffset:]) 321 } 322 323 // BytePointer returns a pointer to the first byte of the state. 324 // 325 //go:nosplit 326 func (s *State) BytePointer() *byte { 327 return &(*s)[0] 328 } 329 330 // XSTATE_BV does not exist if FXSAVE is used, but FXSAVE implicitly saves x87 331 // and SSE state, so this is the equivalent XSTATE_BV value. 332 const fxsaveBV uint64 = cpuid.XSAVEFeatureX87 | cpuid.XSAVEFeatureSSE 333 334 // AfterLoad converts the loaded state to the format that compatible with the 335 // current processor. 336 func (s *State) AfterLoad() { 337 old := s.Slice() 338 339 // Recreate the slice. This is done to ensure that it is aligned 340 // appropriately in memory, and large enough to accommodate any new 341 // state that may be saved by the new CPU. Even if extraneous new state 342 // is saved, the state we care about is guaranteed to be a subset of 343 // new state. Later optimizations can use less space when using a 344 // smaller state component bitmap. Intel SDM Volume 1 Chapter 13 has 345 // more info. 346 *s = NewState() 347 348 // x86FPState always contains all the FP state supported by the host. 349 // We may have come from a newer machine that supports additional state 350 // which we cannot restore. 351 // 352 // The x86 FP state areas are backwards compatible, so we can simply 353 // truncate the additional floating point state. 354 // 355 // Applications should not depend on the truncated state because it 356 // should relate only to features that were not exposed in the app 357 // FeatureSet. However, because we do not *prevent* them from using 358 // this state, we must verify here that there is no in-use state 359 // (according to XSTATE_BV) which we do not support. 360 // What do we support? 361 supportedBV := fxsaveBV 362 hostFeatureSet := cpuid.HostFeatureSet() 363 if hostFeatureSet.UseXsave() { 364 supportedBV = hostFeatureSet.ValidXCR0Mask() 365 } 366 367 // What was in use? 368 savedBV := fxsaveBV 369 if len(old) >= xstateBVOffset+8 { 370 savedBV = hostarch.ByteOrder.Uint64(old[xstateBVOffset:]) 371 } 372 373 // Supported features must be a superset of saved features. 374 if savedBV&^supportedBV != 0 { 375 panic(ErrLoadingState{supportedFeatures: supportedBV, savedFeatures: savedBV}) 376 } 377 378 // Copy to the new, aligned location. 379 copy(*s, old) 380 381 mxcsrBefore := s.GetMXCSR() 382 sanitizeMXCSR(*s) 383 mxcsrAfter := s.GetMXCSR() 384 if mxcsrBefore != mxcsrAfter { 385 panic(fmt.Sprintf("incompatible mxcsr value: %x (%x)", mxcsrBefore, mxcsrAfter)) 386 } 387 if hostFeatureSet.UseXsave() { 388 if err := safecopy.CheckXstate(s.BytePointer()); err != nil { 389 xcompBV := uint64(0) 390 if len(old) >= xcompBVOffset+8 { 391 xcompBV = hostarch.ByteOrder.Uint64(old[xcompBVOffset:]) 392 } 393 panic(fmt.Sprintf("incompatible state: %s\nlen(old)=%d len(new)=%d supportedBV=%#x XSTATE_BV=%#x XCOMP_BV=%#x", err, len(old), len(*s), supportedBV, savedBV, xcompBV)) 394 } 395 } 396 }