github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/arch/fpu/fpu_amd64.go (about)

     1  // Copyright 2021 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // +build amd64 i386
    16  
    17  package fpu
    18  
    19  import (
    20  	"io"
    21  
    22  	"golang.org/x/sys/unix"
    23  	"github.com/SagerNet/gvisor/pkg/cpuid"
    24  	"github.com/SagerNet/gvisor/pkg/hostarch"
    25  	"github.com/SagerNet/gvisor/pkg/sync"
    26  	"github.com/SagerNet/gvisor/pkg/syserror"
    27  )
    28  
    29  // initX86FPState (defined in asm files) sets up initial state.
    30  func initX86FPState(data *byte, useXsave bool)
    31  
    32  func newX86FPStateSlice() State {
    33  	size, align := cpuid.HostFeatureSet().ExtendedStateSize()
    34  	capacity := size
    35  	// Always use at least 4096 bytes.
    36  	//
    37  	// For the KVM platform, this state is a fixed 4096 bytes, so make sure
    38  	// that the underlying array is at _least_ that size otherwise we will
    39  	// corrupt random memory. This is not a pleasant thing to debug.
    40  	if capacity < 4096 {
    41  		capacity = 4096
    42  	}
    43  	return alignedBytes(capacity, align)[:size]
    44  }
    45  
    46  // NewState returns an initialized floating point state.
    47  //
    48  // The returned state is large enough to store all floating point state
    49  // supported by host, even if the app won't use much of it due to a restricted
    50  // FeatureSet. Since they may still be able to see state not advertised by
    51  // CPUID we must ensure it does not contain any sentry state.
    52  func NewState() State {
    53  	f := newX86FPStateSlice()
    54  	initX86FPState(&f[0], cpuid.HostFeatureSet().UseXsave())
    55  	return f
    56  }
    57  
    58  // Fork creates and returns an identical copy of the x86 floating point state.
    59  func (s *State) Fork() State {
    60  	n := newX86FPStateSlice()
    61  	copy(n, *s)
    62  	return n
    63  }
    64  
    65  // ptraceFPRegsSize is the size in bytes of Linux's user_i387_struct, the type
    66  // manipulated by PTRACE_GETFPREGS and PTRACE_SETFPREGS on x86. Equivalently,
    67  // ptraceFPRegsSize is the size in bytes of the x86 FXSAVE area.
    68  const ptraceFPRegsSize = 512
    69  
    70  // PtraceGetFPRegs implements Context.PtraceGetFPRegs.
    71  func (s *State) PtraceGetFPRegs(dst io.Writer, maxlen int) (int, error) {
    72  	if maxlen < ptraceFPRegsSize {
    73  		return 0, syserror.EFAULT
    74  	}
    75  
    76  	return dst.Write((*s)[:ptraceFPRegsSize])
    77  }
    78  
    79  // PtraceSetFPRegs implements Context.PtraceSetFPRegs.
    80  func (s *State) PtraceSetFPRegs(src io.Reader, maxlen int) (int, error) {
    81  	if maxlen < ptraceFPRegsSize {
    82  		return 0, syserror.EFAULT
    83  	}
    84  
    85  	var f [ptraceFPRegsSize]byte
    86  	n, err := io.ReadFull(src, f[:])
    87  	if err != nil {
    88  		return 0, err
    89  	}
    90  	// Force reserved bits in MXCSR to 0. This is consistent with Linux.
    91  	sanitizeMXCSR(State(f[:]))
    92  	// N.B. this only copies the beginning of the FP state, which
    93  	// corresponds to the FXSAVE area.
    94  	copy(*s, f[:])
    95  	return n, nil
    96  }
    97  
    98  const (
    99  	// mxcsrOffset is the offset in bytes of the MXCSR field from the start of
   100  	// the FXSAVE area. (Intel SDM Vol. 1, Table 10-2 "Format of an FXSAVE
   101  	// Area")
   102  	mxcsrOffset = 24
   103  
   104  	// mxcsrMaskOffset is the offset in bytes of the MXCSR_MASK field from the
   105  	// start of the FXSAVE area.
   106  	mxcsrMaskOffset = 28
   107  )
   108  
   109  var (
   110  	mxcsrMask     uint32
   111  	initMXCSRMask sync.Once
   112  )
   113  
   114  const (
   115  	// minXstateBytes is the minimum size in bytes of an x86 XSAVE area, equal
   116  	// to the size of the XSAVE legacy area (512 bytes) plus the size of the
   117  	// XSAVE header (64 bytes). Equivalently, minXstateBytes is GDB's
   118  	// X86_XSTATE_SSE_SIZE.
   119  	minXstateBytes = 512 + 64
   120  
   121  	// userXstateXCR0Offset is the offset in bytes of the USER_XSTATE_XCR0_WORD
   122  	// field in Linux's struct user_xstateregs, which is the type manipulated
   123  	// by ptrace(PTRACE_GET/SETREGSET, NT_X86_XSTATE). Equivalently,
   124  	// userXstateXCR0Offset is GDB's I386_LINUX_XSAVE_XCR0_OFFSET.
   125  	userXstateXCR0Offset = 464
   126  
   127  	// xstateBVOffset is the offset in bytes of the XSTATE_BV field in an x86
   128  	// XSAVE area.
   129  	xstateBVOffset = 512
   130  
   131  	// xsaveHeaderZeroedOffset and xsaveHeaderZeroedBytes indicate parts of the
   132  	// XSAVE header that we coerce to zero: "Bytes 15:8 of the XSAVE header is
   133  	// a state-component bitmap called XCOMP_BV. ... Bytes 63:16 of the XSAVE
   134  	// header are reserved." - Intel SDM Vol. 1, Section 13.4.2 "XSAVE Header".
   135  	// Linux ignores XCOMP_BV, but it's able to recover from XRSTOR #GP
   136  	// exceptions resulting from invalid values; we aren't. Linux also never
   137  	// uses the compacted format when doing XSAVE and doesn't even define the
   138  	// compaction extensions to XSAVE as a CPU feature, so for simplicity we
   139  	// assume no one is using them.
   140  	xsaveHeaderZeroedOffset = 512 + 8
   141  	xsaveHeaderZeroedBytes  = 64 - 8
   142  )
   143  
   144  // sanitizeMXCSR coerces reserved bits in the MXCSR field of f to 0. ("FXRSTOR
   145  // generates a general-protection fault (#GP) in response to an attempt to set
   146  // any of the reserved bits of the MXCSR register." - Intel SDM Vol. 1, Section
   147  // 10.5.1.2 "SSE State")
   148  func sanitizeMXCSR(f State) {
   149  	mxcsr := hostarch.ByteOrder.Uint32(f[mxcsrOffset:])
   150  	initMXCSRMask.Do(func() {
   151  		temp := State(alignedBytes(uint(ptraceFPRegsSize), 16))
   152  		initX86FPState(&temp[0], false /* useXsave */)
   153  		mxcsrMask = hostarch.ByteOrder.Uint32(temp[mxcsrMaskOffset:])
   154  		if mxcsrMask == 0 {
   155  			// "If the value of the MXCSR_MASK field is 00000000H, then the
   156  			// MXCSR_MASK value is the default value of 0000FFBFH." - Intel SDM
   157  			// Vol. 1, Section 11.6.6 "Guidelines for Writing to the MXCSR
   158  			// Register"
   159  			mxcsrMask = 0xffbf
   160  		}
   161  	})
   162  	mxcsr &= mxcsrMask
   163  	hostarch.ByteOrder.PutUint32(f[mxcsrOffset:], mxcsr)
   164  }
   165  
   166  // PtraceGetXstateRegs implements ptrace(PTRACE_GETREGS, NT_X86_XSTATE) by
   167  // writing the floating point registers from this state to dst and returning the
   168  // number of bytes written, which must be less than or equal to maxlen.
   169  func (s *State) PtraceGetXstateRegs(dst io.Writer, maxlen int, featureSet *cpuid.FeatureSet) (int, error) {
   170  	// N.B. s.x86FPState may contain more state than the application
   171  	// expects. We only copy the subset that would be in their XSAVE area.
   172  	ess, _ := featureSet.ExtendedStateSize()
   173  	f := make([]byte, ess)
   174  	copy(f, *s)
   175  	// "The XSAVE feature set does not use bytes 511:416; bytes 463:416 are
   176  	// reserved." - Intel SDM Vol 1., Section 13.4.1 "Legacy Region of an XSAVE
   177  	// Area". Linux uses the first 8 bytes of this area to store the OS XSTATE
   178  	// mask. GDB relies on this: see
   179  	// gdb/x86-linux-nat.c:x86_linux_read_description().
   180  	hostarch.ByteOrder.PutUint64(f[userXstateXCR0Offset:], featureSet.ValidXCR0Mask())
   181  	if len(f) > maxlen {
   182  		f = f[:maxlen]
   183  	}
   184  	return dst.Write(f)
   185  }
   186  
   187  // PtraceSetXstateRegs implements ptrace(PTRACE_SETREGS, NT_X86_XSTATE) by
   188  // reading floating point registers from src and returning the number of bytes
   189  // read, which must be less than or equal to maxlen.
   190  func (s *State) PtraceSetXstateRegs(src io.Reader, maxlen int, featureSet *cpuid.FeatureSet) (int, error) {
   191  	// Allow users to pass an xstate register set smaller than ours (they can
   192  	// mask bits out of XSTATE_BV), as long as it's at least minXstateBytes.
   193  	// Also allow users to pass a register set larger than ours; anything after
   194  	// their ExtendedStateSize will be ignored. (I think Linux technically
   195  	// permits setting a register set smaller than minXstateBytes, but it has
   196  	// the same silent truncation behavior in kernel/ptrace.c:ptrace_regset().)
   197  	if maxlen < minXstateBytes {
   198  		return 0, unix.EFAULT
   199  	}
   200  	ess, _ := featureSet.ExtendedStateSize()
   201  	if maxlen > int(ess) {
   202  		maxlen = int(ess)
   203  	}
   204  	f := make([]byte, maxlen)
   205  	if _, err := io.ReadFull(src, f); err != nil {
   206  		return 0, err
   207  	}
   208  	// Force reserved bits in MXCSR to 0. This is consistent with Linux.
   209  	sanitizeMXCSR(State(f))
   210  	// Users can't enable *more* XCR0 bits than what we, and the CPU, support.
   211  	xstateBV := hostarch.ByteOrder.Uint64(f[xstateBVOffset:])
   212  	xstateBV &= featureSet.ValidXCR0Mask()
   213  	hostarch.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV)
   214  	// Force XCOMP_BV and reserved bytes in the XSAVE header to 0.
   215  	reserved := f[xsaveHeaderZeroedOffset : xsaveHeaderZeroedOffset+xsaveHeaderZeroedBytes]
   216  	for i := range reserved {
   217  		reserved[i] = 0
   218  	}
   219  	return copy(*s, f), nil
   220  }
   221  
   222  // SetMXCSR sets the MXCSR control/status register in the state.
   223  func (s *State) SetMXCSR(mxcsr uint32) {
   224  	hostarch.ByteOrder.PutUint32((*s)[mxcsrOffset:], mxcsr)
   225  }
   226  
   227  // BytePointer returns a pointer to the first byte of the state.
   228  //
   229  //go:nosplit
   230  func (s *State) BytePointer() *byte {
   231  	return &(*s)[0]
   232  }
   233  
   234  // XSTATE_BV does not exist if FXSAVE is used, but FXSAVE implicitly saves x87
   235  // and SSE state, so this is the equivalent XSTATE_BV value.
   236  const fxsaveBV uint64 = cpuid.XSAVEFeatureX87 | cpuid.XSAVEFeatureSSE
   237  
   238  // AfterLoad converts the loaded state to the format that compatible with the
   239  // current processor.
   240  func (s *State) AfterLoad() {
   241  	old := *s
   242  
   243  	// Recreate the slice. This is done to ensure that it is aligned
   244  	// appropriately in memory, and large enough to accommodate any new
   245  	// state that may be saved by the new CPU. Even if extraneous new state
   246  	// is saved, the state we care about is guaranteed to be a subset of
   247  	// new state. Later optimizations can use less space when using a
   248  	// smaller state component bitmap. Intel SDM Volume 1 Chapter 13 has
   249  	// more info.
   250  	*s = NewState()
   251  
   252  	// x86FPState always contains all the FP state supported by the host.
   253  	// We may have come from a newer machine that supports additional state
   254  	// which we cannot restore.
   255  	//
   256  	// The x86 FP state areas are backwards compatible, so we can simply
   257  	// truncate the additional floating point state.
   258  	//
   259  	// Applications should not depend on the truncated state because it
   260  	// should relate only to features that were not exposed in the app
   261  	// FeatureSet. However, because we do not *prevent* them from using
   262  	// this state, we must verify here that there is no in-use state
   263  	// (according to XSTATE_BV) which we do not support.
   264  	if len(*s) < len(old) {
   265  		// What do we support?
   266  		supportedBV := fxsaveBV
   267  		if fs := cpuid.HostFeatureSet(); fs.UseXsave() {
   268  			supportedBV = fs.ValidXCR0Mask()
   269  		}
   270  
   271  		// What was in use?
   272  		savedBV := fxsaveBV
   273  		if len(old) >= xstateBVOffset+8 {
   274  			savedBV = hostarch.ByteOrder.Uint64(old[xstateBVOffset:])
   275  		}
   276  
   277  		// Supported features must be a superset of saved features.
   278  		if savedBV&^supportedBV != 0 {
   279  			panic(ErrLoadingState{supportedFeatures: supportedBV, savedFeatures: savedBV})
   280  		}
   281  	}
   282  
   283  	// Copy to the new, aligned location.
   284  	copy(*s, old)
   285  }